Parsing of VCF files.
This module implements VCFv4.1, as defined by 1000 genomes project:
http://www.1000genomes.org/wiki/Analysis/Variant%20Call%20Format/vcf-variant-call-format-version-41
type
vcf_description = string
type
vcf_number =
| |
Number of int |
| |
OnePerAllele |
| |
OnePerGenotype |
| |
Unknown |
type
vcf_format_type = [ `character_value | `float_value | `integer_value | `string_value ]
Types, allowed for VCF FORMAT meta header.
type
vcf_info_type = [ `character_value
| `flag_value
| `float_value
| `integer_value
| `string_value ]
Types, allowed for VCF INFO meta header.
type
vcf_format = [ `character of char
| `float of float
| `integer of int
| `missing
| `string of string ]
type
vcf_info = [ `character of char
| `flag of string
| `float of float
| `integer of int
| `missing
| `string of string ]
type
vcf_row = {
|
vcfr_chrom : string ; |
|
vcfr_pos : int ; |
|
vcfr_ids : string list ; |
|
vcfr_ref : string ; |
|
vcfr_alts : string list ; |
|
vcfr_qual : float option ; |
|
vcfr_filter : vcf_id list ; |
|
vcfr_info : (vcf_id, vcf_info list) Core.Std.Hashtbl.t ; |
|
vcfr_samples : (vcf_id, (vcf_id * vcf_format list) list) Core.Std.Hashtbl.t ; |
}
type
vcf_parse_row_error = [ `duplicate_ids of vcf_id list
| `format_type_coersion_failure of vcf_format_type * string
| `info_type_coersion_failure of vcf_info_type * string
| `invalid_arguments_length of vcf_id * int * int
| `invalid_dna of string
| `invalid_float of string
| `invalid_int of string
| `invalid_row_length of int * int
| `malformed_sample of string
| `unknown_alt of string
| `unknown_filter of vcf_id
| `unknown_format of vcf_id
| `unknown_info of vcf_id ]
type
vcf_parse_error = [ `incomplete_input of
Biocaml_internal_utils.Pos.t * string list * string option
| `malformed_header of Biocaml_internal_utils.Pos.t * string
| `malformed_meta of Biocaml_internal_utils.Pos.t * string
| `malformed_row of
Biocaml_internal_utils.Pos.t * vcf_parse_row_error * string
| `not_ready ]