Parsing of VCF files.
This module implements VCFv4.1, as defined by 1000 genomes project:
http://www.1000genomes.org/wiki/Analysis/Variant%20Call%20Format/vcf-variant-call-format-version-41
type
vcf_description = string
type
vcf_number =
| |
Number of int |
| |
OnePerAllele |
| |
OnePerGenotype |
| |
Unknown |
type
vcf_format_type = [ `character_value | `float_value | `integer_value | `string_value ]
Types, allowed for VCF FORMAT meta header.
type
vcf_info_type = [ `character_value
| `flag_value
| `float_value
| `integer_value
| `string_value ]
Types, allowed for VCF INFO meta header.
type
vcf_meta = {
|
vcfm_version : string ; |
|
vcfm_id_cache : vcf_id Biocaml_internal_pervasives.Set.Poly.t ; |
|
vcfm_info : (vcf_id, vcf_info_meta) Biocaml_internal_pervasives.Hashtbl.t ; |
|
vcfm_filters : (vcf_id * vcf_filter_meta) list ; |
|
vcfm_format : (vcf_id, vcf_format_meta) Biocaml_internal_pervasives.Hashtbl.t ; |
|
vcfm_alt : (string, vcf_alt_meta) Biocaml_internal_pervasives.Hashtbl.t ; |
|
vcfm_arbitrary : (string, string) Biocaml_internal_pervasives.Hashtbl.t ; |
|
: string list ; |
|
vcfm_samples : string list ; |
}
type
vcf_format = [ `character of char
| `float of float
| `integer of int
| `missing
| `string of string ]
type
vcf_info = [ `character of char
| `flag of string
| `float of float
| `integer of int
| `missing
| `string of string ]
type
vcf_row = {
|
vcfr_chrom : string ; |
|
vcfr_pos : int ; |
|
vcfr_ids : string list ; |
|
vcfr_ref : string ; |
|
vcfr_alts : string list ; |
|
vcfr_qual : float option ; |
|
vcfr_filter : vcf_id list ; |
|
vcfr_info : (vcf_id, vcf_info list) Biocaml_internal_pervasives.Hashtbl.t ; |
|
vcfr_samples : (vcf_id, (vcf_id * vcf_format list) list) Biocaml_internal_pervasives.Hashtbl.t ; |
}
module Pos: module type of Biocaml_pos
type
vcf_parse_row_error = [ `duplicate_ids of vcf_id list
| `format_type_coersion_failure of vcf_format_type * string
| `info_type_coersion_failure of vcf_info_type * string
| `invalid_arguments_length of vcf_id * int * int
| `invalid_dna of string
| `invalid_float of string
| `invalid_int of string
| `invalid_row_length of int * int
| `malformed_sample of string
| `unknown_alt of string
| `unknown_filter of vcf_id
| `unknown_format of vcf_id
| `unknown_info of vcf_id ]
type
vcf_parse_error = [ `incomplete_input of Pos.t * string list * string option
| `malformed_header of Pos.t * string
| `malformed_meta of Pos.t * string
| `malformed_row of
Pos.t * vcf_parse_row_error * string
| `not_ready ]