Module Biocaml_vcf (.ml)

Parsing of VCF files.

This module implements VCFv4.1, as defined by 1000 genomes project: http://www.1000genomes.org/wiki/Analysis/Variant%20Call%20Format/vcf-variant-call-format-version-41

module Biocaml_vcf: 
sig
type vcf_id = string 
type vcf_description = string 
type vcf_number = 
| Number of int
| OnePerAllele
| OnePerGenotype
| Unknown
type vcf_format_type = [ `character_value | `float_value | `integer_value | `string_value ] 
Types, allowed for VCF FORMAT meta header.
type vcf_info_type = [ `character_value
| `flag_value
| `float_value
| `integer_value
| `string_value ]
Types, allowed for VCF INFO meta header.
type vcf_info_meta = 
| Info of vcf_number * vcf_info_type
* vcf_description
type vcf_filter_meta = 
| Filter of vcf_description
type vcf_format_meta = 
| Format of vcf_number * vcf_format_type
* vcf_description
type vcf_alt_meta = 
| Alt of vcf_description
type vcf_meta = {
   vcfm_version : string;
   vcfm_id_cache : vcf_id Core.Std.Set.Poly.t;
   vcfm_info : (vcf_id, vcf_info_meta) Core.Std.Hashtbl.t;
   vcfm_filters : (vcf_id * vcf_filter_meta) list;
   vcfm_format : (vcf_id, vcf_format_meta) Core.Std.Hashtbl.t;
   vcfm_alt : (string, vcf_alt_meta) Core.Std.Hashtbl.t;
   vcfm_arbitrary : (string, string) Core.Std.Hashtbl.t;
   vcfm_header : string list;
   vcfm_samples : string list;
}
type vcf_format = [ `character of char
| `float of float
| `integer of int
| `missing
| `string of string ]
type vcf_info = [ `character of char
| `flag of string
| `float of float
| `integer of int
| `missing
| `string of string ]
type vcf_row = {
   vcfr_chrom : string;
   vcfr_pos : int;
   vcfr_ids : string list;
   vcfr_ref : string;
   vcfr_alts : string list;
   vcfr_qual : float option;
   vcfr_filter : vcf_id list;
   vcfr_info : (vcf_id, vcf_info list) Core.Std.Hashtbl.t;
   vcfr_samples : (vcf_id, (vcf_id * vcf_format list) list)
Core.Std.Hashtbl.t
;
}
type item = vcf_row 
type vcf_parse_row_error = [ `duplicate_ids of vcf_id list
| `format_type_coersion_failure of vcf_format_type * string
| `info_type_coersion_failure of vcf_info_type * string
| `invalid_arguments_length of vcf_id * int * int
| `invalid_dna of string
| `invalid_float of string
| `invalid_int of string
| `invalid_row_length of int * int
| `malformed_sample of string
| `unknown_alt of string
| `unknown_filter of vcf_id
| `unknown_format of vcf_id
| `unknown_info of vcf_id ]
type vcf_parse_error = [ `incomplete_input of
Biocaml_internal_utils.Pos.t * string list * string option
| `malformed_header of Biocaml_internal_utils.Pos.t * string
| `malformed_meta of Biocaml_internal_utils.Pos.t * string
| `malformed_row of
Biocaml_internal_utils.Pos.t * vcf_parse_row_error * string
| `not_ready ]
val parse_error_to_string : vcf_parse_error -> string

module Transform: 
sig
val string_to_item : ?filename:string ->
unit ->
(string,
(Biocaml_vcf.item, Biocaml_vcf.vcf_parse_error)
Biocaml_internal_utils.Result.t)
Biocaml_transform.t
end
end