SAM files and SAM-alignements high-level representation.
Basic Types
Low-Level Items
type
raw_alignment = {
|
qname : string ; |
|
flag : int ; |
|
rname : string ; |
|
pos : int ; |
|
mapq : int ; |
|
cigar : string ; |
|
rnext : string ; |
|
pnext : int ; |
|
tlen : int ; |
|
seq : string ; |
|
qual : string ; |
|
optional : (string * char * string) list ; |
}
The contents of an alignment line.
type
raw_item = [ `alignment of raw_alignment
| `comment of string
| `header of string * (string * string) list ]
The "items" of a parsed SAM file stream.
High-Level Items
type
reference_sequence = {
|
ref_name : string ; |
|
ref_length : int ; |
|
ref_assembly_identifier : string option ; |
|
ref_checksum : string option ; |
|
ref_species : string option ; |
|
ref_uri : string option ; |
|
ref_unknown : (string * string) list ; |
}
Definition of a reference sequence.
val reference_sequence : ?assembly_identifier:string ->
?checksum:string ->
?species:string ->
?uri:string ->
?unknown_data:(string * string) list ->
string -> int -> reference_sequence
Create a reference sequence.
module Flags:
sig
Manipulate the alignment flags.
type
t = private int
Flags are represented as “bit map”.
end
type
cigar_op = [ `D of int
| `Eq of int
| `H of int
| `I of int
| `M of int
| `N of int
| `P of int
| `S of int
| `X of int ]
CIGAR operations.
type
optional_content_value = [ `array of char * optional_content_value array
| `char of char
| `float of float
| `int of int
| `string of string ]
Meta-value used to store “optional content”.
type
alignment = {
|
query_template_name : string ; |
|
flags : Flags.t ; |
|
reference_sequence : [ `name of string | `none | `reference_sequence of reference_sequence ] ; |
|
position : int option ; |
|
mapping_quality : int option ; |
|
cigar_operations : cigar_op array ; |
|
next_reference_sequence : [ `name of string | `none | `qname | `reference_sequence of reference_sequence ] ; |
|
next_position : int option ; |
|
template_length : int option ; |
|
sequence : [ `none | `reference | `string of string ] ; |
|
quality : Biocaml_phred_score.t array ; |
|
optional_content : optional_content ; |
}
High-level representation of a parsed alignment.
type
item = [ `alignment of alignment
| `comment of string
| `header of string * (string * string) list
| `header_line of
string * [ `coordinate | `queryname | `unknown | `unsorted ] *
(string * string) list
| `reference_sequence_dictionary of
reference_sequence array ]
High-level representation of a parsed entity.
Error Types
module Error:
sig
The possible errors.
type
optional_content_parsing = [ `wrong_optional of
(string * char * string) list *
[ `not_a_char of string
| `not_a_float of string
| `not_an_int of string
| `unknown_type of char
| `wrong_array of
[ `not_a_char of string
| `not_a_float of string
| `not_an_int of string
| `unknown_type of char
| `wrong_type of string ]
| `wrong_type of string ] ]
Errors which can happen while parsing optional content.
type
string_to_raw = [ `incomplete_input of
Biocaml_internal_utils.Pos.t * string list * string option
| `invalid_header_tag of Biocaml_internal_utils.Pos.t * string
| `invalid_tag_value_list of Biocaml_internal_utils.Pos.t * string list
| `not_an_int of Biocaml_internal_utils.Pos.t * string * string
| `wrong_alignment of Biocaml_internal_utils.Pos.t * string
| `wrong_optional_field of Biocaml_internal_utils.Pos.t * string ]
The possible errors one can get while parsing SAM files.
type
item_to_raw = [ `wrong_phred_scores of Biocaml_sam_deprecated.alignment ]
The error that may happen while downgrading the
higher-level represtation of an alignment.
type
parse = [ `comment_after_end_of_header of int * string
| `duplicate_in_reference_sequence_dictionary of
Biocaml_sam_deprecated.reference_sequence array
| `header_after_end_of_header of int * (string * (string * string) list)
| `header_line_not_first of int
| `header_line_without_version of (string * string) list
| `header_line_wrong_sorting of string
| `incomplete_input of
Biocaml_internal_utils.Pos.t * string list * string option
| `invalid_header_tag of Biocaml_internal_utils.Pos.t * string
| `invalid_tag_value_list of Biocaml_internal_utils.Pos.t * string list
| `missing_ref_sequence_length of (string * string) list
| `missing_ref_sequence_name of (string * string) list
| `not_an_int of Biocaml_internal_utils.Pos.t * string * string
| `wrong_alignment of Biocaml_internal_utils.Pos.t * string
| `wrong_cigar_text of string
| `wrong_flag of Biocaml_sam_deprecated.raw_alignment
| `wrong_mapq of Biocaml_sam_deprecated.raw_alignment
| `wrong_optional of
(string * char * string) list *
[ `not_a_char of string
| `not_a_float of string
| `not_an_int of string
| `unknown_type of char
| `wrong_array of
[ `not_a_char of string
| `not_a_float of string
| `not_an_int of string
| `unknown_type of char
| `wrong_type of string ]
| `wrong_type of string ]
| `wrong_optional_field of Biocaml_internal_utils.Pos.t * string
| `wrong_phred_scores of Biocaml_sam_deprecated.raw_alignment
| `wrong_pnext of Biocaml_sam_deprecated.raw_alignment
| `wrong_pos of Biocaml_sam_deprecated.raw_alignment
| `wrong_qname of Biocaml_sam_deprecated.raw_alignment
| `wrong_ref_sequence_length of (string * string) list
| `wrong_tlen of Biocaml_sam_deprecated.raw_alignment ]
All possible parsing errors. It is defined as:
type parse = [
| string_to_raw
| raw_to_item
]
type
t = parse
The union of all possible errors.
S-Expressions conversions for Errors
end
exception Error of Error.t
The only exception raised by *_exn
functions in this module.
Stream functions
val in_channel_to_item_stream : ?buffer_size:int ->
?filename:string ->
Pervasives.in_channel ->
(item, [> Error.parse ])
Biocaml_internal_utils.Result.t Biocaml_internal_utils.Stream.t
Parse an input-channel into a stream of high-level items.
val in_channel_to_raw_item_stream : ?buffer_size:int ->
?filename:string ->
Pervasives.in_channel ->
(raw_item, [> Error.parse ])
Biocaml_internal_utils.Result.t Biocaml_internal_utils.Stream.t
Parse an input-channel into a stream of low-level (“raw”) items.
val in_channel_to_item_stream_exn : ?buffer_size:int ->
?filename:string ->
Pervasives.in_channel ->
item Biocaml_internal_utils.Stream.t
Like in_channel_to_item_stream but each call to Stream.next
may
raise Error _
val in_channel_to_raw_item_stream_exn : ?buffer_size:int ->
?filename:string ->
Pervasives.in_channel ->
raw_item Biocaml_internal_utils.Stream.t
Like in_channel_to_raw_item_stream but each call to Stream.next
may
raise Error _
Low-level partial parsing
Here we expose functions used both in
Biocaml_sam.Transform
and
Biocaml_bam.Transform
for parsing.
It can be ignored by most users but can be useful.
val parse_cigar_text : string ->
(cigar_op array, [> `wrong_cigar_text of string ])
Biocaml_internal_utils.Result.t
Parse CIGAR operations from a string.
: 'a ->
string ->
([> `comment of string | `header of string * (string * string) list ],
[> `invalid_header_tag of 'a * string
| `invalid_tag_value_list of 'a * string list ])
Biocaml_internal_utils.Result.t
val expand_header_line : (string * string) list ->
([> `header_line of
string * [ `coordinate | `queryname | `unknown | `unsorted ] *
(string * string) list ],
[> `header_line_without_version of (string * string) list
| `header_line_wrong_sorting of string ])
Biocaml_internal_utils.Result.t
Parse a header line into a more detailed type.
S-Expressions