Module Biocaml_sam (.ml)

SAM files. Documentation here assumes familiarity with the SAM specification.

module Biocaml_sam: 
sig

Types



Header Types


type header_item_tag = private [< `CO | `HD | `Other of string | `PG | `RG | `SQ ] 
Header item tags define the different types of header lines. The term "tag" in this context should not be confused with its use in "tag-value" pairs, which comprise the content of header items.
type tag_value = private string * string 
A tag-value pair comprising the content of header items. Tag-value pairs occur in other places too, but this type is specifically for those in the header.
type sort_order = [ `Coordinate | `Query_name | `Unknown | `Unsorted ] 
type header_line = private {
   version : string; (*VN*)
   sort_order : sort_order option; (*SO*)
}
@HD. A header consists of different types of lines. Confusingly, one of these types is called the "header line", which is what this type refers to. It does not refer generically to any line within a header.
type ref_seq = private {
   name : string; (*SN*)
   length : int; (*LN*)
   assembly : string option; (*AS*)
   md5 : string option; (*M5*)
   species : string option; (*SP*)
   uri : string option; (*UR*)
}
@SQ. Reference sequence.
type platform = [ `Capillary
| `Helicos
| `Illumina
| `Ion_Torrent
| `LS454
| `Pac_Bio
| `Solid ]
type read_group = private {
   id : string; (*ID*)
   seq_center : string option; (*CN*)
   description : string option; (*DS*)
   run_date : [ `Date of Core.Std.Date.t | `Time of Core.Std.Time.t ] option; (*DT*)
   flow_order : string option; (*FO*)
   key_seq : string option; (*KS*)
   library : string option; (*LB*)
   program : string option; (*PG*)
   predicted_median_insert_size : int option; (*PI*)
   platform : platform option; (*PL*)
   platform_unit : string option; (*PU*)
   sample : string option; (*SM*)
}
@RG.
type program = private {
   id : string; (*ID*)
   name : string option; (*PN*)
   command_line : string option; (*CL*)
   previous_id : string option; (*PP*)
   description : string option; (*DS*)
   version : string option; (*VN*)
}
@PG.
type header_item = private [< `CO of string
| `HD of header_line
| `Other of string * tag_value list
| `PG of program
| `RG of read_group
| `SQ of ref_seq ]
type header = private {
   version : string option;
   sort_order : sort_order option;
   ref_seqs : ref_seq list;
   read_groups : read_group list;
   programs : program list;
   comments : string list;
   others : (string * tag_value list) list;
}
- sort_order: Guaranteed to be None if version = None.

  • ref_seqs: List of @SQ items. Order matters; it dictates alignment sorting order when sort_order = `Coordinate.
  • read_groups: Unordered list of @RG items.
  • programs: List of @PG lines. Currently unordered, but we should topologically sort.
  • comments: Unordered list of @CO lines.

val empty_header : header

Alignment Types


module Flags: 
sig
type t = private int 
Flags are represented as a "bit map".
val of_int : int -> t Core.Std.Or_error.t
val has_multiple_segments : t -> bool
val segment_unmapped : t -> bool
val next_segment_unmapped : t -> bool
val first_segment : t -> bool
val last_segment : t -> bool
val secondary_alignment : t -> bool
val supplementary_alignment : t -> bool
val t_of_sexp : Sexplib.Sexp.t -> t
val sexp_of_t : t -> Sexplib.Sexp.t
end
type cigar_op = private [< `Alignment_match of int
| `Deletion of int
| `Hard_clipping of int
| `Insertion of int
| `Padding of int
| `Seq_match of int
| `Seq_mismatch of int
| `Skipped of int
| `Soft_clipping of int ]
CIGAR operations.
type optional_field_value = private [< `A of char
| `B of char * string list
| `H of string
| `Z of string
| `f of float
| `i of Core.Std.Int32.t ]
The constructor encodes the TYPE and each carries its corresponding VALUE.
type optional_field = private {
   tag : string;
   value : optional_field_value;
}
type rnext = private [< `Equal_to_RNAME | `Value of string ] 
type alignment = private {
   qname : string option; (*QNAME*)
   flags : Flags.t; (*FLAG*)
   rname : string option; (*RNAME*)
   pos : int option; (*POS*)
   mapq : int option; (*MAPQ*)
   cigar : cigar_op list; (*CIGAR*)
   rnext : rnext option; (*RNEXT*)
   pnext : int option; (*PNEXT*)
   tlen : int option; (*TLEN*)
   seq : string option; (*SEQ*)
   qual : Biocaml_phred_score.t list; (*QUAL*)
   optional_fields : optional_field list;
}
For cigar and qual, empty list indicates no value, i.e. '*', was given.

Input/Output


module MakeIO: 
functor (Future : Future.S) ->
sig
val read : ?start:Biocaml_internal_utils.Pos.t ->
Future.Reader.t ->
(Biocaml_sam.header *
Biocaml_sam.alignment Core.Std.Or_error.t Future.Pipe.Reader.t)
Core.Std.Or_error.t Future.Deferred.t
val write : Future.Writer.t ->
?header:Biocaml_sam.header ->
Biocaml_sam.alignment Future.Pipe.Reader.t -> unit Future.Deferred.t
val write_file : ?perm:int ->
?append:bool ->
string ->
?header:Biocaml_sam.header ->
Biocaml_sam.alignment Future.Pipe.Reader.t -> unit Future.Deferred.t
end
include ??

Low-level Parsers and Constructors


val header_line : version:string ->
?sort_order:sort_order ->
unit -> header_line Core.Std.Or_error.t

Low-level Header Parsers and Constructors


val ref_seq : name:string ->
length:int ->
?assembly:string ->
?md5:string ->
?species:string ->
?uri:string -> unit -> ref_seq Core.Std.Or_error.t
val read_group : id:string ->
?seq_center:string ->
?description:string ->
?run_date:string ->
?flow_order:string ->
?key_seq:string ->
?library:string ->
?program:string ->
?predicted_median_insert_size:int ->
?platform:platform ->
?platform_unit:string ->
?sample:string -> unit -> read_group Core.Std.Or_error.t
The run_date string will be parsed as a Date.t or Time.t, whichever is possible. If it is a time without a timezone, local timezone will be assumed.
val header : ?version:string ->
?sort_order:sort_order ->
?ref_seqs:ref_seq list ->
?read_groups:read_group list ->
?programs:program list ->
?comments:string list ->
?others:(string * tag_value list) list ->
unit -> header Core.Std.Or_error.t
val parse_header_item_tag : string -> header_item_tag Core.Std.Or_error.t
val parse_tag_value : string -> tag_value Core.Std.Or_error.t
val parse_header_version : string -> string Core.Std.Or_error.t
val parse_sort_order : string -> sort_order Core.Std.Or_error.t
val parse_header_line : tag_value list -> header_line Core.Std.Or_error.t
val parse_ref_seq : tag_value list -> ref_seq Core.Std.Or_error.t
val parse_platform : string -> platform Core.Std.Or_error.t
val parse_read_group : tag_value list -> read_group Core.Std.Or_error.t
val parse_program : tag_value list -> program Core.Std.Or_error.t
val parse_header_item : Biocaml_internal_utils.Line.t -> header_item Core.Std.Or_error.t
val parse_header : string -> header Core.Std.Or_error.t
val cigar_op_alignment_match : int -> cigar_op Core.Std.Or_error.t

Low-level Optional field Parsers and Constructors


val cigar_op_insertion : int -> cigar_op Core.Std.Or_error.t
val cigar_op_deletion : int -> cigar_op Core.Std.Or_error.t
val cigar_op_skipped : int -> cigar_op Core.Std.Or_error.t
val cigar_op_soft_clipping : int -> cigar_op Core.Std.Or_error.t
val cigar_op_hard_clipping : int -> cigar_op Core.Std.Or_error.t
val cigar_op_padding : int -> cigar_op Core.Std.Or_error.t
val cigar_op_seq_match : int -> cigar_op Core.Std.Or_error.t
val cigar_op_seq_mismatch : int -> cigar_op Core.Std.Or_error.t
val optional_field_value_A : char -> optional_field_value Core.Std.Or_error.t

Low-level Optional field Parsers and Constructors


val optional_field_value_i : Core.Std.Int32.t -> optional_field_value
val optional_field_value_f : float -> optional_field_value
val optional_field_value_Z : string -> optional_field_value Core.Std.Or_error.t
val optional_field_value_H : string -> optional_field_value Core.Std.Or_error.t
val optional_field_value_B : char -> string list -> optional_field_value Core.Std.Or_error.t
val optional_field : string ->
optional_field_value ->
optional_field Core.Std.Or_error.t
val parse_optional_field_value : string -> optional_field_value Core.Std.Or_error.t
val parse_optional_field : string -> optional_field Core.Std.Or_error.t
val alignment : ?ref_seqs:Core.Std.String.Set.t ->
?qname:string ->
flags:Flags.t ->
?rname:string ->
?pos:int ->
?mapq:int ->
?cigar:cigar_op list ->
?rnext:rnext ->
?pnext:int ->
?tlen:int ->
?seq:string ->
?qual:Biocaml_phred_score.t list ->
?optional_fields:optional_field list ->
unit -> alignment Core.Std.Or_error.t

Low-level Alignment Parsers and Constructors


val parse_qname : string -> string option Core.Std.Or_error.t
val parse_flags : string -> Flags.t Core.Std.Or_error.t
val parse_rname : string -> string option Core.Std.Or_error.t
val parse_pos : string -> int option Core.Std.Or_error.t
val parse_mapq : string -> int option Core.Std.Or_error.t
val parse_cigar : string -> cigar_op list Core.Std.Or_error.t
val parse_rnext : string -> rnext option Core.Std.Or_error.t
val parse_pnext : string -> int option Core.Std.Or_error.t
val parse_tlen : string -> int option Core.Std.Or_error.t
val parse_seq : string -> string option Core.Std.Or_error.t
val parse_qual : string -> Biocaml_phred_score.t list Core.Std.Or_error.t
val parse_alignment : ?ref_seqs:Core.Std.String.Set.t ->
Biocaml_internal_utils.Line.t -> alignment Core.Std.Or_error.t

Low-level Printers


val print_header_item_tag : header_item_tag -> string

Low-level Header Printers


val print_tag_value : tag_value -> string
val print_header_version : string -> string
val print_sort_order : sort_order -> string
val print_header_line : header_line -> string
val print_ref_seq : ref_seq -> string
val print_platform : platform -> string
val print_read_group : read_group -> string
val print_program : program -> string
val print_other : string * tag_value list -> string
val print_qname : string option -> string

Low-level Alignment Printers


val print_flags : Flags.t -> string
val print_rname : string option -> string
val print_pos : int option -> string
val print_mapq : int option -> string
val print_cigar_op : cigar_op -> string
val print_cigar : cigar_op list -> string
val print_rnext : rnext option -> string
val print_pnext : int option -> string
val print_tlen : int option -> string
val print_seq : string option -> string
val print_qual : Biocaml_phred_score.t list -> string
val print_optional_field : optional_field -> string
val print_alignment : alignment -> string
val header_item_tag_of_sexp : Sexplib.Sexp.t -> header_item_tag
val __header_item_tag_of_sexp__ : Sexplib.Sexp.t -> header_item_tag
val sexp_of_header_item_tag : header_item_tag -> Sexplib.Sexp.t
val tag_value_of_sexp : Sexplib.Sexp.t -> tag_value
A tag-value pair comprising the content of header items. Tag-value pairs occur in other places too, but this type is specifically for those in the header.
val sexp_of_tag_value : tag_value -> Sexplib.Sexp.t
val sort_order_of_sexp : Sexplib.Sexp.t -> sort_order
val __sort_order_of_sexp__ : Sexplib.Sexp.t -> sort_order
val sexp_of_sort_order : sort_order -> Sexplib.Sexp.t
val header_line_of_sexp : Sexplib.Sexp.t -> header_line
@HD. A header consists of different types of lines. Confusingly, one of these types is called the "header line", which is what this type refers to. It does not refer generically to any line within a header.
val sexp_of_header_line : header_line -> Sexplib.Sexp.t
val ref_seq_of_sexp : Sexplib.Sexp.t -> ref_seq
@SQ. Reference sequence.
val sexp_of_ref_seq : ref_seq -> Sexplib.Sexp.t
val platform_of_sexp : Sexplib.Sexp.t -> platform
val __platform_of_sexp__ : Sexplib.Sexp.t -> platform
val sexp_of_platform : platform -> Sexplib.Sexp.t
val read_group_of_sexp : Sexplib.Sexp.t -> read_group
@RG.
val sexp_of_read_group : read_group -> Sexplib.Sexp.t
val program_of_sexp : Sexplib.Sexp.t -> program
@PG.
val sexp_of_program : program -> Sexplib.Sexp.t
val header_item_of_sexp : Sexplib.Sexp.t -> header_item
val __header_item_of_sexp__ : Sexplib.Sexp.t -> header_item
val sexp_of_header_item : header_item -> Sexplib.Sexp.t

- sort_order: Guaranteed to be None if version = None.



Alignment Types



Flags are represented as a "bit map".
val cigar_op_of_sexp : Sexplib.Sexp.t -> cigar_op
CIGAR operations.
val __cigar_op_of_sexp__ : Sexplib.Sexp.t -> cigar_op
val sexp_of_cigar_op : cigar_op -> Sexplib.Sexp.t
val optional_field_value_of_sexp : Sexplib.Sexp.t -> optional_field_value
The constructor encodes the TYPE and each carries its corresponding VALUE.
val __optional_field_value_of_sexp__ : Sexplib.Sexp.t -> optional_field_value
val sexp_of_optional_field_value : optional_field_value -> Sexplib.Sexp.t
val optional_field_of_sexp : Sexplib.Sexp.t -> optional_field
val sexp_of_optional_field : optional_field -> Sexplib.Sexp.t
val rnext_of_sexp : Sexplib.Sexp.t -> rnext
val __rnext_of_sexp__ : Sexplib.Sexp.t -> rnext
val sexp_of_rnext : rnext -> Sexplib.Sexp.t
val alignment_of_sexp : Sexplib.Sexp.t -> alignment
For cigar and qual, empty list indicates no value, i.e. '*', was given.
val sexp_of_alignment : alignment -> Sexplib.Sexp.t

Input/Output



Low-level Parsers and Constructors



Low-level Header Parsers and Constructors



The run_date string will be parsed as a Date.t or Time.t, whichever is possible. If it is a time without a timezone, local timezone will be assumed.

Low-level Optional field Parsers and Constructors



Low-level Optional field Parsers and Constructors



Low-level Alignment Parsers and Constructors



Low-level Printers



Low-level Header Printers



Low-level Alignment Printers


end