struct
let next_vcf_header meta p =
let open Lines.Buffer in
let { vcfm_info; vcfm_format; _ } = meta in
let l = Option.value_exn (next_line p :> string option) in
let chunks =
List.filter ~f:(fun s -> s <> "") (String.split ~on:' ' l)
in begin match chunks with
| "#CHROM" :: "POS" :: "ID" :: "REF" :: "ALT" :: "QUAL" ::
"FILTER" :: "INFO" :: rest ->
begin match rest with
| "FORMAT" :: ((_ :: _) as samples)
| ([] as samples) ->
let merge_with_reserved ~c = Hashtbl.merge
~f:(fun ~key v ->
match v with
| `Left t -> Some (c Unknown t "<reserved>")
| `Right parsed -> Some parsed
| `Both (_t, parsed) -> Some parsed)
in
let vcfm_info = merge_with_reserved reserved_info vcfm_info
~c:(fun n t description -> Info (n, t, description));
and vcfm_format = merge_with_reserved reserved_format vcfm_format
~c:(fun n t description -> Format (n, t, description));
and (vcfm_header, vcfm_samples) =
if samples = []
then (chunks, samples)
else List.split_n chunks List.(length chunks - length samples)
in Ok (`complete { meta with vcfm_info; vcfm_format;
vcfm_header; vcfm_samples })
| _ :: _ -> Error (`malformed_header (current_position p, l))
end
| _ -> Error (`malformed_header (current_position p, l))
end
let next_vcf_meta meta p =
let open Lines.Buffer in
let { vcfm_info; vcfm_filters; vcfm_format; vcfm_alt; _ } = meta in
match (peek_line p :> string option) with
| Some l when String.is_prefix l ~prefix:"##" ->
let _l = next_line p in
let s = String.suffix l (String.length l - 2) in
begin match String.lsplit2 s ~on:'=' with
| Some ("fileformat", v) ->
Ok (`partial { meta with vcfm_version = v })
| Some ("INFO", v) ->
Scanf.sscanf v "<ID=%s@,Number=%s@,Type=%s@,Description=%S>"
(fun id n t description ->
let info_meta = Info (string_to_vcf_number n,
string_to_vcf_info_type t,
description)
in Hashtbl.set vcfm_info id info_meta);
Ok (`partial meta)
| Some ("FILTER", v) ->
Scanf.sscanf v "<ID=%s@,Description=%S>"
(fun id description ->
let filter_meta = Filter description in
let meta = { meta with
vcfm_filters = (id, filter_meta) :: vcfm_filters }
in Ok (`partial meta))
| Some ("FORMAT", v) ->
Scanf.sscanf v "<ID=%s@,Number=%s@,Type=%s@,Description=%S>"
(fun id n t description ->
let format_meta = Format (string_to_vcf_number n,
string_to_vcf_format_type t,
description)
in Hashtbl.set vcfm_format id format_meta);
Ok (`partial meta)
| Some ("ALT", v) ->
Scanf.sscanf v "<ID=%s@,Description=%S>"
(fun id description ->
let alt_meta = Alt description in
Hashtbl.set vcfm_alt id alt_meta);
Ok (`partial meta)
| Some (k, v) -> begin
Hashtbl.set meta.vcfm_arbitrary ~key:k ~data:v;
Ok (`partial meta)
end
| None -> Error (`malformed_meta (current_position p, s))
end
| Some _l ->
next_vcf_header meta p
| None -> Error `not_ready
let next_vcf_row meta p =
let open Line in
let open Lines.Buffer in
match (next_line p :> string option) with
| Some l when not (String.is_empty l) ->
let chunks =
List.filter ~f:(fun s -> s <> "") (String.split ~on:' ' l)
in begin match list_to_vcf_row meta chunks with
| Ok row -> `output (Ok row)
| Error err ->
`output (Error (`malformed_row (current_position p, err, l)))
end
| Some _ | None -> `not_ready
let rec next meta_ref p =
match !meta_ref with
| `complete meta -> next_vcf_row meta p
| `partial meta ->
begin match next_vcf_meta meta p with
| Ok meta -> meta_ref := meta; `not_ready
| Error `not_ready -> `not_ready
| Error err -> `output (Error err)
end
let string_to_item ?filename () =
let name = sprintf "vcf_parser:%s" Option.(value ~default:"<>" filename) in
let meta_ref = ref (`partial default_meta) in
Lines.Transform.make_merge_error ~name ?filename ~next:(next meta_ref) ()
end