(F : Fetch) = struct
open F
let search_and_fetch database of_xml query =
let query_url = esearch_url database query in
fetch query_url esearch_answer_of_string >>= fun answer ->
let object_url = efetch_url ~retmode:`xml database answer.ids in
fetch object_url (fun x -> x |> tree_of_string |> snd |> of_xml)
let search_and_summary database of_xml query =
let query_url = esearch_url database query in
fetch query_url esearch_answer_of_string >>= fun answer ->
let object_url = esummary_url database answer.ids in
fetch object_url (fun x -> x |> tree_of_string |> snd |> of_xml)
module Object_id = struct
type t = [`int of int | `string of string ]
let to_string = function
| `int i -> string_of_int i
| `string s -> s
let of_xml x =
try `int (ileaf_exn "Object-id_id" x)
with _ -> (
try `string (sleaf_exn "Object-id_str" x)
with _ ->
invalid_arg (sprintf "Entrez.Make.Object_id.of_xml: %s" (string_of_tree x))
)
end
module Dbtag = struct
type t = {
db : string ;
tag : Object_id.t ;
}
let of_xml x = {
db = sleaf_exn "Dbtag_db" x ;
tag = Object_id.of_xml (x |> echild_exn "Dbtag_tag" |> echild_exn "Object-id")
}
end
module Gene_ref = struct
type t = {
locus : string option ;
allele : string option ;
desc : string option ;
maploc : string option ;
pseudo : bool option ;
db : Dbtag.t list ;
}
let of_xml t =
let t = echild_exn "Gene-ref" t in
{
locus = sleaf "Gene-ref_locus" t ;
allele = sleaf "Gene-ref_allele" t ;
desc = sleaf "Gene-ref_desc" t ;
maploc = sleaf "Gene-ref_maploc" t ;
pseudo = Option.bind (echild "Gene-ref_pseudo" t) (battr "value") ;
db =
Option.value_map
(echild "Gene-ref_db" t)
~default:[]
~f:(map_echildren ~tag:"Dbtag" Dbtag.of_xml) ;
}
end
module PubmedSummary = struct
type t = {
pmid : int ;
doi : string option ;
pubdate : string option ;
source : string option ;
title : string ;
}
let parse_article_ids x =
map_echildren
~tag:"ArticleId"
(fun x -> sleaf_exn "IdType" x, sleaf_exn "Value" x)
x
let parse_document_summary x =
let article_ids = parse_article_ids (echild_exn "ArticleIds" x) in
{ pmid = int_of_string (List.Assoc.find_exn article_ids "pubmed") ;
doi = List.Assoc.find article_ids "doi" ;
pubdate = sleaf "PubDate" x ;
source = sleaf "Source" x ;
title = sleaf_exn "Title" x }
let parse_eSummaryResult x =
map_echildren
~tag:"DocumentSummary"
parse_document_summary
(echild_exn "DocumentSummarySet" x)
let search = search_and_summary `pubmed parse_eSummaryResult
end
module Pubmed = struct
type t = {
pmid : int ;
title : string ;
abstract : string ;
}
let parse_book_document bd =
{ pmid = ileaf_exn "PMID" bd ;
title = sleaf_exn "ArticleTitle" bd ;
abstract = echild_exn "Abstract" bd |> sleaf_exn "AbstractText" }
let parse_medline_citation mc =
let article = echild_exn "Article" mc in
{ pmid = ileaf_exn "PMID" mc ;
title = sleaf_exn "ArticleTitle" article ;
abstract = echild_exn "Abstract" article |> sleaf_exn "AbstractText" }
let parse_pubmed_article_set_element x = match tag_of_tree x with
| Some "PubmedArticle" ->
Some (parse_medline_citation (echild_exn "MedlineCitation" x))
| Some "PubmedBookArticle" ->
Some (parse_book_document (echild_exn "BookDocument" x))
| Some t ->
failwith (sprintf "Unexpected %s tag while parsing PubmedArticleSet element" t)
| None -> None
let parse_pubmed_article_set = function
| E ("PubmedArticleSet",_,children) ->
List.filter_map ~f:parse_pubmed_article_set_element children
| _ -> assert false
let search = search_and_fetch `pubmed parse_pubmed_article_set
end
module Gene = struct
type t = {
_type : [ `unknown | `tRNA | `rRNA | `snRNA | `scRNA |
`snoRNA | `protein_coding | `pseudo | `transposon | `miscRNA |
`ncRNA | `other ] ;
summary : string option ;
gene : Gene_ref.t ;
}
let type_of_int = function
| 0 -> `unknown
| 1 -> `tRNA
| 2 -> `rRNA
| 3 -> `snRNA
| 4 -> `scRNA
| 5 -> `snoRNA
| 6 -> `protein_coding
| 7 -> `pseudo
| 8 -> `transposon
| 9 -> `miscRNA
| 10 -> `ncRNA
| 11 -> `ncRNA
| n -> invalid_arg (sprintf "Entrez.Make.Gene.type_of_int: %d" n)
let parse_entrez_gene = function
| E ("Entrezgene",_,_) as x -> Some {
summary = sleaf "Entrezgene_summary" x ;
_type = type_of_int (ileaf_exn "Entrezgene_type" x) ;
gene = Gene_ref.of_xml (echild_exn "Entrezgene_gene" x) ;
}
| _ -> None
let parse_entrez_gene_set = function
| E ("Entrezgene-Set",_,children) ->
List.filter_map ~f:parse_entrez_gene children
| _ -> assert false
let search query =
let database = `gene in
let of_xml = parse_entrez_gene_set in
let query_url = esearch_url database query in
fetch query_url esearch_answer_of_string >>= fun answer ->
let object_url = efetch_url ~retmode:`xml database answer.ids in
fetch object_url (fun x -> x |> tree_of_string |> snd |> of_xml)
end
end