Gitlab is now running v13.9.0 - More info -> here <-

Commit 2bf80010 authored by Philippe Veber's avatar Philippe Veber

Detection_pipeline: added ranking

parent a4977c65
......@@ -69,4 +69,6 @@ module New_API = struct
let tree ~branch_length_unit:_ (d, _) = d.tree
end
let queries d =
List.map d.nucleotide_alignments ~f:(fun al -> d, al)
end
......@@ -69,6 +69,16 @@ module type S = sig
val view_site :
query -> convergent_species:string list -> site_pos:int -> pdf file
val ranking :
?length:int ->
query_descr:(query -> string) ->
meth:(query -> cpt file) ->
column_label:string ->
convergent_species:string list workflow ->
query list ->
Codepitk.Candidate_site.t list workflow
end
module Make (Q : Query) = struct
......@@ -245,4 +255,89 @@ module Make (Q : Query) = struct
|> Base.Fn.flip Biotk_croquis.Croquis.Layout.render_pdf dest
in
Workflow.path_plugin ~descr:"detection_pipeline.view_site" f
let ranking ?(length = 100) ~query_descr ~meth ~column_label ~convergent_species queries =
let open Core_kernel in
let open Codepitk in
let query_ids = Array.of_list (List.map queries ~f:query_descr) in
let trees = List.map queries ~f:(tree ~branch_length_unit:`Amino_acid) in
let alignments = List.map queries ~f:amino_acid_alignment in
let result_files = List.map queries ~f:meth in
let f = fun%workflow () ->
let ranking_length = [%param length] in
let column_label = [%param column_label] in
let query_ids = [%param query_ids] in
let trees = [%eval Workflow.path_list trees] in
let alignments = [%eval Workflow.path_list alignments] in
let result_files = [%eval Workflow.path_list result_files] in
let convergent_species = String.Set.of_list [%eval convergent_species] in
let module Result_table = Codepitk.Result_table in
let lazy_load xs ~f = List.map xs ~f:(fun x -> lazy (f x)) |> Array.of_list in
let results = lazy_load result_files ~f:Result_table.of_file in
let trees = lazy_load trees ~f:Phylogenetics.Newick.from_file in
let alignments = lazy_load alignments ~f:Biotk.Fasta.from_file in
let module S = struct
type t = {
alignment : int ;
pos : int ;
score : float ;
}
let leq s1 s2 = Float.(s1.score <= s2.score)
end in
let module H = CCHeap.Make(S) in
let add_result_line acc ~alignment ~pos ~score =
match score with
| None -> acc
| Some score ->
let candidate = { S.alignment ; pos ; score } in
let acc = H.add acc candidate in
if H.size acc > ranking_length then H.take_exn acc |> fst
else acc
in
let add_result_file alignment acc (rf : Result_table.t Lazy.t) =
let col = List.Assoc.find_exn (Lazy.force rf).scores_per_meth column_label ~equal:String.equal in
Array.foldi col ~init:acc ~f:(fun pos acc score ->
add_result_line acc ~alignment ~pos ~score
)
in
let top_sites =
Array.foldi results ~init:H.empty ~f:add_result_file
|> H.to_list_sorted
|> List.rev
in
let annotate_site (s : S.t) =
let id = query_ids.(s.alignment) in
let tree = Lazy.force trees.(s.alignment) in
let alignment =
match Lazy.force alignments.(s.alignment) with
| Ok (_, fa) -> fa
| Error _ -> assert false
in
let leaf_values = List.map alignment ~f:(fun it ->
it.description, it.sequence.[s.pos]
)
in
let contents =
let open Phylogenetics in
Newick.with_inner_tree tree ~f:(fun t ->
Tree.map t
~node:(fun _ -> ())
~branch:(fun bi -> Option.value_exn bi.length)
~leaf:(fun ni ->
let species = Option.value_exn ni.name in
let state = List.Assoc.find_exn leaf_values species ~equal:String.equal in
let condition =
if String.Set.mem convergent_species species then `Convergent
else `Ancestral
in
{ Codepitk.Candidate_site.species ; state ; condition }
)
)
in
{ Candidate_site.alignment_id = Some id ; contents ; pos = Some s.pos ; score = Some s.score }
in
List.map top_sites ~f:annotate_site
in
Workflow.plugin ~descr:"detection_pipeline.ranking" f
end
......@@ -69,6 +69,15 @@ module type S = sig
val view_site :
query -> convergent_species:string list -> site_pos:int -> pdf file
val ranking :
?length:int ->
query_descr:(query -> string) ->
meth:(query -> cpt file) ->
column_label:string ->
convergent_species:string list workflow ->
query list ->
Codepitk.Candidate_site.t list workflow
end
module Make (Q : Query) : S with type query := Q.t
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment