Commit ac2adfb0 authored by Philippe Veber's avatar Philippe Veber
Browse files

New module Real_dataset: pipeline for real datasets!

parent 7b241068
open Core_kernel
open Bistro
type t = {
tree_path : string ;
alignment_dir_path : string ;
}
type meth =
[`Multinomial |
`Pcoc |
`Pcoc_gamma |
`Pcoc_C60|
`Tdg09 |
`Topological |
`Identical |
`Diffsel]
module Family : sig
type dataset = t
type t
val name : t -> string
val make : dataset -> name:string -> ext:string -> t
include Detection_pipeline.Dataset with type t := t
end
=
struct
type dataset = t
type t = {
rd : dataset ;
name : string ;
ext : string ;
}
let name f = f.name
let make rd ~name ~ext = { rd ; name ; ext }
let nucleotide_alignment f =
sprintf "%s/%s.%s" f.rd.alignment_dir_path f.name f.ext
|> Workflow.input
let tree f =
Raw_dataset.filter_input_tree
~descr:f.name
~tree:(Workflow.input f.rd.tree_path)
~fna:(nucleotide_alignment f)
()
end
module DP = Detection_pipeline.Make(Family)
(* FIXME: test if paths exist *)
let make ~tree_path ~alignment_dir_path =
{ tree_path ; alignment_dir_path }
let families rd =
Sys.readdir rd.alignment_dir_path
|> Array.to_list
|> List.filter_map ~f:(fun fn ->
match Filename.split_extension fn with
| name, Some ext -> (
match ext with
| "fa" | "fna" | "fasta" ->
Some (Family.make rd ~name ~ext)
| _ -> None
)
| _ -> None
)
let implementation f = function
| `Multinomial ->
DP.multinomial_asymptotic_lrt f
| `Pcoc ->
DP.pcoc ~gamma:false ~ncat:10 f
| `Pcoc_gamma ->
DP.pcoc ~gamma:true ~ncat:10 f
| `Pcoc_C60 ->
DP.pcoc ~gamma:false ~ncat:60 f
| `Tdg09 ->
DP.tdg09 f
| `Topological ->
DP.topological f
| `Identical ->
DP.identical f
| `Diffsel ->
DP.diffsel f
let maybe_apply_method meths f m =
if List.mem meths m ~equal:Poly.equal then
Some (implementation f m)
else None
let result_table meths rd =
let families = families rd in
List.map families ~f:(fun f ->
let table =
Convergence_detection.merge_result_tables
?multinomial:(maybe_apply_method meths f `Multinomial)
?tdg09:(maybe_apply_method meths f `Tdg09)
?pcoc:(maybe_apply_method meths f `Pcoc)
?topological:(maybe_apply_method meths f `Topological)
?identical:(maybe_apply_method meths f `Identical)
?diffsel:(maybe_apply_method meths f `Diffsel)
()
in
Family.name f, table
)
let repo meths rd =
List.map (result_table meths rd) ~f:(fun (name, table) ->
Bistro_utils.Repo.item [name ^ ".tsv"] table
)
open Bistro
open File_formats
type t
type meth =
[`Multinomial |
`Pcoc |
`Pcoc_gamma |
`Pcoc_C60|
`Tdg09 |
`Topological |
`Identical |
`Diffsel]
val make :
tree_path:string ->
alignment_dir_path:string ->
t
val result_table :
meth list ->
t ->
(string * text_file pworkflow) list
val repo :
meth list ->
t -> Bistro_utils.Repo.t
module Family : sig
type t
val tree : t -> nhx pworkflow
end
val families : t -> Family.t list
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment