Commit f2d78011 authored by Philippe Veber's avatar Philippe Veber
Browse files

started new CLI for dataset analysis

parent 000fdac4
......@@ -2,7 +2,6 @@
_build
_bistro
outdir*
run*
reviewphiltrans.install
**/.merlin
example/outdir
......
open Core
open Codepi
open Bistro_utils
let first_nhx_in_dir dir =
Sys.readdir dir
|> Array.find_exn ~f:(String.is_suffix ~suffix:".nhx")
let sw b x = if b then Some x else None
let realdata_main ~use_diffsel
~use_pcoc
~use_pcoc_c60
~use_pcoc_gamma
~use_pcoc_v2
~use_pcoc_pcp
~use_tdg09
~use_topological
~use_identical
~no_use_multinomial
~calc_dnds
~calc_gene_trees
~indir ~outdir ~np ~mem () =
let loggers = [
Console_logger.create () ;
] in
let mem = Option.map mem ~f:(fun i -> `GB i) in
let rd =
Real_dataset.make
~alignment_dir_path:(Filename.concat indir "Alignments")
~tree_path:(Filename.concat indir (first_nhx_in_dir indir))
in
let use_multinomial = not no_use_multinomial in
let meths = List.filter_opt [
sw use_diffsel `Diffsel ;
sw use_pcoc `Pcoc ;
sw use_pcoc_c60 `Pcoc_C60 ;
sw use_pcoc_gamma `Pcoc_gamma ;
sw use_pcoc_v2 `PCOC_v2 ;
sw use_pcoc_pcp `PCOC_pcp ;
sw use_tdg09 `Tdg09 ;
sw use_topological `Topological ;
sw use_identical `Identical ;
sw use_multinomial `Multinomial ;
]
in
let pal = List.filter_opt [
sw calc_dnds `DnDs;
sw calc_gene_trees `GeneTree;
]
in
List.concat [
Repo.shift "Merged_results" (Real_dataset.repo meths rd) ;
Repo.shift "PreParsed_Dataset" (Real_dataset.repo_parsed_rd pal rd);
]
|> Bistro_utils.Repo.build_main ~outdir ~loggers ?np ?mem
let realdata_command =
let open Command.Let_syntax in
Command.basic
~summary:"Run pipeline on real data"
[%map_open
let outdir =
flag "--outdir" (required string) ~doc:"PATH Output directory"
and indir =
flag "--indir" (required string) ~doc:"PATH Input directory"
and use_diffsel =
flag "--diffsel" no_arg ~doc:" use the diffsel method (very slow)."
and use_pcoc =
flag "--pcoc" no_arg ~doc:" use the pcoc method (slow)."
and use_pcoc_c60 =
flag "--pcoc-c60" no_arg ~doc:" use the pcoc method with c60 profils (very_slow)."
and use_pcoc_gamma =
flag "--pcoc-gamma" no_arg ~doc:" use the pcoc method with the gamma option (very_slow)."
and use_pcoc_v2 =
flag "--pcoc-v2" no_arg ~doc:" use the pcoc v2 method with the C10 profiles (slow)."
and use_pcoc_pcp =
flag "--pcoc-pcp" no_arg ~doc:" use the pcoc v2 method with the physico-chemical profiles (slow)."
and use_tdg09 =
flag "--tdg09" no_arg ~doc:" use the tdg09 method (slow)."
and use_topological =
flag "--topological" no_arg ~doc:" use the topological method (fast)."
and use_identical =
flag "--identical" no_arg ~doc:" use the identical method (fast)."
and no_use_multinomial =
flag "--no-multinomial" no_arg ~doc:" not use the multinomial method (very fast so by default)."
and calc_dnds =
flag "--dnds" no_arg ~doc:" calculate dn ds dnds trees (slow)."
and calc_gene_trees =
flag "--gt" no_arg ~doc:" calculate gene trees (slow)."
and np =
flag "--np" (optional int) ~doc:"INT Number of available processors"
and mem =
flag "--mem" (optional int) ~doc:"INT Available memory (in GB)"
in
realdata_main ~use_diffsel
~use_pcoc
~use_pcoc_c60
~use_pcoc_gamma
~use_pcoc_v2
~use_pcoc_pcp
~use_tdg09
~use_topological
~use_identical
~no_use_multinomial
~calc_dnds
~calc_gene_trees
~indir ~outdir ~np ~mem
]
let () =
Command.group ~summary:"Reviewphiltrans" [
Command.group ~summary:"codepi" [
"validation", Pipeline.validation_command ;
"realdata", realdata_command ;
"run", Run.command ;
"alistats", Alistats.command ;
]
|> Command.run
open Core
open Bistro
open Bistro_utils
module Dataset = Dataset.New_API
type t = {
tree_file : string ;
alignment_dir : string ;
convergent_species_file : string ;
}
let convergent_species_workflow run =
[%workflow
In_channel.read_lines
[%path Workflow.input run.convergent_species_file]]
let dataset run =
let tree = Workflow.input run.tree_file in
let nucleotide_alignments =
Sys.readdir run.alignment_dir
|> Array.to_list
|> List.map ~f:(fun ali ->
Workflow.input (Filename.concat run.alignment_dir ali)
)
in
let convergent_species = convergent_species_workflow run in
{ Dataset.tree ; nucleotide_alignments ; convergent_species }
module Pipeline = Detection_pipeline.Make(Dataset.Query)
let repo run =
let d = dataset run in
let q = d, List.hd_exn d.nucleotide_alignments in
Repo.[
item ["multinomial.cpt"] (Pipeline.multinomial_asymptotic_lrt q) ;
]
let main
~tree_file ~alignment_dir ~convergent_species_file
~outdir ~np ~mem () =
let loggers = [
Console_logger.create () ;
] in
let mem = Option.map mem ~f:(fun i -> `GB i) in
let run = { tree_file ; alignment_dir ; convergent_species_file } in
let repo = repo run in
Bistro_utils.Repo.build_main ~outdir ~loggers ?np ?mem repo
let command =
let open Command.Let_syntax in
Command.basic
~summary:"Run pipeline on real data"
[%map_open
let outdir =
flag "--outdir" (required string) ~doc:"PATH Output directory"
and tree_file =
flag "--tree" (required string) ~doc:"PATH Tree (newick format)"
and alignment_dir =
flag "--alignments" (required Filename.arg_type) ~doc:"PATH Directory containing all alignments"
and convergent_species_file =
flag "--convergent-species" (required Filename.arg_type) ~doc:"PATH File providing the list of convergent species"
and np =
flag "--np" (optional int) ~doc:"INT Number of available processors"
and mem =
flag "--mem" (optional int) ~doc:"INT Available memory (in GB)"
in
main ~tree_file ~alignment_dir ~convergent_species_file ~outdir ~np ~mem
]
open Core
val command : Command.t
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment