Commit 17572c94 authored by Philippe Veber's avatar Philippe Veber
Browse files

separate pcoc module

parent 2ac8dfe8
......@@ -5,49 +5,27 @@ open File_formats
open Bistro_bioinfo.Std
open Defs
type pcoc_out
type diffsel_out
type result = [
| `Pcoc of [`pcoc] directory workflow
| `Pcoc_gamma of [`pcoc] directory workflow
| `Diffsel of [`diffsel] directory workflow
]
type det_out =
| Pcoc_out
| Diffsel_out
type det_result = {
dataset : Dataset.t ;
det_meth : det_meth ;
det_result : det_out directory workflow ;
}
let meth_string_of_result = function
| `Pcoc _ -> "pcoc"
| `Pcoc_gamma _ -> "pcoc_gamma"
| `Diffsel _ -> "diffsel"
type dataset_res = {
model_prefix : string ;
tree_prefix : string ;
res_by_tools: det_result list ;
res_by_tools: result list ;
merged_results : text_file workflow ;
plot_merged_results : svg workflow ;
}
let pcoc ?plot_complete ?gamma ~(faa:aminoacid_fasta workflow) ~(tree:_ workflow) : (*`pcoc TODO*) det_out directory workflow =
let env = docker_image ~account:"carinerey" ~name:"pcoc" ~tag:"06212018" () in
workflow ~descr:"convergence_detection.pcoc" [
cmd "pcoc_det.py" ~env [
opt "-t" dep tree;
opt "-m" string "-";
opt "-aa" dep faa ;
opt "-o" ident dest ;
option ( flag string "--gamma" ) gamma;
option ( flag string "--plot --plot_complete_ali" ) plot_complete;
]
]
}
let pcoc_selector (run_pcoc:det_out directory workflow) : text_file workflow =
let out_pcoc = (dep run_pcoc) // "RUN*/*.results.tsv" in
workflow ~descr:"convergence_detection.selector_pcoc" [
cmd "cp" [out_pcoc; ident dest] ;
]
let diffsel ~(phy_n:nucleotide_phylip workflow) ~(tree: _ workflow) ~(w_every:int) ~(n_cycles: int) : (*`diffsel TODO*) det_out directory workflow =
let diffsel ~(phy_n:nucleotide_phylip workflow) ~(tree: _ workflow) ~(w_every:int) ~(n_cycles: int) : [`diffsel] directory workflow =
let env = docker_image ~account:"vlanore" ~name:"diffsel" ~tag:"v1.0" () in
let tmp_tree = tmp // "myrun.tree" in
let tmp_ali = tmp // "myrun.ali" in
......@@ -112,19 +90,18 @@ let diffsel_selector run_diffsel : text_file workflow =
let merge_results ~res_by_tools : text_file workflow =
let env = docker_image ~account:"carinerey" ~name:"ete3" ~tag:"3.0.0b35" () in
let command = List.map res_by_tools ~f:(fun res ->
let def_meth = res.det_meth in
let w = match def_meth with
| Pcoc -> pcoc_selector res.det_result
| Pcoc_gamma -> pcoc_selector res.det_result
| Diffsel -> diffsel_selector res.det_result
in
let opt = match def_meth with
| Pcoc -> string "--pcoc"
| Pcoc_gamma -> string "--pcoc_gamma"
| Diffsel -> string "--diffsel"
in
seq ~sep:" " [opt; dep w]
)
let w = match res with
| `Pcoc d
| `Pcoc_gamma d -> Pcoc.results d
| `Diffsel d -> diffsel_selector d
in
let opt = match res with
| `Pcoc _ -> string "--pcoc"
| `Pcoc_gamma _ -> string "--pcoc_gamma"
| `Diffsel _ -> string "--diffsel"
in
seq ~sep:" " [opt; dep w]
)
in
workflow ~descr:"convergence_detection.merge_results" [
cmd "python" ~env [
......@@ -141,13 +118,12 @@ let plot_merge_results ~res_by_tools ~tree ~faa ~tsv : svg workflow =
let env = docker_image ~account:"carinerey" ~name:"pcoc" ~tag:"06212018" () in
(* use of pcoc env due to its working X server for dra plot with ete3 *)
let meths = List.map res_by_tools ~f:(fun res ->
let def_meth = res.det_meth in
let opt = match def_meth with
| Pcoc -> string "PCOC"
| Pcoc_gamma -> string "PCOC_gamma"
| Diffsel -> string "Diffsel"
let opt = match res with
| `Pcoc _ -> "PCOC"
| `Pcoc_gamma _ -> "PCOC_gamma"
| `Diffsel _ -> "Diffsel"
in
opt
string opt
) |> seq ~sep:","
in
let package_diffsel_script_utils = tmp // "diffsel_script_utils.py" in
......@@ -177,4 +153,3 @@ let plot_merge_results ~res_by_tools ~tree ~faa ~tsv : svg workflow =
]
)
]
......@@ -5,56 +5,41 @@ open Bistro_bioinfo.Std
open File_formats
open Defs
type pcoc_out
type diffsel_out
type det_out =
| Pcoc_out
| Diffsel_out
type result = [
| `Pcoc of [`pcoc] directory workflow
| `Pcoc_gamma of [`pcoc] directory workflow
| `Diffsel of [`diffsel] directory workflow
]
type det_result = {
dataset : Dataset.t ;
det_meth : det_meth ;
det_result : det_out directory workflow ;
}
val meth_string_of_result : result -> string
type dataset_res = {
model_prefix : string ;
tree_prefix : string ;
res_by_tools: det_result list ;
res_by_tools: result list ;
merged_results : text_file workflow ;
plot_merged_results : svg workflow
}
val pcoc :
?plot_complete : bool ->
?gamma : bool ->
faa : aminoacid_fasta workflow ->
tree : _ workflow ->
(*[`pcoc]*) det_out directory workflow
val pcoc_selector :
(*[`pcoc]*) det_out directory workflow ->
text_file workflow
val diffsel :
phy_n : nucleotide_phylip workflow ->
tree : _ workflow ->
w_every : int ->
n_cycles: int ->
(*[`diffsel]*) det_out directory workflow
[`diffsel] directory workflow
val diffsel_selector :
(*[`diffsel]*) det_out directory workflow ->
[`diffsel] directory workflow ->
text_file workflow
val merge_results :
res_by_tools : det_result list ->
res_by_tools : result list ->
text_file workflow
val plot_merge_results :
res_by_tools : det_result list ->
tree : _ workflow ->
faa : aminoacid_fasta workflow ->
tsv : text_file workflow ->
res_by_tools : result list ->
tree:nhx workflow ->
faa:aminoacid_fasta workflow ->
tsv:text_file workflow ->
svg workflow
......@@ -15,15 +15,6 @@ type det_meth =
| Pcoc_gamma
| Diffsel
let string_of_det_meth = function
| Pcoc -> "pcoc"
| Pcoc_gamma -> "pcoc_gamma"
| Diffsel -> "diffsel"
type 'a w_det_meth =
| Pcoc_w of 'a
| Pcoc_gamma_w of 'a
| Diffsel of 'a
open Core
open Bistro.Std
open Bistro.EDSL
open Bistro_bioinfo.Std
open File_formats
let pcoc ?plot_complete ?gamma ~(faa:aminoacid_fasta workflow) ~(tree:_ workflow) : [`pcoc] directory workflow =
let env = docker_image ~account:"carinerey" ~name:"pcoc" ~tag:"06212018" () in
workflow ~descr:"convergence_detection.pcoc" [
cmd "pcoc_det.py" ~env [
opt "-t" dep tree;
opt "-m" string "-";
opt "-aa" dep faa ;
opt "-o" ident dest ;
option ( flag string "--gamma" ) gamma;
option ( flag string "--plot --plot_complete_ali" ) plot_complete;
]
]
let results run_pcoc : text_file workflow =
let out_pcoc = dep run_pcoc // "RUN*/*.results.tsv" in
workflow ~descr:"convergence_detection.selector_pcoc" [
cmd "cp" [out_pcoc; ident dest] ;
]
open Bistro.Std
open File_formats
val pcoc :
?plot_complete : bool ->
?gamma : bool ->
faa : aminoacid_fasta workflow ->
tree : _ workflow ->
[`pcoc] directory workflow
val results :
[`pcoc] directory workflow ->
text_file workflow
......@@ -40,6 +40,25 @@ let derive_sim ~tree_dir ~trees ~profile_fn ~preview =
|> List.concat
let repo_of_detection_result res =
let det_meth_prefix = Convergence_detection.meth_string_of_result res in
Repo.[
(
match res with
| `Pcoc w -> item ["pcoc.results.tsv"] (Pcoc.results w)
| `Pcoc_gamma w -> item ["pcoc_gamma.results.tsv"] (Pcoc.results w)
| `Diffsel w -> item ["diffsel.results.tsv"] (diffsel_selector w)
) ;
(
match res with
| `Pcoc w -> item ["raw_results"] w
| `Pcoc_gamma w -> item ["raw_results"] w
| `Diffsel w -> item ["raw_results"] w
) ;
]
|> Repo.shift det_meth_prefix
|> Repo.shift "Detection_tools"
let repo_of_dataset_results_l ~dataset_results_l =
List.map dataset_results_l ~f:(fun dataset_results ->
let det_results_l = dataset_results.res_by_tools in
......@@ -47,33 +66,14 @@ let repo_of_dataset_results_l ~dataset_results_l =
let plot_merge_results = dataset_results.plot_merged_results in
let merged_results_item = Repo.item ["merged_results.tsv"] merged_results in
let plot_merged_results_item = Repo.item ["plot_merged_results"] plot_merge_results in
[ [ merged_results_item ; plot_merged_results_item ] ;
List.map det_results_l ~f:(fun det_results ->
let det_meth = det_results.det_meth in
let det_meth_prefix = string_of_det_meth det_meth in
let w = det_results.det_result
in
let repo_d = Repo.shift "Detection_tools" (Repo.shift det_meth_prefix (
Repo.[
[match det_meth with
| Pcoc -> item ["pcoc.results.tsv"] (pcoc_selector w)
| Pcoc_gamma -> item ["pcoc_gamma.results.tsv"] (pcoc_selector w)
| Diffsel -> item ["diffsel.results.tsv"] (diffsel_selector w)
];
[match det_meth with
| Pcoc -> item ["raw_results"] w
| Pcoc_gamma -> item ["raw_results"] w
| Diffsel -> item ["raw_results"] w
] ;
] |> List.concat
))
in
repo_d
) |> List.concat
] |> List.concat
|> Repo.shift dataset_results.model_prefix
|> Repo.shift dataset_results.tree_prefix
let repo =
merged_results_item ::
plot_merged_results_item ::
(List.map det_results_l ~f:repo_of_detection_result |> List.concat)
in
repo
|> Repo.shift dataset_results.model_prefix
|> Repo.shift dataset_results.tree_prefix
)
|> List.concat
......@@ -85,18 +85,16 @@ let derive_from_det_meth ~det_meth ~(dataset : Dataset.t) ~preview =
let diffsel_tree = Tree_dataset.diffsel_tree dataset.dataset.tree_dataset in
let w_every = if preview then 1 else 10 in
let n_cycles = if preview then 100 else 1000 in
let det_result = match det_meth with
| Pcoc -> Convergence_detection.pcoc ~plot_complete:true ~gamma:false ~faa ~tree:pcoc_tree
| Pcoc_gamma -> Convergence_detection.pcoc ~plot_complete: true ~gamma:true ~faa ~tree:pcoc_tree
| Diffsel -> Convergence_detection.diffsel ~phy_n ~tree:diffsel_tree ~w_every ~n_cycles
in
{det_meth; det_result; dataset}
match det_meth with
| `Pcoc -> `Pcoc (Pcoc.pcoc ~plot_complete:true ~gamma:false ~faa ~tree:pcoc_tree)
| `Pcoc_gamma -> `Pcoc_gamma (Pcoc.pcoc ~plot_complete: true ~gamma:true ~faa ~tree:pcoc_tree)
| `Diffsel -> `Diffsel (Convergence_detection.diffsel ~phy_n ~tree:diffsel_tree ~w_every ~n_cycles)
let derive_from_dataset ~dataset ~preview =
let det_meths = [Pcoc;Pcoc_gamma;Diffsel] in
let det_meths = [`Pcoc;`Pcoc_gamma;`Diffsel] in
let res_by_tools = List.map det_meths ~f:(fun det_meth ->
derive_from_det_meth ~det_meth ~dataset ~preview
) in
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment