Commit 9c9ddd3b authored by Philippe Veber's avatar Philippe Veber
Browse files

update wrt world

parent a3df223f
open Core open Core
open Phylogenetics open Phylogenetics
open Bistro
let ok_exn err = function let ok_exn err = function
| Ok x -> x | Ok x -> x
...@@ -8,7 +9,7 @@ let ok_exn err = function ...@@ -8,7 +9,7 @@ let ok_exn err = function
let is_gc ?pos i c = let is_gc ?pos i c =
match pos with match pos with
| Some p when not ((i mod 3) = p) -> false | Some p when not ((i mod 3) = p) -> false
| _ -> c = 'g' || c = 'G' || c = 'c' || c = 'C' | _ -> Char.(c = 'g' || c = 'G' || c = 'c' || c = 'C')
let strings_from_fasta fa = let strings_from_fasta fa =
match Alignment.from_fasta fa with match Alignment.from_fasta fa with
...@@ -29,7 +30,7 @@ let seq_gc ?pos seqs = ...@@ -29,7 +30,7 @@ let seq_gc ?pos seqs =
) )
in in
let gc_counts = Array.map seqs ~f:(fun seq -> let gc_counts = Array.map seqs ~f:(fun seq ->
let len = if pos <> None then String.length seq / 3 else String.length seq in let len = if Option.is_some pos then String.length seq / 3 else String.length seq in
let sum = String.foldi ~init:0 ~f:(fun i a c -> a + if is_gc ?pos i c then 1 else 0) seq in let sum = String.foldi ~init:0 ~f:(fun i a c -> a + if is_gc ?pos i c then 1 else 0) seq in
float len, float sum float len, float sum
) )
...@@ -76,7 +77,7 @@ let command = ...@@ -76,7 +77,7 @@ let command =
main ~alignment main ~alignment
] ]
let%pworkflow histogram (fa : #Bistro.fasta Bistro.pworkflow) = let%pworkflow histogram (fa : #fasta file) =
let al = ok_exn Alignment.show_parsing_error @@ Alignment.from_fasta [%path fa] in let al = ok_exn Alignment.show_parsing_error @@ Alignment.from_fasta [%path fa] in
let float_array_of_int_list x = let float_array_of_int_list x =
Array.of_list x Array.of_list x
......
...@@ -142,10 +142,10 @@ rate_distribution=Constant() ...@@ -142,10 +142,10 @@ rate_distribution=Constant()
) )
] ]
let alignment run_bppseqgen_multi_profiles : nucleotide_fasta pworkflow = let alignment run_bppseqgen_multi_profiles : nucleotide_fasta file =
Workflow.select run_bppseqgen_multi_profiles ["seq.fa"] Workflow.select run_bppseqgen_multi_profiles ["seq.fa"]
let info run_bppseqgen_multi_profiles : text_file pworkflow = let info run_bppseqgen_multi_profiles : text file =
Workflow.select run_bppseqgen_multi_profiles ["seq.fa.info"] Workflow.select run_bppseqgen_multi_profiles ["seq.fa.info"]
end end
...@@ -166,7 +166,7 @@ let conf_file_bppseqman_fna2faa ~fna = ...@@ -166,7 +166,7 @@ let conf_file_bppseqman_fna2faa ~fna =
|} |}
] ]
let fna2faa (fna : nucleotide_fasta pworkflow) : aminoacid_fasta pworkflow = let fna2faa (fna : nucleotide_fasta file) : aminoacid_fasta file =
Workflow.shell ~descr:"bppsuite.fna2faa" [ Workflow.shell ~descr:"bppsuite.fna2faa" [
cmd "bppseqman" ~img [ cmd "bppseqman" ~img [
assign "param" (file_dump (conf_file_bppseqman_fna2faa ~fna)) ; assign "param" (file_dump (conf_file_bppseqman_fna2faa ~fna)) ;
...@@ -197,14 +197,14 @@ let conf_file_bppseqman_faa2phy ~faa = ...@@ -197,14 +197,14 @@ let conf_file_bppseqman_faa2phy ~faa =
|} |}
] ]
let fna2phy ~(fna: nucleotide_fasta pworkflow) : nucleotide_phylip pworkflow = let fna2phy ~(fna: nucleotide_fasta file) : nucleotide_phylip file =
Workflow.shell ~descr:"bppsuite.fna2phy_interleaved" [ Workflow.shell ~descr:"bppsuite.fna2phy_interleaved" [
cmd "bppseqman" ~img [ cmd "bppseqman" ~img [
assign "param" (file_dump (conf_file_bppseqman_fna2phy ~fna)) ; assign "param" (file_dump (conf_file_bppseqman_fna2phy ~fna)) ;
] ]
] ]
let faa2phy ~(faa: aminoacid_fasta pworkflow) : aminoacid_phylip pworkflow = let faa2phy ~(faa: aminoacid_fasta file) : aminoacid_phylip file =
Workflow.shell ~descr:"bppsuite.faa2phy_interleaved" [ Workflow.shell ~descr:"bppsuite.faa2phy_interleaved" [
cmd "bppseqman" ~img [ cmd "bppseqman" ~img [
assign "param" (file_dump (conf_file_bppseqman_faa2phy ~faa)) ; assign "param" (file_dump (conf_file_bppseqman_faa2phy ~faa)) ;
...@@ -212,7 +212,7 @@ let faa2phy ~(faa: aminoacid_fasta pworkflow) : aminoacid_phylip pworkflow = ...@@ -212,7 +212,7 @@ let faa2phy ~(faa: aminoacid_fasta pworkflow) : aminoacid_phylip pworkflow =
] ]
let paste_fna ~(fna_l: nucleotide_fasta pworkflow list) : nucleotide_fasta pworkflow = let paste_fna ~(fna_l: nucleotide_fasta file list) : nucleotide_fasta file =
Workflow.shell ~descr:"bppsuite.catfasta" [ Workflow.shell ~descr:"bppsuite.catfasta" [
cmd "catfasta2phyml.pl" ~stdout:dest ~img (List.concat [ cmd "catfasta2phyml.pl" ~stdout:dest ~img (List.concat [
[string "-f" ] ; [string "-f" ] ;
......
...@@ -4,34 +4,34 @@ open File_formats ...@@ -4,34 +4,34 @@ open File_formats
module Bppseqgen : sig module Bppseqgen : sig
val multi_profiles : val multi_profiles :
?descr : string -> ?descr : string ->
profile_f: text_file pworkflow -> profile_f: text file ->
profile_c: text_file pworkflow -> profile_c: text file ->
input_tree: nhx pworkflow -> input_tree: nhx file ->
hypothesis:Convergence_hypothesis.t -> hypothesis:Convergence_hypothesis.t ->
seed:int -> seed:int ->
[`bppseqgen] dworkflow [`bppseqgen] directory
val alignment : val alignment :
[`bppseqgen] dworkflow -> [`bppseqgen] directory ->
nucleotide_fasta pworkflow nucleotide_fasta file
val info : val info :
[`bppseqgen] dworkflow -> [`bppseqgen] directory ->
text_file pworkflow text file
end end
val fna2faa : val fna2faa :
nucleotide_fasta pworkflow -> nucleotide_fasta file ->
aminoacid_fasta pworkflow aminoacid_fasta file
val fna2phy : val fna2phy :
fna: nucleotide_fasta pworkflow -> fna: nucleotide_fasta file ->
nucleotide_phylip pworkflow nucleotide_phylip file
val faa2phy : val faa2phy :
faa: aminoacid_fasta pworkflow -> faa: aminoacid_fasta file ->
aminoacid_phylip pworkflow aminoacid_phylip file
val paste_fna: val paste_fna:
fna_l: nucleotide_fasta pworkflow list -> fna_l: nucleotide_fasta file list ->
nucleotide_fasta pworkflow nucleotide_fasta file
...@@ -3,17 +3,17 @@ open Bistro.Shell_dsl ...@@ -3,17 +3,17 @@ open Bistro.Shell_dsl
open Bistro open Bistro
type result = [ type result = [
| `Pcoc of [`pcoc] dworkflow | `Pcoc of [`pcoc] directory
| `Pcoc_gamma of [`pcoc] dworkflow | `Pcoc_gamma of [`pcoc] directory
| `Pcoc_C60 of [`pcoc] dworkflow | `Pcoc_C60 of [`pcoc] directory
| `Diffsel of [`diffsel] dworkflow | `Diffsel of [`diffsel] directory
| `Identical_LG of [`identical] dworkflow | `Identical_LG of [`identical] directory
| `Identical_WAG of [`identical] dworkflow | `Identical_WAG of [`identical] directory
| `Topological_LG of [`topological] dworkflow | `Topological_LG of [`topological] directory
| `Topological_WAG of [`topological] dworkflow | `Topological_WAG of [`topological] directory
| `Tdg09 of [`tdg09] dworkflow | `Tdg09 of [`tdg09] directory
| `Multinomial of text_file pworkflow | `Multinomial of text file
| `Msd of [`msd] dworkflow * float | `Msd of [`msd] directory * float
] ]
let meth_string_of_result = function let meth_string_of_result = function
...@@ -34,11 +34,11 @@ type dataset_res = { ...@@ -34,11 +34,11 @@ type dataset_res = {
tree_prefix : string ; tree_prefix : string ;
dataset : Dataset.t ; dataset : Dataset.t ;
res_by_tools: result list ; res_by_tools: result list ;
merged_results : text_file pworkflow ; merged_results : text file ;
plot_merged_results : svg pworkflow ; plot_merged_results : svg file ;
} }
let merge_results ?fna_infos ~(res_by_tools : result list) () : text_file pworkflow = let merge_results ?fna_infos ~(res_by_tools : result list) () : text file =
let command = List.map res_by_tools ~f:(fun res -> let command = List.map res_by_tools ~f:(fun res ->
let w = match res with let w = match res with
| `Pcoc d -> Pcoc.results d | `Pcoc d -> Pcoc.results d
...@@ -78,7 +78,7 @@ let merge_results ?fna_infos ~(res_by_tools : result list) () : text_file pworkf ...@@ -78,7 +78,7 @@ let merge_results ?fna_infos ~(res_by_tools : result list) () : text_file pworkf
] ; ] ;
] ]
let merge_result_tables ?fna_infos ?oracle ?multinomial ?tdg09 ?identical ?topological ?pcoc ?pcoc_v2 ?pcoc_pcp ?diffsel ?diffseldsparse () : text_file pworkflow = let merge_result_tables ?fna_infos ?oracle ?multinomial ?tdg09 ?identical ?topological ?pcoc ?pcoc_v2 ?pcoc_pcp ?diffsel ?diffseldsparse () : text file =
Workflow.shell ~descr:"convergence_detection.merge_results" [ Workflow.shell ~descr:"convergence_detection.merge_results" [
cmd "python" ~img:Env.env_py [ cmd "python" ~img:Env.env_py [
file_dump (string Scripts.merge_det_results) ; file_dump (string Scripts.merge_det_results) ;
...@@ -97,7 +97,7 @@ let merge_result_tables ?fna_infos ?oracle ?multinomial ?tdg09 ?identical ?topol ...@@ -97,7 +97,7 @@ let merge_result_tables ?fna_infos ?oracle ?multinomial ?tdg09 ?identical ?topol
] ; ] ;
] ]
let plot_merge_results ?t_choices ~plot_all_sites ~(res_by_tools:result list) ~tree ~faa ~tsv (): svg pworkflow = let plot_merge_results ?t_choices ~plot_all_sites ~(res_by_tools:result list) ~tree ~faa ~tsv (): svg file =
let img = Env.env_pcoc in let img = Env.env_pcoc in
(* use of pcoc env due to its working X server for dra plot with ete3 *) (* use of pcoc env due to its working X server for dra plot with ete3 *)
let meths = List.map res_by_tools ~f:(fun res -> let meths = List.map res_by_tools ~f:(fun res ->
......
...@@ -2,17 +2,17 @@ open Bistro ...@@ -2,17 +2,17 @@ open Bistro
open File_formats open File_formats
type result = [ type result = [
| `Pcoc of [`pcoc] dworkflow | `Pcoc of [`pcoc] directory
| `Pcoc_gamma of [`pcoc] dworkflow | `Pcoc_gamma of [`pcoc] directory
| `Pcoc_C60 of [`pcoc] dworkflow | `Pcoc_C60 of [`pcoc] directory
| `Diffsel of [`diffsel] dworkflow | `Diffsel of [`diffsel] directory
| `Identical_LG of [`identical] dworkflow | `Identical_LG of [`identical] directory
| `Identical_WAG of [`identical] dworkflow | `Identical_WAG of [`identical] directory
| `Topological_LG of [`topological] dworkflow | `Topological_LG of [`topological] directory
| `Topological_WAG of [`topological] dworkflow | `Topological_WAG of [`topological] directory
| `Tdg09 of [`tdg09] dworkflow | `Tdg09 of [`tdg09] directory
| `Multinomial of text_file pworkflow | `Multinomial of text file
| `Msd of [`msd] dworkflow * float | `Msd of [`msd] directory * float
] ]
val meth_string_of_result : result -> string val meth_string_of_result : result -> string
...@@ -22,58 +22,58 @@ type dataset_res = { ...@@ -22,58 +22,58 @@ type dataset_res = {
tree_prefix : string ; tree_prefix : string ;
dataset : Dataset.t ; dataset : Dataset.t ;
res_by_tools: result list ; res_by_tools: result list ;
merged_results : text_file pworkflow ; merged_results : text file ;
plot_merged_results : svg pworkflow plot_merged_results : svg file
} }
val merge_results : val merge_results :
?fna_infos:text_file pworkflow -> ?fna_infos:text file ->
res_by_tools : result list -> res_by_tools : result list ->
unit -> unit ->
text_file pworkflow text file
val merge_result_tables : val merge_result_tables :
?fna_infos:text_file pworkflow -> ?fna_infos:text file ->
?oracle:text_file pworkflow -> ?oracle:text file ->
?multinomial:text_file pworkflow -> ?multinomial:text file ->
?tdg09:text_file pworkflow -> ?tdg09:text file ->
?identical:text_file pworkflow -> ?identical:text file ->
?topological:text_file pworkflow -> ?topological:text file ->
?pcoc:text_file pworkflow -> ?pcoc:text file ->
?pcoc_v2:text_file pworkflow -> ?pcoc_v2:text file ->
?pcoc_pcp:text_file pworkflow -> ?pcoc_pcp:text file ->
?diffsel:text_file pworkflow -> ?diffsel:text file ->
?diffseldsparse:text_file pworkflow -> ?diffseldsparse:text file ->
unit -> unit ->
text_file pworkflow text file
val plot_merge_results : val plot_merge_results :
? t_choices : text_file pworkflow -> ? t_choices : text file ->
plot_all_sites: bool -> plot_all_sites: bool ->
res_by_tools : result list -> res_by_tools : result list ->
tree:nhx pworkflow -> tree:nhx file ->
faa:aminoacid_fasta pworkflow -> faa:aminoacid_fasta file ->
tsv:text_file pworkflow -> tsv:text file ->
unit -> unit ->
svg pworkflow svg file
val plot_convergent_sites : val plot_convergent_sites :
?plot_all_sites:bool -> ?plot_all_sites:bool ->
alignment:aminoacid_fasta pworkflow -> alignment:aminoacid_fasta file ->
detection_results:text_file pworkflow -> detection_results:text file ->
tree:nhx pworkflow -> tree:nhx file ->
unit -> unit ->
svg pworkflow svg file
val recall_precision_curve : val recall_precision_curve :
text_file pworkflow -> text file ->
svg pworkflow svg file
val oracle : val oracle :
n_h0:int -> n_h0:int ->
n_ha:int -> n_ha:int ->
text_file pworkflow text file
val recall_precision_auc_table : val recall_precision_auc_table :
text_file pworkflow -> text file ->
(string * float) array workflow (string * float) array workflow
...@@ -14,7 +14,7 @@ type t = model ...@@ -14,7 +14,7 @@ type t = model
let string_of_float_without_dot f = (* to avoid things like "H0_NeG4._NeC2." *) let string_of_float_without_dot f = (* to avoid things like "H0_NeG4._NeC2." *)
let int_value = int_of_float f in let int_value = int_of_float f in
let rounded = int_value |> float_of_int in let rounded = int_value |> float_of_int in
if rounded -. f = 0. then string_of_int int_value else string_of_float f if Float.(rounded -. f = 0.) then string_of_int int_value else string_of_float f
let string_of_nes nes = match nes with let string_of_nes nes = match nes with
| Fixed g -> "NeG" ^ (string_of_float_without_dot g) | Fixed g -> "NeG" ^ (string_of_float_without_dot g)
| Variable (g, c) -> "NeG" ^ (string_of_float_without_dot g) ^ "_NeC_" ^ (string_of_float_without_dot c) | Variable (g, c) -> "NeG" ^ (string_of_float_without_dot g) ^ "_NeC_" ^ (string_of_float_without_dot c)
......
...@@ -24,26 +24,19 @@ let eval w = ...@@ -24,26 +24,19 @@ let eval w =
let path w = let path w =
with_workflow (Workflow.eval_path w) ~f:(fun x -> x) with_workflow (Workflow.eval_path w) ~f:(fun x -> x)
let less w = let command fmt =
Sys.command (sprintf "less %s" (path w)) Printf.ksprintf (fun s -> ignore (Sys.command s : int)) fmt
|> ignore
let firefox w = let less w = command "less %s" (path w)
Sys.command (sprintf "firefox %s" (path w))
|> ignore
let seaview w = let firefox w = command "firefox %s" (path w)
Sys.command (sprintf "seaview %s" (path w))
|> ignore
let evince w = let seaview w = command "seaview %s" (path w)
Sys.command (sprintf "evince %s" (path w))
|> ignore let evince w = command "evince %s" (path w)
let workflow_of_template t = let workflow_of_template t =
let open Bistro.Shell_dsl in let open Bistro.Shell_dsl in
Workflow.shell [ Workflow.shell [
cmd "cp" [ file_dump t ; dest ] cmd "cp" [ file_dump t ; dest ]
] ]
...@@ -3,9 +3,9 @@ open File_formats ...@@ -3,9 +3,9 @@ open File_formats
module type Dataset = sig module type Dataset = sig
type t type t
val tree : t -> nhx pworkflow val tree : t -> nhx file
val nucleotide_alignment : t -> nucleotide_fasta pworkflow val nucleotide_alignment : t -> nucleotide_fasta file
end end
module Make(D : Dataset) = struct module Make(D : Dataset) = struct
......
...@@ -50,7 +50,7 @@ echo end_it=$end_it ...@@ -50,7 +50,7 @@ echo end_it=$end_it
|} |}
let diffsel ~(phy_n:nucleotide_phylip pworkflow) ~(tree: _ pworkflow) ~(w_every:int) ~(n_cycles: int) ?(descr = "") ?seed () : [`diffsel] dworkflow = let diffsel ~(phy_n:nucleotide_phylip file) ~(tree: _ file) ~(w_every:int) ~(n_cycles: int) ?(descr = "") ?seed () : [`diffsel] directory =
let env = Env.env_diffsel in let env = Env.env_diffsel in
let tmp_tree = tmp // "myrun.tree" in let tmp_tree = tmp // "myrun.tree" in
let tmp_ali = tmp // "myrun.ali" in let tmp_ali = tmp // "myrun.ali" in
...@@ -84,7 +84,7 @@ let diffsel ~(phy_n:nucleotide_phylip pworkflow) ~(tree: _ pworkflow) ~(w_every: ...@@ -84,7 +84,7 @@ let diffsel ~(phy_n:nucleotide_phylip pworkflow) ~(tree: _ pworkflow) ~(w_every:
) )
] ]
let check_conv run_diffsel : directory pworkflow = let check_conv run_diffsel : [`diffsel_check_conv] directory =
let env = Env.env_r in let env = Env.env_r in
let script = tmp // "DiffselMCMCConvergenceAnalysis.Rmd" in let script = tmp // "DiffselMCMCConvergenceAnalysis.Rmd" in
let trace = Workflow.select run_diffsel ["myrun.trace"] in let trace = Workflow.select run_diffsel ["myrun.trace"] in
...@@ -110,7 +110,7 @@ let check_conv run_diffsel : directory pworkflow = ...@@ -110,7 +110,7 @@ let check_conv run_diffsel : directory pworkflow =
) )
] ]
let selector run_diffsel : text_file pworkflow = let selector run_diffsel : text file =
let env = Env.env_diffsel in let env = Env.env_diffsel in
let package = tmp // "diffsel_script_utils.py" in let package = tmp // "diffsel_script_utils.py" in
let script = tmp // "diffsel_analyze_result.py" in let script = tmp // "diffsel_analyze_result.py" in
......
...@@ -2,19 +2,19 @@ open Bistro ...@@ -2,19 +2,19 @@ open Bistro
open File_formats open File_formats
val diffsel : val diffsel :
phy_n:nucleotide_phylip pworkflow -> phy_n:nucleotide_phylip file ->
tree:_ pworkflow -> tree:_ file ->
w_every:int -> w_every:int ->
n_cycles:int -> n_cycles:int ->
?descr:string -> ?descr:string ->
?seed:int -> ?seed:int ->
unit -> unit ->
[`diffsel] dworkflow [`diffsel] directory
val selector : val selector :
[`diffsel] dworkflow -> [`diffsel] directory ->
text_file pworkflow text file
val check_conv : val check_conv :
[`diffsel] dworkflow -> [`diffsel] directory ->
directory pworkflow [`diffsel_check_conv] directory
...@@ -43,8 +43,8 @@ echo end_it=$end_it ...@@ -43,8 +43,8 @@ echo end_it=$end_it
let diffseldsparse let diffseldsparse
?pi ?shiftprob ?eps ?pi ?shiftprob ?eps
~(alignment:nucleotide_phylip pworkflow) ~(tree: _ pworkflow) ~(alignment:nucleotide_phylip file) ~(tree: _ file)
~(w_every:int) ~(n_cycles: int) () : [`diffseldsparse] dworkflow = ~(w_every:int) ~(n_cycles: int) () : [`diffseldsparse] directory =
let tmp_tree = tmp // "myrun.tree" in let tmp_tree = tmp // "myrun.tree" in
let tmp_ali = tmp // "myrun.ali" in let tmp_ali = tmp // "myrun.ali" in
let dest_tree = dest // "myrun.tree" in let dest_tree = dest // "myrun.tree" in
...@@ -79,7 +79,7 @@ let diffseldsparse ...@@ -79,7 +79,7 @@ let diffseldsparse
) )
] ]
let check_conv run_diffseldsparse : directory pworkflow = let check_conv run_diffseldsparse : [`diffseldsparse_check_conv] directory =
let script = tmp // "DiffselMCMCConvergenceAnalysis.Rmd" in let script = tmp // "DiffselMCMCConvergenceAnalysis.Rmd" in
let trace = Workflow.select run_diffseldsparse ["myrun.trace"] in let trace = Workflow.select run_diffseldsparse ["myrun.trace"] in
let out = dest // "out.html" in let out = dest // "out.html" in
...@@ -123,7 +123,7 @@ let readdiffseldsparse run = ...@@ -123,7 +123,7 @@ let readdiffseldsparse run =
) )
] ]
let posterior_probabilities run_diffseldsparse : text_file pworkflow = let posterior_probabilities run_diffseldsparse : text file =
let tmp_tree = tmp // "myrun.tree" in let tmp_tree = tmp // "myrun.tree" in
let tmp_ali = tmp // "myrun.ali" in let tmp_ali = tmp // "myrun.ali" in
let dep_tree = (dep run_diffseldsparse) // "myrun.tree" in let dep_tree = (dep run_diffseldsparse) // "myrun.tree" in
......
...@@ -5,25 +5,25 @@ val diffseldsparse : ...@@ -5,25 +5,25 @@ val diffseldsparse :
?pi:float -> ?pi:float ->
?shiftprob:float