Commit 9c9ddd3b authored by Philippe Veber's avatar Philippe Veber
Browse files

update wrt world

parent a3df223f
open Core
open Phylogenetics
open Bistro
let ok_exn err = function
| Ok x -> x
......@@ -8,7 +9,7 @@ let ok_exn err = function
let is_gc ?pos i c =
match pos with
| Some p when not ((i mod 3) = p) -> false
| _ -> c = 'g' || c = 'G' || c = 'c' || c = 'C'
| _ -> Char.(c = 'g' || c = 'G' || c = 'c' || c = 'C')
let strings_from_fasta fa =
match Alignment.from_fasta fa with
......@@ -29,7 +30,7 @@ let seq_gc ?pos seqs =
)
in
let gc_counts = Array.map seqs ~f:(fun seq ->
let len = if pos <> None then String.length seq / 3 else String.length seq in
let len = if Option.is_some pos then String.length seq / 3 else String.length seq in
let sum = String.foldi ~init:0 ~f:(fun i a c -> a + if is_gc ?pos i c then 1 else 0) seq in
float len, float sum
)
......@@ -76,7 +77,7 @@ let command =
main ~alignment
]
let%pworkflow histogram (fa : #Bistro.fasta Bistro.pworkflow) =
let%pworkflow histogram (fa : #fasta file) =
let al = ok_exn Alignment.show_parsing_error @@ Alignment.from_fasta [%path fa] in
let float_array_of_int_list x =
Array.of_list x
......
......@@ -142,10 +142,10 @@ rate_distribution=Constant()
)
]
let alignment run_bppseqgen_multi_profiles : nucleotide_fasta pworkflow =
let alignment run_bppseqgen_multi_profiles : nucleotide_fasta file =
Workflow.select run_bppseqgen_multi_profiles ["seq.fa"]
let info run_bppseqgen_multi_profiles : text_file pworkflow =
let info run_bppseqgen_multi_profiles : text file =
Workflow.select run_bppseqgen_multi_profiles ["seq.fa.info"]
end
......@@ -166,7 +166,7 @@ let conf_file_bppseqman_fna2faa ~fna =
|}
]
let fna2faa (fna : nucleotide_fasta pworkflow) : aminoacid_fasta pworkflow =
let fna2faa (fna : nucleotide_fasta file) : aminoacid_fasta file =
Workflow.shell ~descr:"bppsuite.fna2faa" [
cmd "bppseqman" ~img [
assign "param" (file_dump (conf_file_bppseqman_fna2faa ~fna)) ;
......@@ -197,14 +197,14 @@ let conf_file_bppseqman_faa2phy ~faa =
|}
]
let fna2phy ~(fna: nucleotide_fasta pworkflow) : nucleotide_phylip pworkflow =
let fna2phy ~(fna: nucleotide_fasta file) : nucleotide_phylip file =
Workflow.shell ~descr:"bppsuite.fna2phy_interleaved" [
cmd "bppseqman" ~img [
assign "param" (file_dump (conf_file_bppseqman_fna2phy ~fna)) ;
]
]
let faa2phy ~(faa: aminoacid_fasta pworkflow) : aminoacid_phylip pworkflow =
let faa2phy ~(faa: aminoacid_fasta file) : aminoacid_phylip file =
Workflow.shell ~descr:"bppsuite.faa2phy_interleaved" [
cmd "bppseqman" ~img [
assign "param" (file_dump (conf_file_bppseqman_faa2phy ~faa)) ;
......@@ -212,7 +212,7 @@ let faa2phy ~(faa: aminoacid_fasta pworkflow) : aminoacid_phylip pworkflow =
]
let paste_fna ~(fna_l: nucleotide_fasta pworkflow list) : nucleotide_fasta pworkflow =
let paste_fna ~(fna_l: nucleotide_fasta file list) : nucleotide_fasta file =
Workflow.shell ~descr:"bppsuite.catfasta" [
cmd "catfasta2phyml.pl" ~stdout:dest ~img (List.concat [
[string "-f" ] ;
......
......@@ -4,34 +4,34 @@ open File_formats
module Bppseqgen : sig
val multi_profiles :
?descr : string ->
profile_f: text_file pworkflow ->
profile_c: text_file pworkflow ->
input_tree: nhx pworkflow ->
profile_f: text file ->
profile_c: text file ->
input_tree: nhx file ->
hypothesis:Convergence_hypothesis.t ->
seed:int ->
[`bppseqgen] dworkflow
[`bppseqgen] directory
val alignment :
[`bppseqgen] dworkflow ->
nucleotide_fasta pworkflow
[`bppseqgen] directory ->
nucleotide_fasta file
val info :
[`bppseqgen] dworkflow ->
text_file pworkflow
[`bppseqgen] directory ->
text file
end
val fna2faa :
nucleotide_fasta pworkflow ->
aminoacid_fasta pworkflow
nucleotide_fasta file ->
aminoacid_fasta file
val fna2phy :
fna: nucleotide_fasta pworkflow ->
nucleotide_phylip pworkflow
fna: nucleotide_fasta file ->
nucleotide_phylip file
val faa2phy :
faa: aminoacid_fasta pworkflow ->
aminoacid_phylip pworkflow
faa: aminoacid_fasta file ->
aminoacid_phylip file
val paste_fna:
fna_l: nucleotide_fasta pworkflow list ->
nucleotide_fasta pworkflow
fna_l: nucleotide_fasta file list ->
nucleotide_fasta file
......@@ -3,17 +3,17 @@ open Bistro.Shell_dsl
open Bistro
type result = [
| `Pcoc of [`pcoc] dworkflow
| `Pcoc_gamma of [`pcoc] dworkflow
| `Pcoc_C60 of [`pcoc] dworkflow
| `Diffsel of [`diffsel] dworkflow
| `Identical_LG of [`identical] dworkflow
| `Identical_WAG of [`identical] dworkflow
| `Topological_LG of [`topological] dworkflow
| `Topological_WAG of [`topological] dworkflow
| `Tdg09 of [`tdg09] dworkflow
| `Multinomial of text_file pworkflow
| `Msd of [`msd] dworkflow * float
| `Pcoc of [`pcoc] directory
| `Pcoc_gamma of [`pcoc] directory
| `Pcoc_C60 of [`pcoc] directory
| `Diffsel of [`diffsel] directory
| `Identical_LG of [`identical] directory
| `Identical_WAG of [`identical] directory
| `Topological_LG of [`topological] directory
| `Topological_WAG of [`topological] directory
| `Tdg09 of [`tdg09] directory
| `Multinomial of text file
| `Msd of [`msd] directory * float
]
let meth_string_of_result = function
......@@ -34,11 +34,11 @@ type dataset_res = {
tree_prefix : string ;
dataset : Dataset.t ;
res_by_tools: result list ;
merged_results : text_file pworkflow ;
plot_merged_results : svg pworkflow ;
merged_results : text file ;
plot_merged_results : svg file ;
}
let merge_results ?fna_infos ~(res_by_tools : result list) () : text_file pworkflow =
let merge_results ?fna_infos ~(res_by_tools : result list) () : text file =
let command = List.map res_by_tools ~f:(fun res ->
let w = match res with
| `Pcoc d -> Pcoc.results d
......@@ -78,7 +78,7 @@ let merge_results ?fna_infos ~(res_by_tools : result list) () : text_file pworkf
] ;
]
let merge_result_tables ?fna_infos ?oracle ?multinomial ?tdg09 ?identical ?topological ?pcoc ?pcoc_v2 ?pcoc_pcp ?diffsel ?diffseldsparse () : text_file pworkflow =
let merge_result_tables ?fna_infos ?oracle ?multinomial ?tdg09 ?identical ?topological ?pcoc ?pcoc_v2 ?pcoc_pcp ?diffsel ?diffseldsparse () : text file =
Workflow.shell ~descr:"convergence_detection.merge_results" [
cmd "python" ~img:Env.env_py [
file_dump (string Scripts.merge_det_results) ;
......@@ -97,7 +97,7 @@ let merge_result_tables ?fna_infos ?oracle ?multinomial ?tdg09 ?identical ?topol
] ;
]
let plot_merge_results ?t_choices ~plot_all_sites ~(res_by_tools:result list) ~tree ~faa ~tsv (): svg pworkflow =
let plot_merge_results ?t_choices ~plot_all_sites ~(res_by_tools:result list) ~tree ~faa ~tsv (): svg file =
let img = Env.env_pcoc in
(* use of pcoc env due to its working X server for dra plot with ete3 *)
let meths = List.map res_by_tools ~f:(fun res ->
......
......@@ -2,17 +2,17 @@ open Bistro
open File_formats
type result = [
| `Pcoc of [`pcoc] dworkflow
| `Pcoc_gamma of [`pcoc] dworkflow
| `Pcoc_C60 of [`pcoc] dworkflow
| `Diffsel of [`diffsel] dworkflow
| `Identical_LG of [`identical] dworkflow
| `Identical_WAG of [`identical] dworkflow
| `Topological_LG of [`topological] dworkflow
| `Topological_WAG of [`topological] dworkflow
| `Tdg09 of [`tdg09] dworkflow
| `Multinomial of text_file pworkflow
| `Msd of [`msd] dworkflow * float
| `Pcoc of [`pcoc] directory
| `Pcoc_gamma of [`pcoc] directory
| `Pcoc_C60 of [`pcoc] directory
| `Diffsel of [`diffsel] directory
| `Identical_LG of [`identical] directory
| `Identical_WAG of [`identical] directory
| `Topological_LG of [`topological] directory
| `Topological_WAG of [`topological] directory
| `Tdg09 of [`tdg09] directory
| `Multinomial of text file
| `Msd of [`msd] directory * float
]
val meth_string_of_result : result -> string
......@@ -22,58 +22,58 @@ type dataset_res = {
tree_prefix : string ;
dataset : Dataset.t ;
res_by_tools: result list ;
merged_results : text_file pworkflow ;
plot_merged_results : svg pworkflow
merged_results : text file ;
plot_merged_results : svg file
}
val merge_results :
?fna_infos:text_file pworkflow ->
?fna_infos:text file ->
res_by_tools : result list ->
unit ->
text_file pworkflow
text file
val merge_result_tables :
?fna_infos:text_file pworkflow ->
?oracle:text_file pworkflow ->
?multinomial:text_file pworkflow ->
?tdg09:text_file pworkflow ->
?identical:text_file pworkflow ->
?topological:text_file pworkflow ->
?pcoc:text_file pworkflow ->
?pcoc_v2:text_file pworkflow ->
?pcoc_pcp:text_file pworkflow ->
?diffsel:text_file pworkflow ->
?diffseldsparse:text_file pworkflow ->
?fna_infos:text file ->
?oracle:text file ->
?multinomial:text file ->
?tdg09:text file ->
?identical:text file ->
?topological:text file ->
?pcoc:text file ->
?pcoc_v2:text file ->
?pcoc_pcp:text file ->
?diffsel:text file ->
?diffseldsparse:text file ->
unit ->
text_file pworkflow
text file
val plot_merge_results :
? t_choices : text_file pworkflow ->
? t_choices : text file ->
plot_all_sites: bool ->
res_by_tools : result list ->
tree:nhx pworkflow ->
faa:aminoacid_fasta pworkflow ->
tsv:text_file pworkflow ->
tree:nhx file ->
faa:aminoacid_fasta file ->
tsv:text file ->
unit ->
svg pworkflow
svg file
val plot_convergent_sites :
?plot_all_sites:bool ->
alignment:aminoacid_fasta pworkflow ->
detection_results:text_file pworkflow ->
tree:nhx pworkflow ->
alignment:aminoacid_fasta file ->
detection_results:text file ->
tree:nhx file ->
unit ->
svg pworkflow
svg file
val recall_precision_curve :
text_file pworkflow ->
svg pworkflow
text file ->
svg file
val oracle :
n_h0:int ->
n_ha:int ->
text_file pworkflow
text file
val recall_precision_auc_table :
text_file pworkflow ->
text file ->
(string * float) array workflow
......@@ -14,7 +14,7 @@ type t = model
let string_of_float_without_dot f = (* to avoid things like "H0_NeG4._NeC2." *)
let int_value = int_of_float f in
let rounded = int_value |> float_of_int in
if rounded -. f = 0. then string_of_int int_value else string_of_float f
if Float.(rounded -. f = 0.) then string_of_int int_value else string_of_float f
let string_of_nes nes = match nes with
| Fixed g -> "NeG" ^ (string_of_float_without_dot g)
| Variable (g, c) -> "NeG" ^ (string_of_float_without_dot g) ^ "_NeC_" ^ (string_of_float_without_dot c)
......
......@@ -24,26 +24,19 @@ let eval w =
let path w =
with_workflow (Workflow.eval_path w) ~f:(fun x -> x)
let less w =
Sys.command (sprintf "less %s" (path w))
|> ignore
let command fmt =
Printf.ksprintf (fun s -> ignore (Sys.command s : int)) fmt
let firefox w =
Sys.command (sprintf "firefox %s" (path w))
|> ignore
let less w = command "less %s" (path w)
let seaview w =
Sys.command (sprintf "seaview %s" (path w))
|> ignore
let firefox w = command "firefox %s" (path w)
let evince w =
Sys.command (sprintf "evince %s" (path w))
|> ignore
let seaview w = command "seaview %s" (path w)
let evince w = command "evince %s" (path w)
let workflow_of_template t =
let open Bistro.Shell_dsl in
Workflow.shell [
cmd "cp" [ file_dump t ; dest ]
]
......@@ -3,9 +3,9 @@ open File_formats
module type Dataset = sig
type t
val tree : t -> nhx pworkflow
val tree : t -> nhx file
val nucleotide_alignment : t -> nucleotide_fasta pworkflow
val nucleotide_alignment : t -> nucleotide_fasta file
end
module Make(D : Dataset) = struct
......
......@@ -50,7 +50,7 @@ echo end_it=$end_it
|}
let diffsel ~(phy_n:nucleotide_phylip pworkflow) ~(tree: _ pworkflow) ~(w_every:int) ~(n_cycles: int) ?(descr = "") ?seed () : [`diffsel] dworkflow =
let diffsel ~(phy_n:nucleotide_phylip file) ~(tree: _ file) ~(w_every:int) ~(n_cycles: int) ?(descr = "") ?seed () : [`diffsel] directory =
let env = Env.env_diffsel in
let tmp_tree = tmp // "myrun.tree" in
let tmp_ali = tmp // "myrun.ali" in
......@@ -84,7 +84,7 @@ let diffsel ~(phy_n:nucleotide_phylip pworkflow) ~(tree: _ pworkflow) ~(w_every:
)
]
let check_conv run_diffsel : directory pworkflow =
let check_conv run_diffsel : [`diffsel_check_conv] directory =
let env = Env.env_r in
let script = tmp // "DiffselMCMCConvergenceAnalysis.Rmd" in
let trace = Workflow.select run_diffsel ["myrun.trace"] in
......@@ -110,7 +110,7 @@ let check_conv run_diffsel : directory pworkflow =
)
]
let selector run_diffsel : text_file pworkflow =
let selector run_diffsel : text file =
let env = Env.env_diffsel in
let package = tmp // "diffsel_script_utils.py" in
let script = tmp // "diffsel_analyze_result.py" in
......
......@@ -2,19 +2,19 @@ open Bistro
open File_formats
val diffsel :
phy_n:nucleotide_phylip pworkflow ->
tree:_ pworkflow ->
phy_n:nucleotide_phylip file ->
tree:_ file ->
w_every:int ->
n_cycles:int ->
?descr:string ->
?seed:int ->
unit ->
[`diffsel] dworkflow
[`diffsel] directory
val selector :
[`diffsel] dworkflow ->
text_file pworkflow
[`diffsel] directory ->
text file
val check_conv :
[`diffsel] dworkflow ->
directory pworkflow
[`diffsel] directory ->
[`diffsel_check_conv] directory
......@@ -43,8 +43,8 @@ echo end_it=$end_it
let diffseldsparse
?pi ?shiftprob ?eps
~(alignment:nucleotide_phylip pworkflow) ~(tree: _ pworkflow)
~(w_every:int) ~(n_cycles: int) () : [`diffseldsparse] dworkflow =
~(alignment:nucleotide_phylip file) ~(tree: _ file)
~(w_every:int) ~(n_cycles: int) () : [`diffseldsparse] directory =
let tmp_tree = tmp // "myrun.tree" in
let tmp_ali = tmp // "myrun.ali" in
let dest_tree = dest // "myrun.tree" in
......@@ -79,7 +79,7 @@ let diffseldsparse
)
]
let check_conv run_diffseldsparse : directory pworkflow =
let check_conv run_diffseldsparse : [`diffseldsparse_check_conv] directory =
let script = tmp // "DiffselMCMCConvergenceAnalysis.Rmd" in
let trace = Workflow.select run_diffseldsparse ["myrun.trace"] in
let out = dest // "out.html" in
......@@ -123,7 +123,7 @@ let readdiffseldsparse run =
)
]
let posterior_probabilities run_diffseldsparse : text_file pworkflow =
let posterior_probabilities run_diffseldsparse : text file =
let tmp_tree = tmp // "myrun.tree" in
let tmp_ali = tmp // "myrun.ali" in
let dep_tree = (dep run_diffseldsparse) // "myrun.tree" in
......
......@@ -5,25 +5,25 @@ val diffseldsparse :
?pi:float ->
?shiftprob:float * float ->
?eps:float ->
alignment:nucleotide_phylip pworkflow ->
tree:_ pworkflow ->
alignment:nucleotide_phylip file ->
tree:_ file ->
w_every:int ->
n_cycles:int ->
unit ->
[`diffseldsparse] dworkflow
[`diffseldsparse] directory
val posterior_probabilities :
[`diffseldsparse] dworkflow ->
text_file pworkflow
[`diffseldsparse] directory ->
text file
val readdiffseldsparse :
[`diffseldsparse] dworkflow ->
[`readdiffseldsparse] dworkflow
[`diffseldsparse] directory ->
[`readdiffseldsparse] directory
val check_conv :
[`diffseldsparse] dworkflow ->
directory pworkflow
[`diffseldsparse] directory ->
[`diffseldsparse_check_conv] directory
val results :
[`readdiffseldsparse] dworkflow ->
text_file pworkflow
[`readdiffseldsparse] directory ->
text file
(library
(name reviewphiltrans)
(libraries bistro.bioinfo bistro.utils gzt ocaml-r.graphics ocaml-r.grDevices phylogenetics.convergence reviewphiltrans_toolbox )
(libraries biotk biotope bistro.utils ocaml-r.graphics ocaml-r.grDevices phylogenetics.convergence reviewphiltrans_toolbox)
(preprocess
(pps ppx_jane ppx_csv_conv bistro.ppx ppx_here)))
......
open Bistro
class type nhx = object
inherit text_file
inherit text
method format : [`nhx]
end
class type nw = object
inherit text_file
inherit text
method format : [`nw]
end
class type diffsel_tree = object
inherit text_file
inherit text
method format : [`diffsel_tree]
end
class type topological_tree = object
inherit text_file
inherit text
method format : [`topological_tree]
end
......@@ -31,10 +31,10 @@ class type aminoacid_fasta = object
end
class type nucleotide_phylip = object
inherit text_file
inherit text
method format : [`Nucleotide]
end
class type aminoacid_phylip = object
inherit text_file
inherit text
method format : [`Aminoacid]
end
......@@ -91,7 +91,7 @@ let bppancestor ?(descr="") ~faa ~tree ~config : _ workflow =
)
]
let identical ?(descr="") ~(tree_id:_ pworkflow) ~(tree_sc:_ pworkflow) ~(faa:aminoacid_fasta pworkflow) ~prot_model () : [`identical] dworkflow =
let identical ?(descr="") ~(tree_id:_ file) ~(tree_sc:_ file) ~(faa:aminoacid_fasta file) ~prot_model () : [`identical] directory =
let config = [assign "model" (string prot_model)] in
let out1 = dest // "out1.tsv" in
let out2 = dest // "out2.tsv" in
......@@ -109,5 +109,5 @@ let identical ?(descr="") ~(tree_id:_ pworkflow) ~(tree_sc:_ pworkflow) ~(faa:am
]
]
let results run_identical : text_file pworkflow =
let results run_identical : text file =
Workflow.select run_identical ["out1.tsv"]
......@@ -5,7 +5,7 @@ open File_formats
let img = Env.env_msd
let msd ?(descr="") ~e ~(faa : aminoacid_fasta pworkflow) ~(tree_sc : _ pworkflow) : [`msd] dworkflow =
let msd ?(descr="") ~e ~(faa : aminoacid_fasta file) ~(tree_sc : _ file) : [`msd] directory =
let map_table = dest // "map.tsv" in
let tree_nw = dest // "tree.nw" in
let out = dest // "out.tsv" in
......@@ -42,7 +42,7 @@ let msd ?(descr="") ~e ~(faa : aminoacid_fasta pworkflow) ~(tree_sc : _ pworkflo
];
]
let results run_msd : text_file pworkflow =
let results run_msd : text file =
Workflow.shell ~descr:"convergence_detection.parse_msd" [
cmd "python" ~img [
file_dump (string Scripts.parse_output_msd) ;
......
......@@ -3,7 +3,7 @@ open Bistro
open Bistro.Shell_dsl
open File_formats
let multinomial ?(descr="") ~(tree_sc:_ pworkflow) ~(faa:aminoacid_fasta pworkflow) : text_file pworkflow =
let multinomial ?(descr="") ~(tree_sc:_ file) ~(faa:aminoacid_fasta file) : text file =
let img = Env.env_py in
Workflow.shell ~descr:("calc_multinomial."^descr) [
mkdir_p dest;
......@@ -15,7 +15,7 @@ let multinomial ?(descr="") ~(tree_sc:_ pworkflow) ~(faa:aminoacid_fasta pworkfl
]
]
let%pworkflow multinomial_ocaml_implementation ~meth ~(tree_sc:_ pworkflow) ~(faa:aminoacid_fasta pworkflow) (* : text_file pworkflow *) =
let%pworkflow multinomial_ocaml_implementation ~meth ~(tree_sc:_ file) ~(faa:aminoacid_fasta file) (* : text file *) =
let open Phylogenetics in
let open Phylogenetics_convergence in
let module MT = Multinomial_test in
......
......@@ -5,7 +5,7 @@ open File_formats
let img = Env.env_pcoc
let pcoc ?(descr = "") ?plot_complete ?gamma ?catx_est ?max_gap_per_pos ?max_gap_per_conv_leaf ~(faa:aminoacid_fasta pworkflow) ~(tree:_ workflow) (): [`pcoc] dworkflow =
let pcoc ?(descr = "") ?plot_complete ?gamma ?catx_est ?max_gap_per_pos ?max_gap_per_conv_leaf ~(faa:aminoacid_fasta file) ~(tree:_ workflow) (): [`pcoc] directory =
Workflow.shell ~descr:("convergence_detection.pcoc."^descr) [
cmd "pcoc_det.py" ~img [
opt "-t" dep tree;
......@@ -20,7 +20,7 @@ let pcoc ?(descr = "") ?plot_complete ?gamma ?catx_est ?max_gap_per_pos ?max_gap
]
]
let results run_pcoc : text_file pworkflow =
let results run_pcoc : text file =