Commit 8e7685ab authored by Philippe Veber's avatar Philippe Veber
Browse files

More precise type for detection method outputs

parent 8210cff4
......@@ -51,7 +51,7 @@ let orthomam_echolocation = {
}
type detection_method = {
result : Pipeline.query -> text file ;
result : Pipeline.query -> cpt file ;
col : int ;
label : string ;
requires_rooted_tree : bool ;
......
open Core
open Bistro.Shell_dsl
open Bistro
open File_formats
type result = [
| `Pcoc of [`pcoc] directory
......@@ -12,7 +13,7 @@ type result = [
| `Topological_LG of [`topological] directory
| `Topological_WAG of [`topological] directory
| `Tdg09 of [`tdg09] directory
| `Multinomial of text file
| `Multinomial of cpt file
| `Msd of [`msd] directory * float
]
......@@ -34,11 +35,11 @@ type dataset_res = {
tree_prefix : string ;
dataset : Dataset.t ;
res_by_tools: result list ;
merged_results : text file ;
merged_results : cpt file ;
plot_merged_results : svg file ;
}
let merge_results ?fna_infos ~(res_by_tools : result list) () : text file =
let merge_results ?fna_infos ~(res_by_tools : result list) () : cpt file =
let command = List.map res_by_tools ~f:(fun res ->
let w = match res with
| `Pcoc d -> Pcoc.results d
......@@ -78,7 +79,7 @@ let merge_results ?fna_infos ~(res_by_tools : result list) () : text file =
] ;
]
let merge_result_tables ?fna_infos ?oracle ?multinomial ?tdg09 ?identical ?topological ?pcoc ?pcoc_v2 ?pcoc_pcp ?diffsel ?diffseldsparse () : text file =
let merge_result_tables ?fna_infos ?oracle ?multinomial ?tdg09 ?identical ?topological ?pcoc ?pcoc_v2 ?pcoc_pcp ?diffsel ?diffseldsparse () : cpt file =
Workflow.shell ~descr:"convergence_detection.merge_results" ~img:Env.env_py [
cmd "python" [
file_dump (string Scripts.merge_det_results) ;
......
......@@ -11,7 +11,7 @@ type result = [
| `Topological_LG of [`topological] directory
| `Topological_WAG of [`topological] directory
| `Tdg09 of [`tdg09] directory
| `Multinomial of text file
| `Multinomial of cpt file
| `Msd of [`msd] directory * float
]
......@@ -22,7 +22,7 @@ type dataset_res = {
tree_prefix : string ;
dataset : Dataset.t ;
res_by_tools: result list ;
merged_results : text file ;
merged_results : cpt file ;
plot_merged_results : svg file
}
......@@ -30,22 +30,22 @@ val merge_results :
?fna_infos:text file ->
res_by_tools : result list ->
unit ->
text file
cpt file
val merge_result_tables :
?fna_infos:text file ->
?oracle:text file ->
?multinomial:text file ->
?tdg09:text file ->
?identical:text file ->
?topological:text file ->
?pcoc:text file ->
?pcoc_v2:text file ->
?pcoc_pcp:text file ->
?diffsel:text file ->
?diffseldsparse:text file ->
?oracle:cpt file ->
?multinomial:cpt file ->
?tdg09:cpt file ->
?identical:cpt file ->
?topological:cpt file ->
?pcoc:cpt file ->
?pcoc_v2:cpt file ->
?pcoc_pcp:cpt file ->
?diffsel:cpt file ->
?diffseldsparse:cpt file ->
unit ->
text file
cpt file
val plot_merge_results :
? t_choices : text file ->
......@@ -53,27 +53,27 @@ val plot_merge_results :
res_by_tools : result list ->
tree:nhx file ->
faa:aminoacid_fasta file ->
tsv:text file ->
tsv:cpt file ->
unit ->
svg file
val plot_convergent_sites :
?plot_all_sites:bool ->
alignment:aminoacid_fasta file ->
detection_results:text file ->
detection_results:cpt file ->
tree:nhx file ->
unit ->
svg file
val recall_precision_curve :
text file ->
cpt file ->
svg file
val oracle :
n_h0:int ->
n_ha:int ->
text file
cpt file
val recall_precision_auc_table :
text file ->
cpt file ->
(string * float) list workflow
......@@ -27,45 +27,45 @@ module type S = sig
val dnds_tree : query -> text file
val identical : query -> text file
val identical : query -> cpt file
val topological : query -> text file
val topological : query -> cpt file
val multinomial : query -> text file
val multinomial : query -> cpt file
val multinomial_simulation_lrt : query -> text file
val multinomial_simulation_lrt : query -> cpt file
val multinomial_simulation_sparse : query -> text file
val multinomial_simulation_sparse : query -> cpt file
val multinomial_asymptotic_lrt : query -> text file
val multinomial_asymptotic_lrt : query -> cpt file
val multinomial_asymptotic_sparse : query -> text file
val multinomial_asymptotic_sparse : query -> cpt file
val tdg09 : query -> text file
val tdg09 : query -> cpt file
val failsafe_tdg09 : query -> text file
val failsafe_tdg09 : query -> cpt file
val pcoc : ?gamma:bool -> ?ncat:int -> query -> text file
val pcoc : ?gamma:bool -> ?ncat:int -> query -> cpt file
val pcoc_v2 :
?gamma:bool -> ?aa_profiles:Pcoc.aa_profiles -> query -> text file
?gamma:bool -> ?aa_profiles:Pcoc.aa_profiles -> query -> cpt file
val gemma :
query ->
lmm_test:[ `All | `LRT | `Score | `Wald ] ->
relatedness_mode:[ `Centered | `Standardized ] ->
text file
cpt file
val inhouse_lmm : query -> text file
val inhouse_lmm : query -> cpt file
val diffsel : query -> text file
val diffsel : query -> cpt file
val diffseldsparse :
?pi:float ->
?shiftprob:float * float ->
?eps:float ->
query ->
text file
cpt file
val view_site :
query -> convergent_species:string list -> site_pos:int -> pdf file
......
......@@ -27,45 +27,45 @@ module type S = sig
val dnds_tree : query -> text file
val identical : query -> text file
val identical : query -> cpt file
val topological : query -> text file
val topological : query -> cpt file
val multinomial : query -> text file
val multinomial : query -> cpt file
val multinomial_simulation_lrt : query -> text file
val multinomial_simulation_lrt : query -> cpt file
val multinomial_simulation_sparse : query -> text file
val multinomial_simulation_sparse : query -> cpt file
val multinomial_asymptotic_lrt : query -> text file
val multinomial_asymptotic_lrt : query -> cpt file
val multinomial_asymptotic_sparse : query -> text file
val multinomial_asymptotic_sparse : query -> cpt file
val tdg09 : query -> text file
val tdg09 : query -> cpt file
val failsafe_tdg09 : query -> text file
val failsafe_tdg09 : query -> cpt file
val pcoc : ?gamma:bool -> ?ncat:int -> query -> text file
val pcoc : ?gamma:bool -> ?ncat:int -> query -> cpt file
val pcoc_v2 :
?gamma:bool -> ?aa_profiles:Pcoc.aa_profiles -> query -> text file
?gamma:bool -> ?aa_profiles:Pcoc.aa_profiles -> query -> cpt file
val gemma :
query ->
lmm_test:[ `All | `LRT | `Score | `Wald ] ->
relatedness_mode:[ `Centered | `Standardized ] ->
text file
cpt file
val inhouse_lmm : query -> text file
val inhouse_lmm : query -> cpt file
val diffsel : query -> text file
val diffsel : query -> cpt file
val diffseldsparse :
?pi:float ->
?shiftprob:float * float ->
?eps:float ->
query ->
text file
cpt file
val view_site :
query -> convergent_species:string list -> site_pos:int -> pdf file
......
......@@ -106,7 +106,7 @@ let check_conv run_diffsel : [`diffsel_check_conv] directory =
]
]
let selector run_diffsel : text file =
let selector run_diffsel : cpt file =
let env = Env.env_diffsel in
let package = tmp // "diffsel_script_utils.py" in
let script = tmp // "diffsel_analyze_result.py" in
......
......@@ -13,7 +13,7 @@ val diffsel :
val selector :
[`diffsel] directory ->
text file
cpt file
val check_conv :
[`diffsel] directory ->
......
......@@ -117,7 +117,7 @@ let readdiffseldsparse run =
]
]
let posterior_probabilities run_diffseldsparse : text file =
let posterior_probabilities run_diffseldsparse : cpt file =
let tmp_tree = tmp // "myrun.tree" in
let tmp_ali = tmp // "myrun.ali" in
let dep_tree = (dep run_diffseldsparse) // "myrun.tree" in
......
......@@ -14,7 +14,7 @@ val diffseldsparse :
val posterior_probabilities :
[`diffseldsparse] directory ->
text file
cpt file
val readdiffseldsparse :
[`diffseldsparse] directory ->
......@@ -26,4 +26,4 @@ val check_conv :
val results :
[`readdiffseldsparse] directory ->
text file
cpt file
......@@ -54,3 +54,10 @@ class type rds = object
inherit binary_file
method format : [`rds]
end
(** Convergence Prediction Table *)
class type cpt = object
inherit tsv
method header : [`Yes]
method fields : [`Site_then_scores]
end
......@@ -33,4 +33,4 @@ val univariate_lmm :
univariate_lmm_output file
val result_table_of_output :
aminoacid_fasta file -> univariate_lmm_output file -> text file
aminoacid_fasta file -> univariate_lmm_output file -> cpt file
......@@ -105,5 +105,5 @@ let identical ?(descr="") ~(tree_id:_ file) ~(tree_sc:_ file) ~(faa:aminoacid_fa
]
]
let results run_identical : text file =
let results run_identical : cpt file =
Workflow.select run_identical ["out1.tsv"]
......@@ -4,4 +4,4 @@ open File_formats
val test :
aminoacid_fasta file ->
nhx file ->
text file
cpt file
......@@ -42,7 +42,7 @@ let msd ?(descr="") ~e ~(faa : aminoacid_fasta file) ~(tree_sc : _ file) : [`msd
];
]
let results run_msd : text file =
let results run_msd : cpt file =
Workflow.shell ~descr:"convergence_detection.parse_msd" ~img [
cmd "python" [
file_dump (string Scripts.parse_output_msd) ;
......
......@@ -3,7 +3,7 @@ open Bistro
open Bistro.Shell_dsl
open File_formats
let multinomial ?(descr="") ~(tree_sc:_ file) ~(faa:aminoacid_fasta file) () : text file =
let multinomial ?(descr="") ~(tree_sc:_ file) ~(faa:aminoacid_fasta file) () : cpt file =
let img = Env.env_py in
Workflow.shell ~descr:("calc_multinomial."^descr) ~img [
cmd "python" [
......@@ -14,7 +14,7 @@ let multinomial ?(descr="") ~(tree_sc:_ file) ~(faa:aminoacid_fasta file) () : t
]
]
let%pworkflow multinomial_ocaml_implementation ~meth ~(tree_sc:_ file) ~(faa:aminoacid_fasta file) (* : text file *) =
let%pworkflow multinomial_ocaml_implementation ~meth ~(tree_sc:_ file) ~(faa:aminoacid_fasta file) (* : cpt file *) =
let open Phylogenetics in
let open Phylogenetics_convergence in
let module MT = Multinomial_test in
......
......@@ -20,7 +20,7 @@ let pcoc ?(descr = "") ?plot_complete ?gamma ?catx_est ?max_gap_per_pos ?max_gap
]
]
let results run_pcoc : text file =
let results run_pcoc : cpt file =
let out_pcoc = dep run_pcoc // "RUN*/*.results.tsv" in
Workflow.shell ~descr:"convergence_detection.selector_pcoc" [
cmd "cp" [out_pcoc; ident dest] ;
......
......@@ -15,7 +15,7 @@ val pcoc :
val results :
[`pcoc] directory ->
text file
cpt file
type aa_profiles =
[`C10 |
......
......@@ -27,7 +27,7 @@ val make :
val result_table :
meth list ->
t ->
(string * text file) list
(string * cpt file) list
val repo :
meth list ->
......
......@@ -50,10 +50,10 @@ module Mutsel : sig
val benchmark_statistics :
Codepitk.Simulator.Site_independent_mutsel.simulation workflow ->
labels:string list ->
results:(text file * int) list ->
results:(cpt file * int) list ->
binary_file file
(* val benchmark : t -> (t -> text file) list -> benchmark workflow
(* val benchmark : t -> (t -> cpt file) list -> benchmark workflow
*
* val rds_of_benchmark : benchmark workflow -> rds file *)
......@@ -103,7 +103,7 @@ module Bppseqgen : sig
include Detection_pipeline.S with type query := t
val oracle : t -> text file
val oracle : t -> cpt file
val alignment_plot : t -> svg file
......
......@@ -38,7 +38,7 @@ let tdg09 ?(descr="") ~(faa:aminoacid_fasta file) ~(tree:_ file) () : [`tdg09]
]
]
let results run_tdg09 : text file =
let results run_tdg09 : cpt file =
let tdg09_out = Workflow.select run_tdg09 [ "tdg09.yaml" ] in
Workflow.shell ~descr:"convergence_detection.parse_tdg09" ~img [
cmd "python" [
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment