Commit 4a026c95 authored by LANORE Vincent's avatar LANORE Vincent
Browse files

Big overhaul of hypothesis management.

parent 40136eae
......@@ -2,75 +2,35 @@ open Core
open Bistro.EDSL
open Bistro.Std
type t =
| H0_NeG1
| H0_NeG1_NeC_4
| H0_NeG1_NeC_5
| H0_NeG2
| H0_NeG2_NeC_4
| H0_NeG3
| H0_NeG4
| H0_NeG4_NeC_1
| H0_NeG4_NeC_2
| H0_NeG4_NeC_6
| H0_NeG4_NeC_8
| H0_NeG5
| H0_NeG5_NeC_1
| H0_NeG5_NeC_div2
| H0_NeG5_NeC_x2
| H0_NeG6
| HaPC_NeG1
| HaPC_NeG1_NeC_4
| HaPC_NeG1_NeC_5
| HaPC_NeG2
| HaPC_NeG2_NeC_4
| HaPC_NeG3
| HaPC_NeG4
| HaPC_NeG4_NeC_1
| HaPC_NeG4_NeC_2
| HaPC_NeG4_NeC_6
| HaPC_NeG4_NeC_8
| HaPC_NeG5
| HaPC_NeG5_NeC_1
| HaPC_NeG5_NeC_div2
| HaPC_NeG5_NeC_x2
| HaPC_NeG6
| HaPCOC
type nes_spec =
| Fixed of float
| Variable of float * float
type model =
| H0 of nes_spec
| HaPC of nes_spec
| HaPCOC of nes_spec
type t = model
let string_of_float_without_dot f = (* to avoid things like "H0_NeG4._NeC2." *)
let int_value = int_of_float f in
let rounded = int_value |> float_of_int in
if rounded -. f = 0. then string_of_int int_value else string_of_float f
let string_of_nes nes = match nes with
| Fixed g -> "NeG" ^ (string_of_float_without_dot g)
| Variable (g, c) -> "NeG" ^ (string_of_float_without_dot g) ^ "_NeC" ^ (string_of_float_without_dot c)
let string_of_model m = match m with
| H0_NeG1 -> "H0_NeG1"
| H0_NeG1_NeC_4 -> "H0_NeG1_NeC_4"
| H0_NeG1_NeC_5 -> "H0_NeG1_NeC_5"
| H0_NeG2 -> "H0_NeG2"
| H0_NeG2_NeC_4 -> "H0_NeG2_NeC_4"
| H0_NeG3 -> "H0_NeG3"
| H0_NeG4 -> "H0_NeG4"
| H0_NeG4_NeC_1 -> "H0_NeG4_NeC_1"
| H0_NeG4_NeC_2 -> "H0_NeG4_NeC_2"
| H0_NeG4_NeC_6 -> "H0_NeG4_NeC_6"
| H0_NeG4_NeC_8 -> "H0_NeG4_NeC_8"
| H0_NeG5 -> "H0_NeG5"
| H0_NeG5_NeC_1 -> "H0_NeG5_NeC_1"
| H0_NeG5_NeC_div2 -> "H0_NeG5_NeC_div2"
| H0_NeG5_NeC_x2 -> "H0_NeG5_NeC_x2"
| H0_NeG6 -> "H0_NeG6"
| HaPC_NeG1 -> "HaPC_NeG1"
| HaPC_NeG1_NeC_4 -> "HaPC_NeG1_NeC_4"
| HaPC_NeG1_NeC_5 -> "HaPC_NeG1_NeC_5"
| HaPC_NeG2 -> "HaPC_NeG2"
| HaPC_NeG2_NeC_4 -> "HaPC_NeG2_NeC_4"
| HaPC_NeG3 -> "HaPC_NeG3"
| HaPC_NeG4 -> "HaPC_NeG4"
| HaPC_NeG4_NeC_1 -> "HaPC_NeG4_NeC_1"
| HaPC_NeG4_NeC_2 -> "HaPC_NeG4_NeC_2"
| HaPC_NeG4_NeC_6 -> "HaPC_NeG4_NeC_6"
| HaPC_NeG4_NeC_8 -> "HaPC_NeG4_NeC_8"
| HaPC_NeG5 -> "HaPC_NeG5"
| HaPC_NeG5_NeC_1 -> "HaPC_NeG5_NeC_1"
| HaPC_NeG5_NeC_div2 -> "HaPC_NeG5_NeC_div2"
| HaPC_NeG5_NeC_x2 -> "HaPC_NeG5_NeC_x2"
| HaPC_NeG6 -> "HaPC_NeG6"
| HaPCOC -> "HaPCOC"
| H0 nes -> "H0_" ^ (string_of_nes nes)
| HaPC nes -> "HaPC_" ^ (string_of_nes nes)
| HaPCOC nes -> "HaPCOC_" ^ (string_of_nes nes)
let neg_of_model m = match m with
| H0 nes | HaPC nes | HaPCOC nes -> match nes with
| Fixed g | Variable (g, _) -> g
let nec_of_model m = match m with
| H0 nes | HaPC nes | HaPCOC nes -> match nes with
| Fixed c | Variable (_, c) -> c
let assign k v =
seq ~sep:"=" [ string k ; v ]
......@@ -84,7 +44,7 @@ nonhomogeneous = general
rate_distribution=Constant()
|}
let bpp_config_H0_F= seq ~sep:"\n" [
(* let bpp_config_H0_F= seq ~sep:"\n" [
seq [string "model1=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M1)))" ] ;
seq [string "nonhomogeneous.root_freq=FromModel(model=$(model1))" ] ;
]
......@@ -100,7 +60,7 @@ let bpp_config_HaPC_F = seq ~sep:"\n" [
seq [string "modelT=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2)))" ] ;
seq [string "modelC=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2)))" ] ;
seq [string "nonhomogeneous.root_freq=FromModel(model=$(model1))" ] ;
]
] *)
let bpp_config_H0_F_Ne = seq ~sep:"\n" [
seq [string "model1=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M1)), Ns=$(NE_1))" ] ;
......@@ -122,16 +82,14 @@ let bpp_config_HaPC_F_Ne = seq ~sep:"\n" [
seq [string "nonhomogeneous.root_freq=FromModel(model=$(model1))" ] ;
]
let bpp_config_F nodes hyp = [
let bpp_config_of_model m = match m with
| H0 _ -> bpp_config_H0_F_Ne
| HaPC _ -> bpp_config_HaPC_F_Ne
| HaPCOC _ -> bpp_config_HaPCOC_F_Ne
let bpp_config_F nodes m = [
string bpp_config_base ;
insert nodes ;
match hyp with
| H0_NeG1 | H0_NeG1_NeC_4 | H0_NeG1_NeC_5 | H0_NeG2 | H0_NeG2_NeC_4 | H0_NeG3 | H0_NeG4
| H0_NeG4_NeC_1 | H0_NeG4_NeC_2 | H0_NeG4_NeC_6 | H0_NeG4_NeC_8 | H0_NeG5 | H0_NeG5_NeC_1
| H0_NeG5_NeC_div2 | H0_NeG5_NeC_x2 | H0_NeG6 -> bpp_config_H0_F_Ne
| HaPC_NeG1 | HaPC_NeG1_NeC_4 | HaPC_NeG1_NeC_5 | HaPC_NeG2 | HaPC_NeG2_NeC_4 | HaPC_NeG3
| HaPC_NeG4 | HaPC_NeG4_NeC_1 | HaPC_NeG4_NeC_2 | HaPC_NeG4_NeC_6 | HaPC_NeG4_NeC_8 | HaPC_NeG5
| HaPC_NeG5_NeC_1 | HaPC_NeG5_NeC_div2 | HaPC_NeG5_NeC_x2 | HaPC_NeG6 -> bpp_config_HaPC_F_Ne
| HaPCOC -> bpp_config_HaPCOC_F
bpp_config_of_model m
;
]
......@@ -5,6 +5,7 @@ open Bistro.Std
open File_formats
open Defs
open Convergence_detection
open Convergence_hypothesis
open Profile
let parse_input_data ~seed indir =
......@@ -62,43 +63,8 @@ let derive_from_model ~model ~input_tree ~tree_dataset ~tree_prefix ~profile ~pr
*)
(* with several profiles or couples of profiles *)
let config_p = Convergence_hypothesis.bpp_config_F nodes model in
let ne_g = match model with
| H0_NeG1 | H0_NeG1_NeC_4 | H0_NeG1_NeC_5
| HaPC_NeG1_NeC_4 | HaPC_NeG1_NeC_5 | HaPC_NeG1 | HaPCOC -> 1.
| H0_NeG2 | H0_NeG2_NeC_4 | HaPC_NeG2_NeC_4 | HaPC_NeG2 -> 2.
| H0_NeG3 | HaPC_NeG3 -> 3.
| H0_NeG4 | H0_NeG4_NeC_1 | H0_NeG4_NeC_2 | H0_NeG4_NeC_6 | H0_NeG4_NeC_8
| HaPC_NeG4_NeC_1 | HaPC_NeG4_NeC_2 | HaPC_NeG4_NeC_6 | HaPC_NeG4_NeC_8 | HaPC_NeG4 -> 4.
| H0_NeG5 | H0_NeG5_NeC_1 | H0_NeG5_NeC_div2 | H0_NeG5_NeC_x2
| HaPC_NeG5_NeC_1 | HaPC_NeG5_NeC_div2| HaPC_NeG5_NeC_x2 | HaPC_NeG5 -> 5.
| H0_NeG6 | HaPC_NeG6 -> 6.
in
let ne_c = match model with
| H0_NeG1 | H0_NeG2 | H0_NeG3 | H0_NeG4
| H0_NeG5 | H0_NeG6 | HaPCOC
| HaPC_NeG1 | HaPC_NeG2 | HaPC_NeG3
| HaPC_NeG4 | HaPC_NeG5 | HaPC_NeG6 -> ne_g
| HaPC_NeG5_NeC_div2 -> ne_g /. 2.
| HaPC_NeG5_NeC_x2 -> ne_g *. 2.
| H0_NeG5_NeC_div2 -> ne_g /. 2.
| H0_NeG5_NeC_x2 -> ne_g *. 2.
| H0_NeG1_NeC_4 -> 4.
| H0_NeG1_NeC_5 -> 5.
| H0_NeG2_NeC_4 -> 4.
| H0_NeG4_NeC_1 -> 1.
| H0_NeG4_NeC_2 -> 2.
| H0_NeG4_NeC_6 -> 6.
| H0_NeG4_NeC_8 -> 8.
| H0_NeG5_NeC_1 -> 1.
| HaPC_NeG1_NeC_4 -> 4.
| HaPC_NeG1_NeC_5 -> 5.
| HaPC_NeG2_NeC_4 -> 4.
| HaPC_NeG4_NeC_1 -> 1.
| HaPC_NeG4_NeC_2 -> 2.
| HaPC_NeG4_NeC_6 -> 6.
| HaPC_NeG4_NeC_8 -> 8.
| HaPC_NeG5_NeC_1 -> 1.
in
let ne_g = neg_of_model model in
let ne_c = nec_of_model model in
let ne_a = ne_g in
let profile_f = profile.profile_f in
let profile_c = profile.profile_c in
......@@ -116,38 +82,38 @@ let derive_from_tree ~tree_dir ~tree ~profile ~preview ~use_concat ~ns ~no_Ne ~n
let tree_prefix = Filename.chop_extension tree in
let input_tree = input (Filename.concat tree_dir tree) in
let tree_dataset = Tree_dataset.prepare ~descr:("simulated_data." ^ tree_prefix) input_tree in
let dataset_H0_NeG5 = derive_from_model ~model:H0_NeG5 ~input_tree ~tree_dataset ~tree_prefix ~profile ~preview ~ns ~seed in
let dataset_HaPCOC = derive_from_model ~model:HaPCOC ~input_tree ~tree_dataset ~tree_prefix ~profile ~preview ~ns ~seed in
let dataset_HaPC_NeG5 = derive_from_model ~model:HaPC_NeG5 ~input_tree ~tree_dataset ~tree_prefix ~profile ~preview ~ns ~seed in
let dataset_H0_NeG5 = derive_from_model ~model:(H0 (Fixed 5.)) ~input_tree ~tree_dataset ~tree_prefix ~profile ~preview ~ns ~seed in
let dataset_HaPCOC = derive_from_model ~model:(HaPCOC (Fixed 1.)) ~input_tree ~tree_dataset ~tree_prefix ~profile ~preview ~ns ~seed in
let dataset_HaPC_NeG5 = derive_from_model ~model:(HaPC (Fixed 5.)) ~input_tree ~tree_dataset ~tree_prefix ~profile ~preview ~ns ~seed in
let indel_H0_NeG5 = Dataset.add_indels_to_dataset dataset_H0_NeG5 ~seed:(Random.int Int.max_value) in
let indel_HaPC_NeG5 = Dataset.add_indels_to_dataset dataset_HaPC_NeG5 ~seed:(Random.int Int.max_value) in
(* let dataset_basis_hyps = [dataset_H0_NeG5; dataset_HaPCOC; dataset_HaPC_NeG5] in *)
let models = Convergence_hypothesis.[
[ H0_NeG4 ;
HaPC_NeG4;
[ H0 (Fixed 4.) ;
HaPC (Fixed 4.);
];
if preview then
[]
else
[
H0_NeG6;
HaPC_NeG6;
H0_NeG1;
HaPC_NeG1;
H0_NeG2;
HaPC_NeG2;
HaPC_NeG4_NeC_6 ;
HaPC_NeG4_NeC_2 ;
HaPC_NeG2_NeC_4 ;
HaPC_NeG1_NeC_4 ;
HaPC_NeG4_NeC_8 ;
HaPC_NeG4_NeC_1 ;
H0_NeG4_NeC_6 ;
H0_NeG4_NeC_2 ;
H0_NeG2_NeC_4 ;
H0_NeG1_NeC_4 ;
H0_NeG4_NeC_8 ;
H0_NeG4_NeC_1 ;
H0 (Fixed 1.);
H0 (Fixed 2.);
H0 (Fixed 6.);
HaPC (Fixed 1.);
HaPC (Fixed 2.);
HaPC (Fixed 6.);
H0 (Variable (1., 4.)) ;
H0 (Variable (2., 4.)) ;
H0 (Variable (4., 1.)) ;
H0 (Variable (4., 2.)) ;
H0 (Variable (4., 6.)) ;
H0 (Variable (4., 8.)) ;
HaPC (Variable (1., 4.)) ;
HaPC (Variable (2., 4.)) ;
HaPC (Variable (4., 1.)) ;
HaPC (Variable (4., 2.)) ;
HaPC (Variable (4., 6.)) ;
HaPC (Variable (4., 8.)) ;
]
] |> List.concat
in
......@@ -392,7 +358,7 @@ let validation_command =
flag "--preview-mode" no_arg ~doc:" Preview mode"
and use_diffsel =
flag "--diffsel" no_arg ~doc:" use the diffsel method (very slow)."
and use_c60 =
and use_c60 =
flag "--c60" no_arg ~doc:" use the pcoc c60 method (slow)."
and ne_test =
flag "--ne-test" no_arg ~doc:" mode with hypothesis in test including different Ne"
......@@ -407,7 +373,7 @@ let validation_command =
and add_indels =
flag "--add-indels" no_arg ~doc:" add indels in H*NeG5"
and ns =
flag "--ns" (optional int) ~doc:"INT Number of sites to simulate (WARNING: will be multiplicated per 3)"
flag "--ns" (optional int) ~doc:"INT Number of sites to simulate (WARNING: will be multiplied per 3)"
and np =
flag "--np" (optional int) ~doc:"INT Number of available processors"
and mem =
......
......@@ -31,8 +31,8 @@ let repo rd =
item ["input_tree.nhx"] rd.input_tree ;
item ["recalculated_tree_nt.nw"] (Phyml.phyml_tree ~model:GTR ~tree:rd.input_tree phy_nt );
item ["recalculated_tree_aa.nw"] (Phyml.phyml_tree ~model:LG ~tree:rd.input_tree phy_aa );
item ["tree.H0.node_ids" ] (Tree_dataset.nodes rd.tree_dataset H0_NeG5) ;
item ["tree.Ha.node_ids" ] (Tree_dataset.nodes rd.tree_dataset HaPCOC) ;
item ["tree.H0.node_ids" ] (Tree_dataset.nodes rd.tree_dataset (H0 (Fixed 5.))) ;
item ["tree.Ha.node_ids" ] (Tree_dataset.nodes rd.tree_dataset (HaPCOC (Fixed 1.))) ;
item ["tree.only_convergent_tags.nhx" ] (Tree_dataset.tree rd.tree_dataset `Detection) ;
item ["tree.only_node_ids.nhx" ] (Tree_dataset.tree rd.tree_dataset `Simulation) ;
item ["tree.diffsel" ] (Tree_dataset.diffsel_tree rd.tree_dataset) ;
......
......@@ -17,45 +17,15 @@ let prepare ?(descr="") tree =
let nodes dataset (model : Convergence_hypothesis.t) =
dataset / selector (
match model with
|H0_NeG1 -> [ "tree.H0.node_ids" ]
|H0_NeG2 -> [ "tree.H0.node_ids" ]
|H0_NeG3 -> [ "tree.H0.node_ids" ]
|H0_NeG4 -> [ "tree.H0.node_ids" ]
|H0_NeG5 -> [ "tree.H0.node_ids" ]
|H0_NeG6 -> [ "tree.H0.node_ids" ]
|HaPC_NeG1-> [ "tree.Ha.node_ids" ]
|HaPC_NeG2-> [ "tree.Ha.node_ids" ]
|HaPC_NeG3-> [ "tree.Ha.node_ids" ]
|HaPC_NeG4-> [ "tree.Ha.node_ids" ]
|HaPC_NeG5-> [ "tree.Ha.node_ids" ]
|HaPC_NeG6-> [ "tree.Ha.node_ids" ]
|HaPC_NeG5_NeC_div2 -> [ "tree.Ha.node_ids" ]
|HaPC_NeG5_NeC_x2 -> [ "tree.Ha.node_ids" ]
|H0_NeG5_NeC_div2 -> [ "tree.H0_a.node_ids" ]
|H0_NeG5_NeC_x2 -> [ "tree.H0_a.node_ids" ]
|HaPCOC -> [ "tree.Ha.node_ids" ]
|HaPC_NeG1_NeC_5 -> [ "tree.Ha.node_ids" ]
|HaPC_NeG5_NeC_1 -> [ "tree.Ha.node_ids" ]
|H0_NeG1_NeC_5 -> [ "tree.H0_a.node_ids" ]
|H0_NeG5_NeC_1 -> [ "tree.H0_a.node_ids" ]
|HaPC_NeG1_NeC_4 -> [ "tree.Ha.node_ids" ]
|HaPC_NeG4_NeC_1 -> [ "tree.Ha.node_ids" ]
|HaPC_NeG4_NeC_8 -> [ "tree.Ha.node_ids" ]
|H0_NeG1_NeC_4 -> [ "tree.H0_a.node_ids" ]
|H0_NeG4_NeC_1 -> [ "tree.H0_a.node_ids" ]
|H0_NeG4_NeC_8 -> [ "tree.H0_a.node_ids" ]
|HaPC_NeG4_NeC_6 -> [ "tree.Ha.node_ids" ]
|HaPC_NeG4_NeC_2 -> [ "tree.Ha.node_ids" ]
|HaPC_NeG2_NeC_4 -> [ "tree.Ha.node_ids" ]
|H0_NeG4_NeC_6 -> [ "tree.H0_a.node_ids" ]
|H0_NeG4_NeC_2 -> [ "tree.H0_a.node_ids" ]
|H0_NeG2_NeC_4 -> [ "tree.H0_a.node_ids" ]
| H0 Fixed _ -> [ "tree.H0.node_ids" ]
| H0 Variable _ -> [ "tree.H0_a.node_ids" ]
| HaPC _ | HaPCOC _ -> [ "tree.Ha.node_ids" ]
)
let tree dataset mode =
dataset / selector (
match mode with
`Detection -> [ "tree.only_convergent_tags.nhx" ]
| `Detection -> [ "tree.only_convergent_tags.nhx" ]
| `Simulation -> [ "tree.only_node_ids.nhx" ]
)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment