Commit 155bd5df authored by Carine Rey's avatar Carine Rey
Browse files

add HaPC

parent e599f322
......@@ -4,19 +4,26 @@ open Bistro.Std
type t =
| H0
| Ha
| H0NeSmall
| HaNeSmall
| H0NeBig
| HaNeBig
| HaPC
| HaPCOC
| H0_NeSmall
| HaPCOC_NeSmall
| HaPC_NeSmall
| H0_NeBig
| HaPCOC_NeBig
| HaPC_NeBig
let string_of_model m = match m with
| H0 -> "H0"
| Ha -> "Ha"
| H0NeSmall -> "H0NeSmall"
| HaNeSmall -> "HaNeSmall"
| H0NeBig -> "H0NeBig"
| HaNeBig -> "HaNeBig"
| H0 -> "H0"
| HaPC -> "HaPC"
| HaPCOC -> "HaPCOC"
| H0_NeSmall -> "H0_NeSmall"
| HaPCOC_NeSmall -> "HaPCOC_NeSmall"
| HaPC_NeSmall -> "HaPC_NeSmall"
| H0_NeBig -> "H0_NeBig"
| HaPCOC_NeBig -> "HaPCOC_NeBig"
| HaPC_NeBig -> "HaPC_NeBig"
let assign k v =
seq ~sep:"=" [ string k ; v ]
......@@ -30,56 +37,24 @@ nonhomogeneous = general
rate_distribution=Constant()
|}
let bpp_config_H0 = {|
model1=Codon_AAFit(model=K80, fitness=FromModel(model=LGL08_CAT_C1(nbCat=10)))
nonhomogeneous.root_freq=FromModel(model=$(model1))
|}
let bpp_config_Ha = {|
model1=Codon_AAFit(model=K80, fitness=FromModel(model=LGL08_CAT_C2(nbCat=60)))
modelT=OneChange(model=Codon_AAFit(model=K80, fitness=FromModel(model=LGL08_CAT_C7(nbCat=10))),register=DnDs, numReg=2)
modelC=Codon_AAFit(model=K80, fitness=FromModel(model=LGL08_CAT_C7(nbCat=10)))
nonhomogeneous.root_freq=FromModel(model=$(model1))
|}
let bpp_config_H0_Ne = {|
model1=Codon_AAFit(model=K80, fitness=FromModel(model=LGL08_CAT_C2(nbCat=60)), Ns=$(NE_1))
modelC=Codon_AAFit(model=K80, fitness=FromModel(model=LGL08_CAT_C2(nbCat=60)), Ns=$(NE_C))
nonhomogeneous.root_freq=FromModel(model=$(model1))
|}
let bpp_config_Ha_Ne = {|
model1=Codon_AAFit(model=K80, fitness=FromModel(model=LGL08_CAT_C2(nbCat=60)), Ns=$(NE_1))
modelT=OneChange(model=Codon_AAFit(model=K80, fitness=FromModel(model=LGL08_CAT_C7(nbCat=10))),register=DnDs, numReg=2, Ns=$(NE_T))
modelC=Codon_AAFit(model=K80, fitness=FromModel(model=LGL08_CAT_C7(nbCat=10)), Ns=$(NE_C))
nonhomogeneous.root_freq=FromModel(model=$(model1))
|}
let bpp_config nodes hyp = [
string bpp_config_base ;
insert nodes ;
string (
match hyp with
| H0 -> bpp_config_H0
| Ha -> bpp_config_Ha
| H0NeSmall -> bpp_config_H0_Ne
| HaNeSmall -> bpp_config_Ha_Ne
| H0NeBig -> bpp_config_H0_Ne
| HaNeBig -> bpp_config_Ha_Ne
) ;
]
let bpp_config_H0_F= seq ~sep:"\n" [
seq [string "model1=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M1)))" ] ;
seq [string "nonhomogeneous.root_freq=FromModel(model=$(model1))" ] ;
]
let bpp_config_Ha_F = seq ~sep:"\n" [
let bpp_config_HaPCOC_F = seq ~sep:"\n" [
seq [string "model1=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M1)))" ] ;
seq [string "modelT=OneChange(model=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2))), register=DnDs, numReg=2)" ] ;
seq [string "modelC=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2)))" ] ;
seq [string "nonhomogeneous.root_freq=FromModel(model=$(model1))" ] ;
]
let bpp_config_HaPC_F = seq ~sep:"\n" [
seq [string "model1=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M1)))" ] ;
seq [string "modelT=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2)))" ] ;
seq [string "modelC=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2)))" ] ;
seq [string "nonhomogeneous.root_freq=FromModel(model=$(model1))" ] ;
]
let bpp_config_H0_F_Ne = seq ~sep:"\n" [
seq [string "model1=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M1)), Ns=$(NE_1))" ] ;
......@@ -87,22 +62,32 @@ seq [string "model2=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F),
seq [string "nonhomogeneous.root_freq=FromModel(model=$(model1))" ] ;
]
let bpp_config_Ha_F_Ne = seq ~sep:"\n" [
let bpp_config_HaPCOC_F_Ne = seq ~sep:"\n" [
seq [string "model1=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M1)), Ns=$(NE_1))" ] ;
seq [string "modelT=OneChange(model=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2))), register=DnDs, numReg=2, Ns=$(NE_T))" ] ;
seq [string "modelC=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2)), Ns=$(NE_C))" ] ;
seq [string "nonhomogeneous.root_freq=FromModel(model=$(model1))" ] ;
]
let bpp_config_HaPC_F_Ne = seq ~sep:"\n" [
seq [string "model1=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M1)), Ns=$(NE_1))" ] ;
seq [string "modelT=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2)))" ] ;
seq [string "modelC=Codon_AAFit(model=K80, fitness=Empirical(file=$(PROFILE_F), col=$(COL_M2)), Ns=$(NE_C))" ] ;
seq [string "nonhomogeneous.root_freq=FromModel(model=$(model1))" ] ;
]
let bpp_config_F nodes hyp = [
string bpp_config_base ;
insert nodes ;
match hyp with
| H0 -> bpp_config_H0_F
| Ha -> bpp_config_Ha_F
| H0NeSmall -> bpp_config_H0_F_Ne
| HaNeSmall -> bpp_config_Ha_F_Ne
| H0NeBig -> bpp_config_H0_F_Ne
| HaNeBig -> bpp_config_Ha_F_Ne
| H0 -> bpp_config_H0_F
| HaPC -> bpp_config_HaPC_F
| HaPCOC -> bpp_config_HaPCOC_F
| H0_NeSmall -> bpp_config_H0_F_Ne
| HaPC_NeSmall -> bpp_config_HaPC_F_Ne
| HaPCOC_NeSmall -> bpp_config_HaPCOC_F_Ne
| H0_NeBig -> bpp_config_H0_F_Ne
| HaPC_NeBig -> bpp_config_HaPCOC_F_Ne
| HaPCOC_NeBig -> bpp_config_HaPCOC_F_Ne
;
]
......@@ -55,10 +55,12 @@ let derive_from_model ~model ~input_tree ~tree_dataset ~tree_prefix ~profile_f ~
(* with several profiles or couples of profiles *)
let config_p = Convergence_hypothesis.bpp_config_F nodes model in
let ne_c = match model with
| H0NeSmall -> 0.5
| HaNeSmall -> 0.5
| H0NeBig -> 6.
| HaNeBig -> 6.
| H0_NeSmall -> 0.5
| HaPCOC_NeSmall -> 0.5
| HaPC_NeSmall -> 0.5
| H0_NeBig -> 6.
| HaPC_NeBig -> 6.
| HaPCOC_NeBig -> 6.
| _ -> 1.
in
let fna = Bppsuite.bppseqgen_multi_profiles ~descr ~nb_sites ~tree ~config:config_p ~profile_f ~ne_c in
......@@ -72,19 +74,22 @@ let derive_from_tree ~tree_dir ~tree ~profile_f ~preview =
let input_tree = input (Filename.concat tree_dir tree) in
let tree_dataset = Tree_dataset.prepare input_tree in
let models = Convergence_hypothesis.[
H0;
Ha;
H0NeSmall;
HaNeSmall;
H0NeBig ;
HaNeBig ;
H0 ;
HaPC ;
HaPCOC ;
H0_NeSmall ;
HaPCOC_NeSmall ;
HaPC_NeSmall ;
H0_NeBig ;
HaPCOC_NeBig ;
HaPC_NeBig ;
] in
let dataset_per_hypo = List.map models ~f:(fun model ->
derive_from_model ~model ~input_tree ~tree_dataset ~tree_prefix ~profile_f ~preview
) in
let ready_dataset_H0 = (derive_from_model ~model:H0 ~input_tree ~tree_dataset ~tree_prefix ~profile_f ~preview).dataset in
let ready_dataset_Ha = (derive_from_model ~model:Ha ~input_tree ~tree_dataset ~tree_prefix ~profile_f ~preview).dataset in
let concat_H0Ha = {Dataset.model_prefix="H0+Ha"; tree_prefix; dataset = Ready_dataset.paste ready_dataset_H0 ready_dataset_Ha} in
let ready_dataset_HaPCOC = (derive_from_model ~model:HaPCOC ~input_tree ~tree_dataset ~tree_prefix ~profile_f ~preview).dataset in
let concat_H0Ha = {Dataset.model_prefix="H0+HaPCOC"; tree_prefix; dataset = Ready_dataset.paste ready_dataset_H0 ready_dataset_HaPCOC} in
let dataset_concat_hypos = [concat_H0Ha;] in
List.concat [ dataset_per_hypo ; dataset_concat_hypos ]
......
......@@ -21,7 +21,7 @@ let repo rd =
Repo.[
item ["input_tree.nhx"] rd.input_tree ;
item ["tree.H0.node_ids" ] (Tree_dataset.nodes rd.tree_dataset H0) ;
item ["tree.Ha.node_ids" ] (Tree_dataset.nodes rd.tree_dataset Ha) ;
item ["tree.Ha.node_ids" ] (Tree_dataset.nodes rd.tree_dataset HaPCOC) ;
item ["tree.only_convergent_tags.nhx" ] (Tree_dataset.tree rd.tree_dataset `Detection) ;
item ["tree.only_node_ids.nhx" ] (Tree_dataset.tree rd.tree_dataset `Simulation) ;
item ["tree.diffsel" ] (Tree_dataset.diffsel_tree rd.tree_dataset) ;
......
......@@ -17,12 +17,15 @@ let prepare tree =
let nodes dataset (model : Convergence_hypothesis.t) =
dataset / selector (
match model with
| H0 -> [ "tree.H0.node_ids" ]
| Ha -> [ "tree.Ha.node_ids" ]
| H0NeSmall -> [ "tree.H0_a.node_ids" ]
| HaNeSmall -> [ "tree.Ha.node_ids" ]
| H0NeBig -> [ "tree.H0_a.node_ids" ]
| HaNeBig -> [ "tree.Ha.node_ids" ]
|H0 -> [ "tree.H0.node_ids" ]
|HaPC -> [ "tree.Ha.node_ids" ]
|HaPCOC -> [ "tree.Ha.node_ids" ]
|HaPCOC_NeSmall -> [ "tree.Ha.node_ids" ]
|HaPC_NeSmall -> [ "tree.Ha.node_ids" ]
|HaPCOC_NeBig -> [ "tree.Ha.node_ids" ]
|HaPC_NeBig -> [ "tree.Ha.node_ids" ]
|H0_NeSmall -> [ "tree.H0_a.node_ids" ]
|H0_NeBig -> [ "tree.H0_a.node_ids" ]
)
let tree dataset mode =
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment