Commit 66dbf3dc authored by LANORE Vincent's avatar LANORE Vincent
Browse files

Pipeline now passes seed to add_indel script (currently a random seed per fna; to be fixed).

parent 5b8de38b
......@@ -25,10 +25,10 @@ let repo ~preview dataset_l =
)
|> List.concat
let add_indels_to_dataset d =
let add_indels_to_dataset d ~seed =
let p = 0.33 in
let model_prefix = sprintf "%s_0.33_i" d.model_prefix in
let tree_prefix = d.tree_prefix in
let is_real = d.is_real in
let dataset = Ready_dataset.add_indels_to_ready_dataset ~p d.dataset in
let dataset = Ready_dataset.add_indels_to_ready_dataset ~p ~seed d.dataset in
{model_prefix; tree_prefix; is_real; dataset; seed = Hashtbl.hash d.seed}
......@@ -132,8 +132,8 @@ let derive_from_tree ~tree_dir ~tree ~profile ~preview ~use_concat ~ns ~no_Ne ~n
let dataset_H0_NeG5 = derive_from_model ~model:H0_NeG5 ~input_tree ~tree_dataset ~tree_prefix ~profile ~preview ~ns ~seed in
let dataset_HaPCOC = derive_from_model ~model:HaPCOC ~input_tree ~tree_dataset ~tree_prefix ~profile ~preview ~ns ~seed in
let dataset_HaPC_NeG5 = derive_from_model ~model:HaPC_NeG5 ~input_tree ~tree_dataset ~tree_prefix ~profile ~preview ~ns ~seed in
let indel_H0_NeG5 = Dataset.add_indels_to_dataset dataset_H0_NeG5 in
let indel_HaPC_NeG5 = Dataset.add_indels_to_dataset dataset_HaPC_NeG5 in
let indel_H0_NeG5 = Dataset.add_indels_to_dataset dataset_H0_NeG5 ~seed:(Random.int Int.max_value) in
let indel_HaPC_NeG5 = Dataset.add_indels_to_dataset dataset_HaPC_NeG5 ~seed:(Random.int Int.max_value) in
let dataset_basis_hyps = [dataset_H0_NeG5; dataset_HaPCOC; dataset_HaPC_NeG5] in
let models = Convergence_hypothesis.[
[
......
......@@ -68,24 +68,25 @@ let paste d1 d2 =
let fna_infos = Some (paste_fna_infos ~fna_infos_l) in
let ready_dataset = of_raw {Raw_dataset.input_tree=r_d1.input_tree ; fna; fna_infos} in
ready_dataset
let add_indels_to_fna ~(p:float) (fna:nucleotide_fasta workflow) : nucleotide_fasta workflow =
let env = docker_image ~account:"carinerey" ~name:"python_basics" ~tag:"07252018" () in
workflow ~descr:("add_indels") [
let add_indels_to_fna ~(p:float) ~(seed:int) (fna:nucleotide_fasta workflow) : nucleotide_fasta workflow =
let env = docker_image ~account:"carinerey" ~name:"python_basics" ~tag:"07252018" () in
workflow ~descr:("add_indels") [
cmd "python" ~env [
file_dump (string Scripts.add_indels) ;
opt "-p" float p;
opt "-a" dep fna;
opt "-o" ident dest;
opt "-r" int seed;
string "-c";
]
]
let add_indels_to_ready_dataset ~p d =
let add_indels_to_ready_dataset ~p ~seed d =
let r_d = to_raw d in
let fna = add_indels_to_fna ~p r_d.fna in
(* VL: one seed per fna. FIXME:should be computed from global seed and task info instead *)
let fna = add_indels_to_fna ~p ~seed:(Random.int Int.max_value) r_d.fna in
let fna_infos = r_d.fna_infos in
let ready_dataset = of_raw {Raw_dataset.input_tree=r_d.input_tree ; fna; fna_infos} in
ready_dataset
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment