Commit 705914d1 authored by Carine Rey's avatar Carine Rey
Browse files

create a fixed seed by dataset

parent 110b7fe9
......@@ -3,5 +3,5 @@
(executable
((name reviewphiltrans_app)
(public_name reviewphiltrans)
(libraries (reviewphiltrans))
(libraries (str reviewphiltrans))
))
......@@ -6,6 +6,7 @@ type t = {
tree_prefix : string ;
is_real: bool ;
dataset : Ready_dataset.t ;
seed : int ;
}
let repo ~preview dataset_l =
......
......@@ -7,7 +7,7 @@ open Defs
open Convergence_detection
open Profile
let parse_input_data indir =
let parse_input_data ~seed indir =
let datasets = Array.to_list @@ Sys.readdir indir in
List.map datasets ~f:(fun dataset_prefix ->
let files = Array.to_list @@ Sys.readdir (Filename.concat indir dataset_prefix ) in
......@@ -37,7 +37,8 @@ let parse_input_data indir =
let dataset = {Dataset.model_prefix = tree_prefix;
is_real = true;
tree_prefix = dataset_prefix;
dataset = Ready_dataset.of_raw ~descr:("real_data." ^ tree_prefix) raw_dataset
dataset = Ready_dataset.of_raw ~descr:("real_data." ^ tree_prefix) raw_dataset;
seed;
} in
[dataset]
else
......@@ -45,7 +46,18 @@ let parse_input_data indir =
)
|> List.concat
let derive_from_model ~model ~input_tree ~tree_dataset ~tree_prefix ~profile ~preview ~ns =
let calc_fixed_seed ~(str:string) (seed:int) : int =
let str_digest = Md5.to_hex (Md5.digest_string str) in
let id = "1" ^ (Str.global_replace (Str.regexp "[^0-9]+") "" str_digest) in
let id = String.sub id 0 10 in
let id_int = float_of_int (int_of_string id) in
let id_len = float_of_int (String.length id) in
let seed_f = float_of_int seed in
let size_str_id = 10. ** id_len in
let res = Float.abs (Pervasives.(seed_f *. id_int /. size_str_id +. 1.)) in
int_of_float(res)
let derive_from_model ~model ~input_tree ~tree_dataset ~tree_prefix ~profile ~preview ~ns ~seed =
let model_prefix = Convergence_hypothesis.string_of_model model in
let nb_sites = ns in
let nodes = Tree_dataset.nodes tree_dataset model in
......@@ -110,7 +122,8 @@ let derive_from_model ~model ~input_tree ~tree_dataset ~tree_prefix ~profile ~pr
in
let profile_f = profile.profile_f in
let profile_c = profile.profile_c in
let seed = Random.int Int.max_value in
(*let seed = Random.int Int.max_value in*)
let seed = calc_fixed_seed ~str:descr seed in
printf "Bppseqgen seed: %s %s %i\n" model_prefix tree_prefix seed;
let run_fna = Bppsuite.bppseqgen_multi_profiles ~descr ~nb_sites ~tree ~config:config_p ~profile_f ~profile_c ~ne_c ~ne_a ~seed in
let fna = Bppsuite.bppseqgen_multi_profiles_get_fa run_fna in
......@@ -118,15 +131,15 @@ let derive_from_model ~model ~input_tree ~tree_dataset ~tree_prefix ~profile ~pr
let faa = Bppsuite.fna2faa ~fna in
let ready_dataset = { Ready_dataset.input_tree = input_tree ; tree_dataset ; fna; faa; fna_infos} in
{ Dataset.model_prefix; is_real= false; tree_prefix; dataset = ready_dataset }
{ Dataset.model_prefix; is_real= false; tree_prefix; dataset = ready_dataset; seed }
let derive_from_tree ~tree_dir ~tree ~profile ~preview ~use_concat ~ns ~no_Ne ~no_HaPC ~ne_test =
let derive_from_tree ~tree_dir ~tree ~profile ~preview ~use_concat ~ns ~no_Ne ~no_HaPC ~ne_test ~seed =
let tree_prefix = Filename.chop_extension tree in
let input_tree = input (Filename.concat tree_dir tree) in
let tree_dataset = Tree_dataset.prepare ~descr:("simulated_data." ^ tree_prefix) input_tree in
let ready_dataset_H0_NeG5 = derive_from_model ~model:H0_NeG5 ~input_tree ~tree_dataset ~tree_prefix ~profile ~preview ~ns in
let ready_dataset_HaPCOC = derive_from_model ~model:HaPCOC ~input_tree ~tree_dataset ~tree_prefix ~profile ~preview ~ns in
let ready_dataset_HaPC_NeG5 = derive_from_model ~model:HaPC_NeG5 ~input_tree ~tree_dataset ~tree_prefix ~profile ~preview ~ns in
let ready_dataset_H0_NeG5 = derive_from_model ~model:H0_NeG5 ~input_tree ~tree_dataset ~tree_prefix ~profile ~preview ~ns ~seed in
let ready_dataset_HaPCOC = derive_from_model ~model:HaPCOC ~input_tree ~tree_dataset ~tree_prefix ~profile ~preview ~ns ~seed in
let ready_dataset_HaPC_NeG5 = derive_from_model ~model:HaPC_NeG5 ~input_tree ~tree_dataset ~tree_prefix ~profile ~preview ~ns ~seed in
let ready_dataset_basis_hyps = [ready_dataset_H0_NeG5; ready_dataset_HaPCOC; ready_dataset_HaPC_NeG5] in
let models = Convergence_hypothesis.[
[
......@@ -165,16 +178,16 @@ let derive_from_tree ~tree_dir ~tree ~profile ~preview ~use_concat ~ns ~no_Ne ~n
] |> List.concat
in
let dataset_per_hypo = List.map models ~f:(fun model ->
derive_from_model ~model ~input_tree ~tree_dataset ~tree_prefix ~profile ~preview ~ns
derive_from_model ~model ~input_tree ~tree_dataset ~tree_prefix ~profile ~preview ~ns ~seed
) in
let _concat_H0HaPCOC = {Dataset.model_prefix="H0_NeG5+HaPCOC"; tree_prefix; is_real = false; dataset = Ready_dataset.paste ready_dataset_H0_NeG5.dataset ready_dataset_HaPCOC.dataset} in
let concat_H0HaPC = {Dataset.model_prefix="H0_NeG5+HaPC_NeG5"; tree_prefix; is_real = false; dataset = Ready_dataset.paste ready_dataset_H0_NeG5.dataset ready_dataset_HaPC_NeG5.dataset} in
let _concat_H0HaPCOC = {Dataset.model_prefix="H0_NeG5+HaPCOC"; tree_prefix; is_real = false; dataset = Ready_dataset.paste ready_dataset_H0_NeG5.dataset ready_dataset_HaPCOC.dataset; seed} in
let concat_H0HaPC = {Dataset.model_prefix="H0_NeG5+HaPC_NeG5"; tree_prefix; is_real = false; dataset = Ready_dataset.paste ready_dataset_H0_NeG5.dataset ready_dataset_HaPC_NeG5.dataset; seed} in
let dataset_concat_hypos = if use_concat then [concat_H0HaPC;] else [] in
List.concat [ ready_dataset_basis_hyps ; dataset_per_hypo ; dataset_concat_hypos ]
let derive_sim ~tree_dir ~trees ~profile ~preview ~use_concat ~ns ~no_Ne ~no_HaPC ~ne_test =
let derive_sim ~tree_dir ~trees ~profile ~preview ~use_concat ~ns ~no_Ne ~no_HaPC ~ne_test ~seed =
List.map trees ~f:(fun tree ->
derive_from_tree ~tree_dir ~tree ~profile ~preview ~use_concat ~ns ~no_Ne ~no_HaPC ~ne_test)
derive_from_tree ~tree_dir ~tree ~profile ~preview ~use_concat ~ns ~no_Ne ~no_HaPC ~ne_test ~seed)
|> List.concat
......@@ -244,12 +257,13 @@ let derive_from_det_meth ~det_meth ~(dataset : Dataset.t) ~preview =
let tree_conv = Tree_dataset.topological_tree dataset.dataset.tree_dataset in
let w_every = if preview then 1 else 1 in
let n_cycles = if preview then 10 else 2000 in
let seed = dataset.seed + 10 in
match det_meth with
| `Pcoc -> `Pcoc (Pcoc.pcoc ~catx_est:10 ~plot_complete:true ~gamma:false ~faa ~tree:tree_sc)
| `Pcoc_gamma -> `Pcoc_gamma (Pcoc.pcoc ~catx_est:10 ~plot_complete: true ~gamma:true ~faa ~tree:tree_sc)
| `Pcoc_C60 -> `Pcoc_C60 (Pcoc.pcoc ~catx_est:60 ~plot_complete: true ~gamma:false ~faa ~tree:tree_sc)
| `Tdg09 -> `Tdg09 (Tamuri.tdg09 ~faa ~tree:tree_sc)
| `Diffsel -> `Diffsel (Diffsel.diffsel ~phy_n ~tree:diffsel_tree ~w_every ~n_cycles ~id:1 ~tag:"master_a4b5" ~seed:(Random.int Int.max_value) ())
| `Diffsel -> `Diffsel (Diffsel.diffsel ~phy_n ~tree:diffsel_tree ~w_every ~n_cycles ~id:1 ~tag:"master_a4b5" ~seed ())
| `Identical_LG -> `Identical_LG (Identical.identical ~faa ~tree_id ~tree_sc ~prot_model:"LG08")
| `Identical_WAG -> `Identical_WAG (Identical.identical ~faa ~tree_id ~tree_sc ~prot_model:"WAG01")
| `Topological_LG -> `Topological_LG (Topological.topological ~faa ~tree:tree_id ~tree_conv ~prot_model:"LG08")
......@@ -305,7 +319,7 @@ let derive_profile ?(indir = "") ?(ns = 0) ~preview ~fast_mode ~no_Ne ~ne_test ~
let trees = [tree] in
let tree_prefix = Filename.chop_extension tree in
let dataset_l =
derive_sim ~tree_dir ~trees ~profile ~preview ~use_concat ~ns ~no_Ne ~no_HaPC ~ne_test in
derive_sim ~tree_dir ~trees ~profile ~preview ~use_concat ~ns ~no_Ne ~no_HaPC ~ne_test ~seed in
let dataset_results_l =
if only_simu then
[]
......@@ -357,17 +371,17 @@ let logger =
time_logger#logger ;
]
let detection_main ~outdir ~indir ?(np = 2) ?(mem = 2) ~preview ~fast_mode () =
let dataset_l = parse_input_data indir in
let detection_main ~outdir ~indir ?(np = 2) ?(mem = 2) ~preview ~fast_mode ?(seed = Random.int Int.max_value) () =
let dataset_l = parse_input_data ~seed indir in
let dataset_results_l = derive_det ~dataset_l ~preview ~fast_mode in
let repo = repo_of_dataset_results_l ~dataset_results_l in
Repo.build ~outdir ~np ~mem:(`GB mem) ~logger repo
let simulation_main ~outdir ?(ns = 0) ?(np = 2) ?(mem = 2) ~tree_dir ~profile_fn ~preview ~use_concat ~no_Ne ~no_HaPC ~seed () =
let simulation_main ~outdir ?(ns = 0) ?(np = 2) ?(mem = 2) ~tree_dir ~profile_fn ~preview ~use_concat ~no_Ne ~no_HaPC ?(seed = Random.int Int.max_value) () =
let nb_sites = if ns = 0 then (if preview then 20 else 50) else ns in
let profile = Profile.profile_l_of_splitted_profile ~nb_cat:1 ~nb_sites profile_fn in
let trees = Array.to_list @@ Sys.readdir tree_dir in
let dataset_l = derive_sim ~tree_dir ~trees ~profile ~preview ~use_concat ~ns ~no_Ne ~no_HaPC ~ne_test:false in
let dataset_l = derive_sim ~tree_dir ~trees ~profile ~preview ~use_concat ~ns ~no_Ne ~no_HaPC ~ne_test:false ~seed in
let repo = Dataset.repo dataset_l ~preview in
Repo.build ~outdir ~np ~mem:(`GB mem) ~logger repo
......@@ -379,7 +393,7 @@ let validation_main ~outdir ?(indir = "") ?(ns = 0) ?(np = 2) ?(mem = 2) ~previe
let profile = Profile.profile_l_of_splitted_profile ~nb_cat:3 ~nb_sites profile_fn in
let sim_repo_l = derive_profile ~indir ~ns ~preview ~fast_mode ~no_Ne ~ne_test ~no_HaPC ~tree_dir ~profile ~use_concat ~only_simu ~seed () in
(* real trees *)
let indir_dataset_l = if indir = "" then [] else parse_input_data indir in
let indir_dataset_l = if indir = "" then [] else parse_input_data ~seed indir in
let dataset_l = indir_dataset_l in
let dataset_results_l =
if only_simu then
......@@ -424,7 +438,7 @@ let simulation_command =
and seed =
flag "--seed" (optional int) ~doc:"INT Global seed"
in
simulation_main ~outdir ?ns ?np ?mem ~no_Ne ~no_HaPC ~tree_dir ~profile_fn ~preview ~use_concat ~seed
simulation_main ~outdir ?ns ?np ?mem ~no_Ne ~no_HaPC ~tree_dir ~profile_fn ~preview ~use_concat ?seed
]
let detection_command =
......@@ -444,8 +458,10 @@ let detection_command =
flag "--np" (optional int) ~doc:"INT Number of available processors"
and mem =
flag "--mem" (optional int) ~doc:"INT Available memory (in GB)"
and seed =
flag "--seed" (optional int) ~doc:"INT Global seed"
in
detection_main ~outdir ~indir ?np ?mem ~preview ~fast_mode
detection_main ~outdir ~indir ?np ?mem ~preview ~fast_mode ?seed
]
let validation_command =
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment