Commit e599f322 authored by Carine Rey's avatar Carine Rey
Browse files

Merge branch 'real_input_data'

parents f8512d1e a2a02222
......@@ -33,6 +33,16 @@ test:
mv dag.dot dagtest.dot && \
dot -Tsvg dagtest.dot -o dagtest.svg
test_real:
cd example && \
reviewphiltrans pipeline --outdir outdir_real_data --indir real_data --tree-dir trees --profile-fn aa_fitness/tiragesBloomK30.tsv --preview --np 4
mv dag.dot dagtest_real.dot && \
dot -Tpdf dagtest_real.dot -o dagtest_real.pdf
real:
cd example && \
reviewphiltrans pipeline --outdir outdir_real_data --indir real_data --tree-dir trees --profile-fn aa_fitness/tiragesBloomK30.tsv --np 4
analyses_test:
cd example && \
reviewphiltrans pipeline --outdir outdir_analyses_test --tree-dir trees4analyses --profile-fn aa_fitness/tiragesBloomK30.tsv --preview --np 4 && \
......
This diff is collapsed.
((((((((((((((((((Ele.bald:1[&&NHX:Condition=1],Ele.bal2:1[&&NHX:Condition=1]):1[&&NHX:Condition=1],Ele.bal4:1[&&NHX:Condition=1]):1[&&NHX:Condition=1],(Ele.vivi:1[&&NHX:Condition=1],Ele.vivA:1[&&NHX:Condition=1]):1[&&NHX:Condition=1]):1[&&NHX:Condition=1:Transition=1],Ele.bal3:1[&&NHX:Condition=0]):1[&&NHX:Condition=0],Ele.viv2:1[&&NHX:Condition=0]):1[&&NHX:Condition=0],Ele.fici:1[&&NHX:Condition=0]):1[&&NHX:Condition=0],(Ele.grac:1[&&NHX:Condition=0],Ele.lim2:1[&&NHX:Condition=0]):1[&&NHX:Condition=0]):1[&&NHX:Condition=0],(Ele.rost:1[&&NHX:Condition=0],((Ele.limo:1[&&NHX:Condition=0],Ele.pal2:1[&&NHX:Condition=0]):1[&&NHX:Condition=0],(Ele.acut:1[&&NHX:Condition=0],(Ele.palu:1[&&NHX:Condition=0],(Ele.gra2:1[&&NHX:Condition=0],Ele.lim3:1[&&NHX:Condition=0]):1[&&NHX:Condition=0]):1[&&NHX:Condition=0]):1[&&NHX:Condition=0]):1[&&NHX:Condition=0]):1[&&NHX:Condition=0]):1[&&NHX:Condition=0],Ele.geni:1[&&NHX:Condition=0]):1[&&NHX:Condition=0],Ele.quan:1[&&NHX:Condition=0]):1[&&NHX:Condition=0],((Abildgaar:1[&&NHX:Condition=0],Bulbostyl:1[&&NHX:Condition=1:Transition=1]):1[&&NHX:Condition=0],(Actinosch:1[&&NHX:Condition=0],(Fimb.lit:1[&&NHX:Condition=0],((Fimb.dic:1[&&NHX:Condition=0],Fimb.fe2:1[&&NHX:Condition=0]):1[&&NHX:Condition=0],(Fimb.li2:1[&&NHX:Condition=1],(Fimb.di2:1[&&NHX:Condition=1],Fimb.fer:1[&&NHX:Condition=1]):1[&&NHX:Condition=1]):1[&&NHX:Condition=1:Transition=1]):1[&&NHX:Condition=0]):1[&&NHX:Condition=0]):1[&&NHX:Condition=0]):1[&&NHX:Condition=0]):1[&&NHX:Condition=0],(Bolboscho:1[&&NHX:Condition=0],((Fuir.abn:1[&&NHX:Condition=0],Fuir.umb:1[&&NHX:Condition=0]):1[&&NHX:Condition=0],(((Scho.lac:1[&&NHX:Condition=0],Scho.val:1[&&NHX:Condition=0]):1[&&NHX:Condition=0],Scho.muc:1[&&NHX:Condition=0]):1[&&NHX:Condition=0],((((Hellmut1:1[&&NHX:Condition=0],Isolepis:1[&&NHX:Condition=0]):1[&&NHX:Condition=0],Hellmut2:1[&&NHX:Condition=0]):1[&&NHX:Condition=0],Scirpoid:1[&&NHX:Condition=0]):1[&&NHX:Condition=0],(Cyp.spha:1[&&NHX:Condition=0],(Cyp.alt3:1[&&NHX:Condition=0],(Cyp.era6:1[&&NHX:Condition=0],((Cyp.era1:1[&&NHX:Condition=0],Cyp.fusc:1[&&NHX:Condition=0]):1[&&NHX:Condition=0],(Cyp.pulc:1[&&NHX:Condition=0],(Cyp.capi:1[&&NHX:Condition=1],(Volkiell:1[&&NHX:Condition=1],(Cyp.ust2:1[&&NHX:Condition=1],(Remirea:1[&&NHX:Condition=1],(Cyp.iria:1[&&NHX:Condition=1],((Killinga:1[&&NHX:Condition=1],Pycreus:1[&&NHX:Condition=1]):1[&&NHX:Condition=1],((Cyp.long:1[&&NHX:Condition=1],Cyp.rotu:1[&&NHX:Condition=1]):1[&&NHX:Condition=1],(Cyp.papy:1[&&NHX:Condition=1],Cyp.ustu:1[&&NHX:Condition=1]):1[&&NHX:Condition=1]):1[&&NHX:Condition=1]):1[&&NHX:Condition=1]):1[&&NHX:Condition=1]):1[&&NHX:Condition=1]):1[&&NHX:Condition=1]):1[&&NHX:Condition=1]):1[&&NHX:Condition=1:Transition=1]):1[&&NHX:Condition=0]):1[&&NHX:Condition=0]):1[&&NHX:Condition=0]):1[&&NHX:Condition=0]):1[&&NHX:Condition=0]):1[&&NHX:Condition=0]):1[&&NHX:Condition=0]):1[&&NHX:Condition=0]):1[&&NHX:Condition=0]):1[&&NHX:Condition=0],(Blysmus:1[&&NHX:Condition=0],((Eriophor:1[&&NHX:Condition=0],Scirpus:1[&&NHX:Condition=0]):1[&&NHX:Condition=0],(((Schoenox:1[&&NHX:Condition=0],Uncin.un:1[&&NHX:Condition=0]):1[&&NHX:Condition=0],Uncin.ph:1[&&NHX:Condition=0]):1[&&NHX:Condition=0],(Carex.com:1[&&NHX:Condition=0],(Carex.hal:1[&&NHX:Condition=0],(Carex.ber:1[&&NHX:Condition=0],Carex.pen:1[&&NHX:Condition=0]):1[&&NHX:Condition=0]):1[&&NHX:Condition=0]):1[&&NHX:Condition=0]):1[&&NHX:Condition=0]):1[&&NHX:Condition=0]):1[&&NHX:Condition=0]):1[&&NHX:Condition=0],((Rhy.alba:1[&&NHX:Condition=0],Rhy.grac:1[&&NHX:Condition=0]):1[&&NHX:Condition=0],(Rhy.albi:1[&&NHX:Condition=0],(Rhy.rubr:1[&&NHX:Condition=1],(Rhy.glob:1[&&NHX:Condition=1],Rhy.glo2:1[&&NHX:Condition=1]):1[&&NHX:Condition=1]):1[&&NHX:Condition=1:Transition=1]):1[&&NHX:Condition=0]):1[&&NHX:Condition=0]):1[&&NHX:Condition=0],Carpha:1[&&NHX:Condition=0]):1[&&NHX:Condition=0],(Schoenus:1[&&NHX:Condition=0],(Baumea:1[&&NHX:Condition=0],Machaeri:1[&&NHX:Condition=0]):1[&&NHX:Condition=0]):1[&&NHX:Condition=0]):1[&&NHX:Condition=0],Cladium:1[&&NHX:Condition=0]):1[&&NHX:Condition=0],(Coleochlo:1[&&NHX:Condition=0],Microdra:1[&&NHX:Condition=0]):1[&&NHX:Condition=0],Chrysithr:1[&&NHX:Condition=0]);
\ No newline at end of file
......@@ -118,6 +118,8 @@ let conf_file_bppseqman_fna2faa ~fna =
assign "output.sequence.file" dest ;
string {|alphabet=Codon(letter=DNA)
genetic_code = Standard
input.sequence.remove_stop_codons = no
input.sequence.sites_to_use = all
input.alignment = true
sequence.manip = Translate
|}
......@@ -136,6 +138,8 @@ let conf_file_bppseqman_fa2phy ~fna =
assign "output.sequence.file" dest ;
assign "output.sequence.format" (string "Phylip") ;
string {| input.alignment = true
input.sequence.remove_stop_codons = no
input.sequence.sites_to_use = all
sequence.manip =
|}
]
......
......@@ -12,15 +12,17 @@ let assign k v =
let conf_file_bppml ~tree ~faa ~out ~config =
seq ~sep:"\n" (
[
[
assign "OUT" (out) ;
assign "input.sequence.file" (dep faa) ;
assign "alphabet" (string "Protein") ;
assign "input.sequence.remove_stop_codons" (string "no") ;
assign "input.sequence.sites_to_use" (string "all") ;
assign "input.tree.file" (dep tree) ;
assign "init.tree" (string "user") ;
assign "input.tree.format" (string "Nhx") ;
assign "optimization.topology" (string "false") ;
assign "output.tree.file" (string "$(OUT)/tree.nhx") ;
......@@ -48,11 +50,11 @@ let bppml ?(descr="") ~faa ~tree ~config : _ workflow =
]
)
]
let conf_file_bppancestor ~tree ~faa ~out ~config =
seq ~sep:"\n" (
[
[
assign "OUT" (out) ;
assign "input.sequence.file" (dep faa) ;
assign "alphabet" (string "Protein") ;
......@@ -60,7 +62,7 @@ let conf_file_bppancestor ~tree ~faa ~out ~config =
assign "input.tree.file" (dep tree) ;
assign "init.tree" (string "user") ;
assign "input.tree.format" (string "Nhx") ;
assign "optimization.topology" (string "false") ;
assign "output.sequence.file" (string "$(OUT)/output_anc.fa") ;
......
......@@ -6,6 +6,42 @@ open File_formats
open Defs
open Convergence_detection
let parse_input_data indir =
let datasets = Array.to_list @@ Sys.readdir indir in
List.map datasets ~f:(fun dataset_prefix ->
let files = Array.to_list @@ Sys.readdir (Filename.concat indir dataset_prefix ) in
if List.length files = 2 then
let h_file = List.hd files in
let h_file = match h_file with
| Some s -> s
| None -> ""
in
let h_file_ext = Filename.split_extension h_file in
let t_file = List.nth files 1 in
let t_file = match t_file with
| Some s -> s
| None -> ""
in
let t_file_ext = Filename.split_extension t_file in
let fna, input_tree = match (h_file_ext, t_file_ext) with
| ( _ , Some "fna") , ( _ , Some "nhx") -> h_file, t_file
| ( _ , Some "nhx"), ( _ , Some "fna") -> t_file, h_file
| _ -> failwith ({|Syntax error: extension errors in |} ^ (Filename.concat indir dataset_prefix ) ^ " nhx: " ^ (h_file) ^ " fna: " ^ (t_file))
in
let tree_prefix = Filename.chop_extension input_tree in
let input_tree = input (Filename.concat indir (Filename.concat dataset_prefix input_tree)) in
let fna = input (Filename.concat indir (Filename.concat dataset_prefix fna)) in
let raw_dataset = Raw_dataset.{input_tree; fna} in
let dataset = {Dataset.model_prefix = tree_prefix;
tree_prefix = dataset_prefix;
dataset = Ready_dataset.of_raw raw_dataset
} in
[dataset]
else
[]
)
|> List.concat
let derive_from_model ~model ~input_tree ~tree_dataset ~tree_prefix ~profile_f ~preview =
let model_prefix = Convergence_hypothesis.string_of_model model in
let nb_sites = if preview then 20 else 50 in
......@@ -152,14 +188,16 @@ let derive_det ~dataset_l ~profile_fn ~preview=
List.map dataset_l ~f:(fun dataset ->
derive_from_dataset ~preview ~dataset)
let main ~outdir ?(np = 2) ?(mem = 2) ~tree_dir ~profile_fn ~preview () =
let main ~outdir ?(indir = "") ?(np = 2) ?(mem = 2) ~tree_dir ~profile_fn ~preview () =
let logger =
Logger.tee [
Console_logger.create () ;
Dot_output.create "dag.dot" (*dot -Tpdf example/dag.dot -o dag.pdf*)
] in
let trees = Array.to_list @@ Sys.readdir tree_dir in
let dataset_l = derive_sim ~tree_dir ~trees ~profile_fn ~preview in
let dataset_real = if indir = "" then [] else parse_input_data indir in
let dataset_simu = derive_sim ~tree_dir ~trees ~profile_fn ~preview in
let dataset_l = List.concat [dataset_real; if indir = "" then dataset_simu else [] ] in
let dataset_results_l = derive_det ~dataset_l ~profile_fn ~preview in
let repo = [
Dataset.repo dataset_l ~preview ;
......@@ -176,6 +214,8 @@ let command =
[%map_open
let outdir =
flag "--outdir" (required string) ~doc:"PATH Output directory"
and indir =
flag "--indir" (optional string) ~doc:"PATH Input directory"
and preview =
flag "--preview-mode" no_arg ~doc:" Preview mode"
and np =
......@@ -187,5 +227,5 @@ let command =
and profile_fn =
flag "--profile-fn" (required string) ~doc:"PATH Path to profile file"
in
main ~outdir ?np ?mem ~tree_dir ~profile_fn ~preview
main ~outdir ?indir ?np ?mem ~tree_dir ~profile_fn ~preview
]
......@@ -110,6 +110,7 @@ df_final["Sites"] = pd.to_numeric(df_final["Sites"].str.replace('[','').str.repl
df_final["Topological"] = map(prob_ap, df_final["lnL_conv"],df_final["lnL_noconv"])
df_final = df_final[["Sites","Topological"]]
#===================================================================================================
# Create output files
#===================================================================================================
......
......@@ -132,9 +132,9 @@ df_list = [df for df in [df_pcoc, df_pcoc_gamma,
] if not df.empty ]
df_list_len = [df.shape[0] for df in df_list]
if len(set(df_list_len)) != 1:
print("ERROR: all files have not the same number of rows")
sys.exit(1)
print("WARNING: all files have not the same number of rows")
df_final = reduce(lambda x, y: pd.merge(x, y, on = 'Sites', how='outer'), df_list)
......
......@@ -16,6 +16,8 @@ let conf_file_bppml ~tree ~faa ~out ~config =
assign "OUT" (out) ;
assign "input.sequence.file" (dep faa) ;
assign "alphabet" (string "Protein") ;
assign "input.sequence.remove_stop_codons" (string "no") ;
assign "input.sequence.sites_to_use" (string "all") ;
assign "input.tree.file" (dep tree) ;
assign "init.tree" (string "user") ;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment