Commit fcc90e34 authored by Carine Rey's avatar Carine Rey
Browse files

start pipeline with several models (very beginning)

parent 3e20c213
......@@ -9,6 +9,7 @@ let env = docker_image ~account:"carinerey" ~name:"bppsuite:06182018" ()
let assign k v =
seq ~sep:"=" [ string k ; v ]
let conf_file_bppseqgen ~tree ~nb_sites =
seq ~sep:"\n" [
assign "input.tree.file" (dep tree) ;
......@@ -18,13 +19,14 @@ let conf_file_bppseqgen ~tree ~nb_sites =
string {|alphabet=Codon(letter=DNA)
genetic_code = Standard
input.tree.format=Nhx
nonhomogeneous = general
output.internal.sequences=no
|} ;
string {|nonhomogeneous = general
nonhomogeneous.number_of_models = 1
model1.nodes_id=0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17
nonhomogeneous.root_freq=Fixed()
rate_distribution=Constant()
output.internal.sequences=no
|}
|};
]
let bppseqgen ~nb_sites ~tree : nucleotide_fasta workflow =
......
......@@ -32,6 +32,7 @@ type raw_dataset = {
}
type ready_dataset = {
input_tree: nhx workflow ;
parsed_tree : parsed_input_tree directory workflow ;
fna: nucleotide_fasta workflow ;
faa: aminoacid_fasta workflow ;
......
......@@ -23,14 +23,15 @@ let select_out parsed_tree t = match t with
| Tree_diffsel -> parsed_tree / selector [ "tree.diffsel" ]
let ready_dataset_of_raw_dataset raw_dataset =
let tree = raw_dataset.input_tree in
let input_tree = raw_dataset.input_tree in
let fna = raw_dataset.fna in
let parsed_tree = parse_input_tree ~tree in
let parsed_tree = parse_input_tree ~tree:input_tree in
let faa = Bppsuite.fna2faa ~fna in
{ parsed_tree; fna; faa}
{ input_tree; parsed_tree; fna; faa}
let repo_of_ready_dataset ready_dataset =
let repo_of_ready_dataset (ready_dataset:ready_dataset) =
Repo.[
item ["ready_dataset/input_tree.nhx"] ready_dataset.input_tree ;
item ["ready_dataset/tree.H0.node_ids" ] (ready_dataset.parsed_tree / selector [ "tree.H0.node_ids" ]) ;
item ["ready_dataset/tree.Ha.node_ids" ] (ready_dataset.parsed_tree / selector [ "tree.Ha.node_ids" ]) ;
item ["ready_dataset/tree.only_convergent_tags.nhx" ] (ready_dataset.parsed_tree / selector [ "tree.only_convergent_tags.nhx" ]) ;
......@@ -40,25 +41,32 @@ let repo_of_ready_dataset ready_dataset =
item ["ready_dataset/simulated_sequences.faa"] ready_dataset.faa ;
]
let repo_of_raw_dataset raw_dataset =
let repo_of_raw_dataset (raw_dataset:raw_dataset) =
Repo.[
item ["raw_dataset/input_tree.nhx"] raw_dataset.input_tree ;
item ["raw_dataset/simulated_sequences.fna"] raw_dataset.fna ;
]
let derive_from_tree ~tree_dir ~tree ~preview =
let tree = input (Filename.concat tree_dir tree) in
let derive_from_model ~model ~tree ~parsed_tree ~preview =
let nb_sites = if preview then 10 else 100 in
let parsed_tree = parse_input_tree ~tree in
let fna = Bppsuite.bppseqgen ~nb_sites ~tree:(select_out parsed_tree Tree4simu) in
let faa = Bppsuite.fna2faa ~fna in
let raw_dataset = { input_tree = tree ; fna} in
let ready_dataset = { parsed_tree; fna; faa} in
let ready_dataset = { input_tree = tree ; parsed_tree; fna; faa} in
if preview then
repo_of_ready_dataset ready_dataset
else
repo_of_raw_dataset raw_dataset
let derive_from_tree ~tree_dir ~tree ~preview =
let tree = input (Filename.concat tree_dir tree) in
let parsed_tree = parse_input_tree ~tree in
let models = ["H0"] in
List.map models ~f:(fun model ->
Repo.shift model (derive_from_model ~model ~tree ~parsed_tree ~preview))
|> List.concat
let derive ~tree_dir ~trees ~preview =
List.map trees ~f:(fun tree ->
let id = Filename.chop_extension tree in
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment