open Core open Bistro.Std open Bistro_utils open Bistro.EDSL open Bistro_bioinfo.Std open File_formats open Bppsuite type t = { input_tree: nhx workflow ; tree_dataset : [`tree_dataset] directory workflow ; fna: nucleotide_fasta workflow ; fna_infos: text_file workflow option ; faa: aminoacid_fasta workflow ; } let of_raw (raw_dataset : Raw_dataset.t) = let input_tree = raw_dataset.input_tree in let fna = raw_dataset.fna in let fna_infos = raw_dataset.fna_infos in let tree_dataset = Tree_dataset.prepare input_tree in let faa = Bppsuite.fna2faa ~fna in { input_tree; tree_dataset ; fna; faa; fna_infos} let repo rd = let phy = (Bppsuite.fa2phy rd.fna) in Repo.[ [ item ["input_tree.nhx"] rd.input_tree ; item ["recalculated_tree.nw"] (Phyml.phyml_tree ~tree:rd.input_tree phy ); item ["tree.H0.node_ids" ] (Tree_dataset.nodes rd.tree_dataset H0) ; item ["tree.Ha.node_ids" ] (Tree_dataset.nodes rd.tree_dataset HaPCOC) ; item ["tree.only_convergent_tags.nhx" ] (Tree_dataset.tree rd.tree_dataset `Detection) ; item ["tree.only_node_ids.nhx" ] (Tree_dataset.tree rd.tree_dataset `Simulation) ; item ["tree.diffsel" ] (Tree_dataset.diffsel_tree rd.tree_dataset) ; item ["tree.convergent_topology" ] (Tree_dataset.topological_tree rd.tree_dataset) ; item ["simulated_sequences.fna"] rd.fna ; item ["simulated_sequences.phy"] phy ; item ["simulated_sequences.faa"] rd.faa ; ] ; match rd.fna_infos with | Some w -> [item ["simulated_sequences.fna_infos"] w] | None -> [] ; ] |> List.concat let to_raw { input_tree ; fna ; fna_infos} = { Raw_dataset.input_tree ; fna ; fna_infos} let paste_fna_infos ~(fna_infos_l: text_file workflow list) : text_file workflow = workflow ~descr:"cat" [ cmd "cat" ~stdout:dest (List.concat [ List.map fna_infos_l ~f:(fun fna_infos -> dep fna_infos) ; ]) ] let paste d1 d2 = let r_d1 = to_raw d1 in let r_d2 = to_raw d2 in let fna = Bppsuite.paste_fna [r_d1.fna ; r_d2.fna ] in let fna_infos_l = List.map [r_d1.fna_infos ; r_d2.fna_infos] ~f:(fun fna_infos -> match fna_infos with | Some i -> [i] | None -> [] ) |> List.concat in let fna_infos = Some (paste_fna_infos ~fna_infos_l) in let ready_dataset = of_raw {Raw_dataset.input_tree=r_d1.input_tree ; fna; fna_infos} in ready_dataset