Commit 2b1cde4a authored by Philippe Veber's avatar Philippe Veber
Browse files

new Rubisco_dataset module

parent db76c9da
open Core_kernel
open Bistro
module Tk = Codepitk
module Species_data_entry = struct
type t = {
species : string ;
condition : string ;
}
[@@deriving fields, csv]
end
type query = Path of string
let nhx_path (Path p) =
Filename.concat p "rubisco.nhx"
let alignment_path (Path p) =
Filename.concat p "rubisco.fa"
let species_data_path (Path p) =
Filename.concat p "rubisco_species"
let parse_species_data fn =
In_channel.read_lines fn
|> List.map ~f:(fun l ->
String.split l ~on:'\t'
|> Species_data_entry.t_of_row
)
let convergent_species q =
let convergent_species = parse_species_data (species_data_path q) in
List.filter_map convergent_species ~f:(fun entry ->
if String.equal entry.condition "C4" then Some entry.species
else None
)
module Query = struct
type t = query
let nucleotide_alignment q =
Workflow.input (alignment_path q)
let%pworkflow tree ~branch_length_unit:_ q =
let open Phylogenetics in
let q = [%param q] in
let convergent_leaves =
convergent_species q
|> String.Set.of_list
in
Newick.from_file (nhx_path q)
|> Newick.map_inner_tree ~f:(fun tree ->
Tk.Convergence_tree.infer_binary_condition_on_branches tree ~convergent_leaves
|> Tk.Convergence_tree.reset_transitions
)
|> Fn.flip Newick.to_file [%dest]
end
include Detection_pipeline.Make(Query)
let view_site q =
view_site q ~convergent_species:(convergent_species q)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment