Commit a3605253 authored by Philippe Veber's avatar Philippe Veber
Browse files

renamed project to codepi

parent 485292e8
......@@ -3,108 +3,15 @@ open Top
open Reviewphiltrans
let njplot (t : Biotope.Formats.newick Bistro.file) = Sys.command (Printf.sprintf "njplot %s" (path t))
(*
open Pipeline2
module Dbg = Reviewphiltrans.Debug
let auc ?mode d =
let a =
result_table ?mode d
|> Convergence_detection.recall_precision_auc_table
|> Dbg.eval
in Array.sort (fun (_, x) -> fun (_, y) -> compare y x) a ; a
let gc_contents ?pos d =
d |> nucleotide_alignment |> Dbg.path |> Alistats.nucleotide_fasta_gc ?pos
let ac_gc_stats ?pos d =
let al = d |> nucleotide_alignment |> Dbg.path in
let tree = tree d |> Dbg.path in
Alistats.nucleotide_fasta_gc_ac ?pos tree al
let count_py w =
Sys.command (Core.sprintf "python3 lib/scripts/count_detected.py %s" (Dbg.path w))
|> ignore
let h0 =
bppseqgen_simulation
~hyp:Convergence_hypothesis.(H0 (Fixed 5.))
~tree:(`NHX "example/trees_analyses/cyp_coding.Chrysithr_root.nhx")
~profiles:"example/aa_fitness/263SelectedProfiles.tsv"
~nb_sites:10
~seed:42
let ha =
bppseqgen_simulation
~hyp:Convergence_hypothesis.(HaPC (Fixed 5.))
~tree:(`NHX "example/trees_analyses/cyp_coding.Chrysithr_root.nhx")
~profiles:"example/aa_fitness/263SelectedProfiles.tsv"
~nb_sites:10
~seed:42
let sim =
convdet_simulation
~tree:(`NHX "example/trees_test/tree_small_bl.nhx")
~profiles:"example/aa_fitness/263SelectedProfiles.tsv"
~n_h0:10
~n_ha:5
~ne_s:(8., 8.)
~seed:42
()
let sim2 =
convdet_simulation
~tree:(`NHX "example/trees_analyses/cyp_coding.Chrysithr_root.nhx")
~profiles:"example/aa_fitness/263SelectedProfiles.tsv"
~n_h0:40
~n_ha:10
~ne_s:(8., 8.)
~seed:428
()
let convdet_cyp_coding =
convdet_simulation
~tree:(`NHX "example/trees_analyses/cyp_coding.Chrysithr_root.nhx")
~profiles:"example/aa_fitness/263SelectedProfiles.tsv"
let convdet_amaranth =
convdet_simulation
~tree:(`NHX "example/trees_analyses/C4AmaranthaceaePolyroot.nhx")
~profiles:"example/aa_fitness/263SelectedProfiles.tsv"
let convdet_orthomam_aridity =
convdet_simulation
~tree:(`NHX "example/trees_analyses/orthomam_aridity.nhx")
~profiles:"example/aa_fitness/263SelectedProfiles.tsv"
let convdet_orthomam_handpicked =
convdet_simulation
~tree:(`NHX "example/trees_analyses/orthomam_handpicked.nhx")
~profiles:"example/aa_fitness/263SelectedProfiles.tsv"
let convdet_pairs ~bl n =
convdet_simulation
~tree:(`Pair_tree (bl /. 10., bl, n))
~profiles:"example/aa_fitness/263SelectedProfiles.tsv"
let bppseqgen_cyp_coding =
bppseqgen_mixed_simulation
~tree:(`NHX "example/trees_analyses/cyp_coding.Chrysithr_root.nhx")
~profiles:"example/aa_fitness/263SelectedProfiles.tsv"
let bppseqgen_pairs ~bl n =
bppseqgen_mixed_simulation
~tree:(`Pair_tree (bl /. 10., bl, n))
~profiles:"example/aa_fitness/263SelectedProfiles.tsv"
let sim3 =
Bppseqgen_mixed {
tree = `NHX "example/trees_analyses/cyp_coding.Chrysithr_root.nhx" ;
profiles = "example/aa_fitness/263SelectedProfiles.tsv" ;
n_h0 = 400 ;
n_ha = 0 ;
ne_s = 4. ;
seed = 42 ;
}
*)
let n_h0 = 100
let n_ha = 3
let npairs = 10
let tree = Simulation_dataset.Pair_tree { branch_length1 = 3. ; branch_length2 = 3. ; npairs };;
let sim = Simulation_dataset.convdet_simulation ~tree ~profiles:"example/aa_fitness/263SelectedProfiles.tsv" ~n_h0 ~n_ha () ~ne_s:(8.,8.);;
let convergent_species = List.init npairs (Printf.sprintf "C%d");;
let genotype = Gemma.genotype_of_fasta (Simulation_dataset.amino_acid_alignment sim)
let phenotype = Gemma.phenotype_of_tree (Simulation_dataset.tree ~branch_length_unit:`Amino_acid sim);;
let relatedness_matrix = Gemma.calculate_relatedness_matrix ~mode:`Standardized ~genotype ~phenotype;;
let fit = Gemma.univariate_lmm ~lmm:`LRT ~genotype ~phenotype ~relatedness_matrix;;
open Core
open Reviewphiltrans
open Codepi
open Bistro_utils
......
open Core
open Reviewphiltrans
open Codepi
let main ~n_h0 ~n_ha ~seed:i () =
let open Simulation_dataset in
......
(executable
(name reviewphiltrans_app)
(public_name reviewphiltrans)
(modules reviewphiltrans_app)
(libraries reviewphiltrans)
(name codepi_app)
(public_name codepi)
(modules codepi_app)
(libraries codepi)
(preprocess
(pps ppx_jane)))
......@@ -10,7 +10,7 @@
(name diffseldsparse_benchmark)
(public_name diffseldsparse_benchmark)
(modules diffseldsparse_benchmark)
(libraries reviewphiltrans)
(libraries codepi)
(preprocess
(pps ppx_jane)))
......@@ -18,6 +18,12 @@
(name orthomam_app)
(public_name orthomam_convergence)
(modules orthomam_app)
(libraries reviewphiltrans)
(libraries codepi)
(preprocess
(pps ppx_jane)))
(executable
(name lmm_benchmark)
(modules lmm_benchmark)
(libraries codepi)
(preprocess (pps ppx_jane)))
open Reviewphiltrans
open Codepi
module Top = Bistro_utils.Toplevel_eval.Make(struct let np = 3 let mem = 10 end)()
let () =
try
Reviewphiltrans_toolbox.Orthomam_db.make "/disk/data/omm"
Codepitk.Orthomam_db.make "/disk/data/omm"
|> Orthomam.(
site_ranking
~convergent_species:species_with_echolocation
......@@ -12,6 +12,6 @@ let () =
)
|> Top.eval
|> Core.(Fn.flip List.take 10)
|> List.iter Reviewphiltrans_toolbox.Candidate_site.(fun x -> Option.iter print_endline x.alignment_id)
|> List.iter Codepitk.Candidate_site.(fun x -> Option.iter print_endline x.alignment_id)
with
| Failure _ -> ()
......@@ -9,8 +9,8 @@ or to benchmark various tools.
maintainer: ["philippe.veber@gmail.com"]
authors: ["Bastien Boussau" "Carine Rey" "Philippe Veber" "Vincent Lanore"]
license: "CeCILL-B"
homepage: "https://gitlab.in2p3.fr/pveber/reviewphiltrans"
bug-reports: "https://gitlab.in2p3.fr/pveber/reviewphiltrans/issues"
homepage: "https://gitlab.in2p3.fr/pveber/codepi"
bug-reports: "https://gitlab.in2p3.fr/pveber/codepi/issues"
depends: [
"dune" {>= "1.11"}
"biocaml"
......@@ -34,4 +34,4 @@ build: [
"@doc" {with-doc}
]
]
dev-repo: "git+https://gitlab.in2p3.fr/pveber/reviewphiltrans.git"
dev-repo: "git+https://gitlab.in2p3.fr/pveber/codepi.git"
(lang dune 1.11)
(generate_opam_files true)
(name reviewphiltrans)
(source (uri git+https://gitlab.in2p3.fr/pveber/reviewphiltrans.git))
(homepage "https://gitlab.in2p3.fr/pveber/reviewphiltrans")
(bug_reports "https://gitlab.in2p3.fr/pveber/reviewphiltrans/issues")
(name codepi)
(source (uri git+https://gitlab.in2p3.fr/pveber/codepi.git))
(homepage "https://gitlab.in2p3.fr/pveber/codepi")
(bug_reports "https://gitlab.in2p3.fr/pveber/codepi/issues")
(license CeCILL-B)
(authors
"Bastien Boussau"
......@@ -14,7 +14,7 @@
(maintainers "philippe.veber@gmail.com")
(package
(name reviewphiltrans)
(name codepi)
(synopsis "A convergent evolution detection pipeline")
(description "
This pipeline can be used to detect convergent evolution in a dataset
......
......@@ -47,7 +47,7 @@ let nucleotide_fasta_gc ?pos fa =
let nucleotide_fasta_gc_ac ?pos tree fa =
let module BI = Phylogenetics_convergence.Simulator.Branch_info in
let tree = Reviewphiltrans_toolbox.Utils.tree_from_file tree in
let tree = Codepitk.Utils.tree_from_file tree in
let seqs = strings_from_fasta fa in
let root = { BI.length = 0. ; condition = `Ancestral } in
let leaf_state =
......
......@@ -188,7 +188,7 @@ let recall_precision_curve table =
]
let%workflow recall_precision_auc_table table =
let module RT = Reviewphiltrans_toolbox.Result_table in
let module RT = Codepitk.Result_table in
let { RT.oracle ; scores_per_meth } = RT.of_file [%path table] in
let labels = Option.value_exn oracle in
List.map scores_per_meth ~f:(fun (meth, scores) ->
......
......@@ -87,7 +87,7 @@ module Make (Q : Query) = struct
let tree =
Newick.map_inner_tree tree
~f:
Reviewphiltrans_toolbox.Convergence_tree
Codepitk.Convergence_tree
.remove_nodes_with_single_child
in
Newick.to_file tree [%dest]
......@@ -214,7 +214,7 @@ module Make (Q : Query) = struct
let%pworkflow view_site query ~convergent_species ~site_pos =
let tree_path = [%path tree ~branch_length_unit:`Amino_acid query] in
let alignment_path = [%path amino_acid_alignment query] in
let module CS = Reviewphiltrans_toolbox.Candidate_site in
let module CS = Codepitk.Candidate_site in
let convergent_species = [%param convergent_species] in
let site_pos = [%param site_pos] in
let condition s =
......
(library
(name reviewphiltrans)
(libraries core biotk biotope bistro.utils containers gsl ocaml-r.graphics ocaml-r.grDevices phylogenetics.convergence reviewphiltrans_toolbox)
(name codepi)
(libraries core biotk biotope bistro.utils codepitk containers gsl ocaml-r.graphics ocaml-r.grDevices phylogenetics.convergence)
(preprocess
(pps ppx_jane ppx_csv_conv bistro.ppx ppx_here)))
......
......@@ -10,12 +10,12 @@ class type relatedness_matrix =
end
let%pworkflow genotype_of_fasta fasta =
let module G = Reviewphiltrans_toolbox.Gemma in
let module G = Codepitk.Gemma in
G.write_genotypes ~alignment:[%path fasta] ~output:[%dest]
let%pworkflow phenotype_of_tree nhx =
let open Phylogenetics in
let module U = Reviewphiltrans_toolbox.Utils in
let module U = Codepitk.Utils in
let collect_leaves t =
let rec node condition t acc =
match t with
......@@ -81,7 +81,7 @@ let calculate_relatedness_matrix ~mode ~genotype ~phenotype =
]
let%pworkflow[@version 4] result_table_of_output alignment gemma_output =
let module R = Reviewphiltrans_toolbox.Gemma.Result_file in
let module R = Codepitk.Gemma.Result_file in
match R.of_file [%path gemma_output] with
| Error msg -> failwith msg
| Ok result_file ->
......@@ -89,4 +89,4 @@ let%pworkflow[@version 4] result_table_of_output alignment gemma_output =
~site_aggregator:R.min_pvalue_aggregator
|> Result.map_error ~f:Phylogenetics.Alignment.show_parsing_error
|> Result.ok_or_failwith
|> Reviewphiltrans_toolbox.Result_table.to_file ~output:[%dest]
|> Codepitk.Result_table.to_file ~output:[%dest]
......@@ -42,7 +42,7 @@ let%pworkflow multinomial_ocaml_implementation ~meth ~(tree_sc:_ file) ~(faa:ami
Alignment.from_fasta [%path faa]
|> Rresult.R.get_ok
in
let tree = Reviewphiltrans_toolbox.Utils.tree_from_file [%path tree_sc] in
let tree = Codepitk.Utils.tree_from_file [%path tree_sc] in
let leaves = fold_leaves tree ~init:[] ~f:(fun acc bi ni ->
let cond = Phylogenetics_convergence.Simulator.Branch_info.condition bi in
match ni.name with
......
open Core_kernel
open Bistro
open File_formats
open Reviewphiltrans_toolbox
open Codepitk
let ensembl_tree : nhx file =
Bistro_unix.wget "ftp://ftp.ensembl.org/pub/release-99/compara/species_trees/vertebrates_species-tree_Ensembl.nh"
......@@ -78,7 +78,7 @@ let annotate_convergent_species_in_tree (tree : newick file) species : newick fi
let ensembl_tree = Newick.from_file omm_tree in
let tagged_tree =
Newick.map_inner_tree ensembl_tree ~f:(fun t ->
Reviewphiltrans_toolbox.Convergence_tree.infer_binary_condition_on_branches
Codepitk.Convergence_tree.infer_binary_condition_on_branches
~convergent_leaves:(String.Set.of_list species)
t
)
......@@ -168,7 +168,7 @@ let%pworkflow [@mem Workflow.int 4096] concatenate ?(nmissing = 0) ?seed db n :
in
let full_sites_of_alignment (alignment : Phylip.t) =
if alignment.sequence_length mod 3 = 0 then
Reviewphiltrans_toolbox.Utils.int_fold 0 (alignment.sequence_length / 3) ~init:[] ~f:(fun acc j ->
Codepitk.Utils.int_fold 0 (alignment.sequence_length / 3) ~init:[] ~f:(fun acc j ->
let column_is_quasi_full =
at_most_n_failures alignment.items ~n:nmissing ~f:(fun it ->
Option.is_some (aa_at_pos it.sequence j)
......@@ -276,7 +276,7 @@ let%pworkflow phylip_aa_of_nuc (ali : phylip file) : phylip file =
let items = List.map input_ali.items ~f:(fun it ->
let sequence =
it.sequence
|> Reviewphiltrans_toolbox.Utils.translate_nucleotide_sequence_whatever_it_takes
|> Codepitk.Utils.translate_nucleotide_sequence_whatever_it_takes
in
{ it with Phylip.sequence }
)
......@@ -446,7 +446,7 @@ let%pworkflow convergence_species_tree_pdf ~convergent_species db =
let tree_or_branch =
Newick.from_file tree_path
|> Newick.map_inner_tree ~f:(fun t ->
Reviewphiltrans_toolbox.Convergence_tree.infer_binary_condition_on_branches
Codepitk.Convergence_tree.infer_binary_condition_on_branches
t ~convergent_leaves:convergent_species)
in
render_tree tree_or_branch
......@@ -461,7 +461,7 @@ let%workflow ranking_of_results ~alignment_ids ~convergent_species (alignments :
let alignments = [%eval Workflow.path_list alignments] in
let result_files = [%eval Workflow.path_list result_files] in
let convergent_species = String.Set.of_list [%param convergent_species] in
let module Result_table = Reviewphiltrans_toolbox.Result_table in
let module Result_table = Codepitk.Result_table in
let lazy_load xs ~f = List.map xs ~f:(fun x -> lazy (f x)) |> Array.of_list in
let results = lazy_load result_files ~f:Result_table.of_file in
let trees = lazy_load trees ~f:Phylogenetics.Newick.from_file in
......@@ -520,7 +520,7 @@ let%workflow ranking_of_results ~alignment_ids ~convergent_species (alignments :
if String.Set.mem convergent_species species then `Convergent
else `Ancestral
in
{ Reviewphiltrans_toolbox.Candidate_site.species ; state ; condition }
{ Codepitk.Candidate_site.species ; state ; condition }
)
)
in
......@@ -551,7 +551,7 @@ let%pworkflow draw_site q pos =
let tree_fn = [%path tree ~branch_length_unit:`Amino_acid q] in
let convergent_species = [%eval q.convergent_species] in
let pos = [%param pos] in
let open Reviewphiltrans_toolbox in
let open Codepitk in
let open Biotk_croquis in
let tree = Phylogenetics.Newick.from_file tree_fn in
let condition n =
......
open Reviewphiltrans_toolbox
open Codepitk
open Bistro
open File_formats
......
......@@ -11,7 +11,7 @@ let%pworkflow simulator ?branch_factor ?seed ~n_h0 ~n_ha ~ne_s:(ne_s0, ne_s1) ~g
let gBGC0 = [%param gBGC0] in
let gBGC1 = [%param gBGC1] in
let branch_factor = [%param branch_factor] in
let tree = Reviewphiltrans_toolbox.Utils.tree_from_file ?alpha:branch_factor [%path tree] in
let tree = Codepitk.Utils.tree_from_file ?alpha:branch_factor [%path tree] in
let fitness_profiles = Phylogenetics_convergence.Profile_tsv.(read [%path fitness_profiles] |> to_fitness) in
let rescale_fitness beta = Amino_acid.Vector.map ~f:(( *. ) beta) in
let base_param =
......
(library
(name reviewphiltrans_toolbox)
(name codepitk)
(libraries biotk biocaml.ez ocaml-r.graphics ocaml-r.grDevices phylogenetics
phylogenetics.convergence)
(inline_tests
......
......@@ -77,7 +77,7 @@ let%pworkflow amino_acid_fasta_of_nucleotide_fasta (fa : nucleotide_fasta file)
List.map items ~f:(fun it ->
let sequence =
it.sequence
|> Reviewphiltrans_toolbox.Utils.translate_nucleotide_sequence_whatever_it_takes
|> Codepitk.Utils.translate_nucleotide_sequence_whatever_it_takes
in
{ it with Biotk.Fasta.sequence }
)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment