detection_pipeline.ml 7.43 KB
Newer Older
1 2 3
open Bistro
open File_formats

4
module type Query = sig
5
  type t
6

7
  val tree :
8 9 10 11
    branch_length_unit:[ `Nucleotide | `Amino_acid | `Codon ] ->
    t ->
    nhx file

Philippe Veber's avatar
Philippe Veber committed
12
  val nucleotide_alignment : t -> nucleotide_fasta file
13 14
end

15 16 17
module type S = sig
  type query

18 19
  include Query with type t := query

20
  val amino_acid_alignment : query -> aminoacid_fasta file
21

22
  val gene_tree : query -> newick file
23

24
  val dn_tree : query -> text file
25

26
  val ds_tree : query -> text file
27

28 29
  val dnds_tree : query -> text file

30
  val identical : query -> cpt file
31

32
  val topological : query -> cpt file
33

34
  val multinomial_simulation_lrt : query -> cpt file
35

36
  val multinomial_simulation_sparse : query -> cpt file
37

38
  val multinomial_asymptotic_lrt : query -> cpt file
39

40
  val multinomial_asymptotic_sparse : query -> cpt file
41

42
  val tdg09 : query -> cpt file
43

44
  val failsafe_tdg09 : query -> cpt file
45

46 47
  val inhouse_tdg09 : query -> cpt file

48
  val pcoc : ?gamma:bool -> ?ncat:int -> query -> cpt file
49

50
  val pcoc_v2 :
51
    ?gamma:bool -> ?aa_profiles:Pcoc.aa_profiles -> query -> cpt file
52 53 54 55 56

  val gemma :
    query ->
    lmm_test:[ `All | `LRT | `Score | `Wald ] ->
    relatedness_mode:[ `Centered | `Standardized ] ->
57
    cpt file
58

59
  val inhouse_lmm : query -> cpt file
60

61
  val diffsel : query -> cpt file
62

63 64 65 66 67
  val diffseldsparse :
    ?pi:float ->
    ?shiftprob:float * float ->
    ?eps:float ->
    query ->
68
    cpt file
69 70 71

  val view_site :
    query -> convergent_species:string list -> site_pos:int -> pdf file
72 73
end

74
module Make (Q : Query) = struct
75
  include Q
76 77

  let amino_acid_alignment d =
78
    Utils.amino_acid_fasta_of_nucleotide_fasta (nucleotide_alignment d)
79 80 81 82

  let phylip_nucleotide_alignment d =
    Bppsuite.fna2phy ~fna:(nucleotide_alignment d)

83 84 85
  let gene_tree d =
    Tree_dataset.raxmlng_fna ~fna:(nucleotide_alignment d) ()

Philippe Veber's avatar
Philippe Veber committed
86 87 88 89 90 91 92 93 94 95 96 97
  let tree_with_no_single_child ~branch_length_unit d : newick file =
    let f = fun%workflow dest ->
      let tree_file = [%path tree ~branch_length_unit d] in
      let open Phylogenetics in
      let tree = Newick.from_file tree_file in
      let tree =
        Newick.map_inner_tree tree
          ~f:
            Codepitk.Convergence_tree
            .remove_nodes_with_single_child
      in
      Newick.to_file tree dest
98
    in
Philippe Veber's avatar
Philippe Veber committed
99
    Workflow.path_plugin ~descr:"detection_pipeline.tree_with_no_single_child" f
100

101
  let identical d =
102 103 104 105 106 107 108 109 110
    let tree_sc =
      Tree_dataset.prepare_sc_tree (tree ~branch_length_unit:`Amino_acid d)
    in
    let tree_id =
      Tree_dataset.prepare_tree_with_node_id
        (tree ~branch_length_unit:`Amino_acid d)
    in
    Identical.identical ~tree_id ~tree_sc ~prot_model:"LG08"
      ~faa:(amino_acid_alignment d) ()
111 112 113 114
    |> Identical.results

  let topological d =
    let faa = amino_acid_alignment d in
115 116 117 118 119 120 121 122
    let tree_conv =
      Tree_dataset.prepare_topological_tree
        (tree ~branch_length_unit:`Amino_acid d)
    in
    let tree =
      Tree_dataset.prepare_tree_with_node_id
        (tree ~branch_length_unit:`Amino_acid d)
    in
123 124 125 126 127
    Topological.topological ~faa ~tree ~tree_conv ~prot_model:"LG08" ()
    |> Topological.results

  let multinomial_asymptotic_lrt d =
    Multinomial.multinomial_asymptotic_lrt
128
      ~tree_sc:(tree ~branch_length_unit:`Amino_acid d)
129 130 131 132
      ~faa:(amino_acid_alignment d)

  let multinomial_asymptotic_sparse d =
    Multinomial.multinomial_asymptotic_sparse
133
      ~tree_sc:(tree ~branch_length_unit:`Amino_acid d)
134 135 136 137
      ~faa:(amino_acid_alignment d)

  let multinomial_simulation_lrt d =
    Multinomial.multinomial_simulation_lrt
138
      ~tree_sc:(tree ~branch_length_unit:`Amino_acid d)
139 140 141 142
      ~faa:(amino_acid_alignment d)

  let multinomial_simulation_sparse d =
    Multinomial.multinomial_simulation_sparse
143
      ~tree_sc:(tree ~branch_length_unit:`Amino_acid d)
144 145 146 147
      ~faa:(amino_acid_alignment d)

  let tdg09 d =
    Tamuri.tdg09
148
      ~tree:(tree_with_no_single_child ~branch_length_unit:`Amino_acid d)
149
      ~faa:(amino_acid_alignment d) ()
150 151
    |> Tamuri.results

Philippe Veber's avatar
Philippe Veber committed
152 153 154 155
  let mock_tdg09 d =
    let f = fun%workflow dest ->
      match Biotk.Fasta.from_file [%path amino_acid_alignment d] with
      | Ok (_, item :: _) ->
156 157 158 159
        let open Core_kernel in
        let n = String.length item.sequence in
        "Sites\tTdg09_1MinusFDR\tTdg09_1MinusLRT\tTdg09_prob_post"
        :: List.init n ~f:(fun i -> sprintf "%d\t0.0\t0.0\tNA" (i + 1))
Philippe Veber's avatar
Philippe Veber committed
160 161 162 163
        |> Out_channel.write_lines dest
      | _ -> failwith "couldn't read an item in fasta"
    in
    Workflow.path_plugin ~descr:"detection_pipeline.mock_tdg09" f
164 165 166

  let failsafe_tdg09 d = Workflow.trywith (tdg09 d) (mock_tdg09 d)

167 168 169 170 171
  let inhouse_tdg09 d =
    Tamuri.inhouse_tdg09
      (tree ~branch_length_unit:`Amino_acid d)
      (amino_acid_alignment d)

172 173 174 175 176 177 178 179 180 181 182 183 184
  let gemma q ~lmm_test ~relatedness_mode =
    let alignment = amino_acid_alignment q in
    let tree = tree ~branch_length_unit:`Amino_acid q in
    let genotype = Gemma.genotype_of_fasta alignment in
    let phenotype = Gemma.phenotype_of_tree tree in
    let relatedness_matrix =
      Gemma.calculate_relatedness_matrix ~mode:relatedness_mode ~genotype
        ~phenotype
    in
    Gemma.univariate_lmm ~lmm:lmm_test ~genotype ~phenotype
      ~relatedness_matrix
    |> Gemma.result_table_of_output alignment

185 186 187 188 189
  let inhouse_lmm q =
    let alignment = amino_acid_alignment q in
    let tree = tree ~branch_length_unit:`Amino_acid q in
    Inhouse_lmm.test alignment tree

190
  let diffseltree d =
191 192
    Tree_dataset.prepare_diffsel_tree
      (tree ~branch_length_unit:`Amino_acid d)
193 194 195 196

  let diffsel d =
    Diffsel.diffsel
      ~phy_n:(phylip_nucleotide_alignment d)
197
      ~tree:(diffseltree d) ~w_every:1 ~n_cycles:50 ()
198 199 200
    |> Diffsel.selector

  let diffseldsparse ?pi ?shiftprob ?eps d =
201
    Diffseldsparse.diffseldsparse ?pi ?shiftprob ?eps
202
      ~alignment:(phylip_nucleotide_alignment d)
203 204
      ~tree:(diffseltree d) ~w_every:1 ~n_cycles:50 ()
    |> Diffseldsparse.readdiffseldsparse |> Diffseldsparse.results
205

206
  let pcoc ?(gamma = true) ?(ncat = 10) d =
207
    let faa = amino_acid_alignment d in
208
    let tree = tree ~branch_length_unit:`Amino_acid d in
209 210
    Pcoc.pcoc ~catx_est:ncat ~plot_complete:false ~gamma ~faa ~tree ()
    |> Pcoc.results
211 212 213

  let pcoc_v2 ?(gamma = true) ?(aa_profiles = `C10) d =
    let faa = amino_acid_alignment d in
214
    let tree = tree ~branch_length_unit:`Amino_acid d in
215
    Pcoc.pcoc_v2 ~aa_profiles ~gamma ~faa ~tree () |> Pcoc.results
216

217
  let dn_ds_dnds_trees d =
218 219 220 221 222 223 224
    Testnh.dn_ds_trees_real_data ~fna:(nucleotide_alignment d)
      ~tree:(tree ~branch_length_unit:`Nucleotide d)
      ()

  let dn_tree d = (dn_ds_dnds_trees d).dn_tsv

  let ds_tree d = (dn_ds_dnds_trees d).ds_tsv
225

226
  let dnds_tree d = (dn_ds_dnds_trees d).dnds_tsv
227

Philippe Veber's avatar
Philippe Veber committed
228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245
  let view_site query ~convergent_species ~site_pos =
    let f = fun%workflow dest ->
      let tree_path = [%path tree ~branch_length_unit:`Amino_acid query] in
      let alignment_path = [%path amino_acid_alignment query] in
      let module CS = Codepitk.Candidate_site in
      let convergent_species = [%param convergent_species] in
      let site_pos = [%param site_pos] in
      let condition s =
        if List.mem s convergent_species then `Convergent else `Ancestral
      in
      let tree = Phylogenetics.Newick.from_file tree_path in
      let alignment =
        Biotk.Fasta.from_file alignment_path
        |> Base.Result.ok_or_failwith |> snd
      in
      CS.make ~condition tree alignment site_pos
      |> CS.draw |> Biotk_croquis.Croquis.Layout.simple
      |> Base.Fn.flip Biotk_croquis.Croquis.Layout.render_pdf dest
246
    in
Philippe Veber's avatar
Philippe Veber committed
247
    Workflow.path_plugin ~descr:"detection_pipeline.view_site" f
248
end