orthomam.ml 19.7 KB
Newer Older
1 2 3
open Core_kernel
open Bistro
open File_formats
Philippe Veber's avatar
Philippe Veber committed
4
open Codepitk
5

6
let ensembl_tree : newick file =
7
  Bistro_unix.wget "ftp://ftp.ensembl.org/pub/release-99/compara/species_trees/vertebrates_species-tree_Ensembl.nh"
8

9
type alignment = Alignment of Orthomam_db.t * Orthomam_db.alignment
10

11
let alignments_of_db db =
12
  List.map (Orthomam_db.list_alignments db) ~f:(fun ali -> Alignment (db, ali))
13

14 15 16 17 18
let family_id_of_alignment (alignment : Orthomam_db.alignment) =
  Filename.basename (alignment :> string)
  |> Fn.flip Filename.chop_suffix "_NT.fasta.ali"
  |> String.chop_prefix_exn ~prefix:"all"

19 20 21 22 23 24 25 26 27 28
let%pworkflow fasta_of_phylip ali =
  let ali =
    [%path ali]
    |> Phylogenetics.Phylip.read_exn ~strict:false
  in
  List.map ali.items ~f:(fun it ->
      Biotk.Fasta.{ description = it.name ; sequence = it.sequence }
    )
  |> Biotk.Fasta.to_file [%dest]

29
type query = {
30
  db : Orthomam_db.t ;
31
  alignment : Orthomam_db.alignment ;
32 33 34
  convergent_species : string list workflow ;
}

35
let query (Alignment (db, alignment)) ~convergent_species =
36
  { db ; alignment ; convergent_species }
37

38
let family_id q = family_id_of_alignment q.alignment
39

40 41 42 43 44 45 46 47 48 49 50 51
let%pworkflow remove_unobserved_sequences_from_alignment phylip : phylip file =
  let phylip = [%path phylip] in
  let module Phylip = Phylogenetics.Phylip in
  let data = Phylip.read_exn ~strict:false phylip in
  let threshold = data.sequence_length * 90 / 100 in
  let items = List.filter data.items ~f:(fun it ->
      String.count it.sequence ~f:(Char.equal '?') < threshold
    )
  in
  let filtered_data = Phylip.make_exn items in
  Phylip.write ~strict:false filtered_data [%dest]

52 53 54 55 56 57
let%pworkflow clip_tree_on_alignment tree ali =
  let open Phylogenetics in
  let tree = Newick.from_file [%path tree] in
  let ali = Phylip.read_exn ~strict:false [%path ali] in
  let ali_species = List.map ali.items ~f:(fun it -> it.name) in
  let clipped_tree =
Philippe Veber's avatar
Philippe Veber committed
58
    Newick.map_inner_tree tree ~f:(fun tree ->
59 60 61 62 63
        match
          Tree.leafset_generated_subtree tree
            (fun bi -> bi.Newick.name) ali_species
        with
        | None -> failwith "Tree has no leaf in alignment"
64
        | Some filtered_tree -> filtered_tree
65 66 67 68
      )
  in
  Newick.to_file clipped_tree [%dest]

69 70 71
let omm_tree_of_db db =
  Workflow.input (Orthomam_db.tree db)

72
let annotate_convergent_species_in_tree (tree : newick file) species : nhx file =
73
  Workflow.path_plugin ~version:3 ~descr:"tree_of_convergent_species" (
74 75
    let open Phylogenetics in
    let%pdeps species = species
76
    and omm_tree = path tree in
77

78
    let ensembl_tree = Newick.from_file omm_tree in
79
    let tagged_tree =
Philippe Veber's avatar
Philippe Veber committed
80
      Newick.map_inner_tree ensembl_tree ~f:(fun t ->
Philippe Veber's avatar
Philippe Veber committed
81
          Codepitk.Convergence_tree.infer_binary_condition_on_branches
82 83
            ~convergent_leaves:(String.Set.of_list species)
            t
84 85
        )
    in
86 87 88
    Newick.to_file tagged_tree [%dest]
  )

89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141
let%pworkflow compare_tree_branch_lengths t1 t2 =
  let open Phylogenetics in
  let t1 = Newick.from_file [%path t1] in
  let t2 = Newick.from_file [%path t2] in
  let rec reorder_branches (branches : _ Tree.branch List1.t) =
    List1.map branches ~f:(fun (Branch b) ->
        let tip, label = reorder_tree_aux b.tip in
        Tree.branch b.data tip, label
      )
  and reorder_tree_aux (t : Newick.tree) =
    match t with
    | Leaf l -> t, Option.value_exn l.name
    | Node n ->
      let branches_and_labels = reorder_branches n.branches in
      let branches, labels =
        branches_and_labels
        |> List1.sort ~compare:(fun (_, x) (_, y) ->
            String.compare x y
          )
        |> List1.unzip
      in
      let label = List1.hd labels in
      Tree.node n.data branches, label
  and reorder_tree t = fst (reorder_tree_aux t)
  in
  let rec collect_branch_lengths (t1 : Newick.tree) (t2 : Newick.tree) =
    match (t1, t2) with
    | Leaf _, Leaf _ -> []
    | Node n1, Node n2 ->
      List1.map2_exn n1.branches n2.branches ~f:(fun (Branch b1) (Branch b2) ->
          let acc = collect_branch_lengths b1.tip b2.tip in
          match b1.data.length, b2.data.length with
          | Some l1, Some l2 -> (l1, l2) :: acc
          | _ -> acc
        )
      |> List1.to_list
      |> List.concat
    | Leaf _, Node _
    | Node _, Leaf _ -> invalid_arg "trees with different topologies"
  in
  let t1 = Newick.with_inner_tree ~f:reorder_tree t1 in
  let t2 = Newick.with_inner_tree ~f:reorder_tree t2 in
  let l1, l2 =
    collect_branch_lengths t1 t2
    |> List.unzip
  in
  OCamlR_grDevices.pdf [%dest] ;
  OCamlR_graphics.plot
    ~main:"Comparison of branch lengths"
    ~xlab:"Branch lengths in tree 1"
    ~ylab:"Branch lengths in tree 2"
    ~x:(Array.of_list l1) ~y:(Array.of_list l2) () ;
  OCamlR_grDevices.dev_off ()
142

143
let%pworkflow [@version 6] number_of_missing_sequences_repartition db =
144 145
  Orthomam_db.missing_sequences_cdf [%param db] [%dest]

146
let%pworkflow [@mem Workflow.int 4096] concatenate ?(nmissing = 0) ?seed db n : phylip file =
147
  let db = [%param db] in
148
  let seed = [%param seed] in
149
  let n = [%param n] in
150 151 152 153
  let open Phylogenetics in
  let compare_items (x : Phylip.item) (y : Phylip.item) =
    String.compare x.name y.name
  in
154 155 156 157 158 159 160 161
  let rec at_most_n_failures xs ~n ~f =
    if n <= 0 then List.for_all xs ~f
    else
      match xs with
      | [] -> true
      | h :: t ->
        let n = if f h then n else n - 1 in
        at_most_n_failures t ~n ~f
162
  in
163 164 165 166 167 168 169 170
  let aa_at_pos seq i =
    let seq = String.sub seq ~pos:(3 * i) ~len:3 in
    let open Phylogenetics in
    Codon.of_string seq
    |> Option.bind ~f:Codon.Universal_genetic_code.aa_of_codon
  in
  let full_sites_of_alignment (alignment : Phylip.t) =
    if alignment.sequence_length mod 3 = 0 then
Philippe Veber's avatar
Philippe Veber committed
171
      Codepitk.Utils.int_fold 0 (alignment.sequence_length / 3) ~init:[] ~f:(fun acc j ->
172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190
          let column_is_quasi_full =
            at_most_n_failures alignment.items ~n:nmissing ~f:(fun it ->
                Option.is_some (aa_at_pos it.sequence j)
              )
          in
          if column_is_quasi_full then (alignment, j) :: acc else acc
        )
    else []
  in
  let all_full_sites alignments =
    Array.concat_map (Array.of_list alignments) ~f:(fun al ->
        Array.of_list (full_sites_of_alignment al)
      )
  in
  let build_alignment_from_sites (sites : (Phylip.t * int) array) =
    assert (Array.length sites > n) ;
    let al0, _ = sites.(0) in
    let nb_sequences = al0.number_of_sequences in
    let sequences = Array.init nb_sequences ~f:(fun _ ->
191
        Bytes.create (3 * n)
192 193 194
      )
    in
    for i = 0 to n - 1 do
195
      let al, p = sites.(i) in
196
      List.iteri al.items ~f:(fun j it ->
197 198
          match aa_at_pos it.sequence p with
          | Some _ ->
199 200 201
            Bytes.set sequences.(j) (3 * i) it.sequence.[3 * p] ;
            Bytes.set sequences.(j) (3 * i + 1) it.sequence.[3 * p + 1] ;
            Bytes.set sequences.(j) (3 * i + 2) it.sequence.[3 * p + 2]
202
          | None ->
203 204 205
            Bytes.set sequences.(j) (3 * i) '-' ;
            Bytes.set sequences.(j) (3 * i + 1) '-' ;
            Bytes.set sequences.(j) (3 * i + 2) '-' ;
206 207
        )
    done ;
208
    List.mapi al0.items ~f:(fun i it ->
209 210 211
        { Phylip.sequence = Bytes.to_string sequences.(i) ; name = it.name }
      )
  in
212 213 214
  let all_alignments =
    Orthomam_db.list_alignments db
    |> List.filter_map ~f:(fun ali_fn ->
215
        let ali = Phylip.read_exn ~strict:false (ali_fn :> string) in
216 217 218 219
        if ali.sequence_length mod 3 = 0 then
          Phylip.make_exn (List.sort ali.items ~compare:compare_items)
          |> Option.some
        else None
220 221
      )
  in
222 223 224 225 226
  let all_full_sites = all_full_sites all_alignments in
  if Array.length all_full_sites < n then failwith "not enough sites" ;
  let seed = match seed with
    | None -> Gsl.Rng.default_seed ()
    | Some i -> Nativeint.of_int i
227
  in
228 229 230 231
  let rng = Gsl.Rng.(make (default ())) in
  Gsl.Rng.set rng seed ;
  Gsl.Randist.shuffle rng all_full_sites ;
  let items = build_alignment_from_sites all_full_sites in
232
  Phylip.write ~strict:false (Phylip.make_exn items) [%dest]
233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269

let%pworkflow concatenate_calibration_figure ~nsites ~trees =
  let open Phylogenetics in
  let nsites = [%param nsites] in
  let tree_paths = [%eval
    List.map trees ~f:Workflow.path_list
    |> Workflow.list
  ]
  in
  let tree_lengths =
    List.map tree_paths ~f:(
      List.map ~f:(fun tree_path ->
          Newick.from_file tree_path
          |> Newick.with_inner_tree ~f:(
            Tree.pre ~init:0.
              ~node:(fun acc _ -> acc) ~leaf:(fun acc _ -> acc)
              ~branch:(fun acc bi ->
                  match bi.Newick.length with
                  | None -> failwith "expected tree with branch length"
                  | Some l -> acc +. l
                )
          )
        )
    )
  in
  let x =
    List.map2_exn nsites trees ~f:(fun n trees ->
        List.init (List.length trees) ~f:(fun _ -> float n)
      )
    |> List.concat
    |> Array.of_list
  in
  let y =
    List.concat tree_lengths
    |> Array.of_list
  in
  OCamlR_grDevices.pdf [%dest] ;
270
  OCamlR_graphics.plot ~ylab:"Total branch length" ~xlab:"Concatenate size" ~log:`XY ~x ~y () ;
271 272
  OCamlR_grDevices.dev_off ()

273 274 275 276
let%pworkflow phylip_aa_of_nuc (ali : phylip file) : phylip file =
  let open Phylogenetics in
  let input_ali = Phylip.read_exn ~strict:false [%path ali] in
  let items = List.map input_ali.items ~f:(fun it ->
277 278
      let sequence =
        it.sequence
Philippe Veber's avatar
Philippe Veber committed
279
        |> Codepitk.Utils.translate_nucleotide_sequence_whatever_it_takes
280 281
      in
      { it with Phylip.sequence }
282 283
    )
  in
284
  Phylip.write ~strict:false (Phylip.make_exn items) [%dest]
285

286 287 288 289 290 291 292 293 294 295 296 297 298 299
let%pworkflow iqtree_nexus_partition_file_of_alignment ali : Biotope.Formats.nexus file =
  let maybe_nb_columns =
    let open Option in
    In_channel.with_file [%path ali] ~f:In_channel.input_line >>= fun ali_first_line ->
    String.lsplit2 ali_first_line ~on:'\t' >>= fun (_, n) ->
    return (Int.of_string n)
  in
  let nb_columns = Option.value_exn ~message:"Could not parse phylip alignment first line" maybe_nb_columns in
  Out_channel.with_file [%dest] ~f:(fun oc ->
      fprintf oc
        {|#nexus begin sets; charset part1 = 1-%d\3; charset part2 = 2-%d\3; charset part3 = 3-%d\3; charpartition mine = GTR+R:part1, GTR+R:part2, GTR+R:part3; end;|}
        nb_columns nb_columns nb_columns
    )

300
let branch_length_estimation_concatenate db =
301
  concatenate ~nmissing:2 ~seed:42 db 20_000
302 303 304

let nucleotide_tree_estimatimation db ali =
  let open Biotope in
305
  let spp = iqtree_nexus_partition_file_of_alignment ali in
306 307 308 309 310 311
  Iqtree.iqtree ~st:`DNA ~te:(omm_tree_of_db db) ~spp (`phylip ali)
  |> Iqtree.treefile

let estimated_nucleotide_tree db =
  let ali = branch_length_estimation_concatenate db in
  nucleotide_tree_estimatimation db ali
312 313

let estimated_amino_acid_tree db =
314 315 316 317 318 319 320 321
  let open Biotope in
  let ali =
    branch_length_estimation_concatenate db
    |> phylip_aa_of_nuc
  in
  let model = Iqtree.model_spec ~freq_type:`F ~rate_type:(`R None) `LG in
  Iqtree.iqtree ~te:(omm_tree_of_db db) ~m:model (`phylip ali)
  |> Iqtree.treefile
322 323

let estimated_codon_tree db =
324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343
  let open Biotope in
  let ali = branch_length_estimation_concatenate db in
  let model = Iqtree.model_spec ~freq_type:`F `ECMK07 in
  Iqtree.iqtree ~te:(omm_tree_of_db db) ~m:model ~st:(`CODON 1) (`phylip ali)
  |> Iqtree.treefile

let concatenate_calibration ~seed db =
  let nrep = 5 in
  let nsites = [ 50 ; 100 ; 200 ; 500 ; 1000 ; 2000 ; 5000 ; 10000 ; 20000 ; 50000 ] in
  let seeds =
    let rng = Random.State.make [| seed |] in
    List.map nsites ~f:(fun _ ->
        List.init nrep ~f:(fun _ ->
            Random.State.int rng (1 lsl 30 - 1)
          )
      )
  in
  let concatenates =
    List.map2_exn nsites seeds ~f:(fun n seeds ->
        List.map seeds ~f:(fun seed ->
344
            concatenate ~nmissing:2 ~seed db n
345 346 347 348 349 350 351 352 353
          )
      )
  in
  let trees =
    List.map concatenates ~f:(
      List.map ~f:(nucleotide_tree_estimatimation db)
    )
  in
  concatenate_calibration_figure ~nsites ~trees
354 355 356 357 358

module Q = struct
  type t = query

  let alignment q : phylip file =
359
    Workflow.input (q.alignment :> string)
360 361
    |> remove_unobserved_sequences_from_alignment

362 363 364 365 366
  let omm_tree_with_branch_lengths ~branch_length_unit = match branch_length_unit with
    | `Nucleotide -> estimated_nucleotide_tree
    | `Amino_acid -> estimated_amino_acid_tree
    | `Codon -> estimated_codon_tree

367 368
  let tree ~branch_length_unit q =
    clip_tree_on_alignment
369 370 371
      (annotate_convergent_species_in_tree
         (omm_tree_with_branch_lengths ~branch_length_unit q.db)
         q.convergent_species)
372 373 374 375 376 377 378 379
      (alignment q)

  let nucleotide_alignment q =
    fasta_of_phylip (alignment q)
end

include Q

380 381 382 383 384
let tree_of_db db ~branch_length_unit ~convergent_species =
  annotate_convergent_species_in_tree
    (omm_tree_with_branch_lengths ~branch_length_unit db)
    (Workflow.data convergent_species)

385 386 387
include Detection_pipeline.Make(Q)

let species_with_echolocation = [
388
  "Orcinus_orca" ;
389
  "Myotis_lucifugus" ;
390 391 392 393 394
  "Eptesicus_fuscus" ;
  "Tursiops_truncatus" ;
  "Miniopterus_natalensis" ;
  (* "Pteropus_vampyrus" ; unsure, i'd say no *)
  (* "Pteropus alecto" ; unsure, i'd say no *)
395
  "Rhinolophus_sinicus" ;
396 397
  "Lipotes_vexillifer" ;
  "Hipposideros_armiger" ;
398
  "Physeter_catodon" ;
399 400 401 402 403 404
  "Rousettus_aegyptiacus" ; (* unsure, i'd say yes *)
  "Myotis_brandtii" ;
  "Balaenoptera_acutorostrata_scammoni" ;
  "Delphinapterus_leucas" ;
  "Myotis_davidii" ;
  (* "Rhinolophus ferrumequinum" ; in ensembl but not in orthomam *)
405
]
406

407 408 409 410 411 412 413 414 415
let prestin db =
  let alignments = alignments_of_db db in
  let prestin_alignment = List.find_exn alignments ~f:(fun (Alignment (_, al)) ->
      String.equal (family_id_of_alignment al) "ENSG00000170615_SLC26A5"
    )
  in
  let convergent_species = Workflow.data species_with_echolocation in
  query ~convergent_species prestin_alignment

416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434
let%pworkflow convergence_species_tree_pdf ~convergent_species db =
  let convergent_species = [%param convergent_species] in
  let tree_path = [%path estimated_amino_acid_tree db] in
  let open Phylogenetics in
  let convergent_species = String.Set.of_list convergent_species in
  let render_tree (tree_or_branch : Newick.t) =
    let module P = Biotk.Phylo_tree_draw in
    let rec of_tree : Newick.tree -> P.tree = function
      | Leaf ni ->
        let col, text =
          match ni.name with
          | Some name ->
            let col =
              if String.Set.mem convergent_species name
              then Gg.Color.red else Gg.Color.black
            in
            col, String.tr ~target:'_' ~replacement:' ' name
          | None -> Gg.Color.black, ""
        in
Philippe Veber's avatar
Philippe Veber committed
435
        P.leaf ~col ~style:`italic text
436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454
      | Node n ->
        Phylogenetics.List1.map n.branches ~f:of_branch
        |> Phylogenetics.List1.to_list
        |> P.node
    and of_branch : Newick.branch -> P.branch = fun (Branch b) ->
      let len = Option.value_exn b.data.length in
      let col =
        match List.Assoc.find b.data.tags "Condition" ~equal:String.equal with
        | Some "1" -> Gg.Color.red
        | _ -> Gg.Color.black
      in
      P.branch ~col len (of_tree b.tip)
    in
    match tree_or_branch with
    | Tree t -> P.draw_tree (of_tree t)
    | Branch b -> P.draw_branch (of_branch b)
  in
  let tree_or_branch =
    Newick.from_file tree_path
455
    |> Newick.map_inner_tree ~f:(fun t ->
Philippe Veber's avatar
Philippe Veber committed
456
        Codepitk.Convergence_tree.infer_binary_condition_on_branches
457
          t ~convergent_leaves:convergent_species)
458 459 460 461
  in
  render_tree tree_or_branch
  |> Biotk_croquis.Croquis.Layout.simple
  |> Fn.flip Biotk_croquis.Croquis.Layout.render_pdf [%dest]
462

463
let%workflow ranking_of_results ~alignment_ids ~convergent_species (alignments : aminoacid_fasta file list) (trees : #newick file list) result_files ~column_label =
464 465 466 467 468 469 470
  let ranking_size = 1_000 in
  let column_label = [%param column_label] in
  let alignment_ids = Array.of_list [%param alignment_ids] in
  let trees = [%eval Workflow.path_list trees] in
  let alignments = [%eval Workflow.path_list alignments] in
  let result_files = [%eval Workflow.path_list result_files] in
  let convergent_species = String.Set.of_list [%param convergent_species] in
Philippe Veber's avatar
Philippe Veber committed
471
  let module Result_table = Codepitk.Result_table in
472 473 474 475 476 477 478 479 480 481 482 483 484 485
  let lazy_load xs ~f = List.map xs ~f:(fun x -> lazy (f x)) |> Array.of_list in
  let results = lazy_load result_files ~f:Result_table.of_file in
  let trees = lazy_load trees ~f:Phylogenetics.Newick.from_file in
  let alignments = lazy_load alignments ~f:Biotk.Fasta.from_file in
  let module S = struct
    type t = {
      alignment : int ;
      pos : int ;
      score : float ;
    }
    let leq s1 s2 = Float.(s1.score <= s2.score)
  end in
  let module H = CCHeap.Make(S) in
  let add_result_line acc ~alignment ~pos ~score =
486 487 488 489 490 491 492
    match score with
    | None -> acc
    | Some score ->
      let candidate = { S.alignment ; pos ; score } in
      let acc = H.add acc candidate in
      if H.size acc > ranking_size then H.take_exn acc |> fst
      else acc
493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523
  in
  let add_result_file alignment acc (rf : Result_table.t Lazy.t) =
    let col = List.Assoc.find_exn (Lazy.force rf).scores_per_meth column_label ~equal:String.equal in
    Array.foldi col ~init:acc ~f:(fun pos acc score ->
        add_result_line acc ~alignment ~pos ~score
      )
  in
  let top_sites =
    Array.foldi results ~init:H.empty ~f:add_result_file
    |> H.to_list_sorted
    |> List.rev
  in
  let annotate_site (s : S.t) =
    let id = alignment_ids.(s.alignment) in
    let tree = Lazy.force trees.(s.alignment) in
    let alignment =
      match Lazy.force alignments.(s.alignment) with
      | Ok (_, fa) -> fa
      | Error _ -> assert false
    in
    let leaf_values = List.map alignment ~f:(fun it ->
        it.description, it.sequence.[s.pos]
      )
    in
    let contents =
      let open Phylogenetics in
      Newick.with_inner_tree tree ~f:(fun t ->
          Tree.map t
            ~node:(fun _ -> ())
            ~branch:(fun bi -> Option.value_exn bi.length)
            ~leaf:(fun ni ->
524 525 526 527 528 529
                let species = Option.value_exn ni.name in
                let state = List.Assoc.find_exn leaf_values species ~equal:String.equal in
                let condition = 
                  if String.Set.mem convergent_species species then `Convergent
                  else `Ancestral
                in
Philippe Veber's avatar
Philippe Veber committed
530
                { Codepitk.Candidate_site.species ; state ; condition }
531 532 533
              )
        )
    in
534
    { Candidate_site.alignment_id = Some id ; contents ; pos = Some s.pos ; score = Some s.score }
535 536 537
  in
  List.map top_sites ~f:annotate_site

Philippe Veber's avatar
Philippe Veber committed
538
let site_ranking ?subset ~meth ~convergent_species db =
539 540 541
  let detection_method, column_label = match meth with
    | `multinomial_asymptotic_lrt ->
      multinomial_asymptotic_lrt, "1MinusLRT"
Philippe Veber's avatar
Philippe Veber committed
542
    | `tdg09 ->
543
      failsafe_tdg09, "Tdg09_1MinusFDR"
544 545 546
  in
  let queries =
    alignments_of_db db
Philippe Veber's avatar
Philippe Veber committed
547
    |> (match subset with None -> Fn.id | Some n -> Fn.flip List.take n)
548 549 550 551 552 553 554
    |> List.map ~f:(query ~convergent_species:(Workflow.data convergent_species))
  in
  let alignment_ids = List.map queries ~f:family_id in
  let aa_alignments = List.map queries ~f:amino_acid_alignment in
  let result_files = List.map queries ~f:detection_method in
  let trees = List.map queries ~f:(tree ~branch_length_unit:`Amino_acid) in
  ranking_of_results ~alignment_ids ~convergent_species aa_alignments trees result_files ~column_label
Philippe Veber's avatar
Philippe Veber committed
555 556 557 558 559 560

let%pworkflow draw_site q pos =
  let fasta_fn = [%path amino_acid_alignment q] in
  let tree_fn = [%path tree ~branch_length_unit:`Amino_acid q] in
  let convergent_species = [%eval q.convergent_species] in
  let pos = [%param pos] in
Philippe Veber's avatar
Philippe Veber committed
561
  let open Codepitk in
Philippe Veber's avatar
Philippe Veber committed
562 563
  let open Biotk_croquis in
  let tree = Phylogenetics.Newick.from_file tree_fn in
564 565 566 567 568
  let condition n = 
    if List.mem convergent_species n ~equal:String.equal 
    then `Convergent
    else `Ancestral
  in
Philippe Veber's avatar
Philippe Veber committed
569 570 571 572 573 574 575 576 577
  match Biotk.Fasta.from_file fasta_fn with
  | Ok (_, items) ->
    let picture =
      Candidate_site.make ~condition tree items pos
      |> Candidate_site.draw
      |> Croquis.Layout.simple
    in
    Croquis.Layout.render_pdf picture [%dest]
  | Error msg -> failwith msg