Commit 22663c89 authored by Philippe Veber's avatar Philippe Veber
Browse files

orthomam: fixed nucleotide sequence translation

'N' is a valid amino acid, patate
parent a7061f14
......@@ -242,9 +242,9 @@ let%pworkflow [@mem Workflow.int 4096] concatenate ?(nmissing = 0) ?seed db n :
Bytes.set sequences.(j) (3 * i + 1) it.sequence.[3 * p + 1] ;
Bytes.set sequences.(j) (3 * i + 2) it.sequence.[3 * p + 2]
| None ->
Bytes.set sequences.(j) (3 * i) 'N' ;
Bytes.set sequences.(j) (3 * i + 1) 'N' ;
Bytes.set sequences.(j) (3 * i + 2) 'N' ;
Bytes.set sequences.(j) (3 * i) '-' ;
Bytes.set sequences.(j) (3 * i + 1) '-' ;
Bytes.set sequences.(j) (3 * i + 2) '-' ;
)
done ;
List.mapi al0.items ~f:(fun i it ->
......@@ -271,7 +271,6 @@ let%pworkflow [@mem Workflow.int 4096] concatenate ?(nmissing = 0) ?seed db n :
Gsl.Rng.set rng seed ;
Gsl.Randist.shuffle rng all_full_sites ;
let items = build_alignment_from_sites all_full_sites in
printf "nfull sites = %d\n%!" (Array.length all_full_sites) ;
Phylip.write ~strict:false (Phylip.make_exn items) [%dest]
let%pworkflow concatenate_calibration_figure ~nsites ~trees =
......@@ -315,29 +314,13 @@ let%pworkflow concatenate_calibration_figure ~nsites ~trees =
let%pworkflow phylip_aa_of_nuc (ali : phylip file) : phylip file =
let open Phylogenetics in
let has_some_gaps s = String.exists s ~f:(function '-' | '?' -> true | _ -> false) in
let convert_seq seq =
let fail subseq =
failwithf "Sequence %s cannot be converted to aminoacid sequence because it contains %s" seq subseq ()
in
let n = String.length seq in
String.init (n / 3) ~f:(fun i ->
let subseq = String.sub seq ~pos:(i * 3) ~len:3 in
match Codon.of_string subseq with
| Some codon -> (
match Codon.Universal_genetic_code.aa_of_codon codon with
| Some aa -> Amino_acid.to_char aa
| None -> 'N'
)
| None ->
if has_some_gaps subseq then '-'
else if String.exists subseq ~f:(Char.( = ) 'N') then 'N'
else fail subseq
)
in
let input_ali = Phylip.read_exn ~strict:false [%path ali] in
let items = List.map input_ali.items ~f:(fun it ->
{ it with Phylip.sequence = convert_seq it.sequence }
let sequence =
it.sequence
|> Reviewphiltrans_toolbox.Utils.translate_nucleotide_sequence_whatever_it_takes
in
{ it with Phylip.sequence }
)
in
Phylip.write ~strict:false (Phylip.make_exn items) [%dest]
......
......@@ -25,3 +25,22 @@ let int_for_all a b ~f =
i >= b && f i && loop (i + 1)
in
loop a
let translate_nucleotide_sequence_whatever_it_takes ?(unknown_char = 'X') seq =
let open Phylogenetics in
let has_some_gaps s = String.exists s ~f:(Char.( = ) '-') in
let n = String.length seq in
String.init (n / 3) ~f:(fun i ->
let subseq = String.sub seq ~pos:(i * 3) ~len:3 in
match Codon.of_string subseq with
| Some codon -> (
match Codon.Universal_genetic_code.aa_of_codon codon with
| Some aa -> Amino_acid.to_char aa
| None -> unknown_char
)
| None ->
if has_some_gaps subseq then '-'
else if String.exists subseq ~f:(Char.( = ) 'N') then unknown_char
else
failwithf "Sequence %s cannot be converted to aminoacid sequence because it contains %s" seq subseq ()
)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment