Commit 3699c7ed authored by Philippe Veber's avatar Philippe Veber
Browse files

toolbox/Orthomam_db: refactoring

parent efbb1ae4
......@@ -33,43 +33,43 @@ type orf_defect = [
| `Stop_codon_at_last_position
]
let check_orf seq =
let check_codon seq =
let open Phylogenetics in
match Codon.of_string seq with
| None -> (
let maybe_iupac_nucleotides =
List.init 3 ~f:(fun i -> Iupac_nucleotide.of_char seq.[i])
|> Option.all
in
match maybe_iupac_nucleotides with
| None -> `Invalid_character_in_codon
| Some iupac_nucleotides ->
if List.exists iupac_nucleotides ~f:Iupac_nucleotide.is_ambiguous then
`Ambiguous
else
assert false
)
| Some c ->
if Codon.Universal_genetic_code.is_stop_codon c then
`Stop_codon
else `Coding_codon c
let check_orf seq : (unit, orf_defect) result =
let l = String.length seq in
if l mod 3 <> 0 then Error `Length_not_multiple_of_3
else
let maybe_codons =
List.init (String.length seq / 3) ~f:(fun i ->
Codon.of_string (String.sub seq ~pos:(i * 3) ~len:3)
)
in
List.find_mapi maybe_codons ~f:(fun i maybe_c ->
match maybe_c with
| None -> (
let codon_seq = String.sub seq ~pos:(i * 3) ~len:3 in
let maybe_iupac_nucleotides =
List.init 3 ~f:(fun i -> Iupac_nucleotide.of_char codon_seq.[i])
|> Option.all
in
match maybe_iupac_nucleotides with
| None ->
Some (Error (`Invalid_character_in_codon (i, codon_seq)))
| Some iupac_nucleotides ->
if List.exists iupac_nucleotides ~f:Iupac_nucleotide.is_ambiguous then
None
else
Some (Error (`Invalid_codon (i, codon_seq)))
)
| Some c ->
if Codon.Universal_genetic_code.is_stop_codon c then
if i = String.length seq / 3 - 1 then
Some (Error `Stop_codon_at_last_position)
else
Some (Error (`Stop_codon i))
else
None
Range.find_map (0, String.length seq / 3) ~f:(fun i ->
let seq_pos = i * 3 in
let codon_seq = String.sub seq ~pos:seq_pos ~len:3 in
match check_codon codon_seq with
| `Coding_codon _ -> None
| `Invalid_character_in_codon -> Some (`Invalid_character_in_codon (seq_pos, codon_seq))
| `Ambiguous -> None
| `Stop_codon ->
if i = String.length seq / 3 - 1 then Some `Stop_codon_at_last_position
else Some (`Stop_codon (i * 3))
)
|> Option.value ~default:(Ok ())
|> Option.value_map ~f:Result.fail ~default:(Ok ())
type alignment_defect = [
| `Length_not_multiple_of_3
......@@ -136,15 +136,16 @@ let missing_sequences_cdf db dest =
in
let filtered_alignment =
List.filter all_alignments ~f:(fun ali ->
List.for_all ali.items ~f:(fun it ->
match check_orf (remove_gaps it.sequence) with
| Ok ()
| Error `Stop_codon_at_last_position -> true
| Error ( `Stop_codon _
| `Invalid_character_in_codon _
| `Invalid_codon _
| `Length_not_multiple_of_3) -> false
)
match check_alignment ali with
| None -> true
| Some `Length_not_multiple_of_3 -> false
| Some (`Invalid_sequences xs) ->
List.for_all xs ~f:(fun (_, defect) ->
match defect with
| `Stop_codon_at_last_position -> true
| `Stop_codon _
| `Invalid_codon _ -> false
)
)
in
let supports alignments =
......
type t = int * int
let find_map (lo, hi) ~f =
let rec loop i =
if i = hi then None
else
match f i with
| None -> loop (i + 1)
| Some y -> Some y
in
loop lo
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment