Commit 282afe90 authored by LANORE Vincent's avatar LANORE Vincent
Browse files

Added code to compute gc content at thrid codon position

parent 55fd862c
......@@ -5,37 +5,23 @@ let ok_exn err = function
| Ok x -> x
| Error e -> failwith (err e)
let%pworkflow histogram (fa : #Bistro_bioinfo.fasta Bistro.pworkflow) =
let al = ok_exn Alignment.show_parsing_error @@ Alignment.from_fasta [%path fa] in
let float_array_of_int_list x =
Array.of_list x
|> Array.map ~f:float
in
let values, counts =
Alignment.number_of_residues_per_column_stats al
|> List.unzip
in
OCamlR_grDevices.pdf [%dest] ;
OCamlR_graphics.plot
~plot_type:`Histogram
~xlab:"Number of different residues in column"
~ylab:"Counts"
~x:(float_array_of_int_list values)
~y:(float_array_of_int_list counts) () ;
OCamlR_grDevices.dev_off ()
let is_gc ?thirdpos:(tp=false) i c =
match tp with
| true when not ((i mod 3) = 2) -> false
| _ -> c = 'g' || c = 'G' || c = 'c' || c = 'C'
let nucleotide_fasta_gc fa =
let string_from_fasta fa =
match Alignment.from_fasta fa with
| Ok al ->
let comp = Alignment.composition al in
let f c = match List.Assoc.find comp c ~equal:Char.equal with
| Some x -> x
| None -> 0.
in
f 'G' +. f 'g' +. f 'c' +. f 'C'
| Ok al -> al.sequences |> Array.fold ~init:"" ~f:(fun acc s -> String.concat [acc ; s])
| Error `Fasta_parser_error _ -> failwith "parsing error"
| Error `Msg msg -> failwith msg
let nucleotide_fasta_gc ?thirdpos:(tp=false) fa =
let als = string_from_fasta fa in
let len = if tp then String.length als / 3 else String.length als in
let sum = String.foldi ~init:0 ~f:(fun i a c -> a + if is_gc ~thirdpos:tp i c then 1 else 0) als in
(float_of_int sum) /. (float_of_int len)
let main ~alignment () =
match Alignment.from_fasta alignment with
| Ok al ->
......@@ -55,3 +41,22 @@ let command =
in
main ~alignment
]
let%pworkflow histogram (fa : #Bistro_bioinfo.fasta Bistro.pworkflow) =
let al = ok_exn Alignment.show_parsing_error @@ Alignment.from_fasta [%path fa] in
let float_array_of_int_list x =
Array.of_list x
|> Array.map ~f:float
in
let values, counts =
Alignment.number_of_residues_per_column_stats al
|> List.unzip
in
OCamlR_grDevices.pdf [%dest] ;
OCamlR_graphics.plot
~plot_type:`Histogram
~xlab:"Number of different residues in column"
~ylab:"Counts"
~x:(float_array_of_int_list values)
~y:(float_array_of_int_list counts) () ;
OCamlR_grDevices.dev_off ()
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment