Commit 9589905d authored by LANORE Vincent's avatar LANORE Vincent
Browse files

Added new realistic filter with gc content mean quantiles

parent db4d126c
......@@ -281,9 +281,9 @@ let branch_factor_range = [ 1.; 2.; 3.; 4.; 5. ]
type gBGC_t = Global of float | Convergent of float * float
let gBGC_range =
let range = [ 0.; 2.; 4.; 8.; 16. ] in
let range = [ 0.; 2.; 4.; 8.; 16.; 32. ] in
List.concat [
(* List.map ~f:(fun x -> Global x) range ; *)
List.map ~f:(fun x -> Global x) range ;
List.map ~f:(fun x -> Convergent (0., x)) range ;
]
......@@ -312,13 +312,57 @@ let filter_results ~(f: _ -> bool) (results: (param_t * _) list) =
List.filter results ~f:(fun (_, x) -> f x)
type record_t = {
gc_stat: Alistats.gc_stat
gc_means: ([`first | `second | `third] * float) list
}
let record_of_param ?n_h0:(n_h0=50) s =
let simu = simu_of_param ~n_h0 s in
{ gc_stat = simu |> nucleotide_alignment |> Debug.path |> Alistats.nucleotide_fasta_gc }
let gc_mean_from_simu ~pos s =
(s |> nucleotide_alignment |> Debug.path |> Alistats.nucleotide_fasta_gc ~pos).gc_mean in
{
gc_means = [
(`first, (gc_mean_from_simu simu ~pos:`first)) ;
(`second, gc_mean_from_simu simu ~pos:`second) ;
(`third, gc_mean_from_simu simu ~pos:`third)
]
}
let realistic_result (g: record_t) =
let v = g.gc_stat.gc_variance_among_sequences in
Float.(v >= 8.388e-05 && v <= 5.262e-02)
\ No newline at end of file
let expected_gc = [
(`first, (0.3326, 0.5157, 0.5639, 0.6080, 0.8621)) ;
(`second, (0.2102, 0.3784, 0.4231, 0.4626, 0.7499)) ;
(`third, (0.2242, 0.4852, 0.6099, 0.7358, 0.9575))
]
(* medians = 0.5589 0.4160 0.6274 *)
let quartile (min_, fq_, mean_, tq_, max_) x =
match Float.( x < min_, x < fq_, x < mean_, x < tq_, x < max_) with
| true, _, _, _, _ -> `below_min
| false, true, _, _, _ -> `first
| _, false, true, _, _ -> `second
| _, _, false, true, _ -> `third
| _, _, _, false, true -> `fourth
| _, _, _, _, false -> `over_max
let adjacent q1 q2 =
match q1, q2 with
| `first, `first | `second, `second
| `thrid, `thrid | `fourth, `fourth
| `first, `second | `second, `first
| `second, `third | `third, `second
| `third, `fourth | `fourth, `third -> true
| _ -> false
let quartile_of_record (r: record_t) =
List.map r.gc_means ~f:(fun (q, x) ->
let q_list = List.Assoc.find_exn expected_gc ~equal:(fun x y -> Caml.(x = y)) q in
quartile q_list x
)
let realistic_result (r: record_t) =
match quartile_of_record r with
| [q1 ; q2 ; q3] -> adjacent q1 q2 && adjacent q2 q3 && adjacent q1 q3
| _ -> failwith "oh no"
(* let v = g.gc_stat.gc_variance_among_sequences in
Float.(v >= 8.388e-05 && v <= 5.262e-02) *)
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment