Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
VEBER Philippe
codepi
Commits
5d16d54d
Commit
5d16d54d
authored
Jul 17, 2019
by
Philippe Veber
Browse files
Alistats: added convergent/ancestral differentiated GC computation
parent
dbaefa57
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
23 additions
and
3 deletions
+23
-3
.ocamlinit
.ocamlinit
+6
-1
lib/alistats.ml
lib/alistats.ml
+17
-2
No files found.
.ocamlinit
View file @
5d16d54d
...
...
@@ -4,7 +4,7 @@ open Pipeline2
module Dbg = Reviewphiltrans.Debug
let auc ?mode d =
let a =
let a =
result_table ?mode d
|> Convergence_detection.recall_precision_auc_table
|> Dbg.eval
...
...
@@ -13,6 +13,11 @@ let auc ?mode d =
let gc_contents ?pos d =
d |> nucleotide_alignment |> Dbg.path |> Alistats.nucleotide_fasta_gc ?pos
let ac_gc_stats ?pos d =
let al = d |> nucleotide_alignment |> Dbg.path in
let tree = tree d |> Dbg.path in
Alistats.nucleotide_fasta_gc_ac ?pos tree al
let count_py w =
Sys.command (Core.sprintf "python lib/scripts/count_detected.py %s" (Dbg.path w))
|> ignore
...
...
lib/alistats.ml
View file @
5d16d54d
...
...
@@ -21,14 +21,13 @@ type gc_stat = {
gc_variance_among_sequences
:
float
;
}
let
nucleotide_fasta
_gc
?
pos
fa
=
let
seq
_gc
?
pos
seqs
=
let
pos
=
Option
.
map
pos
~
f
:
(
function
|
`first
->
0
|
`second
->
1
|
`third
->
2
)
in
let
seqs
=
strings_from_fasta
fa
in
let
gc_counts
=
Array
.
map
seqs
~
f
:
(
fun
seq
->
let
len
=
if
pos
<>
None
then
String
.
length
seq
/
3
else
String
.
length
seq
in
let
sum
=
String
.
foldi
~
init
:
0
~
f
:
(
fun
i
a
c
->
a
+
if
is_gc
?
pos
i
c
then
1
else
0
)
seq
in
...
...
@@ -41,6 +40,22 @@ let nucleotide_fasta_gc ?pos fa =
let
gc_mean
=
Array
.
fold
gc_counts
~
init
:
0
.
~
f
:
(
fun
acc
(
_
,
k
)
->
acc
+.
k
/.
total_len
)
in
{
gc_mean
;
gc_variance_among_sequences
}
let
nucleotide_fasta_gc
?
pos
fa
=
let
seqs
=
strings_from_fasta
fa
in
seq_gc
?
pos
seqs
let
nucleotide_fasta_gc_ac
?
pos
tree
fa
=
let
tree
=
Convdet
.
Simulator
.
tree_from_file
tree
in
let
seqs
=
strings_from_fasta
fa
in
let
leaf_state
=
Phylogenetics
.
Tree
.
map_leaves
tree
~
root
:
(
0
.,
0
)
~
f
:
(
fun
_
b
->
snd
b
=
0
)
|>
Array
.
of_list
in
let
ancestral_seqs
,
convergent_seqs
=
Array
.
partitioni_tf
seqs
~
f
:
(
fun
i
_
->
leaf_state
.
(
i
))
in
seq_gc
?
pos
ancestral_seqs
,
seq_gc
?
pos
convergent_seqs
let
main
~
alignment
()
=
match
Alignment
.
from_fasta
alignment
with
|
Ok
al
->
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment