Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Docker-in-Docker (DinD) capabilities of public runners deactivated.
More info
Open sidebar
VEBER Philippe
codepi
Commits
8f0cb4f6
Commit
8f0cb4f6
authored
Nov 19, 2020
by
Philippe Veber
Browse files
included multinomial test from phylogenetics_convergence
parent
f9f89f6e
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
205 additions
and
27 deletions
+205
-27
lib/multinomial.ml
lib/multinomial.ml
+3
-1
lib/tk/convergence_tree.ml
lib/tk/convergence_tree.ml
+25
-0
lib/tk/convergence_tree.mli
lib/tk/convergence_tree.mli
+6
-0
lib/tk/multinomial.ml
lib/tk/multinomial.ml
+137
-26
lib/tk/multinomial.mli
lib/tk/multinomial.mli
+34
-0
No files found.
lib/multinomial.ml
View file @
8f0cb4f6
...
...
@@ -22,11 +22,13 @@ let%pworkflow multinomial_ocaml_implementation ~meth ~(tree_sc:_ file) ~(faa:ami
|>
Rresult
.
R
.
failwith_error_msg
in
let
site
c0
c1
=
let
c0
=
(
c0
:
int
Amino_acid
.
table
:>
int
array
)
in
let
c1
=
(
c1
:
int
Amino_acid
.
table
:>
int
array
)
in
let
d
=
MT
.
data
~
x1
:
c0
~
x2
:
c1
in
let
r
=
test
d
in
r
._
T_
,
r
.
pvalue
in
let
res
=
Multinomial
.
site_loo
p
tree
alignment
site
in
let
res
=
Convergence_tree
.
alignment_counts_ma
p
tree
alignment
site
in
let
res_lines
=
List
.
mapi
res
~
f
:
(
fun
i
(
lr
,
pval
)
->
sprintf
"%d
\t
%f
\t
%f"
(
i
+
1
)
(
1
.
-.
pval
)
lr
...
...
lib/tk/convergence_tree.ml
View file @
8f0cb4f6
...
...
@@ -207,3 +207,28 @@ let infer_binary_condition_on_branches ?(gain_relative_cost = 2.) t
Fitch
.
fitch
~
cost
~
n
:
2
~
category
t
|>
Tree
.
map
~
node
:
convert_node
~
leaf
:
convert_node
~
branch
:
Fn
.
id
|>
transfer_condition_to_branches
|>
reset_transitions
let
alignment_counts_map
tree
alignment
f
=
let
leaves
=
leaves
tree
|>
List
.
map
~
f
:
(
fun
(
n
,
cond
)
->
match
Alignment
.
find_sequence
alignment
n
with
|
None
->
failwithf
"Could not find %s in alignment"
n
()
|
Some
seq
->
seq
,
cond
)
in
let
seqs0
,
seqs1
=
List
.
partition_map
leaves
~
f
:
Either
.(
function
|
(
aa
,
`Ancestral
)
->
First
aa
|
(
aa
,
`Convergent
)
->
Second
aa
)
in
let
counts
seqs
i
=
Amino_acid
.
Table
.
init
(
fun
aa
->
let
aa
=
Amino_acid
.
to_char
aa
in
List
.
count
seqs
~
f
:
(
fun
s
->
Char
.
equal
s
.
[
i
]
aa
)
)
in
let
site
i
=
f
(
counts
seqs0
i
)
(
counts
seqs1
i
)
in
let
n
=
Alignment
.
ncols
alignment
in
List
.
init
n
~
f
:
site
lib/tk/convergence_tree.mli
View file @
8f0cb4f6
...
...
@@ -27,3 +27,9 @@ val infer_binary_condition_on_branches :
val
reset_transitions
:
Newick
.
tree
->
Newick
.
tree
val
remove_nodes_with_single_child
:
Newick
.
tree
->
Newick
.
tree
val
alignment_counts_map
:
t
->
Alignment
.
t
->
(
int
Amino_acid
.
table
->
int
Amino_acid
.
table
->
'
a
)
->
'
a
list
lib/tk/multinomial.ml
View file @
8f0cb4f6
open
Core_kernel
open
Phylogenetics
let
site_loop
tree
alignment
f
=
let
leaves
=
Convergence_tree
.
leaves
tree
|>
List
.
map
~
f
:
(
fun
(
n
,
cond
)
->
match
Alignment
.
find_sequence
alignment
n
with
|
None
->
failwithf
"Could not find %s in alignment"
n
()
|
Some
seq
->
seq
,
cond
)
in
let
seqs0
,
seqs1
=
List
.
partition_map
leaves
~
f
:
Either
.(
function
|
(
aa
,
`Ancestral
)
->
First
aa
|
(
aa
,
`Convergent
)
->
Second
aa
)
type
data
=
{
k
:
int
;
x1
:
int
array
;
x2
:
int
array
;
}
let
data
~
x1
~
x2
=
let
k1
=
Array
.
length
x1
in
let
k2
=
Array
.
length
x2
in
if
k1
<>
k2
then
raise
(
Invalid_argument
"Multinomial_test.data: the two arrays should have the same size"
)
;
{
k
=
k1
;
x1
;
x2
}
type
result
=
{
_T_
:
float
;
pvalue
:
float
;
}
let
isum
=
Array
.
reduce_exn
~
f
:
(
+
)
let
sum
a
b
f
=
let
rec
loop
acc
a
b
=
if
a
>=
b
then
acc
else
loop
(
acc
+.
f
a
)
(
a
+
1
)
b
in
let
counts
seqs
i
=
Amino_acid
.
Table
.
init
(
fun
aa
->
let
aa
=
Amino_acid
.
to_char
aa
in
List
.
count
seqs
~
f
:
(
fun
s
->
Char
.
equal
s
.
[
i
]
aa
)
loop
0
.
a
b
let
%
test
_
=
Float
.(
sum
0
4
(
fun
i
->
[
|
1
.
;
2
.
;
3
.
;
4
.
|
]
.
(
i
))
=
10
.
)
let
frequencies
c
=
let
n
=
float
@@
Array
.
reduce_exn
~
f
:
(
+
)
c
in
Array
.
map
c
~
f
:
(
fun
c
->
float
c
/.
n
)
let
summed_counts
{
k
;
x1
;
x2
}
=
Array
.
init
k
~
f
:
(
fun
i
->
x1
.
(
i
)
+
x2
.
(
i
))
let
simulation_test
?
(
sample_size
=
10_000
)
~
likelihood_log_ratio
d
=
let
freq
=
frequencies
(
summed_counts
d
)
in
let
t_obs
=
likelihood_log_ratio
d
in
let
c
=
ref
0
in
let
n1
=
isum
d
.
x1
in
let
n2
=
isum
d
.
x2
in
for
_
=
1
to
sample_size
do
let
d
=
{
k
=
d
.
k
;
x1
=
Owl
.
Stats
.
multinomial_rvs
~
p
:
freq
n1
;
x2
=
Owl
.
Stats
.
multinomial_rvs
~
p
:
freq
n2
;
}
in
let
_T_
=
likelihood_log_ratio
d
in
if
Float
.(
_T_
>=
t_obs
)
then
incr
c
done
;
let
pvalue
=
float
!
c
/.
float
sample_size
in
{
_T_
=
t_obs
;
pvalue
}
module
LRT
=
struct
let
likelihood_log_ratio
({
x1
;
x2
;
_
}
as
d
)
=
let
logpdf
=
Owl
.
Stats
.
multinomial_logpdf
in
let
log_prod0
=
let
p
=
frequencies
(
summed_counts
d
)
in
logpdf
x1
~
p
+.
logpdf
x2
~
p
in
let
log_prod1
=
logpdf
x1
~
p
:
(
frequencies
x1
)
+.
logpdf
x2
~
p
:
(
frequencies
x2
)
in
-
2
.
*.
(
log_prod0
-.
log_prod1
)
let
asymptotic_test
d
=
let
_T_
=
likelihood_log_ratio
d
in
let
pvalue
=
1
.
-.
Owl
.
Stats
.
chi2_cdf
_T_
~
df
:
(
float
(
d
.
k
-
1
))
in
{
_T_
;
pvalue
}
let
simulation_test
=
simulation_test
~
likelihood_log_ratio
end
(**
Implementation of
Two-Sample Test for Sparse High Dimensional Multinomial Distributions
Amanda Plunkett and Junyong Park
*)
module
Sparse
=
struct
let
f_star
~
n1
~
n2
~
x1
~
x2
=
(
x1
/.
n1
-.
x2
/.
n2
)
**
2
.
-.
x1
/.
(
n1
**
2
.
)
-.
x2
/.
(
n2
**
2
.
)
let
likelihood_log_ratio
{
k
;
x1
;
x2
}
=
let
n1
=
float
(
isum
x1
)
in
let
n2
=
float
(
isum
x2
)
in
let
p1_hat
=
Array
.
map
x1
~
f
:
(
fun
x
->
float
x
/.
n1
)
in
let
p2_hat
=
Array
.
map
x2
~
f
:
(
fun
x
->
float
x
/.
n2
)
in
let
sigma_k_hat_squared
=
let
f
n
p_hat
=
sum
0
k
(
fun
i
->
2
.
/.
(
n
**
2
.
)
*.
(
p_hat
.
(
i
)
**
2
.
-.
p_hat
.
(
i
)
/.
n
))
in
f
n1
p1_hat
+.
f
n2
p2_hat
+.
4
.
/.
n1
/.
n2
*.
sum
0
k
(
fun
i
->
p1_hat
.
(
i
)
*.
p2_hat
.
(
i
))
in
let
sigma_k_hat
=
sqrt
sigma_k_hat_squared
in
sum
0
k
(
fun
i
->
f_star
~
n1
~
n2
~
x1
:
(
float
x1
.
(
i
))
~
x2
:
(
float
x2
.
(
i
)))
/.
sigma_k_hat
let
asymptotic_test
d
=
let
_T_
=
likelihood_log_ratio
d
in
let
pvalue
=
1
.
-.
Owl
.
Stats
.
gaussian_cdf
_T_
~
mu
:
0
.
~
sigma
:
1
.
in
{
_T_
;
pvalue
}
let
simulation_test
=
simulation_test
~
likelihood_log_ratio
end
let
random_discrete_probability
k
~
alpha
=
let
x
=
Array
.
init
k
~
f
:
(
fun
_
->
Owl
.
Stats
.
gamma_rvs
~
shape
:
alpha
~
scale
:
1
.
)
in
let
s
=
Array
.
reduce_exn
x
~
f
:
(
+.
)
in
Array
.
map
x
~
f
:
(
fun
x
->
x
/.
s
)
let
counts
n
p
=
let
r
=
Array
.
map
p
~
f
:
(
fun
_
->
0
)
in
for
_
=
1
to
n
do
let
i
=
Owl
.
Stats
.
categorical_rvs
p
in
r
.
(
i
)
<-
r
.
(
i
)
+
1
done
;
r
let
h0_sample
~
n1
~
n2
p
=
counts
n1
p
,
counts
n2
p
let
uniformity_test
~
k
~
n1
~
n2
test
=
let
p_values
=
Array
.
init
1_000
~
f
:
(
fun
_
->
let
p
=
random_discrete_probability
k
~
alpha
:
10
.
in
let
x1
,
x2
=
h0_sample
~
n1
~
n2
p
in
(
test
{
k
;
x1
;
x2
})
.
pvalue
)
in
let
site
i
=
let
c0
=
(
counts
seqs0
i
:>
int
array
)
in
let
c1
=
(
counts
seqs1
i
:>
int
array
)
in
f
c0
c1
in
let
n
=
Alignment
.
ncols
alignment
in
List
.
init
n
~
f
:
site
ignore
(
OCamlR_graphics
.
hist
~
breaks
:
(
`n
20
)
p_values
:
OCamlR_graphics
.
hist
)
let
example
=
{
k
=
20
;
x1
=
[
|
5
;
3
;
1
;
1
;
0
;
0
;
0
;
0
;
0
;
0
;
0
;
0
;
0
;
0
;
0
;
0
;
0
;
0
;
0
;
0
|
]
;
x2
=
[
|
0
;
0
;
0
;
0
;
7
;
2
;
1
;
0
;
0
;
0
;
0
;
0
;
0
;
0
;
0
;
0
;
0
;
0
;
0
;
0
|
]
}
lib/tk/multinomial.mli
0 → 100644
View file @
8f0cb4f6
(** Two-sample multinomial tests *)
type
data
val
data
:
x1
:
int
array
->
x2
:
int
array
->
data
type
result
=
{
_T_
:
float
;
pvalue
:
float
;
}
module
LRT
:
sig
val
likelihood_log_ratio
:
data
->
float
val
asymptotic_test
:
data
->
result
val
simulation_test
:
?
sample_size
:
int
->
data
->
result
end
module
Sparse
:
sig
val
likelihood_log_ratio
:
data
->
float
val
asymptotic_test
:
data
->
result
val
simulation_test
:
?
sample_size
:
int
->
data
->
result
end
val
uniformity_test
:
k
:
int
->
n1
:
int
->
n2
:
int
->
(
data
->
result
)
->
unit
val
example
:
data
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment