Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
VEBER Philippe
codepi
Commits
8f0cb4f6
Commit
8f0cb4f6
authored
Nov 19, 2020
by
Philippe Veber
Browse files
included multinomial test from phylogenetics_convergence
parent
f9f89f6e
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
205 additions
and
27 deletions
+205
-27
lib/multinomial.ml
lib/multinomial.ml
+3
-1
lib/tk/convergence_tree.ml
lib/tk/convergence_tree.ml
+25
-0
lib/tk/convergence_tree.mli
lib/tk/convergence_tree.mli
+6
-0
lib/tk/multinomial.ml
lib/tk/multinomial.ml
+137
-26
lib/tk/multinomial.mli
lib/tk/multinomial.mli
+34
-0
No files found.
lib/multinomial.ml
View file @
8f0cb4f6
...
@@ -22,11 +22,13 @@ let%pworkflow multinomial_ocaml_implementation ~meth ~(tree_sc:_ file) ~(faa:ami
...
@@ -22,11 +22,13 @@ let%pworkflow multinomial_ocaml_implementation ~meth ~(tree_sc:_ file) ~(faa:ami
|>
Rresult
.
R
.
failwith_error_msg
|>
Rresult
.
R
.
failwith_error_msg
in
in
let
site
c0
c1
=
let
site
c0
c1
=
let
c0
=
(
c0
:
int
Amino_acid
.
table
:>
int
array
)
in
let
c1
=
(
c1
:
int
Amino_acid
.
table
:>
int
array
)
in
let
d
=
MT
.
data
~
x1
:
c0
~
x2
:
c1
in
let
d
=
MT
.
data
~
x1
:
c0
~
x2
:
c1
in
let
r
=
test
d
in
let
r
=
test
d
in
r
._
T_
,
r
.
pvalue
r
._
T_
,
r
.
pvalue
in
in
let
res
=
Multinomial
.
site_loo
p
tree
alignment
site
in
let
res
=
Convergence_tree
.
alignment_counts_ma
p
tree
alignment
site
in
let
res_lines
=
let
res_lines
=
List
.
mapi
res
~
f
:
(
fun
i
(
lr
,
pval
)
->
List
.
mapi
res
~
f
:
(
fun
i
(
lr
,
pval
)
->
sprintf
"%d
\t
%f
\t
%f"
(
i
+
1
)
(
1
.
-.
pval
)
lr
sprintf
"%d
\t
%f
\t
%f"
(
i
+
1
)
(
1
.
-.
pval
)
lr
...
...
lib/tk/convergence_tree.ml
View file @
8f0cb4f6
...
@@ -207,3 +207,28 @@ let infer_binary_condition_on_branches ?(gain_relative_cost = 2.) t
...
@@ -207,3 +207,28 @@ let infer_binary_condition_on_branches ?(gain_relative_cost = 2.) t
Fitch
.
fitch
~
cost
~
n
:
2
~
category
t
Fitch
.
fitch
~
cost
~
n
:
2
~
category
t
|>
Tree
.
map
~
node
:
convert_node
~
leaf
:
convert_node
~
branch
:
Fn
.
id
|>
Tree
.
map
~
node
:
convert_node
~
leaf
:
convert_node
~
branch
:
Fn
.
id
|>
transfer_condition_to_branches
|>
reset_transitions
|>
transfer_condition_to_branches
|>
reset_transitions
let
alignment_counts_map
tree
alignment
f
=
let
leaves
=
leaves
tree
|>
List
.
map
~
f
:
(
fun
(
n
,
cond
)
->
match
Alignment
.
find_sequence
alignment
n
with
|
None
->
failwithf
"Could not find %s in alignment"
n
()
|
Some
seq
->
seq
,
cond
)
in
let
seqs0
,
seqs1
=
List
.
partition_map
leaves
~
f
:
Either
.(
function
|
(
aa
,
`Ancestral
)
->
First
aa
|
(
aa
,
`Convergent
)
->
Second
aa
)
in
let
counts
seqs
i
=
Amino_acid
.
Table
.
init
(
fun
aa
->
let
aa
=
Amino_acid
.
to_char
aa
in
List
.
count
seqs
~
f
:
(
fun
s
->
Char
.
equal
s
.
[
i
]
aa
)
)
in
let
site
i
=
f
(
counts
seqs0
i
)
(
counts
seqs1
i
)
in
let
n
=
Alignment
.
ncols
alignment
in
List
.
init
n
~
f
:
site
lib/tk/convergence_tree.mli
View file @
8f0cb4f6
...
@@ -27,3 +27,9 @@ val infer_binary_condition_on_branches :
...
@@ -27,3 +27,9 @@ val infer_binary_condition_on_branches :
val
reset_transitions
:
Newick
.
tree
->
Newick
.
tree
val
reset_transitions
:
Newick
.
tree
->
Newick
.
tree
val
remove_nodes_with_single_child
:
Newick
.
tree
->
Newick
.
tree
val
remove_nodes_with_single_child
:
Newick
.
tree
->
Newick
.
tree
val
alignment_counts_map
:
t
->
Alignment
.
t
->
(
int
Amino_acid
.
table
->
int
Amino_acid
.
table
->
'
a
)
->
'
a
list
lib/tk/multinomial.ml
View file @
8f0cb4f6
open
Core_kernel
open
Core_kernel
open
Phylogenetics
type
data
=
{
let
site_loop
tree
alignment
f
=
k
:
int
;
let
leaves
=
x1
:
int
array
;
Convergence_tree
.
leaves
tree
x2
:
int
array
;
|>
List
.
map
~
f
:
(
fun
(
n
,
cond
)
->
}
match
Alignment
.
find_sequence
alignment
n
with
|
None
->
failwithf
"Could not find %s in alignment"
n
()
let
data
~
x1
~
x2
=
|
Some
seq
->
seq
,
cond
let
k1
=
Array
.
length
x1
in
)
let
k2
=
Array
.
length
x2
in
in
if
k1
<>
k2
then
raise
(
Invalid_argument
"Multinomial_test.data: the two arrays should have the same size"
)
;
let
seqs0
,
seqs1
=
List
.
partition_map
leaves
~
f
:
Either
.(
function
{
k
=
k1
;
x1
;
x2
}
|
(
aa
,
`Ancestral
)
->
First
aa
|
(
aa
,
`Convergent
)
->
Second
aa
type
result
=
{
)
_T_
:
float
;
pvalue
:
float
;
}
let
isum
=
Array
.
reduce_exn
~
f
:
(
+
)
let
sum
a
b
f
=
let
rec
loop
acc
a
b
=
if
a
>=
b
then
acc
else
loop
(
acc
+.
f
a
)
(
a
+
1
)
b
in
in
let
counts
seqs
i
=
loop
0
.
a
b
Amino_acid
.
Table
.
init
(
fun
aa
->
let
aa
=
Amino_acid
.
to_char
aa
in
let
%
test
_
=
List
.
count
seqs
~
f
:
(
fun
s
->
Char
.
equal
s
.
[
i
]
aa
)
Float
.(
sum
0
4
(
fun
i
->
[
|
1
.
;
2
.
;
3
.
;
4
.
|
]
.
(
i
))
=
10
.
)
let
frequencies
c
=
let
n
=
float
@@
Array
.
reduce_exn
~
f
:
(
+
)
c
in
Array
.
map
c
~
f
:
(
fun
c
->
float
c
/.
n
)
let
summed_counts
{
k
;
x1
;
x2
}
=
Array
.
init
k
~
f
:
(
fun
i
->
x1
.
(
i
)
+
x2
.
(
i
))
let
simulation_test
?
(
sample_size
=
10_000
)
~
likelihood_log_ratio
d
=
let
freq
=
frequencies
(
summed_counts
d
)
in
let
t_obs
=
likelihood_log_ratio
d
in
let
c
=
ref
0
in
let
n1
=
isum
d
.
x1
in
let
n2
=
isum
d
.
x2
in
for
_
=
1
to
sample_size
do
let
d
=
{
k
=
d
.
k
;
x1
=
Owl
.
Stats
.
multinomial_rvs
~
p
:
freq
n1
;
x2
=
Owl
.
Stats
.
multinomial_rvs
~
p
:
freq
n2
;
}
in
let
_T_
=
likelihood_log_ratio
d
in
if
Float
.(
_T_
>=
t_obs
)
then
incr
c
done
;
let
pvalue
=
float
!
c
/.
float
sample_size
in
{
_T_
=
t_obs
;
pvalue
}
module
LRT
=
struct
let
likelihood_log_ratio
({
x1
;
x2
;
_
}
as
d
)
=
let
logpdf
=
Owl
.
Stats
.
multinomial_logpdf
in
let
log_prod0
=
let
p
=
frequencies
(
summed_counts
d
)
in
logpdf
x1
~
p
+.
logpdf
x2
~
p
in
let
log_prod1
=
logpdf
x1
~
p
:
(
frequencies
x1
)
+.
logpdf
x2
~
p
:
(
frequencies
x2
)
in
-
2
.
*.
(
log_prod0
-.
log_prod1
)
let
asymptotic_test
d
=
let
_T_
=
likelihood_log_ratio
d
in
let
pvalue
=
1
.
-.
Owl
.
Stats
.
chi2_cdf
_T_
~
df
:
(
float
(
d
.
k
-
1
))
in
{
_T_
;
pvalue
}
let
simulation_test
=
simulation_test
~
likelihood_log_ratio
end
(**
Implementation of
Two-Sample Test for Sparse High Dimensional Multinomial Distributions
Amanda Plunkett and Junyong Park
*)
module
Sparse
=
struct
let
f_star
~
n1
~
n2
~
x1
~
x2
=
(
x1
/.
n1
-.
x2
/.
n2
)
**
2
.
-.
x1
/.
(
n1
**
2
.
)
-.
x2
/.
(
n2
**
2
.
)
let
likelihood_log_ratio
{
k
;
x1
;
x2
}
=
let
n1
=
float
(
isum
x1
)
in
let
n2
=
float
(
isum
x2
)
in
let
p1_hat
=
Array
.
map
x1
~
f
:
(
fun
x
->
float
x
/.
n1
)
in
let
p2_hat
=
Array
.
map
x2
~
f
:
(
fun
x
->
float
x
/.
n2
)
in
let
sigma_k_hat_squared
=
let
f
n
p_hat
=
sum
0
k
(
fun
i
->
2
.
/.
(
n
**
2
.
)
*.
(
p_hat
.
(
i
)
**
2
.
-.
p_hat
.
(
i
)
/.
n
))
in
f
n1
p1_hat
+.
f
n2
p2_hat
+.
4
.
/.
n1
/.
n2
*.
sum
0
k
(
fun
i
->
p1_hat
.
(
i
)
*.
p2_hat
.
(
i
))
in
let
sigma_k_hat
=
sqrt
sigma_k_hat_squared
in
sum
0
k
(
fun
i
->
f_star
~
n1
~
n2
~
x1
:
(
float
x1
.
(
i
))
~
x2
:
(
float
x2
.
(
i
)))
/.
sigma_k_hat
let
asymptotic_test
d
=
let
_T_
=
likelihood_log_ratio
d
in
let
pvalue
=
1
.
-.
Owl
.
Stats
.
gaussian_cdf
_T_
~
mu
:
0
.
~
sigma
:
1
.
in
{
_T_
;
pvalue
}
let
simulation_test
=
simulation_test
~
likelihood_log_ratio
end
let
random_discrete_probability
k
~
alpha
=
let
x
=
Array
.
init
k
~
f
:
(
fun
_
->
Owl
.
Stats
.
gamma_rvs
~
shape
:
alpha
~
scale
:
1
.
)
in
let
s
=
Array
.
reduce_exn
x
~
f
:
(
+.
)
in
Array
.
map
x
~
f
:
(
fun
x
->
x
/.
s
)
let
counts
n
p
=
let
r
=
Array
.
map
p
~
f
:
(
fun
_
->
0
)
in
for
_
=
1
to
n
do
let
i
=
Owl
.
Stats
.
categorical_rvs
p
in
r
.
(
i
)
<-
r
.
(
i
)
+
1
done
;
r
let
h0_sample
~
n1
~
n2
p
=
counts
n1
p
,
counts
n2
p
let
uniformity_test
~
k
~
n1
~
n2
test
=
let
p_values
=
Array
.
init
1_000
~
f
:
(
fun
_
->
let
p
=
random_discrete_probability
k
~
alpha
:
10
.
in
let
x1
,
x2
=
h0_sample
~
n1
~
n2
p
in
(
test
{
k
;
x1
;
x2
})
.
pvalue
)
)
in
in
let
site
i
=
ignore
(
OCamlR_graphics
.
hist
~
breaks
:
(
`n
20
)
p_values
:
OCamlR_graphics
.
hist
)
let
c0
=
(
counts
seqs0
i
:>
int
array
)
in
let
c1
=
(
counts
seqs1
i
:>
int
array
)
in
let
example
=
{
f
c0
c1
k
=
20
;
in
x1
=
[
|
5
;
3
;
1
;
1
;
0
;
0
;
0
;
0
;
0
;
0
;
0
;
0
;
0
;
0
;
0
;
0
;
0
;
0
;
0
;
0
|
]
;
let
n
=
Alignment
.
ncols
alignment
in
x2
=
[
|
0
;
0
;
0
;
0
;
7
;
2
;
1
;
0
;
0
;
0
;
0
;
0
;
0
;
0
;
0
;
0
;
0
;
0
;
0
;
0
|
]
List
.
init
n
~
f
:
site
}
lib/tk/multinomial.mli
0 → 100644
View file @
8f0cb4f6
(** Two-sample multinomial tests *)
type
data
val
data
:
x1
:
int
array
->
x2
:
int
array
->
data
type
result
=
{
_T_
:
float
;
pvalue
:
float
;
}
module
LRT
:
sig
val
likelihood_log_ratio
:
data
->
float
val
asymptotic_test
:
data
->
result
val
simulation_test
:
?
sample_size
:
int
->
data
->
result
end
module
Sparse
:
sig
val
likelihood_log_ratio
:
data
->
float
val
asymptotic_test
:
data
->
result
val
simulation_test
:
?
sample_size
:
int
->
data
->
result
end
val
uniformity_test
:
k
:
int
->
n1
:
int
->
n2
:
int
->
(
data
->
result
)
->
unit
val
example
:
data
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment