Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Open sidebar
VEBER Philippe
codepi
Commits
3f225416
Commit
3f225416
authored
Jul 04, 2018
by
Carine Rey
Browse files
add Topological and Tdg09 detection meth (not working yet)
parent
8ee06d5a
Changes
15
Hide whitespace changes
Inline
Side-by-side
Showing
15 changed files
with
649 additions
and
7 deletions
+649
-7
etc/docker/tamuri/Dockerfile
etc/docker/tamuri/Dockerfile
+33
-0
etc/docker/tamuri/build_docker.sh
etc/docker/tamuri/build_docker.sh
+14
-0
lib/convergence_detection.ml
lib/convergence_detection.ml
+18
-0
lib/convergence_detection.mli
lib/convergence_detection.mli
+3
-0
lib/pipeline.ml
lib/pipeline.ml
+13
-3
lib/ready_dataset.ml
lib/ready_dataset.ml
+1
-0
lib/scripts/calc_topological.py
lib/scripts/calc_topological.py
+116
-0
lib/scripts/merge_det_results.py
lib/scripts/merge_det_results.py
+24
-1
lib/scripts/parse_input_tree.py
lib/scripts/parse_input_tree.py
+69
-2
lib/scripts/parse_results_tdg09.py
lib/scripts/parse_results_tdg09.py
+76
-0
lib/scripts/rename_input_tree_ali_tdg09.py
lib/scripts/rename_input_tree_ali_tdg09.py
+152
-0
lib/tamuri.ml
lib/tamuri.ml
+43
-0
lib/topological.ml
lib/topological.ml
+79
-0
lib/tree_dataset.ml
lib/tree_dataset.ml
+4
-1
lib/tree_dataset.mli
lib/tree_dataset.mli
+4
-0
No files found.
etc/docker/tamuri/Dockerfile
0 → 100644
View file @
3f225416
FROM
debian:stretch
RUN
apt-get update
&&
\
apt-get
install
--no-install-recommends
-qy
\
python-pip
\
python-dev
\
build-essential
\
python2.7-minimal
\
python-numpy
\
python-pandas
\
python-setuptools
\
python-dev
\
pyqt4-dev-tools
\
xauth
\
libcurl4-openssl-dev
\
libxml2-dev
\
libssl-dev
\
libcairo2-dev
\
openjdk-8-jre
\
wget
RUN
pip
install
--upgrade
pip
RUN
pip
install
ete3
==
3.0.0b35
RUN
pip
install
biopython
==
1.72
# getting tdg09
WORKDIR
/tdg09
RUN
wget https://github.com/tamuri/tdg09/archive/v1.1.2.tar.gz
&&
\
tar
xzf v1.1.2.tar.gz
&&
\
rm
v1.1.2.tar.gz
&&
\
printf
"#!/bin/bash
\
\n
java -cp /tdg09/tdg09-1.1.2/dist/tdg09.jar tdg09.Analyse
\$
@
\
\n
"
>
/usr/bin/tdg09.sh
&&
chmod
+x /usr/bin/tdg09.sh
etc/docker/tamuri/build_docker.sh
0 → 100644
View file @
3f225416
#! /bin/bash
set
-e
IMAGE_NAME
=
tdg09
DOCKERFILE_DIR
=
.
TAG
=
v1.1.2
REPO
=
carinerey/
$IMAGE_NAME
:
$TAG
docker build
-t
$REPO
-f
./Dockerfile
$DOCKERFILE_DIR
if
[[
$1
==
"push_yes"
]]
then
docker push
$REPO
fi
lib/convergence_detection.ml
View file @
3f225416
...
...
@@ -12,6 +12,9 @@ type result = [
|
`Diffsel_bis
of
[
`diffsel
]
directory
workflow
|
`Identical_LG
of
[
`identical
]
directory
workflow
|
`Identical_WAG
of
[
`identical
]
directory
workflow
|
`Topological_LG
of
[
`topological
]
directory
workflow
|
`Topological_WAG
of
[
`topological
]
directory
workflow
|
`Tdg09
of
[
`tdg09
]
directory
workflow
]
let
meth_string_of_result
=
function
...
...
@@ -21,6 +24,9 @@ let meth_string_of_result = function
|
`Diffsel_bis
_
->
"diffsel_bis"
|
`Identical_LG
_
->
"identical_LG"
|
`Identical_WAG
_
->
"identical_WAG"
|
`Topological_LG
_
->
"topological_LG"
|
`Topological_WAG
_
->
"topological_WAG"
|
`Tdg09
_
->
"Tdg09"
type
dataset_res
=
{
model_prefix
:
string
;
...
...
@@ -40,6 +46,9 @@ let merge_results ~res_by_tools : text_file workflow =
|
`Diffsel_bis
d
->
Diffsel
.
selector
d
|
`Identical_LG
d
->
Identical
.
results
d
|
`Identical_WAG
d
->
Identical
.
results
d
|
`Topological_LG
d
->
Topological
.
results
d
|
`Topological_WAG
d
->
Topological
.
results
d
|
`Tdg09
d
->
Tamuri
.
results
d
in
let
opt
=
match
res
with
|
`Pcoc
_
->
string
"--pcoc"
...
...
@@ -48,6 +57,9 @@ let merge_results ~res_by_tools : text_file workflow =
|
`Diffsel_bis
_
->
string
"--diffsel_bis"
|
`Identical_LG
_
->
string
"--identical_LG"
|
`Identical_WAG
_
->
string
"--identical_WAG"
|
`Topological_LG
_
->
string
"--topological_LG"
|
`Topological_WAG
_
->
string
"--topological_WAG"
|
`Tdg09
_
->
string
"--tdg09"
in
seq
~
sep
:
" "
[
opt
;
dep
w
]
)
...
...
@@ -74,6 +86,9 @@ let plot_merge_results ~(res_by_tools:result list) ~tree ~faa ~tsv : svg workflo
|
`Diffsel_bis
_
->
"Diffsel_bis"
|
`Identical_LG
_
->
"Identical_LG08"
|
`Identical_WAG
_
->
"Identical_WAG01"
|
`Topological_LG
_
->
"Topological_LG08"
|
`Topological_WAG
_
->
"Topological_WAG01"
|
`Tdg09
_
->
"Tdg09"
in
string
opt
)
|>
seq
~
sep
:
","
...
...
@@ -86,6 +101,9 @@ let plot_merge_results ~(res_by_tools:result list) ~tree ~faa ~tsv : svg workflo
|
`Diffsel_bis
_
->
"Diffsel_bis:0.9"
|
`Identical_LG
_
->
"Identical_LG08:0.9"
|
`Identical_WAG
_
->
"Identical_WAG01:0.9"
|
`Topological_LG
_
->
"Topological_LG08:0.9"
|
`Topological_WAG
_
->
"Topological_WAG01:0.9"
|
`Tdg09
_
->
"Tdg09:0.9"
in
string
opt
)
|>
seq
~
sep
:
","
...
...
lib/convergence_detection.mli
View file @
3f225416
...
...
@@ -13,6 +13,9 @@ type result = [
|
`Diffsel_bis
of
[
`diffsel
]
directory
workflow
|
`Identical_LG
of
[
`identical
]
directory
workflow
|
`Identical_WAG
of
[
`identical
]
directory
workflow
|
`Topological_LG
of
[
`topological
]
directory
workflow
|
`Topological_WAG
of
[
`topological
]
directory
workflow
|
`Tdg09
of
[
`tdg09
]
directory
workflow
]
val
meth_string_of_result
:
result
->
string
...
...
lib/pipeline.ml
View file @
3f225416
...
...
@@ -51,6 +51,9 @@ let repo_of_detection_result res =
|
`Diffsel_bis
w
->
item
[
"diffsel_bis.results.tsv"
]
(
Diffsel
.
selector
w
)
|
`Identical_LG
w
->
item
[
"Identical_LG.results.tsv"
]
(
Identical
.
results
w
)
|
`Identical_WAG
w
->
item
[
"Identical_WAG.results.tsv"
]
(
Identical
.
results
w
)
|
`Topological_LG
w
->
item
[
"Topological_LG.results.tsv"
]
(
Topological
.
results
w
)
|
`Topological_WAG
w
->
item
[
"Topological_WAG.results.tsv"
]
(
Topological
.
results
w
)
|
`Tdg09
w
->
item
[
"Tdg09.results.tsv"
]
(
Tamuri
.
results
w
)
)
;
(
match
res
with
...
...
@@ -60,6 +63,9 @@ let repo_of_detection_result res =
|
`Diffsel_bis
w
->
item
[
"raw_results"
]
w
|
`Identical_LG
w
->
item
[
"raw_results"
]
w
|
`Identical_WAG
w
->
item
[
"raw_results"
]
w
|
`Topological_LG
w
->
item
[
"raw_results"
]
w
|
`Topological_WAG
w
->
item
[
"raw_results"
]
w
|
`Tdg09
w
->
item
[
"raw_results"
]
w
)
;
]
|>
Repo
.
shift
det_meth_prefix
...
...
@@ -90,23 +96,27 @@ let derive_from_det_meth ~det_meth ~(dataset : Dataset.t) ~preview =
let
tree_sc
=
Tree_dataset
.
tree
dataset
.
dataset
.
tree_dataset
`Detection
in
let
tree_id
=
Tree_dataset
.
tree
dataset
.
dataset
.
tree_dataset
`Simulation
in
let
diffsel_tree
=
Tree_dataset
.
diffsel_tree
dataset
.
dataset
.
tree_dataset
in
let
tree_conv
=
Tree_dataset
.
topological_tree
dataset
.
dataset
.
tree_dataset
in
let
w_every
=
if
preview
then
1
else
1
in
let
n_cycles
=
if
preview
then
10
else
2000
in
match
det_meth
with
|
`Pcoc
->
`Pcoc
(
Pcoc
.
pcoc
~
plot_complete
:
true
~
gamma
:
false
~
faa
~
tree
:
tree_sc
)
|
`Pcoc_gamma
->
`Pcoc_gamma
(
Pcoc
.
pcoc
~
plot_complete
:
true
~
gamma
:
true
~
faa
~
tree
:
tree_sc
)
|
`Tdg09
->
`Tdg09
(
Tamuri
.
tdg09
~
faa
~
tree
:
tree_sc
)
|
`Diffsel
->
`Diffsel
(
Diffsel
.
diffsel
~
phy_n
~
tree
:
diffsel_tree
~
w_every
~
n_cycles
)
|
`Diffsel_bis
->
`Diffsel_bis
(
Diffsel
.
diffsel
~
phy_n
~
tree
:
diffsel_tree
~
w_every
~
n_cycles
)
|
`Identical_LG
->
`Identical_LG
(
Identical
.
identical
~
faa
~
tree_id
~
tree_sc
~
prot_model
:
"LG08"
)
|
`Identical_WAG
->
`Identical_WAG
(
Identical
.
identical
~
faa
~
tree_id
~
tree_sc
~
prot_model
:
"WAG01"
)
|
`Topological_LG
->
`Topological_LG
(
Topological
.
topological
~
faa
~
tree
:
tree_id
~
tree_conv
~
prot_model
:
"LG08"
)
|
`Topological_WAG
->
`Topological_WAG
(
Topological
.
topological
~
faa
~
tree
:
tree_id
~
tree_conv
~
prot_model
:
"WAG01"
)
let
derive_from_dataset
~
dataset
~
preview
=
let
det_meths
=
[
`Pcoc
;
`Pcoc_gamma
;
`Tdg09
;
`Diffsel
;
`Diffsel_bis
;
`Identical_LG
;
`Identical_WAG
]
in
`Identical_LG
;
`Identical_WAG
;
`Topological_LG
;
`Topological_WAG
]
in
let
res_by_tools
=
List
.
map
det_meths
~
f
:
(
fun
det_meth
->
derive_from_det_meth
~
det_meth
~
dataset
~
preview
)
in
...
...
lib/ready_dataset.ml
View file @
3f225416
...
...
@@ -25,6 +25,7 @@ let repo rd =
item
[
"tree.only_convergent_tags.nhx"
]
(
Tree_dataset
.
tree
rd
.
tree_dataset
`Detection
)
;
item
[
"tree.only_node_ids.nhx"
]
(
Tree_dataset
.
tree
rd
.
tree_dataset
`Simulation
)
;
item
[
"tree.diffsel"
]
(
Tree_dataset
.
diffsel_tree
rd
.
tree_dataset
)
;
item
[
"tree.convergent_topology"
]
(
Tree_dataset
.
topological_tree
rd
.
tree_dataset
)
;
item
[
"simulated_sequences.fna"
]
rd
.
fna
;
item
[
"simulated_sequences.faa"
]
rd
.
faa
;
]
...
...
lib/scripts/calc_topological.py
0 → 100644
View file @
3f225416
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright or Copr. Centre National de la Recherche Scientifique (CNRS) (2018)
# Contributors:
# - Carine Rey <carine.rey@ens-lyon.org>
# This software is a computer program whose purpose is to provide a set of scripts for pre and post processing of data for
# convergence detection programs.
# This software is governed by the CeCILL-C license under French law and abiding by the rules of distribution of free software.
# You can use, modify and/ or redistribute the software under the terms of the CeCILL-C license as circulated by CEA, CNRS and
# INRIA at the following URL "http://www.cecill.info".
# As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users
# are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive
# licensors have only limited liability.
# In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or
# reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated
# to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth
# computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements
# in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in
# the same conditions as regards security.
# The fact that you are presently reading this means that you have had knowledge of the CeCILL-C license and that you accept
# its terms.
import
argparse
import
sys
import
os
,
re
import
logging
import
pandas
as
pd
from
ete3
import
Tree
,
NodeStyle
,
TreeStyle
,
TextFace
from
Bio
import
AlignIO
#===================================================================================================
# inputs
#===================================================================================================
### Option defining
parser
=
argparse
.
ArgumentParser
(
prog
=
"calc_topological.py"
,
description
=
''
)
parser
.
add_argument
(
'--version'
,
action
=
'version'
,
version
=
'%(prog)s 1.0'
)
parser
.
add_argument
(
'--debug'
,
action
=
"store_true"
,
help
=
"debug mode"
,
default
=
False
)
##############
requiredOptions
=
parser
.
add_argument_group
(
'REQUIRED OPTIONS'
)
requiredOptions
.
add_argument
(
'-bppml_non_conv'
,
"--bppml_non_conv"
,
type
=
argparse
.
FileType
(
'r'
),
help
=
'bppml_non_conv filename'
,
required
=
True
)
requiredOptions
.
add_argument
(
'-bppml_conv'
,
"--bppml_conv"
,
type
=
argparse
.
FileType
(
'r'
),
help
=
'bppml_conv filename'
,
required
=
True
)
requiredOptions
.
add_argument
(
'-o'
,
'--output'
,
type
=
str
,
help
=
"Output name"
,
required
=
True
)
##############
### Option parsing
args
=
parser
.
parse_args
()
Bppml_non_conv
=
args
.
bppml_non_conv
Bppml_conv
=
args
.
bppml_conv
OutFile
=
args
.
output
#===================================================================================================
# Set up output directory and logger
#===================================================================================================
### Set up the logger
# create logger
logger
=
logging
.
getLogger
(
"calc_identical"
)
logger
.
setLevel
(
logging
.
DEBUG
)
ch
=
logging
.
StreamHandler
()
if
args
.
debug
:
ch
.
setLevel
(
logging
.
DEBUG
)
else
:
ch
.
setLevel
(
logging
.
INFO
)
# create formatter and add it to the handlers
formatter_ch
=
logging
.
Formatter
(
'%(asctime)s - %(levelname)s - %(message)s'
)
ch
.
setFormatter
(
formatter_ch
)
logger
.
addHandler
(
ch
)
logger
.
debug
(
sys
.
argv
)
#===================================================================================================
#
#===================================================================================================
#proba_df_melt["Sites"] = pd.to_numeric(proba_df_melt["Sites"].str.replace('[','').str.replace(']',''))
n_sites
=
20
Sites
=
[
i
+
1
for
i
in
range
(
n_sites
)]
df_final
=
pd
.
DataFrame
({
'Sites'
:
Sites
,
'Topological'
:
0
})
df_final
=
df_final
[[
"Sites"
,
"Topological"
]]
#===================================================================================================
# Create output files
#===================================================================================================
if
df_final
.
shape
[
0
]
!=
n_sites
:
logger
.
error
(
"Format error!"
)
sys
.
exit
(
42
)
df_final
.
to_csv
(
OutFile
,
sep
=
"
\t
"
,
index
=
False
)
logger
.
info
(
"Write summary output in %s"
,
OutFile
)
lib/scripts/merge_det_results.py
View file @
3f225416
...
...
@@ -62,6 +62,12 @@ availableOptions.add_argument('--identical_LG', type=str,
help
=
"Identical_LG output name"
,
default
=
None
)
availableOptions
.
add_argument
(
'--identical_WAG'
,
type
=
str
,
help
=
"Identical_WAG output name"
,
default
=
None
)
availableOptions
.
add_argument
(
'--topological_LG'
,
type
=
str
,
help
=
"topological_LG output name"
,
default
=
None
)
availableOptions
.
add_argument
(
'--topological_WAG'
,
type
=
str
,
help
=
"topological_WAG output name"
,
default
=
None
)
availableOptions
.
add_argument
(
'--tdg09'
,
type
=
str
,
help
=
"tdg09 output name"
,
default
=
None
)
##############
...
...
@@ -75,6 +81,9 @@ df_diffsel = pd.DataFrame()
df_diffsel_bis
=
pd
.
DataFrame
()
df_identical_LG
=
pd
.
DataFrame
()
df_identical_WAG
=
pd
.
DataFrame
()
df_topological_LG
=
pd
.
DataFrame
()
df_topological_WAG
=
pd
.
DataFrame
()
df_tdg09
=
pd
.
DataFrame
()
OutName
=
args
.
output
if
args
.
pcoc
:
...
...
@@ -103,10 +112,24 @@ if args.identical_WAG :
df_identical_WAG
=
pd
.
read_csv
(
args
.
identical_WAG
,
sep
=
"
\t
"
)
df_identical_WAG
.
rename
(
columns
=
{
'Identical'
:
'Identical_WAG01'
},
inplace
=
True
)
if
args
.
topological_LG
:
df_topological_LG
=
pd
.
read_csv
(
args
.
topological_LG
,
sep
=
"
\t
"
)
df_topological_LG
.
rename
(
columns
=
{
'Topological'
:
'Topological_LG08'
},
inplace
=
True
)
if
args
.
topological_WAG
:
df_topological_WAG
=
pd
.
read_csv
(
args
.
topological_WAG
,
sep
=
"
\t
"
)
df_topological_WAG
.
rename
(
columns
=
{
'Topological'
:
'Topological_WAG01'
},
inplace
=
True
)
if
args
.
tdg09
:
df_tdg09
=
pd
.
read_csv
(
args
.
tdg09
,
sep
=
"
\t
"
)
df_list
=
[
df
for
df
in
[
df_pcoc
,
df_pcoc_gamma
,
df_diffsel
,
df_diffsel_bis
,
df_identical_LG
,
df_identical_WAG
]
if
not
df
.
empty
]
df_identical_LG
,
df_identical_WAG
,
df_topological_LG
,
df_topological_WAG
,
df_tdg09
]
if
not
df
.
empty
]
df_list_len
=
[
df
.
shape
[
0
]
for
df
in
df_list
]
if
len
(
set
(
df_list_len
))
!=
1
:
...
...
lib/scripts/parse_input_tree.py
View file @
3f225416
...
...
@@ -205,9 +205,76 @@ with open("%s/tree.Ha.node_ids" %(OutDirName), "w") as output_Ha_node_ids:
t
.
write
(
format
=
1
,
features
=
[
"Condition"
,
"Transition"
],
outfile
=
"%s/tree.only_convergent_tags.nhx"
%
(
OutDirName
))
### tree.diffsel: a diffsel input formated tree
for
n
in
t
.
traverse
():
tdiffsel
=
t
.
copy
(
method
=
"deepcopy"
)
for
n
in
tdiffsel
.
traverse
():
if
n
.
ND
in
conv_node_ids
+
trans_node_ids
:
n
.
dist
=
2
elif
n
.
ND
in
not_conv_node_ids
:
n
.
dist
=
1
t
.
write
(
format
=
1
,
outfile
=
"%s/tree.diffsel"
%
(
OutDirName
))
tdiffsel
.
write
(
format
=
1
,
outfile
=
"%s/tree.diffsel"
%
(
OutDirName
))
### tree.topological : a diffsel input formated tree
def
build_conv_topo
(
t
):
conv_nodes_ids
=
[
n
.
ND
for
n
in
t
.
search_nodes
(
Condition
=
"1"
)]
tconv
=
t
.
copy
(
method
=
"deepcopy"
)
for
n
in
tconv
.
iter_leaves
():
n
.
add_features
(
L
=
1
)
for
n
in
tconv
.
traverse
():
n
.
add_features
(
COPY
=
0
)
# get the most recent ancestral node of all the convergent clades
l_convergent_clades
=
tconv
.
search_nodes
(
Condition
=
"1"
)
common_anc_conv
=
tconv
.
get_common_ancestor
(
l_convergent_clades
)
# duplicate it at its same location (branch lenght = 0). we get
# a duplicated subtree with subtrees A and B (A == B)
dist_dup
=
common_anc_conv
.
dist
if
not
common_anc_conv
.
is_root
():
dup_point
=
common_anc_conv
.
add_sister
(
name
=
"dup_point"
,
dist
=
0.000001
)
dup_point_root
=
False
else
:
dup_point
=
Tree
()
dup_point_root
=
True
dup_point
.
dist
=
0.000001
dup_point
.
add_features
(
ND
=
0
,
Transition
=
"0"
,
Condition
=
"0"
)
common_anc_conv
.
detach
()
common_anc_conv_copy
=
common_anc_conv
.
copy
(
method
=
"deepcopy"
)
# tag duplicated nodes:
for
n
in
common_anc_conv_copy
.
traverse
():
n
.
COPY
=
1
if
n
.
ND
not
in
conv_nodes_ids
and
not
n
.
is_root
():
n
.
dist
=
0.000001
# pruned A from all branches not leading to any convergent clade
l_leaves_to_keep_A
=
common_anc_conv
.
search_nodes
(
COPY
=
0
,
Condition
=
"0"
,
L
=
1
)
#logger.debug("A: %s",l_leaves_to_keep_A)
common_anc_conv
.
prune
(
l_leaves_to_keep_A
,
preserve_branch_length
=
True
)
# pruned B from all branches not leading to any non-convergent clade
l_leaves_to_keep_B
=
common_anc_conv_copy
.
search_nodes
(
COPY
=
1
,
Condition
=
"1"
,
L
=
1
)
#logger.debug("B : %s", l_leaves_to_keep_B)
common_anc_conv_copy
.
prune
(
l_leaves_to_keep_B
,
preserve_branch_length
=
True
)
dup_point
.
add_child
(
common_anc_conv_copy
)
dup_point
.
add_child
(
common_anc_conv
)
tconv
=
dup_point
.
get_tree_root
()
nodeId
=
0
for
node
in
tconv
.
traverse
(
"postorder"
):
node
.
ND
=
nodeId
nodeId
+=
1
return
tconv
tconv
=
build_conv_topo
(
t
)
tconv
.
write
(
format
=
1
,
outfile
=
"%s/tree.topological"
%
(
OutDirName
))
lib/scripts/parse_results_tdg09.py
0 → 100644
View file @
3f225416
#!/usr/bin/python
# -*- coding: utf-8 -*-
import
argparse
import
pandas
as
pd
import
sys
# LIGNE ESSAI
# python parse_results_tdg09.py -ftdg09 toto -o out
### Option defining
parser
=
argparse
.
ArgumentParser
(
prog
=
"parse_results_tdg09.py"
,
description
=
''
)
parser
.
add_argument
(
'--version'
,
action
=
'version'
,
version
=
'%(prog)s 1.0'
)
parser
.
add_argument
(
'--debug'
,
action
=
"store_true"
,
help
=
"debug mode"
,
default
=
False
)
##############
requiredOptions
=
parser
.
add_argument_group
(
'REQUIRED OPTIONS'
)
requiredOptions
.
add_argument
(
'-tdg09'
,
"--file"
,
type
=
argparse
.
FileType
(
'r'
),
help
=
'Input filename'
,
required
=
True
)
requiredOptions
.
add_argument
(
'-o'
,
'--output'
,
type
=
str
,
help
=
"Output name"
,
required
=
True
)
### Option parsing
args
=
parser
.
parse_args
()
Tdg09File
=
args
.
file
OutFile
=
args
.
output
searchlines
=
Tdg09File
.
readlines
()
Tdg09File
.
close
()
n_sites
=
int
(
searchlines
[
9
].
split
()[
1
])
fdr
=
[
"NA"
]
*
(
n_sites
)
for
i
,
line
in
enumerate
(
searchlines
):
if
"LrtResults:
\n
"
in
line
:
for
l
in
searchlines
[(
i
+
2
):(
i
+
n_sites
+
1
)]:
if
not
'FullResults:
\n
'
in
l
:
ll
=
l
.
split
()
if
ll
:
index
=
int
(
ll
[
2
][:
-
1
])
value
=
float
(
ll
[
6
].
replace
(
','
,
'.'
))
#print ll
#print index
#print value
fdr
[
index
-
1
]
=
1
-
value
else
:
break
# Sites Tamuri
Sites
=
[
i
+
1
for
i
in
range
(
n_sites
)]
df_final
=
pd
.
DataFrame
({
'Sites'
:
Sites
,
'Tdg09'
:
fdr
})
df_final
=
df_final
[[
"Sites"
,
"Tdg09"
]]
#===================================================================================================
# Create output files
#===================================================================================================
if
df_final
.
shape
[
0
]
!=
n_sites
:
print
(
"Format error!"
)
sys
.
exit
(
42
)
df_final
.
to_csv
(
OutFile
,
sep
=
"
\t
"
,
index
=
False
)
print
(
"Write summary output in %s"
%
OutFile
)
lib/scripts/rename_input_tree_ali_tdg09.py
0 → 100644
View file @
3f225416
#!/usr/bin/python
# -*- coding: utf-8 -*-
import
argparse
import
sys
import
os
import
logging
import
ete3
import
Bio
from
ete3
import
Tree
,
NodeStyle
,
TreeStyle
,
TextFace
from
Bio
import
AlignIO
# LIGNE ESSAI
# python rename_input_tree_ali_tdg09.py -t tree.only_convergent_tags.nhx -a simulated_sequences.faa -o out
#===================================================================================================
# inputs
#===================================================================================================
### Option defining
parser
=
argparse
.
ArgumentParser
(
prog
=
"rename_input_tree_ali_tdg09.py"
,
description
=
''
)
parser
.
add_argument
(
'--version'
,
action
=
'version'
,
version
=
'%(prog)s 1.0'
)
parser
.
add_argument
(
'--debug'
,
action
=
"store_true"
,
help
=
"debug mode"
,
default
=
False
)
##############
requiredOptions
=
parser
.
add_argument_group
(
'REQUIRED OPTIONS'
)
requiredOptions
.
add_argument
(
'-t'
,
"--tree"
,
type
=
argparse
.
FileType
(
'r'
),
help
=
'Input tree filename'
,
required
=
True
)
requiredOptions
.
add_argument
(
'-a'
,
"--alignment"
,
type
=
argparse
.
FileType
(
'r'
),
help
=
'Input alignment'
,
required
=
True
)
requiredOptions
.
add_argument
(
'-out_a'
,
'--output_ali'
,
type
=
str
,
help
=
"Output ali name"
,
required
=
True
)
requiredOptions
.
add_argument
(
'-out_t'
,
'--output_tree'
,
type
=
str
,
help
=
"Output tree name"
,
required
=
True
)
##############
### Option parsing
args
=
parser
.
parse_args
()
TreeFile
=
args
.
tree
AliFile
=
args
.
alignment
OutAli
=
args
.
output_ali
OutTree
=
args
.
output_tree
#===================================================================================================
# Set up output directory and logger
#===================================================================================================
### Set up the logger
# create logger
logger
=
logging
.
getLogger
(
"parse_input_tree"
)
logger
.
setLevel
(
logging
.
DEBUG
)
ch
=
logging
.
StreamHandler
()
if
args
.
debug
:
ch
.
setLevel
(
logging
.
DEBUG
)
else
:
ch
.
setLevel
(
logging
.
INFO
)
# create formatter and add it to the handlers
formatter_ch
=
logging
.
Formatter
(
'%(asctime)s - %(levelname)s - %(message)s'
)
ch
.
setFormatter
(
formatter_ch
)
logger
.
addHandler
(
ch
)
logger
.
debug
(
sys
.
argv
)
#===================================================================================================
# Read input tree
#===================================================================================================
try
:
t
=
Tree
(
TreeFile
.
name
)
except
Exception
as
exc
:
logger
.
error
(
str
(
exc
))
sys
.
exit
(
1
)
logger
.
info
(
"Tree (%s) ok after checking"
,
TreeFile
.
name
)
#===================================================================================================
# Read input alignment
#===================================================================================================
try
:
alignment
=
AlignIO
.
read
(
AliFile
.
name
,
"fasta"
)
except
Exception
as
exc
:
logger
.
error
(
str
(
exc
))
sys
.
exit
(
1
)
logger
.
info
(
"Ali (%s) ok after checking"
,
AliFile
.
name
)
#===================================================================================================
# Create output files
#===================================================================================================
######## RENAME TREE LEAVES AND SEQUENCES FOR TDG09
nodeId
=
0