Commit 7d058f3c authored by Carine Rey's avatar Carine Rey
Browse files

fix error compatibility py2/py3 + clean up

parent 37368a9e
......@@ -58,9 +58,6 @@ requiredOptions.add_argument('-a', "--ali", type=argparse.FileType('r'),
help='Alignment filename', required=True)
requiredOptions.add_argument('-o', '--output', type=str,
help="Output name", required=True)
requiredOptions.add_argument('-o2', '--output2', type=str,
help="Output name (bilan)", required=False)
##############
......@@ -70,8 +67,6 @@ args = parser.parse_args()
TreeFile = args.tree
AliFile = args.ali
OutFile = args.output
OutFile2 = args.output2
#===================================================================================================
# Set up output directory and logger
#===================================================================================================
......@@ -132,14 +127,9 @@ cond_1_node_ids = [n for n in t.search_nodes(Condition = "1") if n.is_leaf()]
leaf_node_ids = [n for n in t.search_nodes() if n.is_leaf()]
#trans_node_ids = [n.ND for n in t.search_nodes(Transition = "1")]
#anc_trans_node_ids = [n.up.ND for n in t.search_nodes(Transition = "1")]
#leaves_trans_node_ids = [n for n in t.search_nodes(Transition = "1") if n.is_leaf()]
logger.info("cond 0 nodes: %s",[n.ND for n in cond_0_node_ids])
logger.info("cond 0 nodes: %s",[n.ND for n in cond_1_node_ids])
logger.info("cond 1 nodes: %s",[n.ND for n in cond_1_node_ids])
logger.info("leaves_node_ids nodes: %s",[n.ND for n in leaf_node_ids])
logger.info("Tree (%s) ok after checking", TreeFile.name)
......@@ -148,9 +138,6 @@ logger.info("Tree (%s) ok after checking", TreeFile.name)
#===================================================================================================
# Detect tips in convergent clades
#===================================================================================================
df_T_l = []
conditions = dict()
for n in t.iter_leaves():
......@@ -202,6 +189,7 @@ else:
df_leaves = pd.DataFrame()
logger.info(df_leaves)
#===================================================================================================
......@@ -268,15 +256,33 @@ def simplexFromCounts (vector):
if v2[i] == 0:
v2[i] = 1
s = sum(v2)
logger.debug("s")
logger.debug(s)
logger.debug("v2")
logger.debug(v2)
for i in range(len(v2)):
v2[i] = v2[i]/s
v2[i] = v2[i]/float(s)
logger.debug("v2 2")
logger.debug(v2)
return v2
# Likelihood ratio test betweem multinomial vectors
def multinomial_lrt (vector1, vector2, vectorAll):
logger.debug("vector0")
logger.debug(vector1)
logger.debug("vector1")
logger.debug(vector2)
logger.debug("vectorAll")
logger.debug(vectorAll)
param1 = simplexFromCounts(vector1)
param2 = simplexFromCounts(vector2)
paramAll = simplexFromCounts(vectorAll)
logger.debug("param1")
logger.debug(param1)
logger.debug("param2")
logger.debug(param2)
logger.debug("paramAll")
logger.debug(paramAll)
rv1 = multinomial(sum(vector1), param1)
rv2 = multinomial(sum(vector2), param2)
rvAll = multinomial(sum(vectorAll), paramAll)
......@@ -288,18 +294,28 @@ def multinomial_lrt (vector1, vector2, vectorAll):
return lr, lrt
def applyMultinomialLRT(x, orderedAA):
logger.info(x)
cond1 = x['AA'][x['condition']==1]
cond0 = x['AA'][x['condition']==0]
vec0 = [0]*20
vec1 = [0]*20
logger.debug("cond0")
logger.debug(cond0)
logger.debug("cond1")
logger.debug(cond1)
for i in range (len (cond0)):
#print(cond0.iloc[i])
vec0[orderedAA[cond0.iloc[i]]] = vec0[orderedAA[cond0.iloc[i]]] + 1
for i in range (len (cond1)):
#print(cond1.iloc[i])
vec1[orderedAA[cond1.iloc[i]]] = vec1[orderedAA[cond1.iloc[i]]] + 1
#print(vec1)
logger.debug("vec0")
logger.debug(vec0)
logger.debug("vec1")
logger.debug(vec1)
vecAll = [sum(x) for x in zip(vec0, vec1)]
logger.debug("vecAll")
logger.debug(vecAll)
return multinomial_lrt(vec0, vec1, vecAll)
df_leaves_grouped = df_leaves.groupby(['Sites']).apply(lambda x: (applyMultinomialLRT(x, orderedAA)))
......@@ -328,7 +344,3 @@ if df_final.shape[0] != n_sites:
df_final.to_csv(OutFile, sep = "\t", index = False)
logger.info("Write summary output in %s", OutFile)
if OutFile2:
df_T.to_csv(OutFile2, sep = "\t", index = False)
logger.info("Write detailed output in %s", OutFile2)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment