📣 An issue occured with the embedded container registry on October 25 2021, between 10:30 and 12:10 (UTC+2). Any persisting issues should be reported to CC-IN2P3 Support. 🐛

Commit df593f00 authored by Carine Rey's avatar Carine Rey
Browse files

add exe

parent 2bcb3e92
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright or Copr. Centre National de la Recherche Scientifique (CNRS) (2018)
# Contributors:
# - Carine Rey <carine.rey@ens-lyon.org>
# This software is a computer program whose purpose is to provide a set of scripts for pre and post processing of data for
# convergence detection programs.
# This software is governed by the CeCILL-C license under French law and abiding by the rules of distribution of free software.
# You can use, modify and/ or redistribute the software under the terms of the CeCILL-C license as circulated by CEA, CNRS and
# INRIA at the following URL "http://www.cecill.info".
# As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users
# are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive
# licensors have only limited liability.
# In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or
# reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated
# to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth
# computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements
# in conditions enabling the security of their systems and/or data to be ensured and, more generally, to use and operate it in
# the same conditions as regards security.
# The fact that you are presently reading this means that you have had knowledge of the CeCILL-C license and that you accept
# its terms.
import argparse
import sys
import os, random
import logging
from Bio import AlignIO
from Bio.Alphabet import generic_dna
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio.Align import MultipleSeqAlignment
#===================================================================================================
# inputs
#===================================================================================================
### Option defining
parser = argparse.ArgumentParser(prog="add_indels.py",
description='')
parser.add_argument('--version', action='version', version='%(prog)s 1.0')
parser.add_argument('--debug', action="store_true",
help="debug mode",
default=False)
##############
requiredOptions = parser.add_argument_group('REQUIRED OPTIONS')
requiredOptions.add_argument('-a', "--ali", type=argparse.FileType('r'),
help='Alignment filename', required=True),
requiredOptions.add_argument('-o', '--output', type=str,
help="Output name", required=True)
requiredOptions.add_argument('-p', '--indel_p', type=float,
help="indel proportion", required=True)
requiredOptions.add_argument('-c', '--codon', action="store_true",
help="is codon data")
##############
### Option parsing
args = parser.parse_args()
AliFile = args.ali
OutFile = args.output
IndelP = args.indel_p
IsCodon = args.codon
#===================================================================================================
# Set up output directory and logger
#===================================================================================================
### Set up the logger
# create logger
logger = logging.getLogger("add_indels")
logger.setLevel(logging.DEBUG)
ch = logging.StreamHandler()
if args.debug:
ch.setLevel(logging.DEBUG)
else:
ch.setLevel(logging.INFO)
# create formatter and add it to the handlers
formatter_ch = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
ch.setFormatter(formatter_ch)
logger.addHandler(ch)
logger.debug(sys.argv)
#===================================================================================================
# Read input alignment
#===================================================================================================
try:
ali = AlignIO.read(AliFile, "fasta")
except Exception as exc:
logger.error(str(exc))
sys.exit(1)
logger.info("Ali (%s) ok after checking", AliFile.name)
#===================================================================================================
# Add indels in the ali
#===================================================================================================
def add_indels(string, p):
if p > 0:
if not IsCodon:
l = len(string)
nb_i = min(int(l*p), l)
i_pos = random.sample(range(l), nb_i)
else:
l = int(len(string) / 3)
nb_i = min(int(l*p), l)
i_pos = random.sample(range(l), nb_i)
i_pos = [ i*3 for i in i_pos]
i_pos = [ [i,i+1,i+2] for i in i_pos]
i_pos = [i for i_l in i_pos for i in i_l]
print(l)
print(nb_i)
print(i_pos)
string_l = list(string)
for i in i_pos:
string_l[i] = "-"
string= "".join(string_l)
return(string)
new_ali = []
for s in ali:
seq = add_indels(s.seq,IndelP)
new_s = SeqRecord(Seq(seq, generic_dna), id=s.id, description="")
new_ali.append(new_s)
logger.info("Indel replacement ok")
#===================================================================================================
# Create output files
#===================================================================================================
AlignIO.write(MultipleSeqAlignment(new_ali), OutFile, "fasta")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment