-
Léo Schneider authored6ac8b84f
decoy.py 2.08 KiB
import torch.distributions as dist
import random
import numpy as np
ALPHABET_UNMOD = {
"A": 1,
"C": 2,
"D": 3,
"E": 4,
"F": 5,
"G": 6,
"H": 7,
"I": 8,
"K": 9,
"L": 10,
"M": 11,
"N": 12,
"P": 13,
"Q": 14,
"R": 15,
"S": 16,
"T": 17,
"V": 18,
"W": 19,
"Y": 20,
}
def reverse_protein(seq):
return seq[::-1]
def reverse_peptide(seq, format='numerical'):
if format == 'numerical' :
for i in range(len(seq)//2):
if seq[i]!=9 and seq[i]!=15 :
mem = seq[i]
seq[i] = seq[-i]
seq[-i] = mem
if format=='alphabetical':
for i in range(len(seq)//2):
if seq[i]!='K' and seq[i]!='R':
mem = seq[i]
seq[i] = seq[-i]
seq[-i] = mem
return seq
def shuffle_protein(seq):
c = seq.copy()
random.shuffle(c)
return c
def shuffle_peptide(seq, format='numerical'): #TODO A reparer
if format == 'numerical':
ind = np.where(seq == 9 or seq == 15, seq)
print(ind)
final_seq = seq.copy()
random.shuffle(final_seq)
del final_seq[ind]
if format == 'alphabetical' :
ind = np.where(seq == 'R' or seq == 'K', seq)
print(ind)
final_seq = seq.copy()
del final_seq[ind]
random.shuffle(final_seq)
for i in range(len(ind)):
final_seq.insert(ind[i]+i,seq[i])
return final_seq
def random_aa(database, format='numerical'):
total_seq = database.unroll()
freq = total_seq.count()
freq.normalize()
d = dist.Categorical(freq)
l = len(total_seq)
new_seq = d.sample(l)
#similarcutting
def random_aa_trypsin(database, format='numerical'):
total_seq = database.unroll()
if format=='numerical' :
total_seq.remove(9)
total_seq.remove(15)
if format=='alphabetical' :
total_seq.remove('R')
total_seq.remove('K')
freq = total_seq.count()
freq.normalize()
d = dist.Categorical(freq)
l = len(total_seq)
new_seq = d.sample(l)
#similarcutting