-
Léo Schneider authored6ac8b84f
mass_prediction.py 3.03 KiB
# MASS CST DICT
import numpy as np
MASSES_MONO = {
"A": 71.03711,
"C": 103.00919,
"D": 115.02694,
"E": 129.04259,
"F": 147.06841,
"G": 57.02146,
"H": 137.05891,
"I": 113.08406,
"K": 128.09496,
"L": 113.08406,
"M": 131.04049,
"N": 114.04293,
"P": 97.05276,
"Q": 128.05858,
"R": 156.1875,
"S": 87.03203,
"T": 101.04768,
"V": 99.06841,
"W": 186.07931,
"Y": 163.06333,
}
MASSES_AVG = {
"A": 71.0788,
"C": 103.1388,
"D": 115.0886,
"E": 129.1155,
"F": 147.1766,
"G": 57.0519,
"H": 137.1411,
"I": 113.1594,
"K": 128.1741,
"L": 113.1594,
"M": 131.1926,
"N": 114.1038,
"P": 97.1167,
"Q": 128.1307,
"R": 156.1875,
"S": 87.0782,
"T": 101.1051,
"V": 99.1326,
"W": 186.2132,
"Y": 163.1760,
}
PTMs_MON0 = {
'Alkylation': 14.01564,
'Carbamylation': 43.00581,
'Carboxymethyl cysteine (Cys_CM)': 161.01466,
'Carboxyamidomethyl cysteine (Cys_CAM)': 160.03065,
'Pyridyl-ethyl cysteine (Cys_PE)': 208.067039,
'Propionamide cysteine (Cys_PAM)': 174.04631,
'Methionine sulfoxide (MSO)': 147.0354,
'Oxydized tryptophan (TPO)': 202.0742,
'Homoserine Lactone (HSL)': 100.03985,
'H': 1.00783,
'H+': 1.00728,
'O': 15.9949146,
'H2O': 18.01056,
}
PTMs_AVG = {
'Alkylation': 14.02688,
'Carbamylation': 43.02502,
'Carboxymethyl cysteine (Cys_CM)': 161.1755,
'Carboxyamidomethyl cysteine (Cys_CAM)': 160.1908,
'Pyridyl-ethyl cysteine (Cys_PE)': 208.284,
'Propionamide cysteine (Cys_PAM)': 174.2176,
'Methionine sulfoxide (MSO)': 147.1920,
'Oxydized tryptophan (TPO)': 202.2126,
'Homoserine Lactone (HSL)': 100.09714,
'H': 1.00794,
'H+': 1.00739,
'O': 15.9994,
'H2O': 18.01524,
}
def compute_mass(seq, isotop, mod=False):
m = 0
if mod == False:
if isotop == 'mono':
for char in MASSES_MONO.keys():
m += MASSES_MONO[char] * seq.count(char)
if isotop == 'avg':
for char in MASSES_AVG.keys():
m += MASSES_AVG[char] * seq.count(char)
else:
if isotop == 'mono':
for char in MASSES_MONO.keys(): # TODO mod
m += MASSES_MONO[char] * seq.count(char)
if isotop == 'avg':
for char in MASSES_AVG.keys(): # TODO mod
m += MASSES_AVG[char] * seq.count(char)
return m
def compute_frag_mz_ration(seq, isotop, mod=False):
masses = np.array([-1] * 174)
acc_b = 0
acc_y = 0
n = len(seq)
# TODO mod
for i in range(n - 1):
if isotop == 'avg':
acc_b += MASSES_AVG[seq[i - 1]]
acc_y += MASSES_AVG[seq[n - 1 - i]]
if isotop == 'mono':
acc_b += MASSES_MONO[seq[i - 1]]
acc_y += MASSES_MONO[seq[n - 1 - i]]
masses[6 * i ] = acc_y
masses[6 * i + 1] = acc_y / 2
masses[6 * i + 2] = acc_y / 3
masses[6 * i + 3] = acc_b
masses[6 * i + 4] = acc_b / 2
masses[6 * i + 5] = acc_b / 3
return masses