Skip to content
Snippets Groups Projects
mass_prediction.py 3.03 KiB
# MASS CST DICT
import numpy as np

MASSES_MONO = {
    "A": 71.03711,
    "C": 103.00919,
    "D": 115.02694,
    "E": 129.04259,
    "F": 147.06841,
    "G": 57.02146,
    "H": 137.05891,
    "I": 113.08406,
    "K": 128.09496,
    "L": 113.08406,
    "M": 131.04049,
    "N": 114.04293,
    "P": 97.05276,
    "Q": 128.05858,
    "R": 156.1875,
    "S": 87.03203,
    "T": 101.04768,
    "V": 99.06841,
    "W": 186.07931,
    "Y": 163.06333,
}

MASSES_AVG = {
    "A": 71.0788,
    "C": 103.1388,
    "D": 115.0886,
    "E": 129.1155,
    "F": 147.1766,
    "G": 57.0519,
    "H": 137.1411,
    "I": 113.1594,
    "K": 128.1741,
    "L": 113.1594,
    "M": 131.1926,
    "N": 114.1038,
    "P": 97.1167,
    "Q": 128.1307,
    "R": 156.1875,
    "S": 87.0782,
    "T": 101.1051,
    "V": 99.1326,
    "W": 186.2132,
    "Y": 163.1760,
}

PTMs_MON0 = {
    'Alkylation': 14.01564,
    'Carbamylation': 43.00581,
    'Carboxymethyl cysteine (Cys_CM)': 161.01466,
    'Carboxyamidomethyl cysteine (Cys_CAM)': 160.03065,
    'Pyridyl-ethyl cysteine (Cys_PE)': 208.067039,
    'Propionamide cysteine (Cys_PAM)': 174.04631,
    'Methionine sulfoxide (MSO)': 147.0354,
    'Oxydized tryptophan (TPO)': 202.0742,
    'Homoserine Lactone (HSL)': 100.03985,
    'H': 1.00783,
    'H+': 1.00728,
    'O': 15.9949146,
    'H2O': 18.01056,
}

PTMs_AVG = {
    'Alkylation': 14.02688,
    'Carbamylation': 43.02502,
    'Carboxymethyl cysteine (Cys_CM)': 161.1755,
    'Carboxyamidomethyl cysteine (Cys_CAM)': 160.1908,
    'Pyridyl-ethyl cysteine (Cys_PE)': 208.284,
    'Propionamide cysteine (Cys_PAM)': 174.2176,
    'Methionine sulfoxide (MSO)': 147.1920,
    'Oxydized tryptophan (TPO)': 202.2126,
    'Homoserine Lactone (HSL)': 100.09714,
    'H': 1.00794,
    'H+': 1.00739,
    'O': 15.9994,
    'H2O': 18.01524,
}


def compute_mass(seq, isotop, mod=False):
    m = 0
    if mod == False:
        if isotop == 'mono':
            for char in MASSES_MONO.keys():
                m += MASSES_MONO[char] * seq.count(char)
        if isotop == 'avg':
            for char in MASSES_AVG.keys():
                m += MASSES_AVG[char] * seq.count(char)
    else:
        if isotop == 'mono':
            for char in MASSES_MONO.keys():  # TODO mod
                m += MASSES_MONO[char] * seq.count(char)
        if isotop == 'avg':
            for char in MASSES_AVG.keys():  # TODO mod
                m += MASSES_AVG[char] * seq.count(char)
    return m


def compute_frag_mz_ration(seq, isotop, mod=False):
    masses = np.array([-1] * 174)
    acc_b = 0
    acc_y = 0
    n = len(seq)

    # TODO mod
    for i in range(n - 1):
        if isotop == 'avg':
            acc_b += MASSES_AVG[seq[i - 1]]
            acc_y += MASSES_AVG[seq[n - 1 - i]]
        if isotop == 'mono':
            acc_b += MASSES_MONO[seq[i - 1]]
            acc_y += MASSES_MONO[seq[n - 1 - i]]
        masses[6 * i ] = acc_y
        masses[6 * i + 1] = acc_y / 2
        masses[6 * i + 2] = acc_y / 3
        masses[6 * i + 3] = acc_b
        masses[6 * i + 4] = acc_b / 2
        masses[6 * i + 5] = acc_b / 3
    return masses