Skip to content
Snippets Groups Projects
Commit ac013450 authored by Schneider Leo's avatar Schneider Leo
Browse files

fix vocab + update local integration

parent 695fef29
No related branches found
No related tags found
No related merge requests found
......@@ -95,7 +95,7 @@ def alphabetical_to_numerical(seq, vocab):
else :
for i in range(len(seq) - 2 * seq.count('-')):
if seq[i + dec] != '-':
num.append(IUPAC_VOCAB[seq[i + dec]])
num.append(ALPHABET_UNMOD[seq[i + dec]])
else:
if seq[i + dec + 1:i + dec + 4] == 'CaC':
num.append(21)
......
......@@ -229,9 +229,9 @@ def add_length(dataframe):
dataframe['length']=dataframe['seq'].map(fonc)
df = pd.read_csv('output/out_ISA_no_tape.csv')
df = pd.read_csv('output/out_prosit_common.csv')
add_length(df)
df['abs_error'] = np.abs(df['rt pred']-df['true rt'])
histo_abs_error(df, display=False, save=True, path='temp.png')
# histo_abs_error(df, display=False, save=True, path='temp.png')
# scatter_rt(df, display=False, save=True, path='temp.png')
# histo_length_by_error(df, 10, save=True, path='temp.png')
\ No newline at end of file
histo_length_by_error(df, 10, save=True, path='temp.png')
\ No newline at end of file
import pyopenms as oms
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
def compute_chromatograms(rt, mz, intensity, start_c, end_c):
value=[]
......@@ -12,29 +13,74 @@ def compute_chromatograms(rt, mz, intensity, start_c, end_c):
return value
def get_df(expe, long: bool = False):
"""Generates a pandas DataFrame with all peaks in the MSExperiment
if __name__ == "__main__":
e = oms.MSExperiment()
oms.MzMLFile().load("data/Staph140.mzML", e)
e.updateRanges()
Parameters:
long: set to True if you want to have a long/expanded/melted dataframe with one row per peak. Faster but
replicated RT information. If False, returns rows in the style: rt, _np.array(mz), _np.array(int)
Returns:
pandas.DataFrame: feature information stored in a DataFrame
"""
if long:
cols = ["RT", "mz", "inty", 'MSlevel']
expe.updateRanges()
spectraarrs2d = expe.get2DPeakDataLong(expe.getMinRT(), expe.getMaxRT(), expe.getMinMZ(), expe.getMaxMZ())
return pd.DataFrame(dict(zip(cols, spectraarrs2d))) #TODO ajouter MSlevel
cols = ["RT", "mzarray", "intarray", 'MSlevel','MS1 MZ']
return pd.DataFrame(data=((spec.getRT(), *spec.get_peaks(), spec.getMSLevel(), spec.getPrecursors()[0].getMZ() if spec.getMSLevel() ==2 else None) for spec in expe), columns=cols)
def generate_RT_int_imgs(exp,star_mz,stop_mz):
exp.updateRanges()
rt = []
charge = []
mz = []
intensity = []
for s in e :
for s in exp :
if s.getMSLevel() == 1:
rt.append(s.getRT())
charge.append(s.get_peaks()[0])
mz.append(s.get_peaks()[0])
intensity.append(s.get_peaks()[1])
mz_range = np.linspace(350,1250,4000)
mz_range = np.linspace(star_mz,stop_mz,1000)
for i in range(len(mz_range)-1):
print(mz_range[i],'/1250')
val = compute_chromatograms(rt, charge, intensity, mz_range[i] ,mz_range[i+1])
print(mz_range[i],'/{}'.format(stop_mz))
val = compute_chromatograms(rt, mz, intensity, mz_range[i] ,mz_range[i+1])
fig, ax = plt.subplots()
ax.plot(val)
ax.plot(rt,val)
ax.set_xlabel('Retention time')
ax.set_ylabel('Intensity')
ax.set_title('mz : {} to {}'.format(mz_range[i] ,mz_range[i+1]))
plt.savefig('fig/rt_local/{}_to_{}.png'.format(mz_range[i] ,mz_range[i+1]))
plt.clf()
df = e.get_df()
def integrate_ms_ms(time_start, time_end, df):
df_useful = df[(df['MS1 RT']>time_start) & (df['MS1 RT']<time_end) & (df['MSlevel']==2)].reset_index(inplace=True)
return value
if __name__ == "__main__":
e = oms.MSExperiment()
oms.MzMLFile().load("data/Staph140.mzML", e)
# generate_RT_int_imgs(e, 350, 1250)
df = get_df(e)
df1 = df[df['MSlevel'] == 1]
df1.reset_index(inplace=True, drop=True)
for i in range(len(df1)):
fig, ax = plt.subplots()
ax.plot(df1['mzarray'][i], df1['intarray'][i],linewidth=0.1)
ax.set_xlabel('mz')
ax.set_xlim(350,750)
ax.set_ylabel('Intensity')
ax.set_title('RT : {}'.format(df1['RT'][i]))
plt.savefig('fig/rt_local/RT{}.png'.format(df1['RT'][i]))
plt.close()
#358.1 358.32
\ No newline at end of file
......@@ -301,5 +301,5 @@ if __name__ == "__main__":
# b = [np.pad(array, (0, max_len - len(array)), mode='constant', constant_values=default_value) for array in res]
s = oms.MSSpectrum
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment