Skip to content
Snippets Groups Projects
Commit ac013450 authored by Schneider Leo's avatar Schneider Leo
Browse files

fix vocab + update local integration

parent 695fef29
No related branches found
No related tags found
No related merge requests found
...@@ -95,7 +95,7 @@ def alphabetical_to_numerical(seq, vocab): ...@@ -95,7 +95,7 @@ def alphabetical_to_numerical(seq, vocab):
else : else :
for i in range(len(seq) - 2 * seq.count('-')): for i in range(len(seq) - 2 * seq.count('-')):
if seq[i + dec] != '-': if seq[i + dec] != '-':
num.append(IUPAC_VOCAB[seq[i + dec]]) num.append(ALPHABET_UNMOD[seq[i + dec]])
else: else:
if seq[i + dec + 1:i + dec + 4] == 'CaC': if seq[i + dec + 1:i + dec + 4] == 'CaC':
num.append(21) num.append(21)
......
...@@ -229,9 +229,9 @@ def add_length(dataframe): ...@@ -229,9 +229,9 @@ def add_length(dataframe):
dataframe['length']=dataframe['seq'].map(fonc) dataframe['length']=dataframe['seq'].map(fonc)
df = pd.read_csv('output/out_ISA_no_tape.csv') df = pd.read_csv('output/out_prosit_common.csv')
add_length(df) add_length(df)
df['abs_error'] = np.abs(df['rt pred']-df['true rt']) df['abs_error'] = np.abs(df['rt pred']-df['true rt'])
histo_abs_error(df, display=False, save=True, path='temp.png') # histo_abs_error(df, display=False, save=True, path='temp.png')
# scatter_rt(df, display=False, save=True, path='temp.png') # scatter_rt(df, display=False, save=True, path='temp.png')
# histo_length_by_error(df, 10, save=True, path='temp.png') histo_length_by_error(df, 10, save=True, path='temp.png')
\ No newline at end of file \ No newline at end of file
import pyopenms as oms import pyopenms as oms
import numpy as np import numpy as np
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import pandas as pd
def compute_chromatograms(rt, mz, intensity, start_c, end_c): def compute_chromatograms(rt, mz, intensity, start_c, end_c):
value=[] value=[]
...@@ -12,29 +13,74 @@ def compute_chromatograms(rt, mz, intensity, start_c, end_c): ...@@ -12,29 +13,74 @@ def compute_chromatograms(rt, mz, intensity, start_c, end_c):
return value return value
def get_df(expe, long: bool = False):
"""Generates a pandas DataFrame with all peaks in the MSExperiment
if __name__ == "__main__": Parameters:
e = oms.MSExperiment() long: set to True if you want to have a long/expanded/melted dataframe with one row per peak. Faster but
oms.MzMLFile().load("data/Staph140.mzML", e) replicated RT information. If False, returns rows in the style: rt, _np.array(mz), _np.array(int)
e.updateRanges()
Returns:
pandas.DataFrame: feature information stored in a DataFrame
"""
if long:
cols = ["RT", "mz", "inty", 'MSlevel']
expe.updateRanges()
spectraarrs2d = expe.get2DPeakDataLong(expe.getMinRT(), expe.getMaxRT(), expe.getMinMZ(), expe.getMaxMZ())
return pd.DataFrame(dict(zip(cols, spectraarrs2d))) #TODO ajouter MSlevel
cols = ["RT", "mzarray", "intarray", 'MSlevel','MS1 MZ']
return pd.DataFrame(data=((spec.getRT(), *spec.get_peaks(), spec.getMSLevel(), spec.getPrecursors()[0].getMZ() if spec.getMSLevel() ==2 else None) for spec in expe), columns=cols)
def generate_RT_int_imgs(exp,star_mz,stop_mz):
exp.updateRanges()
rt = [] rt = []
charge = [] mz = []
intensity = [] intensity = []
for s in e : for s in exp :
if s.getMSLevel() == 1: if s.getMSLevel() == 1:
rt.append(s.getRT()) rt.append(s.getRT())
charge.append(s.get_peaks()[0]) mz.append(s.get_peaks()[0])
intensity.append(s.get_peaks()[1]) intensity.append(s.get_peaks()[1])
mz_range = np.linspace(350,1250,4000) mz_range = np.linspace(star_mz,stop_mz,1000)
for i in range(len(mz_range)-1): for i in range(len(mz_range)-1):
print(mz_range[i],'/1250') print(mz_range[i],'/{}'.format(stop_mz))
val = compute_chromatograms(rt, charge, intensity, mz_range[i] ,mz_range[i+1]) val = compute_chromatograms(rt, mz, intensity, mz_range[i] ,mz_range[i+1])
fig, ax = plt.subplots() fig, ax = plt.subplots()
ax.plot(val) ax.plot(rt,val)
ax.set_xlabel('Retention time') ax.set_xlabel('Retention time')
ax.set_ylabel('Intensity') ax.set_ylabel('Intensity')
ax.set_title('mz : {} to {}'.format(mz_range[i] ,mz_range[i+1])) ax.set_title('mz : {} to {}'.format(mz_range[i] ,mz_range[i+1]))
plt.savefig('fig/rt_local/{}_to_{}.png'.format(mz_range[i] ,mz_range[i+1])) plt.savefig('fig/rt_local/{}_to_{}.png'.format(mz_range[i] ,mz_range[i+1]))
plt.clf() plt.clf()
df = e.get_df()
def integrate_ms_ms(time_start, time_end, df):
df_useful = df[(df['MS1 RT']>time_start) & (df['MS1 RT']<time_end) & (df['MSlevel']==2)].reset_index(inplace=True)
return value
if __name__ == "__main__":
e = oms.MSExperiment()
oms.MzMLFile().load("data/Staph140.mzML", e)
# generate_RT_int_imgs(e, 350, 1250)
df = get_df(e)
df1 = df[df['MSlevel'] == 1]
df1.reset_index(inplace=True, drop=True)
for i in range(len(df1)):
fig, ax = plt.subplots()
ax.plot(df1['mzarray'][i], df1['intarray'][i],linewidth=0.1)
ax.set_xlabel('mz')
ax.set_xlim(350,750)
ax.set_ylabel('Intensity')
ax.set_title('RT : {}'.format(df1['RT'][i]))
plt.savefig('fig/rt_local/RT{}.png'.format(df1['RT'][i]))
plt.close()
#358.1 358.32 #358.1 358.32
\ No newline at end of file
...@@ -301,5 +301,5 @@ if __name__ == "__main__": ...@@ -301,5 +301,5 @@ if __name__ == "__main__":
# b = [np.pad(array, (0, max_len - len(array)), mode='constant', constant_values=default_value) for array in res] # b = [np.pad(array, (0, max_len - len(array)), mode='constant', constant_values=default_value) for array in res]
s = oms.MSSpectrum
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment