diff --git a/data_viz.py b/data_viz.py index de2373da73dd6c09664588d71603657617218da6..385a19f86da782372a70c6f01c6e56d874f015a0 100644 --- a/data_viz.py +++ b/data_viz.py @@ -275,9 +275,11 @@ def add_length(dataframe): # scatter_rt(df, display=False, save=True, path='fig/custom model res/RT_pred_prosit_ISA_eval.png', color=True) # histo_length_by_error(df, bins=10, display=False, save=True, path='fig/custom model res/histo_length_prosit_ISA_eval.png') -df = pd.read_csv('output/out_common_ISA_ISA_eval_2.csv') -add_length(df) -df['abs_error'] = np.abs(df['rt pred']-df['true rt']) -histo_abs_error(df, display=False, save=True, path='fig/custom model res/histo_ISA_ISA_eval_2.png') -scatter_rt(df, display=False, save=True, path='fig/custom model res/RT_pred_ISA_ISA_eval_2_seq.png', color=True, col = 'seq') -histo_length_by_error(df, bins=10, display=False, save=True, path='fig/custom model res/histo_length_ISA_ISA_eval_2.png') \ No newline at end of file +# df = pd.read_csv('output/out_common_ISA_ISA_eval_2.csv') +# add_length(df) +# df['abs_error'] = np.abs(df['rt pred']-df['true rt']) +# histo_abs_error(df, display=False, save=True, path='fig/custom model res/histo_ISA_ISA_eval_2.png') +# scatter_rt(df, display=False, save=True, path='fig/custom model res/RT_pred_ISA_ISA_eval_2_seq.png', color=True, col = 'seq') +# histo_length_by_error(df, bins=10, display=False, save=True, path='fig/custom model res/histo_length_ISA_ISA_eval_2.png') + + diff --git a/local_integration_msms.py b/local_integration_msms.py index 984e4c2ce76d1e3a43a9f38f47308d0625144b8b..2e5723fc368314283f450ed6d4e5b13bcd2c45a6 100644 --- a/local_integration_msms.py +++ b/local_integration_msms.py @@ -3,6 +3,8 @@ import numpy as np import matplotlib.pyplot as plt import pandas as pd + + def compute_chromatograms(rt, mz, intensity, start_c, end_c): value=[] @@ -23,15 +25,29 @@ def get_df(expe, long: bool = False): Returns: pandas.DataFrame: feature information stored in a DataFrame """ - if long: - cols = ["RT", "mz", "inty", 'MSlevel'] - expe.updateRanges() - spectraarrs2d = expe.get2DPeakDataLong(expe.getMinRT(), expe.getMaxRT(), expe.getMinMZ(), expe.getMaxMZ()) - return pd.DataFrame(dict(zip(cols, spectraarrs2d))) #TODO ajouter MSlevel + cols = ["RT", "mzarray", "intarray", 'MSlevel','MS1 MZ'] - return pd.DataFrame(data=((spec.getRT(), *spec.get_peaks(), spec.getMSLevel(), spec.getPrecursors()[0].getMZ() if spec.getMSLevel() ==2 else None) for spec in expe), columns=cols) + df = pd.DataFrame(data=((spec.getRT(), *spec.get_peaks(), spec.getMSLevel(), spec.getPrecursors()[0].getMZ() if spec.getMSLevel() ==2 else None) for spec in expe), columns=cols) + + if long: + RT = [] + mz = [] + inty = [] + ms_lv = [] + ms1_mz = [] + for index, row in df.iterrows(): + mz.extend(row['mzarray']) + inty.extend(row['intarray']) + RT.extend([row['RT']]*len(row['intarray'])) + ms_lv.extend([row['MSlevel']] * len(row['intarray'])) + ms1_mz.extend([row['MS1 MZ']] * len(row['intarray'])) + dico = {"RT": RT, "mz": mz, "inty": inty, "MSlevel": ms_lv, "MS1 MZ": ms1_mz} + return pd.DataFrame(dico) + + else : + return df def generate_RT_int_imgs(exp,star_mz,stop_mz): exp.updateRanges() @@ -58,7 +74,7 @@ def generate_RT_int_imgs(exp,star_mz,stop_mz): def integrate_ms_ms(time_start, time_end, df): df_useful = df[(df['MS1 RT']>time_start) & (df['MS1 RT']<time_end) & (df['MSlevel']==2)].reset_index(inplace=True) - + value = 0 return value @@ -68,19 +84,17 @@ if __name__ == "__main__": oms.MzMLFile().load("data/Staph140.mzML", e) # generate_RT_int_imgs(e, 350, 1250) - df = get_df(e) + df = get_df(e, long=True) df1 = df[df['MSlevel'] == 1] - df1.reset_index(inplace=True, drop=True) - for i in range(len(df1)): - fig, ax = plt.subplots() - ax.plot(df1['mzarray'][i], df1['intarray'][i],linewidth=0.1) - ax.set_xlabel('mz') - ax.set_xlim(350,750) - ax.set_ylabel('Intensity') - ax.set_title('RT : {}'.format(df1['RT'][i])) - plt.savefig('fig/rt_local/RT{}.png'.format(df1['RT'][i])) - plt.close() - + df_slide = df1[750.1< df1['mz']] + df_slide = df_slide[750.15 > df_slide['mz']] + inty_sorted = [x for y, x in sorted(zip(df_slide['RT'], df_slide['inty']))] + mz_sorted = sorted(df_slide['RT']) + plt.clf() + fig, ax = plt.subplots() + ax.set_xlim(400,500) + ax.plot(mz_sorted,inty_sorted) + plt.savefig('temp.png') #358.1 358.32 \ No newline at end of file