diff --git a/data/COLI-189-AER_100vW_100SPD.mzML b/data/COLI-189-AER_100vW_100SPD.mzML deleted file mode 100644 index f50213c788d5e3968ec0d2d29faed5e138a71faf..0000000000000000000000000000000000000000 Binary files a/data/COLI-189-AER_100vW_100SPD.mzML and /dev/null differ diff --git a/data/COLI-189-ANA_100vW_100SPD.mzML b/data/COLI-189-ANA_100vW_100SPD.mzML deleted file mode 100644 index 050304ec58776ad6614abed9d1670a1e8ad75890..0000000000000000000000000000000000000000 Binary files a/data/COLI-189-ANA_100vW_100SPD.mzML and /dev/null differ diff --git a/data/Staph140.mzML b/data/Staph140.mzML deleted file mode 100644 index 232d55fb5fdfc9c9b368457e6d269c1c94fb5c1e..0000000000000000000000000000000000000000 Binary files a/data/Staph140.mzML and /dev/null differ diff --git a/local_integration_msms.py b/local_integration_msms.py index bba47338e9a3cfe188798bff67ebc6ecb904726a..b2f2c1b4cd7f8aa915d8ebed5e0c1b80711f896b 100644 --- a/local_integration_msms.py +++ b/local_integration_msms.py @@ -94,51 +94,45 @@ def integrate_ms_ms(df, mz_bin, output='temp.png', display = False): if __name__ == "__main__": e = oms.MSExperiment() - oms.MzMLFile().load("data/Staph140.mzML", e) + oms.MzMLFile().load("data/echantillons données DIA/CITAMA-5-AER-d200.mzML", e) # generate_RT_int_imgs(e, 350, 1250) df = get_df(e, long=True) - df1 = df[df['MSlevel'] == 1] - df_slide = df1[750.1< df1['mz']] - df_slide = df_slide[750.15 > df_slide['mz']] - - inty_sorted = [x for y, x in sorted(zip(df_slide['RT'], df_slide['inty']))] - mz_sorted = sorted(df_slide['RT']) - plt.clf() - fig, ax = plt.subplots() - ax.set_xlim(400,500) - ax.plot(mz_sorted,inty_sorted) - - plt.savefig('temp.png') - - #pic d'étude 462-468 - #mz 750.13 + # df1 = df[df['MSlevel'] == 1] + # df_slide = df1[411.5< df1['mz']] + # df_slide = df_slide[418.5 > df_slide['mz']] + # + # + # inty_sorted = [x for y, x in sorted(zip(df_slide['RT'], df_slide['inty']))] + # mz_sorted = sorted(df_slide['RT']) + # plt.clf() + # fig, ax = plt.subplots() + # ax.set_xlim(280,310) + # ax.plot(mz_sorted,inty_sorted) + # + # plt.savefig('temp.png') + # + # #pic d'étude 280-310 + # #mz 411.5 418.5 df_peak = df[df['MSlevel'] == 2] - df_peak = df_peak[750.1 < df_peak['MS1_mz_max']] - df_peak = df_peak[750.1 > df_peak['MS1_mz_min']] - df_peak = df_peak[465 < df_peak['RT']] - df_peak = df_peak[466 > df_peak['RT']] + df_peak = df_peak[411 < df_peak['MS1_mz_max']] + df_peak = df_peak[419 > df_peak['MS1_mz_min']] + df_peak = df_peak[280 < df_peak['RT']] + df_peak = df_peak[310 > df_peak['RT']] # -# df_peak2 = df[df['MSlevel'] == 2] -# df_peak2 = df_peak2[750.1 < df_peak2['MS1_mz_max']] -# df_peak2 = df_peak2[750.1 > df_peak2['MS1_mz_min']] -# df_peak2 = df_peak2[463 < df_peak2['RT']] -# df_peak2 = df_peak2[467 > df_peak2['RT']] -# -# mz1, inty1 = integrate_ms_ms(df_peak, 1) -# mz2, inty2 = integrate_ms_ms(df_peak2, 1) -# plt.clf() -# fig, ax = plt.subplots() -# ax.plot(mz1, inty1, linewidth=0.3) -# ax.plot(mz2, inty2, linewidth=0.3) -# ax.set_xlim(200, 1800) -# plt.savefig('spec_combined.png') -# plt.clf() - - df = pd.read_csv('data/staph140_maxquant.csv') - df['Retention time'] = df['Retention time']*60 - df_filtered = df[df['Retention time']>463 ] - df_filtered = df_filtered[df_filtered['Retention time']<467 ] - -#358.1 358.32 \ No newline at end of file + df_peak2 = df[df['MSlevel'] == 2] + df_peak2 = df_peak2[411 < df_peak2['MS1_mz_max']] + df_peak2 = df_peak2[419> df_peak2['MS1_mz_min']] + df_peak2 = df_peak2[296 < df_peak2['RT']] + df_peak2 = df_peak2[297 > df_peak2['RT']] + + mz1, inty1 = integrate_ms_ms(df_peak, 1) + mz2, inty2 = integrate_ms_ms(df_peak2, 1) + plt.clf() + fig, ax = plt.subplots() + ax.plot(mz1, inty1, linewidth=0.3) + ax.plot(mz2, inty2, linewidth=0.3) + ax.set_xlim(200, 1200) + plt.savefig('spec_combined.png') + plt.clf() diff --git a/prosit_rt_ori.py b/prosit_rt_ori.py index 950c7c5f7bd6e3ca8889f5de097b26c35b9784c9..336a323af68d0d78ff8b483616d1f6441c5c0add 100644 --- a/prosit_rt_ori.py +++ b/prosit_rt_ori.py @@ -59,19 +59,16 @@ history = model.fit( predictions = model.predict(test_sequences) predictions = predictions.ravel() -print(test_sequences[:5]) -print(test_targets[:5]) -print(predictions[:5]) report = RetentionTimeReport(output_path="./output", history=history) print("R2: ", report.calculate_r2(test_targets, predictions)) -pd.DataFrame( - { - "sequence": d["test"]["_parsed_sequence"], - "irt": test_targets, - "predicted_irt": predictions, - } -).to_csv("./predictions_prosit_fullrun.csv", index=False) \ No newline at end of file +# pd.DataFrame( +# { +# "sequence": d["test"]["_parsed_sequence"], +# "irt": test_targets, +# "predicted_irt": predictions, +# } +# ).to_csv("./predictions_prosit_fullrun.csv", index=False) \ No newline at end of file