diff --git a/.idea/LC-MS-RT-prediction.iml b/.idea/LC-MS-RT-prediction.iml index 81c1f054c8f6682138a3e8066d2e19396c22b814..2a5a0ab367bde49e5adbca0d5136f94cf65e5682 100644 --- a/.idea/LC-MS-RT-prediction.iml +++ b/.idea/LC-MS-RT-prediction.iml @@ -8,8 +8,9 @@ <excludeFolder url="file://$MODULE_DIR$/database" /> <excludeFolder url="file://$MODULE_DIR$/.venv" /> <excludeFolder url="file://$MODULE_DIR$/.venv2" /> + <excludeFolder url="file://$MODULE_DIR$/.venv3_10" /> </content> - <orderEntry type="jdk" jdkName="Python 3.11 (LC-MS-RT-prediction)" jdkType="Python SDK" /> + <orderEntry type="jdk" jdkName="Python 3.9 (LC-MS-RT-prediction)" jdkType="Python SDK" /> <orderEntry type="sourceFolder" forTests="false" /> </component> </module> \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml index cde01ce4f1c28496b5ffbb181b9aa0e7b9db0fda..1b5f6f736536803396a9f042bfdc1981e5153ac8 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,5 +3,5 @@ <component name="Black"> <option name="sdkName" value="Python 3.9 (LC-MS-RT-prediction)" /> </component> - <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.11 (LC-MS-RT-prediction)" project-jdk-type="Python SDK" /> + <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.9 (LC-MS-RT-prediction)" project-jdk-type="Python SDK" /> </project> \ No newline at end of file diff --git a/alignement.py b/alignement.py index 58c5969325cab3a60f86e4793584a4ba14f2ed37..1c11a0737517221c5edf939c0991ada89efe594c 100644 --- a/alignement.py +++ b/alignement.py @@ -3,9 +3,10 @@ import pandas as pd from loess.loess_1d import loess_1d from dataloader import RT_Dataset -from common_dataset import Common_Dataset +from msms_processing import load_data import matplotlib.pyplot as plt + ALPHABET_UNMOD = { "": 0, "A": 1, @@ -70,27 +71,38 @@ def align(dataset, reference): data_ori = RT_Dataset(None, 'database/data_train.csv', 'train', 25).data data_ori['sequence'] = data_ori['sequence'].map(numerical_to_alphabetical) -data_train = pd.read_pickle('database/data_DIA_16_01.pkl').reset_index(drop=True) +data_train = load_data('msms/msms16_01.txt').reset_index(drop=True) +# data_train = pd.read_pickle('database/data_DIA_16_01.pkl').reset_index(drop=True) data_align = align(data_train, data_ori) data_align.to_pickle('database/data_DIA_16_01_aligned.pkl') -data_train = pd.read_pickle('database/data_DIA_17_01.pkl').reset_index(drop=True) + +data_train = load_data('msms/msms17_01.txt').reset_index(drop=True) +# data_train = pd.read_pickle('database/data_DIA_17_01.pkl').reset_index(drop=True) data_align = align(data_train, data_ori) data_align.to_pickle('database/data_DIA_17_01_aligned.pkl') -data_train = pd.read_pickle('database/data_DIA_20_01.pkl').reset_index(drop=True) + +data_train = load_data('msms/msms20_01.txt').reset_index(drop=True) +# data_train = pd.read_pickle('database/data_DIA_20_01.pkl').reset_index(drop=True) data_align = align(data_train, data_ori) data_align.to_pickle('database/data_DIA_20_01_aligned.pkl') -data_train = pd.read_pickle('database/data_DIA_23_01.pkl').reset_index(drop=True) + +data_train = load_data('msms/msms23_01.txt').reset_index(drop=True) +# data_train = pd.read_pickle('database/data_DIA_23_01.pkl').reset_index(drop=True) data_align = align(data_train, data_ori) data_align.to_pickle('database/data_DIA_23_01_aligned.pkl') -data_train = pd.read_pickle('database/data_DIA_24_01.pkl').reset_index(drop=True) + +data_train = load_data('msms/msms24_01.txt').reset_index(drop=True) +# data_train = pd.read_pickle('database/data_DIA_24_01.pkl').reset_index(drop=True) data_align = align(data_train, data_ori) data_align.to_pickle('database/data_DIA_24_01_aligned.pkl') -data_train = pd.read_pickle('database/data_DIA_30_01.pkl').reset_index(drop=True) + +data_train = load_data('msms/msms30_01.txt').reset_index(drop=True) +# data_train = pd.read_pickle('database/data_DIA_30_01.pkl').reset_index(drop=True) data_align = align(data_train, data_ori) data_align.to_pickle('database/data_DIA_30_01_aligned.pkl') # -plt.scatter(data_train['Retention time'], data_align['Retention time'], s=1) -plt.savefig('test_align_2.png') +# plt.scatter(data_train['Retention time'], data_align['Retention time'], s=1) +# plt.savefig('test_align_2.png') # # # dataset_ref = pd.read_pickle('database/data_01_16_DIA_ISA_55.pkl') diff --git a/data_exploration.py b/data_exploration.py index 019c47127d7b437253c4af8ecf9726a9bc887f62..f74f487de2937bbc31503220c5cdb740671a29a6 100644 --- a/data_exploration.py +++ b/data_exploration.py @@ -1,7 +1,6 @@ import numpy as np import matplotlib.pyplot as plt import matplotlib -import pandas as pd matplotlib.use('agg') length = 30 diff --git a/database/data_DIA_ISA_55_test.pkl b/database/data_DIA_ISA_55_test.pkl index b7186f96f9507a7ce52296e19e8c4b7b6da6a15a..acd3f16f3327a11b7add15c1c0a5521e28a44c46 100644 Binary files a/database/data_DIA_ISA_55_test.pkl and b/database/data_DIA_ISA_55_test.pkl differ diff --git a/database/data_DIA_ISA_55_train.pkl b/database/data_DIA_ISA_55_train.pkl index 435b980676ffb348a88160fa92b574236aad8f75..bcdd924040a7188f2cb1a2ca2a0ec99ee2cda6c1 100644 Binary files a/database/data_DIA_ISA_55_train.pkl and b/database/data_DIA_ISA_55_train.pkl differ diff --git a/layers.py b/layers.py index 6c3f94f79bfad68a28449e155ff80652bb51287c..b36e1991121556554089d42a4397da7059b09ed1 100644 --- a/layers.py +++ b/layers.py @@ -1,5 +1,3 @@ -import math - import torch from torch import nn diff --git a/msms_processing.py b/msms_processing.py index bde575fa0ed43c3f5a374af6d4cbb63c7ddc21ee..5705fc43b573026f3e058d664e12b41bbf6e3213 100644 --- a/msms_processing.py +++ b/msms_processing.py @@ -109,6 +109,8 @@ if __name__ == '__main__': dataset_train = pd.concat(train_set).reset_index(drop=True) dataset_test = pd.concat(test_set).reset_index(drop=True) + dataset_train.to_pickle('database/data_DIA_ISA_55_train.pkl') + dataset_test.to_pickle('database/data_DIA_ISA_55_test.pkl')