Skip to content
Snippets Groups Projects
Commit 75aefa8a authored by Schneider Leo's avatar Schneider Leo
Browse files

dataset rain ISA

parent 02f95633
No related branches found
No related tags found
No related merge requests found
......@@ -7,7 +7,7 @@ from common_dataset import Common_Dataset
import matplotlib.pyplot as plt
ALPHABET_UNMOD = {
"_": 0,
"": 0,
"A": 1,
"C": 2,
"D": 3,
......@@ -67,14 +67,27 @@ def align(dataset, reference):
return dataset
data_ori = RT_Dataset(None, 'database/data.csv', 'train', 25).data
data_ori ['sequence'] = data_ori['sequence'].map(numerical_to_alphabetical)
# data_ori = pd.read_pickle('database/data_01_16_DIA_ISA_55.pkl')
# data_ori = Common_Dataset(data_ori, 30).data
data_train = pd.read_pickle('database/data_DIA_ISA_55_train.pkl')
# data_train = Common_Dataset(data_train, 30).data
#
data_ori = RT_Dataset(None, 'database/data_train.csv', 'train', 25).data
data_ori['sequence'] = data_ori['sequence'].map(numerical_to_alphabetical)
data_train = pd.read_pickle('database/data_DIA_16_01.pkl').reset_index(drop=True)
data_align = align(data_train, data_ori)
data_align.to_pickle('database/data_DIA_16_01_aligned.pkl')
data_train = pd.read_pickle('database/data_DIA_17_01.pkl').reset_index(drop=True)
data_align = align(data_train, data_ori)
data_align.to_pickle('database/data_DIA_17_01_aligned.pkl')
data_train = pd.read_pickle('database/data_DIA_20_01.pkl').reset_index(drop=True)
data_align = align(data_train, data_ori)
data_align.to_pickle('database/data_DIA_20_01_aligned.pkl')
data_train = pd.read_pickle('database/data_DIA_23_01.pkl').reset_index(drop=True)
data_align = align(data_train, data_ori)
data_align.to_pickle('database/data_DIA_23_01_aligned.pkl')
data_train = pd.read_pickle('database/data_DIA_24_01.pkl').reset_index(drop=True)
data_align = align(data_train, data_ori)
data_align.to_pickle('database/data_DIA_24_01_aligned.pkl')
data_train = pd.read_pickle('database/data_DIA_30_01.pkl').reset_index(drop=True)
data_align = align(data_train, data_ori)
data_align.to_pickle('database/data_DIA_30_01_aligned.pkl')
#
plt.scatter(data_train['Retention time'], data_align['Retention time'], s=1)
plt.savefig('test_align_2.png')
......
No preview for this file type
No preview for this file type
......@@ -3,6 +3,7 @@ import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
import json
import random
def load_data(msms_filet_path='data/msms.txt', score_treshold=70):
......@@ -76,13 +77,42 @@ def mscatter(x,y, ax=None, m=None, **kw):
paths.append(path)
sc.set_paths(paths)
return sc
#data gradient 10 :
# 16/01 20/01 30/01
#data gradient 3 :
# 17/01 23/01 24/01
if __name__ == '__main__':
pass
# data_2 = load_data('data/Custom_dataset/msmsHBM_UCGTs.txt', i)
# data_1 = load_data('data/Custom_dataset/msmsHBM_P450s.txt', i)
# data_3 = load_data('data/Custom_dataset/msmsMkBM_P450s.txt', i)
# data = pd.concat([data_1, data_2, data_3], ignore_index=True)
data_1 = pd.read_pickle('database/data_DIA_16_01_aligned.pkl')
data_2 = pd.read_pickle('database/data_DIA_17_01_aligned.pkl')
data_3 = pd.read_pickle('database/data_DIA_20_01_aligned.pkl')
data_4 = pd.read_pickle('database/data_DIA_23_01_aligned.pkl')
data_5 = pd.read_pickle('database/data_DIA_24_01_aligned.pkl')
data_6 = pd.read_pickle('database/data_DIA_30_01_aligned.pkl')
data = pd.concat([data_1, data_2, data_3, data_4, data_5, data_6], ignore_index=True)
num_total = len(data)
train_size = np.floor(0.8*num_total)
list_gr=[]
train_set = []
test_set=[]
s = 0
groups = data.groupby('Sequence')
for seq, gr in groups:
list_gr.append(gr)
random.shuffle(list_gr)
for gr in list_gr :
if s < train_size :
train_set.append(gr)
s+= len(gr)
else :
test_set.append(gr)
dataset_train = pd.concat(train_set).reset_index(drop=True)
dataset_test = pd.concat(test_set).reset_index(drop=True)
# err_rt = []
# err_spec = []
# nb_data = []
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment