Skip to content
Snippets Groups Projects
Commit 45eea11f authored by Schneider Leo's avatar Schneider Leo
Browse files

track file source

parent a21ba97b
No related branches found
No related tags found
No related merge requests found
...@@ -119,9 +119,10 @@ def zero_to_minus(arr): ...@@ -119,9 +119,10 @@ def zero_to_minus(arr):
class Common_Dataset(Dataset): class Common_Dataset(Dataset):
def __init__(self, dataframe, length, pad=True, convert=True, vocab='unmod'): def __init__(self, dataframe, length, pad=True, convert=True, vocab='unmod', file=False):
print('Data loader Initialisation') print('Data loader Initialisation')
self.data = dataframe.reset_index() self.data = dataframe.reset_index()
self.file_mode = file
if pad : if pad :
print('Padding') print('Padding')
padding(self.data, 'Sequence', length) padding(self.data, 'Sequence', length)
...@@ -135,10 +136,16 @@ class Common_Dataset(Dataset): ...@@ -135,10 +136,16 @@ class Common_Dataset(Dataset):
seq = self.data['Sequence'][index] seq = self.data['Sequence'][index]
rt = self.data['Retention time'][index] rt = self.data['Retention time'][index]
intensity = self.data['Spectra'][index] intensity = self.data['Spectra'][index]
charge = self.data['Charge'][index] charge = self.data['Charge'][index]
file = self.data['file'][index]
if self.file_mode :
return torch.tensor(seq), torch.tensor(charge), torch.tensor(rt).float(), torch.tensor(intensity), torch.tensor(file)
else :
return torch.tensor(seq), torch.tensor(charge), torch.tensor(rt).float(), torch.tensor(intensity)
return torch.tensor(seq), torch.tensor(charge), torch.tensor(rt).float(), torch.tensor(intensity) def set_file_mode(self,b):
self.file_mode=b
def __len__(self) -> int: def __len__(self) -> int:
return self.data.shape[0] return self.data.shape[0]
......
...@@ -253,23 +253,23 @@ def add_length(dataframe): ...@@ -253,23 +253,23 @@ def add_length(dataframe):
dataframe['length']=dataframe['seq'].map(fonc) dataframe['length']=dataframe['seq'].map(fonc)
df = pd.read_csv('output/output_common_data_ISA.csv') # df = pd.read_csv('output/output_common_data_ISA.csv')
# add_length(df)
# df['abs_error'] = np.abs(df['rt pred']-df['true rt'])
# histo_abs_error(df, display=False, save=True, path='fig/custom model res/histo_ISA_ISA.png')
# scatter_rt(df, display=False, save=True, path='fig/custom model res/RT_pred_ISA_ISA.png', color=True)
# histo_length_by_error(df, bins=10, display=False, save=True, path='fig/custom model res/histo_length_ISA_ISA.png')
#
# df = pd.read_csv('output/out_prosit_common.csv')
# add_length(df)
# df['abs_error'] = np.abs(df['rt pred']-df['true rt'])
# histo_abs_error(df, display=False, save=True, path='fig/custom model res/histo_prosit_prosit.png')
# scatter_rt(df, display=False, save=True, path='fig/custom model res/RT_pred_prosit_prosit.png', color=True)
# histo_length_by_error(df, bins=10, display=False, save=True, path='fig/custom model res/histo_length_prosit_prosit.png')
df = pd.read_csv('output/out_common_transfereval.csv')
add_length(df) add_length(df)
df['abs_error'] = np.abs(df['rt pred']-df['true rt']) df['abs_error'] = np.abs(df['rt pred']-df['true rt'])
histo_abs_error(df, display=False, save=True, path='fig/custom model res/histo_ISA_ISA.png') histo_abs_error(df, display=False, save=True, path='fig/custom model res/histo_prosit_ISA_eval.png')
scatter_rt(df, display=False, save=True, path='fig/custom model res/RT_pred_ISA_ISA.png', color=True) scatter_rt(df, display=False, save=True, path='fig/custom model res/RT_pred_prosit_ISA_eval.png', color=True)
histo_length_by_error(df, bins=10, display=False, save=True, path='fig/custom model res/histo_length_ISA_ISA.png') histo_length_by_error(df, bins=10, display=False, save=True, path='fig/custom model res/histo_length_prosit_ISA_eval.png')
\ No newline at end of file
df = pd.read_csv('output/out_prosit_common.csv')
add_length(df)
df['abs_error'] = np.abs(df['rt pred']-df['true rt'])
histo_abs_error(df, display=False, save=True, path='fig/custom model res/histo_prosit_prosit.png')
scatter_rt(df, display=False, save=True, path='fig/custom model res/RT_pred_prosit_prosit.png', color=True)
histo_length_by_error(df, bins=10, display=False, save=True, path='fig/custom model res/histo_length_prosit_prosit.png')
df = pd.read_csv('output/out_common_transfer.csv')
add_length(df)
df['abs_error'] = np.abs(df['rt pred']-df['true rt'])
histo_abs_error(df, display=False, save=True, path='fig/custom model res/histo_prosit_ISA.png')
scatter_rt(df, display=False, save=True, path='fig/custom model res/RT_pred_prosit_ISA.png', color=True)
histo_length_by_error(df, bins=10, display=False, save=True, path='fig/custom model res/histo_length_prosit_ISA.png')
\ No newline at end of file
...@@ -278,8 +278,9 @@ def save_pred(model, data_val, forward, output_path): ...@@ -278,8 +278,9 @@ def save_pred(model, data_val, forward, output_path):
for param in model.parameters(): for param in model.parameters():
param.requires_grad = False param.requires_grad = False
if forward == 'both': if forward == 'both':
pred_rt, pred_int, seqs, charges, true_rt, true_int = [], [], [], [], [], [] pred_rt, pred_int, seqs, charges, true_rt, true_int, file_list = [], [], [], [], [], [], []
for seq, charge, rt, intensity in data_val: data_val.data.set_file_mode(True)
for seq, charge, rt, intensity, file in data_val:
rt, intensity = rt.float(), intensity.float() rt, intensity = rt.float(), intensity.float()
if torch.cuda.is_available(): if torch.cuda.is_available():
seq, charge, rt, intensity = seq.cuda(), charge.cuda(), rt.cuda(), intensity.cuda() seq, charge, rt, intensity = seq.cuda(), charge.cuda(), rt.cuda(), intensity.cuda()
...@@ -290,12 +291,14 @@ def save_pred(model, data_val, forward, output_path): ...@@ -290,12 +291,14 @@ def save_pred(model, data_val, forward, output_path):
charges.extend(charge.data.cpu().tolist()) charges.extend(charge.data.cpu().tolist())
true_rt.extend(rt.data.cpu().tolist()) true_rt.extend(rt.data.cpu().tolist())
true_int.extend(intensity.data.cpu().tolist()) true_int.extend(intensity.data.cpu().tolist())
file_list.extend([file])
data_frame['rt pred'] = pred_rt data_frame['rt pred'] = pred_rt
data_frame['seq'] = seqs data_frame['seq'] = seqs
data_frame['pred int'] = pred_int data_frame['pred int'] = pred_int
data_frame['true rt'] = true_rt data_frame['true rt'] = true_rt
data_frame['true int'] = true_int data_frame['true int'] = true_int
data_frame['charge'] = charges data_frame['charge'] = charges
data_frame['file'] = file_list
......
...@@ -83,11 +83,17 @@ def mscatter(x,y, ax=None, m=None, **kw): ...@@ -83,11 +83,17 @@ def mscatter(x,y, ax=None, m=None, **kw):
# 17/01 23/01 24/01 # 17/01 23/01 24/01
if __name__ == '__main__': if __name__ == '__main__':
data_1 = pd.read_pickle('database/data_DIA_16_01_aligned.pkl') data_1 = pd.read_pickle('database/data_DIA_16_01_aligned.pkl')
data_1['file']= 1
data_2 = pd.read_pickle('database/data_DIA_17_01_aligned.pkl') data_2 = pd.read_pickle('database/data_DIA_17_01_aligned.pkl')
data_2['file'] = 2
data_3 = pd.read_pickle('database/data_DIA_20_01_aligned.pkl') data_3 = pd.read_pickle('database/data_DIA_20_01_aligned.pkl')
data_3['file'] = 3
data_4 = pd.read_pickle('database/data_DIA_23_01_aligned.pkl') data_4 = pd.read_pickle('database/data_DIA_23_01_aligned.pkl')
data_4['file'] = 4
data_5 = pd.read_pickle('database/data_DIA_24_01_aligned.pkl') data_5 = pd.read_pickle('database/data_DIA_24_01_aligned.pkl')
data_5['file'] = 5
data_6 = pd.read_pickle('database/data_DIA_30_01_aligned.pkl') data_6 = pd.read_pickle('database/data_DIA_30_01_aligned.pkl')
data_6['file'] = 6
data = pd.concat([data_1, data_2, data_3, data_4, data_5, data_6], ignore_index=True) data = pd.concat([data_1, data_2, data_3, data_4, data_5, data_6], ignore_index=True)
num_total = len(data) num_total = len(data)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment