Skip to content
Snippets Groups Projects
Commit 45eea11f authored by Schneider Leo's avatar Schneider Leo
Browse files

track file source

parent a21ba97b
No related branches found
No related tags found
No related merge requests found
......@@ -119,9 +119,10 @@ def zero_to_minus(arr):
class Common_Dataset(Dataset):
def __init__(self, dataframe, length, pad=True, convert=True, vocab='unmod'):
def __init__(self, dataframe, length, pad=True, convert=True, vocab='unmod', file=False):
print('Data loader Initialisation')
self.data = dataframe.reset_index()
self.file_mode = file
if pad :
print('Padding')
padding(self.data, 'Sequence', length)
......@@ -135,10 +136,16 @@ class Common_Dataset(Dataset):
seq = self.data['Sequence'][index]
rt = self.data['Retention time'][index]
intensity = self.data['Spectra'][index]
charge = self.data['Charge'][index]
file = self.data['file'][index]
if self.file_mode :
return torch.tensor(seq), torch.tensor(charge), torch.tensor(rt).float(), torch.tensor(intensity), torch.tensor(file)
else :
return torch.tensor(seq), torch.tensor(charge), torch.tensor(rt).float(), torch.tensor(intensity)
return torch.tensor(seq), torch.tensor(charge), torch.tensor(rt).float(), torch.tensor(intensity)
def set_file_mode(self,b):
self.file_mode=b
def __len__(self) -> int:
return self.data.shape[0]
......
......@@ -253,23 +253,23 @@ def add_length(dataframe):
dataframe['length']=dataframe['seq'].map(fonc)
df = pd.read_csv('output/output_common_data_ISA.csv')
# df = pd.read_csv('output/output_common_data_ISA.csv')
# add_length(df)
# df['abs_error'] = np.abs(df['rt pred']-df['true rt'])
# histo_abs_error(df, display=False, save=True, path='fig/custom model res/histo_ISA_ISA.png')
# scatter_rt(df, display=False, save=True, path='fig/custom model res/RT_pred_ISA_ISA.png', color=True)
# histo_length_by_error(df, bins=10, display=False, save=True, path='fig/custom model res/histo_length_ISA_ISA.png')
#
# df = pd.read_csv('output/out_prosit_common.csv')
# add_length(df)
# df['abs_error'] = np.abs(df['rt pred']-df['true rt'])
# histo_abs_error(df, display=False, save=True, path='fig/custom model res/histo_prosit_prosit.png')
# scatter_rt(df, display=False, save=True, path='fig/custom model res/RT_pred_prosit_prosit.png', color=True)
# histo_length_by_error(df, bins=10, display=False, save=True, path='fig/custom model res/histo_length_prosit_prosit.png')
df = pd.read_csv('output/out_common_transfereval.csv')
add_length(df)
df['abs_error'] = np.abs(df['rt pred']-df['true rt'])
histo_abs_error(df, display=False, save=True, path='fig/custom model res/histo_ISA_ISA.png')
scatter_rt(df, display=False, save=True, path='fig/custom model res/RT_pred_ISA_ISA.png', color=True)
histo_length_by_error(df, bins=10, display=False, save=True, path='fig/custom model res/histo_length_ISA_ISA.png')
df = pd.read_csv('output/out_prosit_common.csv')
add_length(df)
df['abs_error'] = np.abs(df['rt pred']-df['true rt'])
histo_abs_error(df, display=False, save=True, path='fig/custom model res/histo_prosit_prosit.png')
scatter_rt(df, display=False, save=True, path='fig/custom model res/RT_pred_prosit_prosit.png', color=True)
histo_length_by_error(df, bins=10, display=False, save=True, path='fig/custom model res/histo_length_prosit_prosit.png')
df = pd.read_csv('output/out_common_transfer.csv')
add_length(df)
df['abs_error'] = np.abs(df['rt pred']-df['true rt'])
histo_abs_error(df, display=False, save=True, path='fig/custom model res/histo_prosit_ISA.png')
scatter_rt(df, display=False, save=True, path='fig/custom model res/RT_pred_prosit_ISA.png', color=True)
histo_length_by_error(df, bins=10, display=False, save=True, path='fig/custom model res/histo_length_prosit_ISA.png')
\ No newline at end of file
histo_abs_error(df, display=False, save=True, path='fig/custom model res/histo_prosit_ISA_eval.png')
scatter_rt(df, display=False, save=True, path='fig/custom model res/RT_pred_prosit_ISA_eval.png', color=True)
histo_length_by_error(df, bins=10, display=False, save=True, path='fig/custom model res/histo_length_prosit_ISA_eval.png')
\ No newline at end of file
......@@ -278,8 +278,9 @@ def save_pred(model, data_val, forward, output_path):
for param in model.parameters():
param.requires_grad = False
if forward == 'both':
pred_rt, pred_int, seqs, charges, true_rt, true_int = [], [], [], [], [], []
for seq, charge, rt, intensity in data_val:
pred_rt, pred_int, seqs, charges, true_rt, true_int, file_list = [], [], [], [], [], [], []
data_val.data.set_file_mode(True)
for seq, charge, rt, intensity, file in data_val:
rt, intensity = rt.float(), intensity.float()
if torch.cuda.is_available():
seq, charge, rt, intensity = seq.cuda(), charge.cuda(), rt.cuda(), intensity.cuda()
......@@ -290,12 +291,14 @@ def save_pred(model, data_val, forward, output_path):
charges.extend(charge.data.cpu().tolist())
true_rt.extend(rt.data.cpu().tolist())
true_int.extend(intensity.data.cpu().tolist())
file_list.extend([file])
data_frame['rt pred'] = pred_rt
data_frame['seq'] = seqs
data_frame['pred int'] = pred_int
data_frame['true rt'] = true_rt
data_frame['true int'] = true_int
data_frame['charge'] = charges
data_frame['file'] = file_list
......
......@@ -83,11 +83,17 @@ def mscatter(x,y, ax=None, m=None, **kw):
# 17/01 23/01 24/01
if __name__ == '__main__':
data_1 = pd.read_pickle('database/data_DIA_16_01_aligned.pkl')
data_1['file']= 1
data_2 = pd.read_pickle('database/data_DIA_17_01_aligned.pkl')
data_2['file'] = 2
data_3 = pd.read_pickle('database/data_DIA_20_01_aligned.pkl')
data_3['file'] = 3
data_4 = pd.read_pickle('database/data_DIA_23_01_aligned.pkl')
data_4['file'] = 4
data_5 = pd.read_pickle('database/data_DIA_24_01_aligned.pkl')
data_5['file'] = 5
data_6 = pd.read_pickle('database/data_DIA_30_01_aligned.pkl')
data_6['file'] = 6
data = pd.concat([data_1, data_2, data_3, data_4, data_5, data_6], ignore_index=True)
num_total = len(data)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment