Skip to content
Snippets Groups Projects
Commit 945682f5 authored by Schneider Leo's avatar Schneider Leo
Browse files

df oktoberfest

parent e34578b4
No related branches found
No related tags found
No related merge requests found
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
# from loess.loess_1d import loess_1d
from loess.loess_1d import loess_1d
import time
ALPHABET_UNMOD = {
......
......@@ -71,10 +71,31 @@ if __name__ == '__main__':
# df.to_csv('spectral_lib/df_predicted_library_oktoberfest.csv',index=False)
#
#
# #write new .msp with new RT
#
#
df= pd.read_csv('spectral_lib/df_predicted_library_oktoberfest.csv')
#write new .msp with new RT
seq=[]
file = open("spectral_lib/predicted_library.msp", "r")
content=file.readlines()
file.close()
remove = False
predicted_lib=pd.read_csv('../output/out_lib_oktoberfest.csv')
pred_rt=predicted_lib['rt pred']
for i in range(len(content)) :
if remove:
if 'Name:' in content[i]:
remove = False
else :
pass
if 'Name:'in content[i]:
s=content[i].split(': ')[1].split('/')[0]
if 'C' in s or len(s)>30:
remove=True
else :
seq.append(s)
df = pd.DataFrame(seq,columns=['sequence'])
predicted_lib=pd.read_csv('../output/out_lib_oktoberfest.csv')
......
......@@ -5,6 +5,8 @@ import pyarrow.parquet as pq
import pyarrow as pa
import torch
import matplotlib.pyplot as plt
from loess.loess_1d import loess_1d
from model.model import ModelTransformer
from config import load_args
from data.dataset import load_data
......@@ -93,34 +95,81 @@ def predict(data_pred, model, output_path):
if __name__ =='__main__':
# df = load_lib('data/spectral_lib/first_lib.parquet')
#
df = load_lib('spectral_lib/first_lib.parquet')
# plt.hist(df['RT'])
# plt.savefig('test.png')
#
# df_2 = pd.read_csv('data/data_prosit/data.csv')
# df_2 = pd.read_csv('data_prosit/data.csv')
#
# plt.clf()
# plt.hist(df_2['irt'])
# plt.savefig('test2.png')
#
# df_2 = extract_sequence(df).reset_index(drop=True)
#
# pred = pd.read_csv('../output/out_uniprot_base.csv')
#
# pred['seq']=pred['seq'].map(numerical_to_alphabetical_str)
#
# pred['Modified.Sequence']=pred['seq']
#
# result = pd.merge(df,pred[['Modified.Sequence','rt pred']],on='Modified.Sequence',how='left')
#
# result['RT']=result['rt pred']
#
#
# #alignement
#
# ref = pd.read_csv('data_prosit/data_noc.csv')
# df_ISA = pd.read_csv('data_ISA/data_aligned_isa_noc.csv')
#
# dataset, reference, column_dataset, column_ref, seq_data, seq_ref = df_ISA, ref, 'irt_scaled', 'irt', 'sequence','sequence',
#
# dataset_ref=dataset[dataset['state']=='train']
# dataset_unique = dataset_ref[[seq_data,column_dataset]].groupby(seq_data).mean()
# print('unique',len(dataset_unique))
# reference_unique = reference[[seq_ref,column_ref]].groupby(seq_ref).mean()
# seq_ref = reference_unique.index
# seq_common = dataset_unique.index
# seq_ref = seq_ref.tolist()
# seq_common = seq_common.tolist()
#
# seq_ref = [tuple(l) for l in seq_ref]
# seq_common = [tuple(l) for l in seq_common]
#
# ind_dict_ref = dict((k, i) for i, k in enumerate(seq_ref))
# inter = set(ind_dict_ref).intersection(seq_common)
# print(len(inter))
#
# ind_dict_ref = [ind_dict_ref[x] for x in inter]
#
# indices_common = dict((k, i) for i, k in enumerate(seq_common))
# indices_common = [indices_common[x] for x in inter]
#
#
# rt_ref = reference_unique[column_ref][ind_dict_ref].reset_index()
# rt_data = dataset_unique[column_dataset][indices_common].reset_index()
#
# plt.scatter(rt_data[column_dataset].tolist(),rt_ref[column_ref].tolist(),s=0.1)
# plt.savefig('test.png')
#
# #présence de NAN qui casse le réalignement (solution temporaire : remplacer par 0.
# result['rt pred']=result['rt pred'].fillna(value=0)
# xout, yout, wout = loess_1d(np.array(rt_data[column_dataset].tolist()), np.array(rt_ref[column_ref].tolist()),
# xnew=result['rt pred'],
# degree=1,
# npoints=None, rotate=False, sigy=None)
#
#
# #writing results
#
# result['RT'] = yout
#
# result = result.drop('rt pred', axis=1)
#
# table = pa.Table.from_pandas(result)
#
# pq.write_table(table, 'spectral_lib/custom_first_lib.parquet')
# pq.write_table(table, 'spectral_lib/custom_first_lib_prosit_aligned.parquet')
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment