Skip to content
Snippets Groups Projects
Commit 34a96c5a authored by Schneider Leo's avatar Schneider Leo
Browse files

data viz

parent 0d5fcf37
No related branches found
No related tags found
No related merge requests found
......@@ -128,11 +128,11 @@ def main():
# retention_time_distribution(df_unique['irt_scaled'], False, True, '../fig/data_exploration/retention_time_distribution_isa_noc_mox_unique.png')
#prosit outlier_plasma
df = pd.read_csv('data_PXD006109/data_prosit_outlier.csv')
df = pd.read_csv('data_ISA/data_prosit_outlier.csv')
df['seq']=df['seq'].map(numerical_to_alphabetical_str)
_ = length_distribution(df['seq'],False ,True, '../fig/data_exploration/length_distribution_prosit_outlier.png')
_ = aa_distribution(df['seq'], False, True, '../fig/data_exploration/aa_distribution_prosit_outlier.png')
retention_time_distribution(df['true rt'], False, True, '../fig/data_exploration/retention_time_distribution_prosit_outlier.png')
_ = length_distribution(df['seq'],False ,True, '../fig/data_exploration/length_distribution_ISA_prosit_outlier.png')
_ = aa_distribution(df['seq'], False, True, '../fig/data_exploration/aa_distribution_ISA_prosit_outlier.png')
retention_time_distribution(df['true rt'], False, True, '../fig/data_exploration/retention_time_distribution_ISA_prosit_outlier.png')
if __name__ == '__main__':
......
......@@ -7,7 +7,7 @@ import random
import pandas as pd
from constant import ALPHABET_UNMOD_REV
import matplotlib.colors as mcolors
import peptides as pep
def histo_abs_error(dataframe, display=False, save=False, path=None):
points = dataframe['abs_error']
......@@ -266,9 +266,35 @@ def plot_augmented_dataset_size(ref_path,base_path):
plt.savefig('../fig/data_exploration/augmented_dataset_size.png')
def compute_peptide_properties(df, base_name, col='seq', format='alpha'):
if format!= 'alpha':
df[col] = df[col].map(numerical_to_alphabetical_str)
hydro=[]
isop=[]
molecular_w = []
for p in df[col]:
pept = pep.Peptide(p)
hydro.append(pept.hydrophobicity())
isop.append(pept.isoelectric_point())
molecular_w.append(pept.molecular_weight())
plt.hist(hydro,bins = 50)
plt.title("Hydrophobicity")
plt.savefig('../fig/data_exploration/hydrophobicity_{}.png'.format(base_name))
plt.clf()
plt.hist(hydro,bins = 50)
plt.title("Isoelectric point")
plt.savefig('../fig/data_exploration/isoelectric_point_{}.png'.format(base_name))
plt.clf()
plt.hist(hydro,bins = 50)
plt.title("Molecular weight")
plt.savefig('../fig/data_exploration/molecular_weight_{}.png'.format(base_name))
plt.clf()
if __name__ == '__main__' :
calc_and_plot_res()
# calc_and_plot_res()
# base = ['plasma_plasma','plasma_prosit']
# # augmented = ['ISA_aug_07_ISA_noc','ISA_aug_1_ISA_noc','ISA_aug_all_ISA_noc']
# for f_suffix_name in base:
......@@ -285,4 +311,22 @@ if __name__ == '__main__' :
# error_by_methionine(df)
# dataframe = pd.read_csv('../output/out_early_stop_plasma_prosit_0.csv')
# df = filter_outlier_rt(dataframe)
# df.to_csv('../data/data_PXD006109/data_prosit_outlier.csv', index=False)
\ No newline at end of file
# df.to_csv('../data/data_PXD006109/data_prosit_outlier.csv', index=False)
#
# dataframe = pd.read_csv('../archive_output/ISA/out_ISA_noc_prosit_0.csv')
# df2 = filter_outlier_rt(dataframe)
# df2.to_csv('../data/data_ISA/data_prosit_outlier.csv', index=False)
df = pd.read_csv('../data/data_PXD006109/data_prosit_outlier.csv')
compute_peptide_properties(df, 'plasma_prosit_outlier', 'seq', 'num')
df = pd.read_csv('../data/data_ISA/data_prosit_outlier.csv')
compute_peptide_properties(df,'ISA_prosit_outlier','seq', 'num')
df = pd.read_csv('../data/data_ISA/data_isa.csv')
compute_peptide_properties(df,'ISA','sequence')
df = pd.read_csv('../data/data_prosit/data.csv')
compute_peptide_properties(df,'prosit','sequence')
df = pd.read_csv('../data/data_PXD006109/plasma/data_plasma.csv')
compute_peptide_properties(df,'plasma','sequence')
\ No newline at end of file
......@@ -106,7 +106,7 @@ def main(args):
embedding_dim=args.embedding_dim, acti=args.activation, norm=args.norm_first)
if args.model_weigh is not None :
model.load_state_dict(torch.load(args.model_weigh+'.pt', weights_only=True))
model.load_state_dict(torch.load(args.model_weigh, weights_only=True))
if torch.cuda.is_available():
model = model.cuda()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment