Skip to content
Snippets Groups Projects
Commit 6c2e15b8 authored by Schneider Leo's avatar Schneider Leo
Browse files

fix typo

parent c46089fb
No related branches found
No related tags found
No related merge requests found
...@@ -5,13 +5,13 @@ import pandas as pd ...@@ -5,13 +5,13 @@ import pandas as pd
from constant import ALPHABET_UNMOD from constant import ALPHABET_UNMOD
def length_distribution(data, plot=False, save=False, f_name='fig/data_exploration/length_distribution.png'): def length_distribution(data, plot=False, save=False, f_name='fig/data_exploration/length_distribution.png'):
max = 31 maximum = 31
dist = np.zeros(max) dist = np.zeros(maximum)
for seq in data: for seq in data:
dist[len(list(seq)) - seq.count('-') * 2] += 1 dist[len(list(seq)) - seq.count('-') * 2] += 1
if plot or save: if plot or save:
plt.stairs(dist, range(max + 1), fill=True) plt.stairs(dist, range(maximum + 1), fill=True)
if plot: if plot:
plt.show() plt.show()
if save: if save:
...@@ -64,9 +64,9 @@ def main(): ...@@ -64,9 +64,9 @@ def main():
#data prosit #data prosit
df = pd.read_csv('data_prosit/data.csv') df = pd.read_csv('data_prosit/data.csv')
_ = length_distribution(df['sequence'],False ,True, '../fig/data_exploration/length_distribution_prosit.png') _ = length_distribution(df['sequence'],False ,True, '../fig/data_exploration/length_distribution_prosit.png')
_ = aa_distribution(df['sequence'], False, True, '../fig/data_exploration/aa_distribution_prosit.png') _ = aa_distribution(df['mod_sequence'], False, True, '../fig/data_exploration/aa_distribution_prosit.png')
retention_time_distribution(df['irt_scaled'], False, True, '../fig/data_exploration/retention_time_distribution_prosit.png') retention_time_distribution(df['irt_scaled'], False, True, '../fig/data_exploration/retention_time_distribution_prosit.png')
df_unique = df[['sequence','irt_scaled']].groupby('sequence').mean() df_unique = df[['mod_sequence','irt_scaled']].groupby('mod_sequence').mean()
_ = length_distribution(df_unique.index, False, True, '../fig/data_exploration/length_distribution_prosit_unique.png') _ = length_distribution(df_unique.index, False, True, '../fig/data_exploration/length_distribution_prosit_unique.png')
_ = aa_distribution(df_unique.index, False, True, '../fig/data_exploration/aa_distribution_prosit_unique.png') _ = aa_distribution(df_unique.index, False, True, '../fig/data_exploration/aa_distribution_prosit_unique.png')
retention_time_distribution(df_unique['irt_scaled'], False, True, retention_time_distribution(df_unique['irt_scaled'], False, True,
...@@ -75,32 +75,32 @@ def main(): ...@@ -75,32 +75,32 @@ def main():
#prosit no cysteine #prosit no cysteine
df = pd.read_csv('data_prosit/data_noc.csv') df = pd.read_csv('data_prosit/data_noc.csv')
_ = length_distribution(df['sequence'],False ,True, '../fig/data_exploration/length_distribution_prosit_noc.png') _ = length_distribution(df['sequence'],False ,True, '../fig/data_exploration/length_distribution_prosit_noc.png')
_ = aa_distribution(df['sequence'], False, True, '../fig/data_exploration/aa_distribution_prosit_noc.png') _ = aa_distribution(df['mod_sequence'], False, True, '../fig/data_exploration/aa_distribution_prosit_noc.png')
retention_time_distribution(df['irt_scaled'], False, True, '../fig/data_exploration/retention_time_distribution_prosit_noc.png') retention_time_distribution(df['irt_scaled'], False, True, '../fig/data_exploration/retention_time_distribution_prosit_noc.png')
df_unique = df[['sequence','irt_scaled']].groupby('sequence').mean() df_unique = df[['mod_sequence','irt_scaled']].groupby('mod_sequence').mean()
_ = length_distribution(df_unique.index,False ,True, '../fig/data_exploration/length_distribution_prosit_noc_unique.png') _ = length_distribution(df_unique.index,False ,True, '../fig/data_exploration/length_distribution_prosit_noc_unique.png')
_ = aa_distribution(df_unique.index, False, True, '../fig/data_exploration/aa_distribution_prosit_noc_unique.png') _ = aa_distribution(df_unique.index, False, True, '../fig/data_exploration/aa_distribution_prosit_noc_unique.png')
retention_time_distribution(df_unique['irt_scaled'], False, True, '../fig/data_exploration/retention_time_distribution_prosit_noc_unique.png') retention_time_distribution(df_unique['irt_scaled'], False, True, '../fig/data_exploration/retention_time_distribution_prosit_noc_unique.png')
#isa #isa
df = pd.read_csv('data_ISA/data_aligned_isa.csv') # df = pd.read_csv('data_ISA/data_aligned_isa.csv')
_ = length_distribution(df['sequence'],False ,True, '../fig/data_exploration/length_distribution_isa.png') # _ = length_distribution(df['sequence'],False ,True, '../fig/data_exploration/length_distribution_isa.png')
_ = aa_distribution(df['sequence'], False, True, '../fig/data_exploration/aa_distribution_isa.png') # _ = aa_distribution(df['sequence'], False, True, '../fig/data_exploration/aa_distribution_isa.png')
retention_time_distribution(df['irt_scaled'], False, True, '../fig/data_exploration/retention_time_distribution_isa.png') # retention_time_distribution(df['irt_scaled'], False, True, '../fig/data_exploration/retention_time_distribution_isa.png')
df_unique = df[['sequence', 'irt_scaled']].groupby('sequence').mean() # df_unique = df[['sequence', 'irt_scaled']].groupby('sequence').mean()
_ = length_distribution(df_unique.index,False ,True, '../fig/data_exploration/length_distribution_isa_unique.png') # _ = length_distribution(df_unique.index,False ,True, '../fig/data_exploration/length_distribution_isa_unique.png')
_ = aa_distribution(df_unique.index, False, True, '../fig/data_exploration/aa_distribution_isa_unique.png') # _ = aa_distribution(df_unique.index, False, True, '../fig/data_exploration/aa_distribution_isa_unique.png')
retention_time_distribution(df_unique['irt_scaled'], False, True, '../fig/data_exploration/retention_time_distribution_isa_unique.png') # retention_time_distribution(df_unique['irt_scaled'], False, True, '../fig/data_exploration/retention_time_distribution_isa_unique.png')
#
#isa no cystéine # #isa no cystéine
df = pd.read_csv('data_ISA/data_aligned_isa_noc.csv') # df = pd.read_csv('data_ISA/data_aligned_isa_noc.csv')
_ = length_distribution(df['sequence'],False ,True, '../fig/data_exploration/length_distribution_isa_noc.png') # _ = length_distribution(df['sequence'],False ,True, '../fig/data_exploration/length_distribution_isa_noc.png')
_ = aa_distribution(df['sequence'], False, True, '../fig/data_exploration/aa_distribution_isa_noc.png') # _ = aa_distribution(df['sequence'], False, True, '../fig/data_exploration/aa_distribution_isa_noc.png')
retention_time_distribution(df['irt_scaled'], False, True, '../fig/data_exploration/retention_time_distribution_isa_noc.png') # retention_time_distribution(df['irt_scaled'], False, True, '../fig/data_exploration/retention_time_distribution_isa_noc.png')
df_unique = df[['sequence', 'irt_scaled']].groupby('sequence').mean() # df_unique = df[['sequence', 'irt_scaled']].groupby('sequence').mean()
_ = length_distribution(df_unique.index,False ,True, '../fig/data_exploration/length_distribution_isa_noc_unique.png') # _ = length_distribution(df_unique.index,False ,True, '../fig/data_exploration/length_distribution_isa_noc_unique.png')
_ = aa_distribution(df_unique.index, False, True, '../fig/data_exploration/aa_distribution_isa_noc_unique.png') # _ = aa_distribution(df_unique.index, False, True, '../fig/data_exploration/aa_distribution_isa_noc_unique.png')
retention_time_distribution(df_unique['irt_scaled'], False, True, '../fig/data_exploration/retention_time_distribution_isa_noc_unique.png') # retention_time_distribution(df_unique['irt_scaled'], False, True, '../fig/data_exploration/retention_time_distribution_isa_noc_unique.png')
if __name__ == '__main__': if __name__ == '__main__':
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment