Skip to content
Snippets Groups Projects
Commit 6c2e15b8 authored by Schneider Leo's avatar Schneider Leo
Browse files

fix typo

parent c46089fb
No related branches found
No related tags found
No related merge requests found
......@@ -5,13 +5,13 @@ import pandas as pd
from constant import ALPHABET_UNMOD
def length_distribution(data, plot=False, save=False, f_name='fig/data_exploration/length_distribution.png'):
max = 31
dist = np.zeros(max)
maximum = 31
dist = np.zeros(maximum)
for seq in data:
dist[len(list(seq)) - seq.count('-') * 2] += 1
if plot or save:
plt.stairs(dist, range(max + 1), fill=True)
plt.stairs(dist, range(maximum + 1), fill=True)
if plot:
plt.show()
if save:
......@@ -64,9 +64,9 @@ def main():
#data prosit
df = pd.read_csv('data_prosit/data.csv')
_ = length_distribution(df['sequence'],False ,True, '../fig/data_exploration/length_distribution_prosit.png')
_ = aa_distribution(df['sequence'], False, True, '../fig/data_exploration/aa_distribution_prosit.png')
_ = aa_distribution(df['mod_sequence'], False, True, '../fig/data_exploration/aa_distribution_prosit.png')
retention_time_distribution(df['irt_scaled'], False, True, '../fig/data_exploration/retention_time_distribution_prosit.png')
df_unique = df[['sequence','irt_scaled']].groupby('sequence').mean()
df_unique = df[['mod_sequence','irt_scaled']].groupby('mod_sequence').mean()
_ = length_distribution(df_unique.index, False, True, '../fig/data_exploration/length_distribution_prosit_unique.png')
_ = aa_distribution(df_unique.index, False, True, '../fig/data_exploration/aa_distribution_prosit_unique.png')
retention_time_distribution(df_unique['irt_scaled'], False, True,
......@@ -75,32 +75,32 @@ def main():
#prosit no cysteine
df = pd.read_csv('data_prosit/data_noc.csv')
_ = length_distribution(df['sequence'],False ,True, '../fig/data_exploration/length_distribution_prosit_noc.png')
_ = aa_distribution(df['sequence'], False, True, '../fig/data_exploration/aa_distribution_prosit_noc.png')
_ = aa_distribution(df['mod_sequence'], False, True, '../fig/data_exploration/aa_distribution_prosit_noc.png')
retention_time_distribution(df['irt_scaled'], False, True, '../fig/data_exploration/retention_time_distribution_prosit_noc.png')
df_unique = df[['sequence','irt_scaled']].groupby('sequence').mean()
df_unique = df[['mod_sequence','irt_scaled']].groupby('mod_sequence').mean()
_ = length_distribution(df_unique.index,False ,True, '../fig/data_exploration/length_distribution_prosit_noc_unique.png')
_ = aa_distribution(df_unique.index, False, True, '../fig/data_exploration/aa_distribution_prosit_noc_unique.png')
retention_time_distribution(df_unique['irt_scaled'], False, True, '../fig/data_exploration/retention_time_distribution_prosit_noc_unique.png')
#isa
df = pd.read_csv('data_ISA/data_aligned_isa.csv')
_ = length_distribution(df['sequence'],False ,True, '../fig/data_exploration/length_distribution_isa.png')
_ = aa_distribution(df['sequence'], False, True, '../fig/data_exploration/aa_distribution_isa.png')
retention_time_distribution(df['irt_scaled'], False, True, '../fig/data_exploration/retention_time_distribution_isa.png')
df_unique = df[['sequence', 'irt_scaled']].groupby('sequence').mean()
_ = length_distribution(df_unique.index,False ,True, '../fig/data_exploration/length_distribution_isa_unique.png')
_ = aa_distribution(df_unique.index, False, True, '../fig/data_exploration/aa_distribution_isa_unique.png')
retention_time_distribution(df_unique['irt_scaled'], False, True, '../fig/data_exploration/retention_time_distribution_isa_unique.png')
#isa no cystéine
df = pd.read_csv('data_ISA/data_aligned_isa_noc.csv')
_ = length_distribution(df['sequence'],False ,True, '../fig/data_exploration/length_distribution_isa_noc.png')
_ = aa_distribution(df['sequence'], False, True, '../fig/data_exploration/aa_distribution_isa_noc.png')
retention_time_distribution(df['irt_scaled'], False, True, '../fig/data_exploration/retention_time_distribution_isa_noc.png')
df_unique = df[['sequence', 'irt_scaled']].groupby('sequence').mean()
_ = length_distribution(df_unique.index,False ,True, '../fig/data_exploration/length_distribution_isa_noc_unique.png')
_ = aa_distribution(df_unique.index, False, True, '../fig/data_exploration/aa_distribution_isa_noc_unique.png')
retention_time_distribution(df_unique['irt_scaled'], False, True, '../fig/data_exploration/retention_time_distribution_isa_noc_unique.png')
# df = pd.read_csv('data_ISA/data_aligned_isa.csv')
# _ = length_distribution(df['sequence'],False ,True, '../fig/data_exploration/length_distribution_isa.png')
# _ = aa_distribution(df['sequence'], False, True, '../fig/data_exploration/aa_distribution_isa.png')
# retention_time_distribution(df['irt_scaled'], False, True, '../fig/data_exploration/retention_time_distribution_isa.png')
# df_unique = df[['sequence', 'irt_scaled']].groupby('sequence').mean()
# _ = length_distribution(df_unique.index,False ,True, '../fig/data_exploration/length_distribution_isa_unique.png')
# _ = aa_distribution(df_unique.index, False, True, '../fig/data_exploration/aa_distribution_isa_unique.png')
# retention_time_distribution(df_unique['irt_scaled'], False, True, '../fig/data_exploration/retention_time_distribution_isa_unique.png')
#
# #isa no cystéine
# df = pd.read_csv('data_ISA/data_aligned_isa_noc.csv')
# _ = length_distribution(df['sequence'],False ,True, '../fig/data_exploration/length_distribution_isa_noc.png')
# _ = aa_distribution(df['sequence'], False, True, '../fig/data_exploration/aa_distribution_isa_noc.png')
# retention_time_distribution(df['irt_scaled'], False, True, '../fig/data_exploration/retention_time_distribution_isa_noc.png')
# df_unique = df[['sequence', 'irt_scaled']].groupby('sequence').mean()
# _ = length_distribution(df_unique.index,False ,True, '../fig/data_exploration/length_distribution_isa_noc_unique.png')
# _ = aa_distribution(df_unique.index, False, True, '../fig/data_exploration/aa_distribution_isa_noc_unique.png')
# retention_time_distribution(df_unique['irt_scaled'], False, True, '../fig/data_exploration/retention_time_distribution_isa_noc_unique.png')
if __name__ == '__main__':
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment