From 56dc4e7813d1c7c39f7cfe8df9e502df201a8d00 Mon Sep 17 00:00:00 2001 From: Schneider Leo <leo.schneider@etu.ec-lyon.fr> Date: Wed, 8 Jan 2025 13:55:38 +0100 Subject: [PATCH] empty line fix --- main.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/main.py b/main.py index 21f2abd..54f2c28 100644 --- a/main.py +++ b/main.py @@ -12,8 +12,11 @@ def fasta_like_to_data(path): def strip_lines(s): s = s.strip('\n') - s = s.split(' ')[1] - return s + try : + s = s.split(' ')[1] + return s + except: + return 'unidentifid seq error' def main(input_data_path): print('Reading file') @@ -24,6 +27,8 @@ def main(input_data_path): data = pd.DataFrame(content,columns=['Sequences']) data = data[~data['Sequences'].str.contains(">")] data['Sequences']=data['Sequences'].map(strip_lines) + data = data[~data['Sequences'].str.contains('unidentifid seq error')] + data['Classes']=[0]*data.shape[0] data['Proteins']=[0]*data.shape[0] @@ -95,4 +100,4 @@ def main(input_data_path): new_file.close() if __name__ == '__main__': - main('241211_FASTA_RP_GroEL_GroES_Tuf_assemble_peptides_list.txt') \ No newline at end of file + main('250107_FASTA_RP_GroEL_GroES_Tuf_5pct_assemble_peptides_list.txt') \ No newline at end of file -- GitLab