From 56dc4e7813d1c7c39f7cfe8df9e502df201a8d00 Mon Sep 17 00:00:00 2001
From: Schneider Leo <leo.schneider@etu.ec-lyon.fr>
Date: Wed, 8 Jan 2025 13:55:38 +0100
Subject: [PATCH] empty line fix

---
 main.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/main.py b/main.py
index 21f2abd..54f2c28 100644
--- a/main.py
+++ b/main.py
@@ -12,8 +12,11 @@ def fasta_like_to_data(path):
 
 def strip_lines(s):
     s = s.strip('\n')
-    s = s.split(' ')[1]
-    return s
+    try :
+        s = s.split(' ')[1]
+        return s
+    except:
+        return 'unidentifid seq error'
 
 def main(input_data_path):
     print('Reading file')
@@ -24,6 +27,8 @@ def main(input_data_path):
     data = pd.DataFrame(content,columns=['Sequences'])
     data = data[~data['Sequences'].str.contains(">")]
     data['Sequences']=data['Sequences'].map(strip_lines)
+    data = data[~data['Sequences'].str.contains('unidentifid seq error')]
+
 
     data['Classes']=[0]*data.shape[0]
     data['Proteins']=[0]*data.shape[0]
@@ -95,4 +100,4 @@ def main(input_data_path):
     new_file.close()
 
 if __name__ == '__main__':
-    main('241211_FASTA_RP_GroEL_GroES_Tuf_assemble_peptides_list.txt')
\ No newline at end of file
+    main('250107_FASTA_RP_GroEL_GroES_Tuf_5pct_assemble_peptides_list.txt')
\ No newline at end of file
-- 
GitLab