Skip to content
Snippets Groups Projects
Commit 56dc4e78 authored by Schneider Leo's avatar Schneider Leo
Browse files

empty line fix

parent 7f5d0e74
No related branches found
No related tags found
No related merge requests found
......@@ -12,8 +12,11 @@ def fasta_like_to_data(path):
def strip_lines(s):
s = s.strip('\n')
s = s.split(' ')[1]
return s
try :
s = s.split(' ')[1]
return s
except:
return 'unidentifid seq error'
def main(input_data_path):
print('Reading file')
......@@ -24,6 +27,8 @@ def main(input_data_path):
data = pd.DataFrame(content,columns=['Sequences'])
data = data[~data['Sequences'].str.contains(">")]
data['Sequences']=data['Sequences'].map(strip_lines)
data = data[~data['Sequences'].str.contains('unidentifid seq error')]
data['Classes']=[0]*data.shape[0]
data['Proteins']=[0]*data.shape[0]
......@@ -95,4 +100,4 @@ def main(input_data_path):
new_file.close()
if __name__ == '__main__':
main('241211_FASTA_RP_GroEL_GroES_Tuf_assemble_peptides_list.txt')
\ No newline at end of file
main('250107_FASTA_RP_GroEL_GroES_Tuf_5pct_assemble_peptides_list.txt')
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment