diff --git a/data/msp_file_extraction.py b/data/msp_file_extraction.py
index c27be7ae13b66ef4279a724c13a526656ba1a1de..3dca185dd8673f9de1930992b2a0da9ec2e327bc 100644
--- a/data/msp_file_extraction.py
+++ b/data/msp_file_extraction.py
@@ -66,6 +66,7 @@ if __name__ == '__main__':
                 seq.append(s)
 
     df = pd.DataFrame(seq,columns=['sequence'])
+    df = df.drop_duplicates()
     df['irt_scaled']=0
     df['state'] = 'holdout'
     df.to_csv('spectral_lib/df_predicted_library_oktoberfest.csv',index=False)
@@ -77,6 +78,7 @@ if __name__ == '__main__':
     predicted_lib['seq'] = predicted_lib['seq'].map(numerical_to_alphabetical_str)
 
     predicted_lib['sequence']=predicted_lib['seq']
+
     pred_rt=predicted_lib['rt pred']
 
     df_joined = pd.merge(df,predicted_lib[['rt pred','sequence']],on='sequence',how='left')