Skip to content
Snippets Groups Projects
Commit 3a0634eb authored by Schneider Leo's avatar Schneider Leo
Browse files

init

parents
No related branches found
No related tags found
No related merge requests found
Sequences,Classes,Proteins
IVDDLSALTVLEASELSK,0,0
\ No newline at end of file
main.py 0 → 100644
import numpy as np
import pandas as pd
from dlomix.models import DetectabilityModel
from dlomix.constants import CLASSES_LABELS, alphabet, aa_to_int_dict
from dlomix.data import DetectabilityDataset
def fasta_like_to_data(path):
file = open(path, "r")
content = file.readlines()
file.close()
return content
def strip_lines(s):
s = s.strip('\n')
s = s.split(' ')[1]
return s
def main(input_data_path):
print('Reading file')
file = open(input_data_path, "r")
content = file.readlines()
file.close()
print('Converting file')
data = pd.DataFrame(content,columns=['Sequences'])
data = data[~data['Sequences'].str.contains(">")]
data['Sequences']=data['Sequences'].map(strip_lines)
data['Classes']=[0]*data.shape[0]
data['Proteins']=[0]*data.shape[0]
data.to_csv('temp.csv',index=False)
print('Initialising model')
## Model init
total_num_classes = len(CLASSES_LABELS)
input_dimension = len(alphabet)
num_cells = 64
model = DetectabilityModel(num_units = num_cells, num_clases = total_num_classes)
## Loading model weights
model.built = True
model_save_path = 'pretrained_model/original_detectability_fine_tuned_model_FINAL'
model.load_weights(model_save_path)
max_pep_length = 40
## Has no impact for prediction
batch_size = 128
print('Initialising dataset')
## Data init
detectability_data = DetectabilityDataset(data_source= 'dummy.csv',
val_data_source= 'temp.csv',
data_format='csv',
max_seq_len=max_pep_length,
label_column="Classes",
sequence_column="Sequences",
dataset_columns_to_keep=['Proteins'],
batch_size=batch_size,
with_termini=False,
alphabet=aa_to_int_dict)
val_data = detectability_data.tensor_val_data
seq = detectability_data["val"]["_parsed_sequence"]
seq = list(map(lambda x: "".join(x),seq))
print('Applying model')
## Applying model
predictions = model.predict(val_data)
print('Formatting output')
## Formatting output
label_multi = np.argmax(predictions, axis=1)
label_multi = list(map(lambda x : CLASSES_LABELS[x],label_multi))
result = pd.DataFrame({'Sequences':seq,'Probability no flyer':predictions[:,0],'Probability low flyer':predictions[:,1],
'Probability medium flyer':predictions[:,2],'Probability high flyer':predictions[:,3],
'Predicted class':label_multi})
print('Saving output')
result.to_csv(input_data_path.strip('.txt')+'_detectability.csv',index=False) #Output du model dans le fichier 'prediction_detectability.csv'
new_file = open(input_data_path.strip('.txt')+'_filtered.txt','w')
print('Writting final file')
i,j=0,1
for l in content :
if '>' in l :
j=1
new_file.write(l)
elif result['Predicted class'][i]!='Non-Flyer':
l_s = l.split(' ')
new_file.write(str(j)+' '+l_s[1])
i+=1
j+=1
else :
i+=1
new_file.close()
if __name__ == '__main__':
main('241211_FASTA_RP_GroEL_GroES_Tuf_assemble_peptides_list.txt')
\ No newline at end of file
model_checkpoint_path: "fine_tuned_weights_attention_model_FINAL_NON"
all_model_checkpoint_paths: "fine_tuned_weights_attention_model_FINAL_NON"
File added
File added
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment