diff --git a/src/main.py b/src/main.py index 2834d5b79b785e2628f61ffc08413dcd1a31ad23..1e570910c19738077ade363d935c16e1503a668f 100644 --- a/src/main.py +++ b/src/main.py @@ -75,21 +75,29 @@ class Main(Nextflow_Building_Blocks): def get_all_executors_in_workflow(self): all_executors = self.get_all_executors_in_subworkflow() dico = {} - for e in all_executors: - if(e.get_type()=="Call"): - for c in e.get_all_calls(): - sub = c.get_first_element_called() - if(sub.get_type()=="Subworkflow"): - if(c not in dico): - sub_calls = sub.get_all_executors_in_workflow() - for sub_c in sub_calls: - dico[sub_c] = "" - #Case it's an operation - else: - dico[e] = "" for e in all_executors: dico[e] = "" + calls = self.get_all_calls_in_workflow() + for call in calls: + + sub = call.get_first_element_called() + if(sub.get_type()=="Subworkflow"): + sub_calls = sub.get_all_executors_in_workflow() + for sub_c in sub_calls: + dico[sub_c] = "" + + + #for e in all_executors: + # if(e.get_type()=="Call"): + # for c in e.get_all_calls(): + # sub = c.get_first_element_called() + # if(sub.get_type()=="Subworkflow"): + # if(c not in dico): + # sub_calls = sub.get_all_executors_in_workflow() + # for sub_c in sub_calls: + # dico[sub_c] = "" + return list(dico.keys()) diff --git a/src/nextflow_file.py b/src/nextflow_file.py index d9ad8fa78ed5ac74575f3df70d560fef8c3e5b70..c19e022940d9b257eb214ad4a192bf0506947d48 100644 --- a/src/nextflow_file.py +++ b/src/nextflow_file.py @@ -277,10 +277,10 @@ class Nextflow_File(Nextflow_Building_Blocks): #If the file is not alreday initialised then we self.initialise it if(not self.initialised): self.initialised = True + if(self.workflow.get_display_info_bool()): + print(f"Analysing -> '{self.get_file_address()}'") if(self.get_DSL()=="DSL2"): - if(self.workflow.get_display_info_bool()): - print(f"Analysing -> '{self.get_file_address()}'") #Extarct Processes self.extract_processes() @@ -320,8 +320,6 @@ class Nextflow_File(Nextflow_Building_Blocks): # sub.initialise() # indice+=1 elif(self.get_DSL()=="DSL1"): - if(self.workflow.get_display_info_bool()): - print(f"Analysing -> '{self.get_file_address()}'") from .main import Main #Extarct Processes self.extract_processes() diff --git a/src/workflow.py b/src/workflow.py index c4d8dc05d131b465420981df0bfa09dda31f2615..cb1aff1b4297018f0604d192f9f478a28f8f509b 100644 --- a/src/workflow.py +++ b/src/workflow.py @@ -336,10 +336,10 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen processes_called.append(p) nb_2_select = int(alpha*len(processes_called)) sampled = random.sample(set(processes_called), nb_2_select) - #name_select = [] - #for p in sampled: - # name_select.append(p.get_alias()) - return sampled + name_select = [] + for p in sampled: + name_select.append(p.get_alias()) + return name_select else: raise BioFlowInsightError("Trying to generate random relevant processes however option 'duplicate' is not activated.") @@ -365,18 +365,32 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen code = code.replace(r, ankers) ankers = "" + + processes, subworkflows, functions = [], [], [] + for c in self.get_workflow_main().get_all_calls_in_workflow(): + ele = c.get_first_element_called() + if(ele.get_type()=="Process"): + processes.append(ele) + elif(ele.get_type()=="Subworkflow"): + subworkflows.append(ele) + elif(ele.get_type()=="Function"): + functions.append(ele) + else: + raise Exception("This shoudn't happen") + + #Adding processes into code - for p in self.get_processes_called(): + for p in processes: if(p.get_code() not in code): code = code.replace(process_section, '\n'+p.get_code_with_alias()+'\n'+process_section) #Adding subworkflows into code - for sub in self.get_subworkflows_called(): + for sub in subworkflows: if(sub.get_code() not in code): code = code.replace(subworkflow_section, subworkflow_section+'\n'+sub.get_code_with_alias()+'\n') #Adding functions into code - for fun in self.get_functions_called(): + for fun in functions: if(fun.get_code() not in code): code = code.replace(function_section, function_section+'\n'+fun.get_code()+'\n') @@ -397,30 +411,63 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen #Rewriting everything in one file + simplifying the operations and calls to simplify the analysis def simplify_workflow_code(self): code = self.get_first_file().get_code() - #code, ankers = self.write_workflow_into_one_file() - #TODO -> update method get_all_executors_from_workflow -> right now it's not searching through the subworkflows - for exe in self.get_workflow_main().get_all_executors_in_workflow(): + code, ankers = self.write_workflow_into_one_file() + all_executors = self.get_workflow_main().get_all_executors_in_workflow() + + #We do this so that the longest operation and calls are rewritten first in the code -> to avoid problems + executor_2_length = {} + for e in all_executors: + executor_2_length[e] = len(e.get_code(get_OG = True)) + sorted_executor_2_length = {k: v for k, v in sorted(executor_2_length.items(), key=lambda item: item[1], reverse=True)} + + for exe in sorted_executor_2_length: if(exe.get_type()=="Call" or exe.get_type()=="Operation"): - code = code.replace(exe.get_code(get_OG = True), exe.simplify_code()) + code = code.replace(exe.get_code(get_OG = True), exe.simplify_code(), 1) else: print(exe.get_code(), exe.get_type()) raise Exception("This shouldn't happen") return code + def rewrite_and_initialise(self, code): + #Write new code in temporary file + temp_file = self.get_output_dir()/f"temp_{str(self)[-7:-2]}.nf" + with open(temp_file, "w") as file: + file.write(code) + + #Replace old analysis with new analysis (simplified code) + self.__init__(str(temp_file), display_info = False, duplicate=True) + self.initialise() + + def check_relevant_processes_in_workflow(self, relevant_processes): + #Check all relevat processes are in wf + workflow_processes = [] + for c in self.get_workflow_main().get_all_calls_in_workflow(): + ele = c.get_first_element_called() + if(ele.get_type()=="Process"): + workflow_processes.append(ele.get_alias()) + + for p in relevant_processes: + if(p not in workflow_processes): + raise BioFlowInsightError(f"The element '{p}' given as a relevant processes is not present in the workflow's processes", 24) + + + def generate_user_view(self, relevant_processes = [], render_graphs = True, processes_2_remove = []): + self.graph.initialise(processes_2_remove = processes_2_remove) + self.graph.generate_user_view(relevant_processes = relevant_processes, render_graphs = render_graphs) + + #Method which rewrites the workflow follwong the user view #Conert workflow to user_view only makes sense when the option duplicate is activated -> otherwise is doesn't make sense + it makes the analysis way more complicated def convert_workflow_2_user_view(self, relevant_processes = []): if(self.duplicate): - None code = self.simplify_workflow_code() - print(code) - #self.rewrite_and_initialise(code) - # - ##Get the clusters and the code - #self.check_relevant_processes_in_workflow(relevant_processes) - #self.nextflow_file.generate_user_view(relevant_processes = relevant_processes, processes_2_remove = []) - #clusters = self.nextflow_file.graph.get_clusters_from_user_view() + self.rewrite_and_initialise(code) + + #Get the clusters and the code + self.check_relevant_processes_in_workflow(relevant_processes) + self.generate_user_view(relevant_processes = relevant_processes, processes_2_remove = []) + clusters = self.graph.get_clusters_from_user_view() # ##DETERMING WHICH SUBWORKFLOWS ARE BROKEN WITH THE CLUSTER ##Creating the clusters with calls instead of processes or subworkflows @@ -724,7 +771,7 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen #code = code.replace("$OR$", "||") # #return remove_extra_jumps(format_with_tabs(code)) - # + return code # ##So basically when retriving a thing (process or subworkflow) ##There is necessarily one call associated with the thing -> since we have the option duplicate activated