diff --git a/src/call.py b/src/call.py index ad5867232364a76a59d8cbb0b7dbe07505e18e6f..9e22063cc5f8ce5158819ca964da450101d6360e 100644 --- a/src/call.py +++ b/src/call.py @@ -64,14 +64,16 @@ class Call(Executor): def simplify_code(self, new_name = ""): + new_call_name = self.get_first_element_called().get_alias_and_id() code = self.get_code() + code = re.sub(fr'{re.escape(self.get_first_element_called().get_alias())} *\(', f'{new_call_name}(', code) if(new_name!=""): code = f"{new_name} = {code}" tag_to_add = "//AREA TO ADD PARAMS" code = f"{tag_to_add}\n{code}" index = 1 for param in self.parameters: - param_new_name = f"{self.get_first_element_called().get_alias()}_param_{index}" + param_new_name = f"{self.get_first_element_called().get_alias_and_id()}_param_{index}" #Case the param is a call if(param.get_type()=="Call"): @@ -99,7 +101,7 @@ class Call(Executor): None elif(param.get_type()=="Emitted"): code = code.replace(param.get_code(get_OG=True), param_new_name) - new_bit = f"{param_new_name} = {param.get_code(get_OG=True)}" + new_bit = f"{param_new_name} = {param.simplify_code()}" code = code.replace(tag_to_add, f"{tag_to_add}\n{new_bit}") else: raise Exception("This shouldn't happen") diff --git a/src/emitted.py b/src/emitted.py index c3534add31ce5d4e46bc153c6858062bdaf3b934..75ad5ac33df11031eb6a0157b6f2b5fa77f9e3d0 100644 --- a/src/emitted.py +++ b/src/emitted.py @@ -16,6 +16,14 @@ class Emitted(Channel): self.source.append(emitted_by) self.emits = None #->this is the channel it's emits -> in the case of a subworkflow + def simplify_code(self): + code = self.get_code(get_OG=True) + thing_which_emits = self.emitted_by.get_first_element_called() + old_name = thing_which_emits.get_alias() + new_call_name = thing_which_emits.get_alias_and_id() + code = re.sub(fr'{re.escape(old_name)} *\.', f'{new_call_name}.', code) + return code + def get_emitted_by(self): return self.emitted_by diff --git a/src/main.py b/src/main.py index 5c99072bae73252b7bf6abb4151a3cdf730a1ab7..98127ae51ebedcd35e0dfa630736e72427721761 100644 --- a/src/main.py +++ b/src/main.py @@ -33,6 +33,27 @@ class Main(Nextflow_Building_Blocks): dico[e] = pos[e] return dico + #We do thus to defitiate the subworkflwow and main case + def get_code_to_simplify(self): + return self.get_code() + + + + def simplify_code(self): + code = self.get_code_to_simplify() + all_executors = self.get_all_executors_in_subworkflow() + #We do this so that the longest operation and calls are rewritten first in the code -> to avoid problems + executor_2_length = {} + for e in all_executors: + executor_2_length[e] = len(e.get_code(get_OG = True)) + sorted_executor_2_length = {k: v for k, v in sorted(executor_2_length.items(), key=lambda item: item[1], reverse=True)} + + for exe in sorted_executor_2_length: + if(exe.get_type()=="Call" or exe.get_type()=="Operation"): + code = code.replace(exe.get_code(get_OG = True), exe.simplify_code(), 1) + else: + raise Exception("This shouldn't happen") + return code def get_position_in_main(self, executor): code = self.get_code() diff --git a/src/operation.py b/src/operation.py index b14e1789a3c3ba83c12611fd7ec393f6a12792b6..6e1bf5aae71a63678ae4f3268f8bacd77eb2f0fc 100644 --- a/src/operation.py +++ b/src/operation.py @@ -32,6 +32,7 @@ class Operation(Executor): #self.condition = Condition(self) def change_code(self, code): + self.OG_code = self.get_code() self.code = Code(code, origin = self) def set_as_artificial(self): @@ -891,7 +892,7 @@ class Operation(Executor): def add_origin_equals(call, index): simplified_code = call.simplify_code() lines = simplified_code.split('\n') - return f"{simplified_code}\noperation_{operation_id}_{index} = {call.get_first_element_called().get_alias()}.out[0]" + return f"{simplified_code}\noperation_{operation_id}_{index} = {call.get_first_element_called().get_alias_and_id()}.out[0]" #if(len(lines)==1): # return f"operation_{operation_id}_{index} = {simplified_code}" #else: @@ -903,6 +904,8 @@ class Operation(Executor): if(o.get_type()=="Call"): to_add.append(add_origin_equals(o, index)) code = code.replace(o.get_code(get_OG=True), f"operation_{operation_id}_{index}") + elif(o.get_type()=="Emitted"): + code = code.replace(o.get_code(get_OG=True), o.simplify_code()) index += 1 to_add.reverse() diff --git a/src/outils_graph.py b/src/outils_graph.py index d2a2e00947c39daba2dbc8ea45af0c81b40a7b73..17f24dc72d37e2d429a5659c9e664a0f9e221240 100644 --- a/src/outils_graph.py +++ b/src/outils_graph.py @@ -1025,7 +1025,7 @@ def check_if_equal(dicoA, dicoB): if("src.operation.Operation" in node["id"]): val = f"operation_{node['xlabel']}__££__{index}" elif("src.process.Process" in node["id"]): - val = f"process_{node['name']}__££__{index}" + val = f"process_{node['name'].split('_GG_')[0]}__££__{index}" if(val in names_already_given): index+=1 else: diff --git a/src/process.py b/src/process.py index 7d689ddac60b60dffb6d4612b898ee7050a018a8..85c480e89b7dcb97c92e973e9d2bcfd8d3dccbfa 100644 --- a/src/process.py +++ b/src/process.py @@ -75,6 +75,9 @@ class Process(Nextflow_Building_Blocks): def get_alias(self): return self.alias + def get_alias_and_id(self): + return f"{self.alias}_GG_{id(self)}" + def get_printed_name(self): return self.printed_name @@ -341,8 +344,17 @@ class Process(Nextflow_Building_Blocks): def replacer(match): return match.group(0).replace(match.group(1), self.get_alias()) return re.sub(r"process\s*(\w+)\s*\{", replacer, code) + + def get_code_with_alias_and_id(self): + code = self.get_code() + def replacer(match): + return match.group(0).replace(match.group(1), self.get_alias_and_id()) + from .constant import PROCESS_HEADER + return re.sub(PROCESS_HEADER, replacer, code) + def simplify_code(self): + return self.get_code_with_alias_and_id() #Function that extracts the outputs from a process (DSL1) def initialise_outputs_DSL1(self): diff --git a/src/subworkflow.py b/src/subworkflow.py index c132338cdfea8700a674c5b7df783e9d860fe5dd..44ffdcaf9a3db2ab8e139abf22cbabb00537c90c 100644 --- a/src/subworkflow.py +++ b/src/subworkflow.py @@ -86,6 +86,23 @@ class Subworkflow(Main): def replacer(match): return match.group(0).replace(match.group(1), self.get_alias()) return re.sub(r"workflow\s*(\w+)\s*\{", replacer, code) + + def get_code_with_alias_and_id(self): + code = self.get_code() + def replacer(match): + return match.group(0).replace(match.group(1), self.get_alias_and_id()) + from .constant import SUBWORKFLOW_HEADER + return re.sub(SUBWORKFLOW_HEADER, replacer, code) + + def get_code_to_simplify(self): + return self.get_code_with_alias_and_id() + + def simplify_code(self): + code = super().simplify_code() + for o in self.emit: + print(o.get_code(get_OG = True), o.simplify_code(), o.OG_code) + code = code.replace(o.get_code(get_OG = True), o.simplify_code(), 1) + return code @@ -97,6 +114,9 @@ class Subworkflow(Main): def get_alias(self): return self.alias + + def get_alias_and_id(self): + return f"{self.alias}_GG_{id(self)}" def get_printed_name(self): return self.printed_name diff --git a/src/workflow.py b/src/workflow.py index d15e45b50191c81cebb6e1d9af76d47e615e0be8..f2995a70e88c18fcfb34985c5d9a164a17e3c355 100644 --- a/src/workflow.py +++ b/src/workflow.py @@ -421,13 +421,18 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen return tab - #This method rewrites the entire workflow into one single file - def write_workflow_into_one_file(self): + + + + + #TODO -> write tests for this method + #Function that rewrites the workflow code + #Rewriting everything in one file + simplifying the operations and calls to simplify the analysis + def simplify_workflow_code(self): + code = self.get_first_file().get_code() #This tag is used as an identification to safely manipulate the string tag = str(time.time()) - self.get_first_file - code = self.get_first_file().get_code() #params_section = f"//PARAMS_SECTION_{tag}" function_section = f"//FUNCTION_SECTION" @@ -456,16 +461,19 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen else: raise Exception("This shoudn't happen") + #Simplifying main + code = code.replace(self.get_workflow_main().get_code(), self.get_workflow_main().simplify_code()) + #Adding processes into code for p in processes: - if(p.get_code_with_alias() not in code): - code = code.replace(process_section, '\n'+p.get_code_with_alias()+'\n'+process_section) + if(p.get_code_with_alias_and_id() not in code): + code = code.replace(process_section, '\n'+p.simplify_code()+'\n'+process_section) #Adding subworkflows into code for sub in subworkflows: - if(sub.get_code_with_alias() not in code): - code = code.replace(subworkflow_section, subworkflow_section+'\n'+sub.get_code_with_alias()+'\n') + if(sub.get_code_with_alias_and_id() not in code): + code = code.replace(subworkflow_section, subworkflow_section+'\n'+sub.simplify_code()+'\n') #Adding functions into code for fun in functions: @@ -479,29 +487,7 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen ankers = {"function_section":function_section, "process_section":process_section, "subworkflow_section":subworkflow_section} - - return code, ankers - - - - #TODO -> write tests for this method - #Function that rewrites the workflow code - #Rewriting everything in one file + simplifying the operations and calls to simplify the analysis - def simplify_workflow_code(self): - code = self.get_first_file().get_code() - code, ankers = self.write_workflow_into_one_file() - all_executors = self.get_workflow_main().get_all_executors_in_workflow() - #We do this so that the longest operation and calls are rewritten first in the code -> to avoid problems - executor_2_length = {} - for e in all_executors: - executor_2_length[e] = len(e.get_code(get_OG = True)) - sorted_executor_2_length = {k: v for k, v in sorted(executor_2_length.items(), key=lambda item: item[1], reverse=True)} - for exe in sorted_executor_2_length: - if(exe.get_type()=="Call" or exe.get_type()=="Operation"): - code = code.replace(exe.get_code(get_OG = True), exe.simplify_code(), 1) - else: - raise Exception("This shouldn't happen") return code def get_subworkflows_called(self): @@ -529,6 +515,10 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen with open(temp_file, "w") as file: file.write(code) + f = open(self.get_output_dir()/ "debug" / "rewritten.nf", "w") + f.write(code) + f.close() + #Replace old analysis with new analysis (simplified code) self.__init__(str(temp_file), display_info = False, duplicate=True, processes_2_remove=processes_2_remove) self.initialise() @@ -538,23 +528,29 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen generate_graph(self.get_output_dir()/ "debug" /"spec_graph", self.graph.full_dico, render_graphs = True) generate_graph(self.get_output_dir()/ "debug" /"process_dependency_graph_OG", temp_process_dependency_graph, render_graphs = True) generate_graph(self.get_output_dir()/ "debug" /"process_dependency_graph", self.graph.get_process_dependency_graph() , render_graphs = True) - f = open(self.get_output_dir()/ "debug" / "rewritten.nf", "w") - f.write(code) - f.close() - raise Exception("Something went worng: The flat dependency graph is not the same!") + raise Exception("Something went wrong: The flat dependency graph is not the same!") def check_relevant_processes_in_workflow(self, relevant_processes): #Check all relevat processes are in wf - workflow_processes = [] + workflow_processes = {} for c in self.get_workflow_main().get_all_calls_in_workflow(): ele = c.get_first_element_called() if(ele.get_type()=="Process"): - workflow_processes.append(ele.get_alias()) + short_name = ele.get_alias().split("_GG_")[0] + try: + temp = workflow_processes[short_name] + except: + workflow_processes[short_name] = [] + workflow_processes[short_name].append(ele.get_alias()) + temporary_relevant = [] for p in relevant_processes: if(p not in workflow_processes): raise BioFlowInsightError(f"The element '{p}' given as a relevant processes is not present in the workflow's processes", 24) + temporary_relevant+=workflow_processes[p] + relevant_processes = temporary_relevant + return relevant_processes def generate_user_view(self, relevant_processes = [], render_graphs = True, processes_2_remove = []): @@ -573,7 +569,6 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen #REPLACE HEADER TAKES subworkflow_takes = subworklfow.get_takes() - print(subworklfow.get_name(), subworkflow_takes) parameters = OG_call.get_parameters() if(len(subworkflow_takes)!=len(parameters)): raise Exception("This shouldn't happen -> the same number of parameters should be kept") @@ -648,7 +643,6 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen names_added = [] for channel in channels_2_sources: if(set(channels_2_sources[channel]).intersection(things_added_in_cluster)!=set(channels_2_sources[channel])): - print(channel.get_code()) if(channel.get_name() not in names_added): takes.append(channel) names_added.append(channel.get_name()) @@ -754,7 +748,8 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen return broken_subworkflows #Get the clusters and the code - self.check_relevant_processes_in_workflow(relevant_processes) + relevant_processes = self.check_relevant_processes_in_workflow(relevant_processes) + print(relevant_processes) self.generate_user_view(relevant_processes = relevant_processes, processes_2_remove = []) clusters = self.graph.get_clusters_from_user_view() broken_subworkflows = get_workflows_broken(get_subworkflow_2_executors(), get_clusters_with_calls(clusters)) @@ -931,12 +926,9 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen conditions_in_subworkflow = list(conditions_in_subworkflow.keys()) - #The problem is here!!!! - print(name) #TAKE #Adding take parameters on the inside of the subworkflow takes_param = self.get_takes(things_added_in_cluster) - print() new_param_names, index, old_param_names = [], 1, [] for param in takes_param: param_name = f"param_{name}_{index}"