diff --git a/src/outils.py b/src/outils.py index 9415f31d5b8be5dd27653e8d602d00c71260106d..bff466056e8fff5deb2cbd2b64607697fdb157ab 100644 --- a/src/outils.py +++ b/src/outils.py @@ -1187,4 +1187,9 @@ def replace_thing_by_call(tab): for r in to_remove: tab.remove(r) tab+=to_add - return tab \ No newline at end of file + return tab + +def replace_group1(text, pattern, replacement): + def replacer(match): + return match.group(0).replace(match.group(1), replacement) + return re.sub(pattern, replacer, text) \ No newline at end of file diff --git a/src/workflow.py b/src/workflow.py index 18a10ad62734b68e3009086ef40ed48872c93191..cf5f359a8afc793ae22f7efb28869bbc83d31f3a 100644 --- a/src/workflow.py +++ b/src/workflow.py @@ -3,7 +3,7 @@ from .nextflow_file import Nextflow_File from .ro_crate import RO_Crate from . import constant -from .outils import is_git_directory, format_with_tabs, replace_thing_by_call +from .outils import is_git_directory, format_with_tabs, replace_thing_by_call, replace_group1 from .outils_graph import flatten_dico, initia_link_dico_rec, get_number_cycles from .outils_annotate import get_tools_commands_from_user_for_process from .bioflowinsighterror import BioFlowInsightError @@ -849,13 +849,18 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen #Creating the subworkflows from clusters calls_in_operations = [] non_relevant_name = 1 + channels_to_replace_outside_of_cluster = [] + subworkflow_clusters_to_add, subworkflow_cluster_calls_to_add = [], [] + index_cluster = 0 for elements in clusters: #Only create the subworkflows for clusters with more than one element processes_added = [] things_added_in_cluster = [] if(len(elements)>1): - name, body, take, emit = "", "", "", "" + name, body, take, emit = "", "main:\n", "", "" + first_element = True for ele in elements: + if(ele.get_type()=="Process"): #Determine the name of the created subworkflow cluster @@ -872,6 +877,15 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen raise Exception("This shoudn't happen since duplicate mode is activated") call = call[0] + + #If first element -> add marker for the subworkflow call + if(first_element): + code = code.replace(call.get_code(get_OG = True), f"//Anker_cluster{index_cluster}") + first_element = False + else: + code = code.replace(call.get_code(get_OG = True), "") + + processes_added.append(call.get_first_element_called()) printed_condition = " && ".join(call.get_condition().get_conditions()) if(printed_condition!=""): @@ -880,7 +894,14 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen body+=f"\n{call.get_code()}\n" things_added_in_cluster.append(call) elif(ele.get_type()=="Operation"): - + + #If first element -> add marker for the subworkflow call + if(first_element): + code = code.replace(ele.get_code(get_OG = True), f"//Anker_cluster{index_cluster}") + first_element = False + else: + code = code.replace(ele.get_code(get_OG = True), "") + #Ignore these cases if(ele.get_code()[:4] not in ["emit", "take"]): origins = ele.get_origins() @@ -893,6 +914,8 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen else: body+=f"\n{ele.get_code()}\n" things_added_in_cluster.append(ele) + else: + raise Exception("This shoudn't happen") #TODO check this part of the code is never seen #Here we removing the Call_12313 thing @@ -912,15 +935,93 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen name = f"non_relevant_cluster_{non_relevant_name}" non_relevant_name+=1 - subworkflow_code = f"subworkflow {name} {{\n{take}\n{body}\n{emit}\n}}" - print(format_with_tabs(subworkflow_code)) - for t in self.get_takes(things_added_in_cluster): - print("*", t.get_code()) - - for t in self.get_emits(things_added_in_cluster): - print("-", t.get_code()) - print("-----------") + #TAKE + #Adding take parameters on the inside of the subworkflow + takes_param = self.get_takes(things_added_in_cluster) + new_param_names, index, old_param_names = [], 1, [] + for param in takes_param: + param_name = f"param_{name}_{index}" + new_param_names.append(param_name) + old_param_names.append(param.get_code()) + index += 1 + if(len(new_param_names)>0): + temp = '\n'.join(new_param_names) + take = f"\ntake:\n{temp}\n" + + #EMIT + #Adding the emitted outputs + emitted_outputs = self.get_emits(things_added_in_cluster) + new_output_names, index, old_output_names = [], 0, [] + for output in emitted_outputs: + output_name = f"{name}.out[{index}]" + new_output_names.append(output_name) + old_output_names.append(output.get_code()) + index += 1 + + if(len(old_output_names)>0): + temp = '\n'.join(old_output_names) + emit = f"\nemit:\n{temp}\n" + + + #Replace names inside subworkflow + subworkflow_code = f"subworkflow {name} {{\n{take}\n{body}\n{emit}\n}}" + params = ", ".join(old_param_names) + subworkfow_call = f"{name}({params})" + + for i in range(len(new_param_names)): + pattern = fr"[\=\,\(] *({re.escape(takes_param[i].get_code())})[\s\,\)]" + subworkflow_code = replace_group1(subworkflow_code, pattern, new_param_names[i]) + #subworkflow_code = subworkflow_code.replace(takes_param[i].get_code(), new_param_names[i]) + + for i in range(len(new_output_names)): + #In the case of channels, we just add chanel = subworkflow.out[i] + if(not bool(re.findall("\.\s*out", old_output_names[i]))): + subworkfow_call+=f"\n{old_output_names[i]} = {new_output_names[i]}" + #In the case of emitted values we need to replace the code on the outside + else: + channels_to_replace_outside_of_cluster.append((old_output_names[i], new_output_names[i])) + + + subworkflow_clusters_to_add.append(subworkflow_code) + subworkflow_cluster_calls_to_add.append(subworkfow_call) + index_cluster+=1 + #print(format_with_tabs(subworkflow_code)) + #print(subworkfow_call) + # + #for t in self.get_takes(things_added_in_cluster): + # print("*", t.get_code()) + # + #for t in self.get_emits(things_added_in_cluster): + # print("-", t.get_code()) + #print("-----------") + + + #TODO -> rmoving the conditions which are problematic + #This might not be the probleme -> when rerunnung the analysis isn't totally robust + #still_empty_conditions = True + #while(still_empty_conditions): + # still_empty_conditions = False + # for index_cluster in range(len(subworkflow_clusters_to_add)): + # #Replace empty if and else by nothing + # def replace_by_empty(match): + # still_empty_conditions = True + # return f"//Anker_cluster{index_cluster}" + # pattern = r"if\s*\([^\{]+\{\s*\/\/Anker_cluster"+str(index_cluster)+r"\s*\}\s*else\{\s*\}" + # code = re.sub(pattern, replace_by_empty, code) + # + # pattern = r"if\s*\([^\{]+\{\s*\}\s*else\{\s*\/\/Anker_cluster"+str(index_cluster)+r"\s*\}" + # code = re.sub(pattern, replace_by_empty, code) + + + for i in range(len(subworkflow_clusters_to_add)): + #print(f"//Anker_cluster{i}", subworkflow_cluster_calls_to_add[i]) + code = code.replace(f"//Anker_cluster{i}", subworkflow_cluster_calls_to_add[i]) + + for old, new in channels_to_replace_outside_of_cluster: + code = code.replace(old, new) + + print(code) #So basically when retriving a thing (process or subworkflow)