diff --git a/src/call.py b/src/call.py index 445872851fa732f8225b905d4f7260536fc27a3b..84327ee106f4dd1c25714895f6cc736c1494b586 100644 --- a/src/call.py +++ b/src/call.py @@ -71,7 +71,7 @@ class Call(Executor): #Case Channel elif(param.get_type()=="Channel"): None - elif(param.get_type()=="Emmited"): + elif(param.get_type()=="Emitted"): None else: print(param.get_code(), param.get_type()) diff --git a/src/outils.py b/src/outils.py index bff466056e8fff5deb2cbd2b64607697fdb157ab..120fb397d0f28630fe9d0dcfa7c2d349cb590380 100644 --- a/src/outils.py +++ b/src/outils.py @@ -918,6 +918,7 @@ def is_git_directory(path = '.'): #Function that extracts the conditions defined in some code +#TODO -> need to update this -> if the same condition appears multiple times in the code -> in the dico it is only counted once def extract_conditions(code): conditions_dico = {} @@ -1192,4 +1193,37 @@ def replace_thing_by_call(tab): def replace_group1(text, pattern, replacement): def replacer(match): return match.group(0).replace(match.group(1), replacement) - return re.sub(pattern, replacer, text) \ No newline at end of file + return re.sub(pattern, replacer, text) + + +#This function take a code and groups together ifs where possible (this function is only to be used with the ifs I define -> cause there are no elses or if elses) +def group_together_ifs(code): + pattern = r"if\s*\(([^{]+)\{" + condition_1, condition_2 = "", "" + big_start, big_end = 0, 0 + still_changing = True + while(still_changing): + still_changing = False + + for match in re.finditer(pattern, code): + condition_1, condition_2, inside_1, inside_2 = "", "a", "", "" + condition_1 = match.group(1) + big_start, end_1 = match.span(0) + end = extract_curly(code, end_1) + inside_1 = code[end_1:end-1].strip() + #print(re.escape(code[big_start:end])+r'\s*'+pattern) + for motch in re.finditer(re.escape(code[big_start:end])+r'\s*'+pattern, code): + condition_2 = motch.group(1) + #print('-', motch.group(1)) + _, end_2 = motch.span(0) + end = extract_curly(code, end_2) + big_end = end + inside_2 = code[end_2:end-1].strip() + break + #Case they are the same condition then we merge the 2 + if(condition_1.strip()==condition_2.strip()): + still_changing = True + code = code.replace(code[big_start:big_end], f"if({condition_1}{{\n{inside_1}\n{inside_2}\n}}\n") + break + + return code \ No newline at end of file diff --git a/src/workflow.py b/src/workflow.py index cf5f359a8afc793ae22f7efb28869bbc83d31f3a..52539159aa0d36734832e666199b8ec733d4d316 100644 --- a/src/workflow.py +++ b/src/workflow.py @@ -3,7 +3,7 @@ from .nextflow_file import Nextflow_File from .ro_crate import RO_Crate from . import constant -from .outils import is_git_directory, format_with_tabs, replace_thing_by_call, replace_group1 +from .outils import is_git_directory, format_with_tabs, replace_thing_by_call, replace_group1, group_together_ifs from .outils_graph import flatten_dico, initia_link_dico_rec, get_number_cycles from .outils_annotate import get_tools_commands_from_user_for_process from .bioflowinsighterror import BioFlowInsightError @@ -828,7 +828,7 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen #Get the clusters and the code self.nextflow_file.generate_user_view(relevant_processes = relevant_processes, processes_2_remove = []) clusters = self.nextflow_file.graph.get_clusters_from_user_view() - print(clusters) + #print(clusters) #TODO -> need to break clusters here #And redo analysis @@ -842,9 +842,8 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen #print(clusters) #Get the topological order clusters = self.nextflow_file.graph.get_topogical_order(clusters) - print(clusters) - - + #print(clusters) + #Creating the subworkflows from clusters calls_in_operations = [] @@ -949,7 +948,7 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen temp = '\n'.join(new_param_names) take = f"\ntake:\n{temp}\n" - #EMIT + #EMIT #Adding the emitted outputs emitted_outputs = self.get_emits(things_added_in_cluster) new_output_names, index, old_output_names = [], 0, [] @@ -980,13 +979,17 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen subworkfow_call+=f"\n{old_output_names[i]} = {new_output_names[i]}" #In the case of emitted values we need to replace the code on the outside else: - channels_to_replace_outside_of_cluster.append((old_output_names[i], new_output_names[i])) + param_out_name= f"{name}_out_{i+1}" + subworkfow_call+=f"\n{param_out_name} = {new_output_names[i]}" + channels_to_replace_outside_of_cluster.append((old_output_names[i], param_out_name)) subworkflow_clusters_to_add.append(subworkflow_code) subworkflow_cluster_calls_to_add.append(subworkfow_call) index_cluster+=1 - #print(format_with_tabs(subworkflow_code)) + subworkflow_code = format_with_tabs(group_together_ifs(subworkflow_code)) + #subworkflow_code = format_with_tabs(subworkflow_code) + print(subworkflow_code) #print(subworkfow_call) # #for t in self.get_takes(things_added_in_cluster): @@ -994,32 +997,41 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen # #for t in self.get_emits(things_added_in_cluster): # print("-", t.get_code()) - #print("-----------") + print("-----------") #TODO -> rmoving the conditions which are problematic #This might not be the probleme -> when rerunnung the analysis isn't totally robust - #still_empty_conditions = True - #while(still_empty_conditions): - # still_empty_conditions = False - # for index_cluster in range(len(subworkflow_clusters_to_add)): - # #Replace empty if and else by nothing - # def replace_by_empty(match): - # still_empty_conditions = True - # return f"//Anker_cluster{index_cluster}" - # pattern = r"if\s*\([^\{]+\{\s*\/\/Anker_cluster"+str(index_cluster)+r"\s*\}\s*else\{\s*\}" - # code = re.sub(pattern, replace_by_empty, code) - # - # pattern = r"if\s*\([^\{]+\{\s*\}\s*else\{\s*\/\/Anker_cluster"+str(index_cluster)+r"\s*\}" - # code = re.sub(pattern, replace_by_empty, code) + still_simplifying_conditions = True + while(still_simplifying_conditions): + still_simplifying_conditions = False + to_replace, anker1, anker2 = "", "", "" + #Replace if/else + for match in re.finditer(r"if\s*\([^\{]+\{\s*(\/\/Anker_cluster\d|\s)\s*\}\s*else\s*\{\s*(\/\/Anker_cluster\d|\s)\s*\}", code): + to_replace = match.group(0) + anker1, anker2 = match.group(1), match.group(2) + still_simplifying_conditions = True + break + #Replace empty if on its own + if(not still_simplifying_conditions): + for match in re.finditer(r"(if\s*\([^\{]+\{\s*(\/\/Anker_cluster\d|\s)\s*\})\s*[^e]", code): + to_replace = match.group(1) + anker1 = match.group(2) + still_simplifying_conditions = True + break + if(still_simplifying_conditions): + code = code.replace(to_replace, f"{anker1}\n{anker2}") - + + #Replace the ankers by the calls of the subworkflows for i in range(len(subworkflow_clusters_to_add)): #print(f"//Anker_cluster{i}", subworkflow_cluster_calls_to_add[i]) code = code.replace(f"//Anker_cluster{i}", subworkflow_cluster_calls_to_add[i]) for old, new in channels_to_replace_outside_of_cluster: - code = code.replace(old, new) + pattern= fr"[ \(,]({re.escape(old)})" + code = replace_group1(code, pattern, new) + #code = code.replace(old, new) print(code)