diff --git a/src/outils.py b/src/outils.py index 88e3e9b27736c1517cf2724642fe5387a893cece..9049cdacb2c0b81ce8a31c771ca3415655a60dcd 100644 --- a/src/outils.py +++ b/src/outils.py @@ -1343,3 +1343,18 @@ def get_channels_to_add_in_false_conditions(body, emitted_channels): return body +#This function removes the empty conditions -> while keeping the anker_clusters -> if it's orignally in a condtion +def remove_empty_conditions(code): + pattern = r"(if *\(.+\)|else)\s*{(\s*|\s*\/\/Anker_clusters\s*)}" + def replace(text, pattern): + def replacer(match): + return match.group(0).replace(match.group(0), match.group(2)) + return re.sub(pattern, replacer, text) + temp = code + code = replace(code, pattern) + while(code!=temp): + temp = code + code = replace(code, pattern) + return code + + diff --git a/src/workflow.py b/src/workflow.py index 80585ba6249642b65430ffdc2bd5da15dfe58dfe..786911629dfae8912dbea7aabf5b35a919d91fb0 100644 --- a/src/workflow.py +++ b/src/workflow.py @@ -3,7 +3,7 @@ from .nextflow_file import Nextflow_File from .ro_crate import RO_Crate from . import constant -from .outils import is_git_directory, format_with_tabs, replace_thing_by_call, replace_group1, group_together_ifs, extract_curly, remove_extra_jumps, get_channels_to_add_in_false_conditions, extract_conditions +from .outils import is_git_directory, format_with_tabs, replace_thing_by_call, replace_group1, group_together_ifs, extract_curly, remove_extra_jumps, get_channels_to_add_in_false_conditions, extract_conditions, remove_empty_conditions from .outils_graph import get_flatten_dico, initia_link_dico_rec, get_number_cycles, generate_graph from .outils_annotate import get_tools_commands_from_user_for_process from .bioflowinsighterror import BioFlowInsightError @@ -766,7 +766,7 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen relevant_processes = self.check_relevant_processes_in_workflow(relevant_processes) self.generate_user_view(relevant_processes = relevant_processes, processes_2_remove = [], render_graphs=render_graphs) clusters = self.graph.get_clusters_from_user_view() - print(len(clusters)) + broken_subworkflows = get_workflows_broken(get_subworkflow_2_executors(), get_clusters_with_calls(clusters)) #While there still are broken workflows -> need to redo the analysis while(len(broken_subworkflows)>0): @@ -782,7 +782,6 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen clusters = self.graph.get_clusters_from_user_view() broken_subworkflows = get_workflows_broken(get_subworkflow_2_executors(), get_clusters_with_calls(clusters)) - #print(code) @@ -823,41 +822,85 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen # else: # print(c, c.get_code()[:20], c.artificial) + + + #This function returns the last executor in the clusters + #This is used to place the anker + def get_last_executor_in_cluster(executors_in_order, clusters): + dico = {} + for cluster in clusters: + for ele in cluster: + dico[ele] = executors_in_order.index(ele) + for ele in {k: v for k, v in sorted(dico.items(), key=lambda item: item[1], reverse=True)}: + return ele + + #Replace the last executor in the clusters by the cluster anker + last_executor_in_cluster = get_last_executor_in_cluster(executors_in_order, clusters) + if(last_executor_in_cluster.get_type()=="Process"): + call = last_executor_in_cluster.get_call() + code = code.replace(call.get_code(get_OG = True), "//Anker_clusters") + elif(last_executor_in_cluster.get_type()=="Operation"): + if(not last_executor_in_cluster.get_artificial_status()): + code = code.replace(last_executor_in_cluster.get_code(get_OG = True), "//Anker_clusters", 1) + else: + raise Exception("This shoudn't happen") + else: + raise Exception("This shoudn't happen") + + + #Removing elements from clusters from the code + for cluster in clusters: + for ele in cluster: + if(ele.get_type()=="Process"): + call = ele.get_call() + code = code.replace(call.get_code(get_OG = True), "") + elif(ele.get_type()=="Operation"): + if(not ele.get_artificial_status()): + code = code.replace(ele.get_code(get_OG = True), "", 1) + else: + raise Exception("This shoudn't happen") + else: + raise Exception("This shoudn't happen") + + #Remove the empty conditions left in the code + code = remove_empty_conditions(code) + + + #Add the subworkflow defintions + #------------------------------------- + #Adding the anker + subworkflow_section = f"//ANKER 4 SUBWORKFLOW DEF" + to_replace = "" + for match in re.finditer(r"workflow\s*\w*\s*\{", code): + to_replace = match.group(0) + break + if(to_replace==""): + raise Exception("No call to a workflow") + code = code.replace(to_replace, f"{subworkflow_section}\n\n{to_replace}") #Creating the subworkflows from clusters calls_in_operations = [] non_relevant_name = 1 - channels_to_replace_outside_of_cluster = [] + subworkflow_clusters_to_add, subworkflow_cluster_calls_to_add = [], [] - index_cluster = 0 - for elements in clusters: + index_cluster = len(clusters) + for elements in list(reversed(clusters)): + + channels_to_replace_outside_of_cluster = [] + #Check that there is at least one process in cluster at_least_one_process = False for e in elements: if(e.get_type()=="Process"): at_least_one_process = True - #Only create the subworkflows for clusters with more than one element + #Only create the subworkflows for clusters with onr more elements (and that element in a process) processes_added = [] things_added_in_cluster = [] if(len(elements)>=1 and at_least_one_process): name, body, take, emit = "", "", "", "" first_element = True - def get_last_operation_or_call(elements): - index = -1 - while(True): - if(elements[index].get_type()=="Process"): - return elements[index].get_call() - else: - if(not elements[index].get_artificial_status()): - return elements[index] - else: - index = index-1 - anker_thing = get_last_operation_or_call(elements).get_code(get_OG = True) - code = code.replace(anker_thing, f"//Anker_cluster{index_cluster}\n\n{anker_thing}") - - for ele in elements: if(ele.get_type()=="Process"): @@ -867,14 +910,6 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen name = f"cluster_{ele.get_alias()}" #Get the call of thing (either process or subworkflow) call = ele.get_call() - - #If first element -> add marker for the subworkflow call - #if(first_element): - # code = code.replace(call.get_code(get_OG = True), f"//Anker_cluster{index_cluster}") - # first_element = False - #else: - code = code.replace(call.get_code(get_OG = True), "") - processes_added.append(call.get_first_element_called()) values = [] @@ -892,13 +927,7 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen #TODO -> check this verification there might be some "effet de bord" if(not ele.get_artificial_status()): - ##If first element -> add marker for the subworkflow call - #if(first_element): - # code = code.replace(ele.get_code(get_OG = True), f"//Anker_cluster{index_cluster}", 1) - # first_element = False - #else: - code = code.replace(ele.get_code(get_OG = True), "", 1) - + #Ignore these cases #TODO -> you should be able to remove this if(ele.get_code()[:4] not in ["emit", "take"]): @@ -1034,11 +1063,32 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen subworkfow_call = subworkfow_call_case_true - subworkflow_clusters_to_add.append(subworkflow_code) - subworkflow_cluster_calls_to_add.append(subworkfow_call) - index_cluster+=1 + #subworkflow_clusters_to_add.append(subworkflow_code) + #subworkflow_cluster_calls_to_add.append(subworkfow_call) + + #Add the subworkflow call + new_code = f"//Anker_clusters\n\n//Cluster_{index_cluster}\n{subworkfow_call}\n" + code = code.replace("//Anker_clusters", new_code) + + for old, new in channels_to_replace_outside_of_cluster: + pattern= fr"[ \(,]({re.escape(old)})[^\w]" + code = replace_group1(code, pattern, new) + #code = code.replace(old, new) + + #Add the subworkflow defintions + #------------------------------------- + code = code.replace(f'{subworkflow_section}', f"{subworkflow_code}\n\n{subworkflow_section}") - + + else: + #If there is only one element then we put it back in the code + new_code = f"//Anker_clusters\n\n//Cluster_{index_cluster}\n{elements[0].get_code(get_OG = True)}\n" + code = code.replace("//Anker_clusters", new_code) + index_cluster-=1 + + + + """ #TODO -> rmoving the conditions which are problematic #This might not be the probleme -> when rerunnung the analysis isn't totally robust still_simplifying_conditions = True @@ -1088,22 +1138,8 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen #code = code.replace(old, new) - #Add the subworkflow defintions - #------------------------------------- - #Add anker - subworkflow_section = f"//ANKER 4 SUBWORKFLOW DEF" - to_replace = "" - for match in re.finditer(r"workflow\s*\w*\s*\{", code): - to_replace = match.group(0) - break - if(to_replace==""): - raise Exception("No call to a workflow") - - code = code.replace(to_replace, f"{subworkflow_section}\n\n{to_replace}") - - for sub in subworkflow_clusters_to_add: - code = code.replace(f'{subworkflow_section}', f"{sub}\n\n{subworkflow_section}") + """ #Putting || back code = code.replace("$OR$", "||")