From c06e5e04270156933bc41e8183dadb7cacc3545e Mon Sep 17 00:00:00 2001
From: George Marchment <georgemarchment@yahoo.fr>
Date: Wed, 12 Mar 2025 10:33:24 +0100
Subject: [PATCH] Implemented a rewrite which places the clusters based on
 topological order -> not the first or last place in the code -> but uses the
 order of execution of the clusters

---
 src/outils.py   |  15 +++++
 src/workflow.py | 146 ++++++++++++++++++++++++++++++------------------
 2 files changed, 106 insertions(+), 55 deletions(-)

diff --git a/src/outils.py b/src/outils.py
index 88e3e9b..9049cda 100644
--- a/src/outils.py
+++ b/src/outils.py
@@ -1343,3 +1343,18 @@ def get_channels_to_add_in_false_conditions(body, emitted_channels):
     
     return body
 
+#This function removes the empty conditions -> while keeping the anker_clusters -> if it's orignally in a condtion
+def remove_empty_conditions(code):
+    pattern = r"(if *\(.+\)|else)\s*{(\s*|\s*\/\/Anker_clusters\s*)}"
+    def replace(text, pattern):
+        def replacer(match):
+            return match.group(0).replace(match.group(0), match.group(2))
+        return re.sub(pattern, replacer, text)
+    temp = code
+    code = replace(code, pattern)
+    while(code!=temp):
+        temp = code
+        code = replace(code, pattern)
+    return code
+    
+
diff --git a/src/workflow.py b/src/workflow.py
index 80585ba..7869116 100644
--- a/src/workflow.py
+++ b/src/workflow.py
@@ -3,7 +3,7 @@
 from .nextflow_file import Nextflow_File
 from .ro_crate import RO_Crate
 from . import constant
-from .outils import is_git_directory, format_with_tabs, replace_thing_by_call, replace_group1, group_together_ifs, extract_curly, remove_extra_jumps, get_channels_to_add_in_false_conditions, extract_conditions
+from .outils import is_git_directory, format_with_tabs, replace_thing_by_call, replace_group1, group_together_ifs, extract_curly, remove_extra_jumps, get_channels_to_add_in_false_conditions, extract_conditions, remove_empty_conditions
 from .outils_graph import get_flatten_dico, initia_link_dico_rec, get_number_cycles, generate_graph
 from .outils_annotate import get_tools_commands_from_user_for_process
 from .bioflowinsighterror import BioFlowInsightError
@@ -766,7 +766,7 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen
             relevant_processes = self.check_relevant_processes_in_workflow(relevant_processes)
             self.generate_user_view(relevant_processes = relevant_processes, processes_2_remove =  [], render_graphs=render_graphs)
             clusters = self.graph.get_clusters_from_user_view()
-            print(len(clusters))
+
             broken_subworkflows = get_workflows_broken(get_subworkflow_2_executors(), get_clusters_with_calls(clusters))
             #While there still are broken workflows -> need to redo the analysis
             while(len(broken_subworkflows)>0):
@@ -782,7 +782,6 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen
                 clusters = self.graph.get_clusters_from_user_view()
                 broken_subworkflows = get_workflows_broken(get_subworkflow_2_executors(), get_clusters_with_calls(clusters))
             
-            
             #print(code)
 
 
@@ -823,41 +822,85 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen
             #        else:
             #            print(c, c.get_code()[:20], c.artificial)
 
+            
+
+            #This function returns the last executor in the clusters
+            #This is used to place the anker
+            def get_last_executor_in_cluster(executors_in_order, clusters):
+                dico = {}
+                for cluster in clusters:
+                    for ele in cluster:
+                        dico[ele] = executors_in_order.index(ele)
+                for ele in {k: v for k, v in sorted(dico.items(), key=lambda item: item[1], reverse=True)}:
+                    return ele
+
+            #Replace the last executor in the clusters by the cluster anker
+            last_executor_in_cluster = get_last_executor_in_cluster(executors_in_order, clusters)
+            if(last_executor_in_cluster.get_type()=="Process"):
+                call = last_executor_in_cluster.get_call()    
+                code = code.replace(call.get_code(get_OG = True), "//Anker_clusters")
+            elif(last_executor_in_cluster.get_type()=="Operation"):
+                if(not last_executor_in_cluster.get_artificial_status()):
+                    code = code.replace(last_executor_in_cluster.get_code(get_OG = True), "//Anker_clusters", 1)
+                else:
+                    raise Exception("This shoudn't happen")
+            else:
+                    raise Exception("This shoudn't happen")
+
+
+            #Removing elements from clusters from the code
+            for cluster in clusters:
+                for ele in cluster:
+                    if(ele.get_type()=="Process"):
+                        call = ele.get_call()    
+                        code = code.replace(call.get_code(get_OG = True), "")
+                    elif(ele.get_type()=="Operation"):
+                        if(not ele.get_artificial_status()):
+                            code = code.replace(ele.get_code(get_OG = True), "", 1)
+                        else:
+                            raise Exception("This shoudn't happen")
+                    else:
+                            raise Exception("This shoudn't happen")
+
+            #Remove the empty conditions left in the code
+            code = remove_empty_conditions(code)
+
+
+            #Add the subworkflow defintions
+            #-------------------------------------
+            #Adding the anker
+            subworkflow_section = f"//ANKER 4 SUBWORKFLOW DEF"
+            to_replace = ""
+            for match in re.finditer(r"workflow\s*\w*\s*\{", code):
+                to_replace = match.group(0)
+                break
+            if(to_replace==""):
+                raise Exception("No call to a workflow")
+            code = code.replace(to_replace, f"{subworkflow_section}\n\n{to_replace}")
 
             #Creating the subworkflows from clusters
             calls_in_operations = []
             non_relevant_name = 1
-            channels_to_replace_outside_of_cluster = []
+            
             subworkflow_clusters_to_add, subworkflow_cluster_calls_to_add = [], []
-            index_cluster = 0
-            for elements in clusters:
+            index_cluster = len(clusters)
+            for elements in list(reversed(clusters)):
+
+                channels_to_replace_outside_of_cluster = []
+
                 #Check that there is at least one process in cluster
                 at_least_one_process = False
                 for e in elements:
                     if(e.get_type()=="Process"):
                         at_least_one_process = True
 
-                #Only create the subworkflows for clusters with more than one element
+                #Only create the subworkflows for clusters with onr more elements (and that element in a process)
                 processes_added = []
                 things_added_in_cluster = []
                 if(len(elements)>=1 and at_least_one_process):
                     name, body, take, emit = "", "", "", ""
                     first_element = True
 
-                    def get_last_operation_or_call(elements):
-                        index = -1
-                        while(True):
-                            if(elements[index].get_type()=="Process"):
-                                return elements[index].get_call()
-                            else:
-                                if(not elements[index].get_artificial_status()):
-                                    return elements[index]
-                                else:
-                                    index = index-1
-                    anker_thing = get_last_operation_or_call(elements).get_code(get_OG = True)
-                    code = code.replace(anker_thing, f"//Anker_cluster{index_cluster}\n\n{anker_thing}")
-
-
                     for ele in elements:
             
                         if(ele.get_type()=="Process"):
@@ -867,14 +910,6 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen
                                 name = f"cluster_{ele.get_alias()}"
                             #Get the call of thing (either process or subworkflow)
                             call = ele.get_call()
-            
-                            #If first element -> add marker for the subworkflow call
-                            #if(first_element):
-                            #    code = code.replace(call.get_code(get_OG = True), f"//Anker_cluster{index_cluster}")
-                            #    first_element = False
-                            #else:
-                            code = code.replace(call.get_code(get_OG = True), "")
-            
                             
                             processes_added.append(call.get_first_element_called())
                             values = []
@@ -892,13 +927,7 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen
                             #TODO -> check this verification there might be some "effet de bord"
                             if(not ele.get_artificial_status()):
             
-                                ##If first element -> add marker for the subworkflow call
-                                #if(first_element):
-                                #    code = code.replace(ele.get_code(get_OG = True), f"//Anker_cluster{index_cluster}", 1)
-                                #    first_element = False
-                                #else:
-                                code = code.replace(ele.get_code(get_OG = True), "", 1)
-                
+             
                                 #Ignore these cases
                                 #TODO -> you should be able to remove this
                                 if(ele.get_code()[:4] not in ["emit", "take"]):
@@ -1034,11 +1063,32 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen
                         subworkfow_call = subworkfow_call_case_true
                     
                     
-                    subworkflow_clusters_to_add.append(subworkflow_code)
-                    subworkflow_cluster_calls_to_add.append(subworkfow_call)
-                    index_cluster+=1
+                    #subworkflow_clusters_to_add.append(subworkflow_code)
+                    #subworkflow_cluster_calls_to_add.append(subworkfow_call)
+
+                    #Add the subworkflow call
+                    new_code = f"//Anker_clusters\n\n//Cluster_{index_cluster}\n{subworkfow_call}\n"
+                    code = code.replace("//Anker_clusters", new_code)
+
+                    for old, new in channels_to_replace_outside_of_cluster:
+                        pattern= fr"[ \(,]({re.escape(old)})[^\w]"
+                        code = replace_group1(code, pattern, new)
+                        #code = code.replace(old, new)
+
+                    #Add the subworkflow defintions
+                    #-------------------------------------                
+                    code = code.replace(f'{subworkflow_section}', f"{subworkflow_code}\n\n{subworkflow_section}")
                     
-              
+                
+                else:
+                    #If there is only one element then we put it back in the code
+                    new_code = f"//Anker_clusters\n\n//Cluster_{index_cluster}\n{elements[0].get_code(get_OG = True)}\n"
+                    code = code.replace("//Anker_clusters", new_code)
+                index_cluster-=1
+
+
+                    
+            """  
             #TODO -> rmoving the conditions which are problematic
             #This might not be the probleme -> when rerunnung the analysis isn't totally robust
             still_simplifying_conditions = True
@@ -1088,22 +1138,8 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen
                 #code = code.replace(old, new)
             
           
-            #Add the subworkflow defintions
-            #-------------------------------------
-            #Add anker
-            subworkflow_section = f"//ANKER 4 SUBWORKFLOW DEF"
-            to_replace = ""
-            for match in re.finditer(r"workflow\s*\w*\s*\{", code):
-                to_replace = match.group(0)
-                break
-            if(to_replace==""):
-                raise Exception("No call to a workflow")
-            
-            code = code.replace(to_replace, f"{subworkflow_section}\n\n{to_replace}")
-            
-            for sub in subworkflow_clusters_to_add:
-                code = code.replace(f'{subworkflow_section}', f"{sub}\n\n{subworkflow_section}")
             
+            """
             
             #Putting || back
             code = code.replace("$OR$", "||")
-- 
GitLab