From 19346fe40ac630985e5d00ca2b8e742d572ecb69 Mon Sep 17 00:00:00 2001
From: George Marchment <georgemarchment@yahoo.fr>
Date: Tue, 11 Mar 2025 13:44:29 +0100
Subject: [PATCH] Made the rewrite determinastic -> debugged -> by removing the
 zoom opti + removed the artificial nodes from the calculation of the user
 view

---
 src/graph.py         |  3 +-
 src/nextflow_file.py | 27 +++++++-------
 src/operation.py     |  2 +-
 src/outils_graph.py  | 86 +++++++++++++++++++++++++++++++++++---------
 src/process.py       |  2 +-
 src/workflow.py      | 20 +++++++++--
 6 files changed, 106 insertions(+), 34 deletions(-)

diff --git a/src/graph.py b/src/graph.py
index c04159d..11d9872 100644
--- a/src/graph.py
+++ b/src/graph.py
@@ -143,6 +143,7 @@ class Graph():
 
     def get_specification_graph(self, dirc = 'graphs', filename = "specification_graph", render_graphs = True):
         generate_graph(self.get_output_dir()/ dirc /filename, self.full_dico, render_graphs = render_graphs)
+        generate_graph(self.get_output_dir()/ dirc /(filename+"_without_artificial_nodes"), remove_artificial_nodes(self.full_dico), render_graphs = render_graphs)
 
     def get_specification_graph_wo_labels(self, filename = "specification_graph_wo_labels", render_graphs = True):
         generate_graph(self.get_output_dir()/'graphs'/filename, self.full_dico, label_edge=False, label_node=False, render_graphs = render_graphs)
@@ -310,7 +311,7 @@ class Graph():
         #For now i'm only gonna work from the flattened dico
         #self.initialise_flattened_dico(self.dico_process_dependency_graph)
         self.initialise_flattened_dico(self.full_dico)
-        dico = self.dico_flattened
+        dico = remove_artificial_nodes(self.dico_flattened)
 
         user_view, self.new_nodes_user_view = relev_user_view_builder(dico, relevant_modules=relevant_processes)
 
diff --git a/src/nextflow_file.py b/src/nextflow_file.py
index c19e022..b7a523f 100644
--- a/src/nextflow_file.py
+++ b/src/nextflow_file.py
@@ -160,13 +160,14 @@ class Nextflow_File(Nextflow_Building_Blocks):
     #----------------------
     #This method extracts the "main" workflow from the file 
     def extract_main(self):
-        from .main import Main
-        #This returns the code without the comments
-        code = "\n"+self.get_code()+"\n"
-        #Find pattern
-        twice = False
-        for match in re.finditer(constant.WORKFLOW_HEADER_2, code):
-            if(self.first_file):
+        if(self.first_file):
+            from .main import Main
+            #This returns the code without the comments
+            code = "\n"+self.get_code()+"\n"
+            #Find pattern
+            twice = False
+            for match in re.finditer(constant.WORKFLOW_HEADER_2, code):
+                
                 start = match.span(1)[0]
                 end = extract_curly(code, match.span(1)[1])#This function is defined in the functions file
                 self.main = Main(code= code[start:end], nextflow_file=self)
@@ -174,9 +175,9 @@ class Nextflow_File(Nextflow_Building_Blocks):
                     #TODO turn into biofow insight error
                     raise Exception(f"Found multiple 'main workflows' in {self.get_file_address()}")
                 twice = True
-            else:
-                #TODO add num
-                BioFlowInsightError("A 'main' workflow was found in the Nextflow file")
+            if(self.main==None):
+                raise BioFlowInsightError("A 'main' workflow was not found in the Nextflow file")
+
 
     #----------------------
     #FUNCTIONS
@@ -194,8 +195,10 @@ class Nextflow_File(Nextflow_Building_Blocks):
             start = match.span(0)[0]
             end = extract_curly(code, match.span(0)[1])#This function is defined in the functions file
             #f = Code(code=code[start:end], origin=self)
-            f = Function(code = code[start:end], name = match.group(2), origin =self)
-            self.functions.append(f)
+            #Fobiden names of functions
+            if(match.group(2) not in ['if']):
+                f = Function(code = code[start:end], name = match.group(2), origin =self)
+                self.functions.append(f)
 
     def get_functions(self):
         return self.functions
diff --git a/src/operation.py b/src/operation.py
index 1a88d6e..5e838ab 100644
--- a/src/operation.py
+++ b/src/operation.py
@@ -817,7 +817,7 @@ class Operation(Executor):
             fillcolor = ""
 
 
-        dico['nodes'].append({'id':str(self), 'name':"", "shape":"point", 'xlabel': code, 'fillcolor':fillcolor})
+        dico['nodes'].append({'id':str(self), 'name':"", "shape":"point", 'xlabel': code, 'fillcolor':fillcolor, "artificial": self.get_artificial_status()})
 
         for o in self.origins:
             #Case origins is a channel
diff --git a/src/outils_graph.py b/src/outils_graph.py
index 17f24dc..0283822 100644
--- a/src/outils_graph.py
+++ b/src/outils_graph.py
@@ -596,14 +596,14 @@ def nr_path_pred(r, n, dico, R):
 #Added a dico so it knows what it's already searched
 dico_rSucc = {}
 def rSucc(n, dico, R, outputs):
-    try:
-        tab = dico_rSucc[n]
-    except:
-        tab = []
-        for r in set(R).union(set(outputs)):
-            if(nr_path_succ(n, r, dico, R+list(outputs))):
-                tab.append(r)
-        dico_rSucc[n] = tab
+    #try:
+    #    tab = dico_rSucc[n]
+    #except:
+    tab = []
+    for r in set(R).union(set(outputs)):
+        if(nr_path_succ(n, r, dico, R+list(outputs))):
+            tab.append(r)
+    dico_rSucc[n] = tab
     return tab
 
 def rSuccM(M, dico, R, outputs):
@@ -615,14 +615,14 @@ def rSuccM(M, dico, R, outputs):
 #Added a dico so it knows what it's already searched
 dico_rPred = {}
 def rPred(n, dico, R, inputs):
-    try:
-        tab = dico_rPred[n]
-    except:
-        tab = []
-        for r in set(R).union(set(inputs)):
-            if(nr_path_pred(r, n, dico, R+list(inputs))):
-                tab.append(r)
-        dico_rPred[n] = tab
+    #try:
+    #    tab = dico_rPred[n]
+    #except:
+    tab = []
+    for r in set(R).union(set(inputs)):
+        if(nr_path_pred(r, n, dico, R+list(inputs))):
+            tab.append(r)
+    dico_rPred[n] = tab
     return tab
 
 def rPredM(M, dico, R, inputs):
@@ -1082,4 +1082,58 @@ def check_if_equal(dicoA, dicoB):
     #TO do that we rewrite the structure using a commun language (without using the ids) -> then just check if the translated structures are the same
     return translate_dico(dicoA) ==translate_dico(dicoB)
 
+#This function removes the artificial nodes from the dico
+#MAtching the nodes together between the artificial nodes
+def remove_artificial_nodes(param_dico):
+    dico = copy.copy(param_dico)
 
+    def get_list_artificial_nodes_rec(dico, tab):
+        for n in dico["nodes"]:
+            if(n["artificial"]):
+                tab+=[n["id"]]
+        for sub in dico["subworkflows"]:
+            tab = get_list_artificial_nodes_rec(dico["subworkflows"][sub], tab)
+        return tab
+    def get_list_artificial_nodes(dico):
+        tab = []
+        return get_list_artificial_nodes_rec(dico, tab)
+
+    def remove_node_rec(dico, node_id):
+        to_remove = []
+        for n in dico["nodes"]:
+            if(n["id"]==node_id):
+                to_remove.append(n)
+        for n in to_remove:
+            dico["nodes"].remove(n)
+        for sub in dico["subworkflows"]:
+            remove_node_rec(dico["subworkflows"][sub], node_id)
+    def remove_node(dico, node_id):
+        remove_node_rec(dico, node_id)
+
+    def get_links_and_remove_edges_rec(dico, node_id, links):
+        to_remove = []
+        for e in dico["edges"]:
+            if(e["A"]==node_id):
+                links["sink"].append({"sink": e["B"], "label": e["label"]})
+                to_remove.append(e)
+            if(e["B"]==node_id):
+                links["source"].append({"source": e["A"], "label": e["label"]})
+                to_remove.append(e)
+        for e in to_remove:
+            dico["edges"].remove(e)
+        for sub in dico["subworkflows"]:
+            get_links_and_remove_edges_rec(dico["subworkflows"][sub], node_id, links)
+    def get_links_and_remove_edges(dico, node_id, links):
+        get_links_and_remove_edges_rec(dico, node_id, links)
+
+    for n_id in get_list_artificial_nodes(dico):
+        remove_node(dico, n_id)
+        links = {}
+        links["source"] = []
+        links["sink"] = []
+        get_links_and_remove_edges(dico, n_id, links)
+        for source in links["source"]:
+            for sink in links["sink"]:
+                edge = {"A" : source['source'], "B" : sink['sink'], "label" : source['label']}
+                dico["edges"].append(edge)
+    return dico
diff --git a/src/process.py b/src/process.py
index 85c480e..b4cef1a 100644
--- a/src/process.py
+++ b/src/process.py
@@ -416,7 +416,7 @@ class Process(Nextflow_Building_Blocks):
         return self.printed_name
 
     def get_structure(self, dico):
-        dico['nodes'].append({'id':str(self), 'name':self.get_name_to_print(), "shape":"ellipse", 'xlabel':"", 'fillcolor':''})
+        dico['nodes'].append({'id':str(self), 'name':self.get_name_to_print(), "shape":"ellipse", 'xlabel':"", 'fillcolor':'', "artificial": False})
 
     def initialise_inputs_outputs(self):
         DSL = self.nextflow_file.get_DSL()
diff --git a/src/workflow.py b/src/workflow.py
index be488f2..efaab88 100644
--- a/src/workflow.py
+++ b/src/workflow.py
@@ -765,6 +765,7 @@ George Marchment, Bryan Brancotte, Marie Schmit, FrÃ©dÃ©ric Lemoine, Sarah Cohen
             relevant_processes = self.check_relevant_processes_in_workflow(relevant_processes)
             self.generate_user_view(relevant_processes = relevant_processes, processes_2_remove =  [], render_graphs=render_graphs)
             clusters = self.graph.get_clusters_from_user_view()
+            print(len(clusters))
             broken_subworkflows = get_workflows_broken(get_subworkflow_2_executors(), get_clusters_with_calls(clusters))
             #While there still are broken workflows -> need to redo the analysis
             while(len(broken_subworkflows)>0):
@@ -794,6 +795,14 @@ George Marchment, Bryan Brancotte, Marie Schmit, FrÃ©dÃ©ric Lemoine, Sarah Cohen
             #Get the topological order
             executors_in_order = self.get_order_execution_executors()
             new_clusters = []
+            #for clust in clusters:
+            #    print("*")
+            #    for c in clust:
+            #        if(c.get_type()=="Process"):
+            #            print(c, c.get_code()[:20])
+            #        else:
+            #            print(c, c.get_code()[:20], c.artificial)
+
             for cluster in clusters:
                 tab = []
                 for e in executors_in_order:
@@ -802,9 +811,14 @@ George Marchment, Bryan Brancotte, Marie Schmit, FrÃ©dÃ©ric Lemoine, Sarah Cohen
                 new_clusters.append(tab)
             clusters = new_clusters
             #clusters = self.graph.get_topogical_order(clusters)
-            #for e in executors_in_order:
-            #    print(e.get_code()[:20])
-            #print(executors_in_order)
+            #print('_________________')
+            #for clust in clusters:
+            #    print("*")
+            #    for c in clust:
+            #        if(c.get_type()=="Process"):
+            #            print(c, c.get_code()[:20])
+            #        else:
+            #            print(c, c.get_code()[:20], c.artificial)
 
 
             #Creating the subworkflows from clusters
-- 
GitLab