From b0dec6bea34ec2577c36057a74f34793ba08dac5 Mon Sep 17 00:00:00 2001
From: George Marchment <georgemarchment@yahoo.fr>
Date: Mon, 21 Oct 2024 16:45:47 +0200
Subject: [PATCH] update

---
 src/graph.py         | 88 ++++++++++++++++++++++++++++++++++++++++++++
 src/nextflow_file.py |  3 ++
 src/outils_graph.py  | 24 ++++++++++--
 src/workflow.py      |  5 ++-
 4 files changed, 115 insertions(+), 5 deletions(-)

diff --git a/src/graph.py b/src/graph.py
index 0f26b79..7c02415 100644
--- a/src/graph.py
+++ b/src/graph.py
@@ -3,6 +3,7 @@ import json
 import networkx as nx
 import numpy as np
 import copy
+import re
 
 from .outils_graph import *
 
@@ -358,6 +359,93 @@ class Graph():
                         new_node_2_subworkflows[node] = node_2_subworkflows[id]
         return new_node_2_subworkflows 
 
+    #==========================================================
+    #Check if fake dependency is created when created user view
+    #==========================================================
+    #Here to check if a fake dependency is created, I'm gonna compare the edges 
+    #of the level graphs between the user view and the process dependency 
+    #Each of the user view edges (with subworkflo) should be in the process dependency edges
+    def check_fake_dependency_user_view(self):
+        #This function removes the "<>" from the node name
+        #And the same for the subworkflows
+        def clean_node(node):
+            #Case the node is a process
+            if(node[0]=="<"):
+                #We just remove the '<>' around the name
+                node = node[1:-1]
+            else:#it's a subworkflow
+                for match in re.finditer(r"id_\d+\.\d+\_(.+)", node):
+                    node = match.group(1)
+            return node
+
+        #First by checking if the node_2_subworkflows are the same, if it's the case i don't need to compare
+        if(self.node_2_subworkflows_process_dependency_graph!=self.node_2_subworkflows_user_view):
+            dico_process_dependency_graph = self.dico_process_dependency_graph
+            user_view_with_subworkflows = self.user_view_with_subworkflows
+            user_view_subworkflows = get_subworkflows_names(user_view_with_subworkflows)
+            #Get the level workflows for the process dependency graph
+            max_level = get_max_level(dico_process_dependency_graph)
+            dependency_levels = []
+            for l in range(max_level+1):
+                new_dico = get_graph_level_l(dico_process_dependency_graph, l)
+                dependency_levels.append(new_dico)
+            #Get the level workflows for the user view
+            max_level = get_max_level(user_view_with_subworkflows)
+            user_view_levels = []
+            for l in range(max_level+1):
+                new_dico = get_graph_level_l(user_view_with_subworkflows, l)
+                user_view_levels.append(new_dico)
+            #For each level, i'm gonna check the edges
+            for i in range(len(user_view_levels)):
+                user_view_level = user_view_levels[i]
+                dependency_level = dependency_levels[i]
+                for sub in user_view_subworkflows:
+                    for edge_user in user_view_level["edges"]:
+                        if(f"_{sub}" in edge_user["A"] or f"_{sub}" in edge_user["B"]):
+                            if(edge_user["A"]!="input" and edge_user["A"]!="output" and edge_user["B"]!="input" and edge_user["B"]!="output"):
+                                #This boolean if is to check if the edge 'edge_user' has equivalence in the process dependency graph
+                                has_matching_user_dependency = False
+                                
+                                for edge_process in get_edges(dependency_level):
+                                    if(f"_{sub}" in edge_process["A"] or f"_{sub}" in edge_process["B"]):
+                                        node = ""
+                                        side = ""
+                                        #Determine if it's A or B
+                                        if(f"_{sub}" in edge_process["A"]):
+                                            node = edge_process["B"]
+                                            side = "B"
+                                        if(f"_{sub}" in edge_process["B"]):
+                                            node = edge_process["A"]
+                                            side = "A"
+                                        node = clean_node(node)
+                                        if(node in edge_user[side]):
+                                            has_matching_user_dependency = True
+                                        
+                                if(not has_matching_user_dependency):
+                                    #Check if there is an indirect path that exist
+                                    node_A = clean_node(edge_user["A"])
+                                    node_B = clean_node(edge_user["B"])
+                                    nodes_level = get_nodes_from_edges(get_edges(dependency_level))
+                                    node_A_temp, node_B_temp = "", ""
+                                    for A in node_A.split("_$$_"):
+                                        for tmp in nodes_level:
+                                            if A in tmp:
+                                                node_A_temp = tmp
+                                    for B in node_B.split("_$$_"):
+                                        for tmp in nodes_level:
+                                            if B in tmp:
+                                                node_B_temp = tmp
+        
+                                    if(not exist_path_dico(node_A_temp, node_B_temp, dependency_level)):
+                                        print("False dependency", edge_user)
+                                        return True     
+                                
+
+            
+            return False
+        else:
+            return False
+
     #============================
     #METADATA FROM GRAPH
     #============================
diff --git a/src/nextflow_file.py b/src/nextflow_file.py
index 4180fd0..30e3daa 100644
--- a/src/nextflow_file.py
+++ b/src/nextflow_file.py
@@ -732,6 +732,9 @@ class Nextflow_File(Nextflow_Building_Blocks):
     def node_2_subworkflows_user_view(self):
         return self.graph.node_2_subworkflows_user_view()
     
+    def check_fake_dependency_user_view(self):
+        return self.graph.check_fake_dependency_user_view()
+    
     
 
     def add_main_DSL1_2_rocrate(self, dico, file_dico, file_name):
diff --git a/src/outils_graph.py b/src/outils_graph.py
index 0f5c1b1..1ef2b2d 100644
--- a/src/outils_graph.py
+++ b/src/outils_graph.py
@@ -493,6 +493,16 @@ def exist_path(A, B, edges):
         visited[n] = False
     return exist_path_rec(A, B, edges, visited)
 
+def get_edges(dico, val= []):
+    val+=dico["edges"]
+    for sub in dico["subworkflows"]:
+        val=get_edges(dico["subworkflows"][sub], val)
+    return val
+
+def exist_path_dico(A, B, dico):
+    edges = get_edges(dico)
+    return exist_path(A, B, edges)
+
 
 def nr_path_succ(n, r, dico, R):
     rest_of_R = set(R)-set([r])
@@ -589,7 +599,7 @@ def get_name_new_node(new_nodes, relevant_modules):
     #Arbitrary choice of choosing the name with the longest name
     longest_name = new_nodes[0][0]
     for name in new_nodes:
-        if(len(longest_name)<len(name[0])):
+        if(len(longest_name)>len(name[0])):
             longest_name = name[0]
 
     return longest_name
@@ -638,7 +648,7 @@ def relev_user_view_builder(dico_param, relevant_modules):
     for out in outputs:
         dico["edges"].append({'A':out, 'B':'output'})
     #TODO remove this -> it's to replicate the one in the algortihm demo
-    #dico["edges"].append({'A':get_id_from_name(dico, f"M5{tag}0")[0], 'B':'output'})
+    #dico["edges"].append({'A':get_id_from_name(dico, f"M5_0{tag}0")[0], 'B':'output'})
     for input in inputs:
         dico["edges"].append({'A':"input", 'B':input})
     U = []
@@ -738,7 +748,7 @@ def relev_user_view_builder(dico_param, relevant_modules):
     for i in range(len(new_nodes)):
         new_nodes[i].sort()
         new_name = get_name_new_node(get_names_tab(dico, new_nodes[i]), relevant_modules)
-        node = {"id": ''.join(new_nodes[i]).replace('<', '').replace('>', ''),
+        node = {"id": '_$$_'.join(new_nodes[i]).replace('<', '').replace('>', ''),
                 "name": new_name.split(tag)[0],
                 "shape": "ellipse",
                 "xlabel": f"{len(new_nodes[i])}",
@@ -897,12 +907,18 @@ def get_graph_level_l(dico, level):
     return new_dico
 
 def get_number_of_subworkflows(dico, val= 0):
-    
     for sub in dico["subworkflows"]:
         if(dico["subworkflows"][sub]["nodes"]!=[]):
             val += 1
         val=get_number_of_subworkflows(dico["subworkflows"][sub], val)
     return val
 
+def get_subworkflows_names(dico, val= []):
+    for sub in dico["subworkflows"]:
+        if(dico["subworkflows"][sub]["nodes"]!=[]):
+            val.append(sub)
+        val=get_subworkflows_names(dico["subworkflows"][sub], val)
+    return val
+
 
 
diff --git a/src/workflow.py b/src/workflow.py
index 1636137..61df16e 100644
--- a/src/workflow.py
+++ b/src/workflow.py
@@ -593,4 +593,7 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen
         return self.nextflow_file.node_2_subworkflows_process_dependency_graph()
     
     def node_2_subworkflows_user_view(self):
-        return self.nextflow_file.node_2_subworkflows_user_view()
\ No newline at end of file
+        return self.nextflow_file.node_2_subworkflows_user_view()
+    
+    def check_fake_dependency_user_view(self):
+        return self.nextflow_file.check_fake_dependency_user_view()
\ No newline at end of file
-- 
GitLab