From b0dec6bea34ec2577c36057a74f34793ba08dac5 Mon Sep 17 00:00:00 2001 From: George Marchment <georgemarchment@yahoo.fr> Date: Mon, 21 Oct 2024 16:45:47 +0200 Subject: [PATCH] update --- src/graph.py | 88 ++++++++++++++++++++++++++++++++++++++++++++ src/nextflow_file.py | 3 ++ src/outils_graph.py | 24 ++++++++++-- src/workflow.py | 5 ++- 4 files changed, 115 insertions(+), 5 deletions(-) diff --git a/src/graph.py b/src/graph.py index 0f26b79..7c02415 100644 --- a/src/graph.py +++ b/src/graph.py @@ -3,6 +3,7 @@ import json import networkx as nx import numpy as np import copy +import re from .outils_graph import * @@ -358,6 +359,93 @@ class Graph(): new_node_2_subworkflows[node] = node_2_subworkflows[id] return new_node_2_subworkflows + #========================================================== + #Check if fake dependency is created when created user view + #========================================================== + #Here to check if a fake dependency is created, I'm gonna compare the edges + #of the level graphs between the user view and the process dependency + #Each of the user view edges (with subworkflo) should be in the process dependency edges + def check_fake_dependency_user_view(self): + #This function removes the "<>" from the node name + #And the same for the subworkflows + def clean_node(node): + #Case the node is a process + if(node[0]=="<"): + #We just remove the '<>' around the name + node = node[1:-1] + else:#it's a subworkflow + for match in re.finditer(r"id_\d+\.\d+\_(.+)", node): + node = match.group(1) + return node + + #First by checking if the node_2_subworkflows are the same, if it's the case i don't need to compare + if(self.node_2_subworkflows_process_dependency_graph!=self.node_2_subworkflows_user_view): + dico_process_dependency_graph = self.dico_process_dependency_graph + user_view_with_subworkflows = self.user_view_with_subworkflows + user_view_subworkflows = get_subworkflows_names(user_view_with_subworkflows) + #Get the level workflows for the process dependency graph + max_level = get_max_level(dico_process_dependency_graph) + dependency_levels = [] + for l in range(max_level+1): + new_dico = get_graph_level_l(dico_process_dependency_graph, l) + dependency_levels.append(new_dico) + #Get the level workflows for the user view + max_level = get_max_level(user_view_with_subworkflows) + user_view_levels = [] + for l in range(max_level+1): + new_dico = get_graph_level_l(user_view_with_subworkflows, l) + user_view_levels.append(new_dico) + #For each level, i'm gonna check the edges + for i in range(len(user_view_levels)): + user_view_level = user_view_levels[i] + dependency_level = dependency_levels[i] + for sub in user_view_subworkflows: + for edge_user in user_view_level["edges"]: + if(f"_{sub}" in edge_user["A"] or f"_{sub}" in edge_user["B"]): + if(edge_user["A"]!="input" and edge_user["A"]!="output" and edge_user["B"]!="input" and edge_user["B"]!="output"): + #This boolean if is to check if the edge 'edge_user' has equivalence in the process dependency graph + has_matching_user_dependency = False + + for edge_process in get_edges(dependency_level): + if(f"_{sub}" in edge_process["A"] or f"_{sub}" in edge_process["B"]): + node = "" + side = "" + #Determine if it's A or B + if(f"_{sub}" in edge_process["A"]): + node = edge_process["B"] + side = "B" + if(f"_{sub}" in edge_process["B"]): + node = edge_process["A"] + side = "A" + node = clean_node(node) + if(node in edge_user[side]): + has_matching_user_dependency = True + + if(not has_matching_user_dependency): + #Check if there is an indirect path that exist + node_A = clean_node(edge_user["A"]) + node_B = clean_node(edge_user["B"]) + nodes_level = get_nodes_from_edges(get_edges(dependency_level)) + node_A_temp, node_B_temp = "", "" + for A in node_A.split("_$$_"): + for tmp in nodes_level: + if A in tmp: + node_A_temp = tmp + for B in node_B.split("_$$_"): + for tmp in nodes_level: + if B in tmp: + node_B_temp = tmp + + if(not exist_path_dico(node_A_temp, node_B_temp, dependency_level)): + print("False dependency", edge_user) + return True + + + + return False + else: + return False + #============================ #METADATA FROM GRAPH #============================ diff --git a/src/nextflow_file.py b/src/nextflow_file.py index 4180fd0..30e3daa 100644 --- a/src/nextflow_file.py +++ b/src/nextflow_file.py @@ -732,6 +732,9 @@ class Nextflow_File(Nextflow_Building_Blocks): def node_2_subworkflows_user_view(self): return self.graph.node_2_subworkflows_user_view() + def check_fake_dependency_user_view(self): + return self.graph.check_fake_dependency_user_view() + def add_main_DSL1_2_rocrate(self, dico, file_dico, file_name): diff --git a/src/outils_graph.py b/src/outils_graph.py index 0f5c1b1..1ef2b2d 100644 --- a/src/outils_graph.py +++ b/src/outils_graph.py @@ -493,6 +493,16 @@ def exist_path(A, B, edges): visited[n] = False return exist_path_rec(A, B, edges, visited) +def get_edges(dico, val= []): + val+=dico["edges"] + for sub in dico["subworkflows"]: + val=get_edges(dico["subworkflows"][sub], val) + return val + +def exist_path_dico(A, B, dico): + edges = get_edges(dico) + return exist_path(A, B, edges) + def nr_path_succ(n, r, dico, R): rest_of_R = set(R)-set([r]) @@ -589,7 +599,7 @@ def get_name_new_node(new_nodes, relevant_modules): #Arbitrary choice of choosing the name with the longest name longest_name = new_nodes[0][0] for name in new_nodes: - if(len(longest_name)<len(name[0])): + if(len(longest_name)>len(name[0])): longest_name = name[0] return longest_name @@ -638,7 +648,7 @@ def relev_user_view_builder(dico_param, relevant_modules): for out in outputs: dico["edges"].append({'A':out, 'B':'output'}) #TODO remove this -> it's to replicate the one in the algortihm demo - #dico["edges"].append({'A':get_id_from_name(dico, f"M5{tag}0")[0], 'B':'output'}) + #dico["edges"].append({'A':get_id_from_name(dico, f"M5_0{tag}0")[0], 'B':'output'}) for input in inputs: dico["edges"].append({'A':"input", 'B':input}) U = [] @@ -738,7 +748,7 @@ def relev_user_view_builder(dico_param, relevant_modules): for i in range(len(new_nodes)): new_nodes[i].sort() new_name = get_name_new_node(get_names_tab(dico, new_nodes[i]), relevant_modules) - node = {"id": ''.join(new_nodes[i]).replace('<', '').replace('>', ''), + node = {"id": '_$$_'.join(new_nodes[i]).replace('<', '').replace('>', ''), "name": new_name.split(tag)[0], "shape": "ellipse", "xlabel": f"{len(new_nodes[i])}", @@ -897,12 +907,18 @@ def get_graph_level_l(dico, level): return new_dico def get_number_of_subworkflows(dico, val= 0): - for sub in dico["subworkflows"]: if(dico["subworkflows"][sub]["nodes"]!=[]): val += 1 val=get_number_of_subworkflows(dico["subworkflows"][sub], val) return val +def get_subworkflows_names(dico, val= []): + for sub in dico["subworkflows"]: + if(dico["subworkflows"][sub]["nodes"]!=[]): + val.append(sub) + val=get_subworkflows_names(dico["subworkflows"][sub], val) + return val + diff --git a/src/workflow.py b/src/workflow.py index 1636137..61df16e 100644 --- a/src/workflow.py +++ b/src/workflow.py @@ -593,4 +593,7 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen return self.nextflow_file.node_2_subworkflows_process_dependency_graph() def node_2_subworkflows_user_view(self): - return self.nextflow_file.node_2_subworkflows_user_view() \ No newline at end of file + return self.nextflow_file.node_2_subworkflows_user_view() + + def check_fake_dependency_user_view(self): + return self.nextflow_file.check_fake_dependency_user_view() \ No newline at end of file -- GitLab