From 0315c99af192318bea5c76b1fc5a268887ceab12 Mon Sep 17 00:00:00 2001 From: George Marchment <georgemarchment@yahoo.fr> Date: Tue, 5 Nov 2024 14:47:13 +0100 Subject: [PATCH] added functionnality + started work on metro map --- src/graph.py | 30 +++++++-- src/nextflow_file.py | 7 +++ src/outils_graph.py | 143 ++++++++++++++++++++++++++++++++----------- src/workflow.py | 5 +- 4 files changed, 145 insertions(+), 40 deletions(-) diff --git a/src/graph.py b/src/graph.py index 7c02415..0eee6a7 100644 --- a/src/graph.py +++ b/src/graph.py @@ -208,7 +208,7 @@ class Graph(): def render_graph_wo_operations(self, filename = "process_dependency_graph", render_graphs = True): - generate_graph(self.get_output_dir()/'graphs'/filename, self.dico_process_dependency_graph, render_graphs = render_graphs, label_edge=False, label_node=False) + generate_graph(self.get_output_dir()/'graphs'/filename, self.dico_process_dependency_graph, render_graphs = render_graphs, label_edge=False, label_node=False, root = True) def get_dependency_graph(self): @@ -314,8 +314,8 @@ class Graph(): def generate_user_view(self, relevant_processes = [], render_graphs = True): user_view, user_view_with_subworkflows = self.get_user_view_graph(relevant_processes = relevant_processes) self.user_view_with_subworkflows = user_view_with_subworkflows - generate_graph(self.get_output_dir()/'graphs'/"user_view", user_view, label_edge=True, label_node=True, render_graphs = render_graphs) - generate_graph(self.get_output_dir()/'graphs'/"user_view_with_subworkflows", user_view_with_subworkflows, label_edge=True, label_node=True, render_graphs = render_graphs) + generate_graph(self.get_output_dir()/'graphs'/"user_view", user_view, label_edge=True, label_node=True, render_graphs = render_graphs, root = False, relevant_nodes = copy.deepcopy(relevant_processes)) + generate_graph(self.get_output_dir()/'graphs'/"user_view_with_subworkflows", user_view_with_subworkflows, label_edge=True, label_node=True, render_graphs = render_graphs, root = False, relevant_nodes = copy.deepcopy(relevant_processes)) #============================ #GENERATE LEVEL GRAPHS @@ -531,7 +531,6 @@ class Graph(): links_flattened = initia_link_dico_rec(self.dico_flattened) not_source_2_sink = [] node_2_sink = [] - for node in links_flattened: if(links_flattened[node]==[]): node_2_sink.append(node) @@ -555,6 +554,24 @@ class Graph(): for A, B in edges_create_cycles: links_flattened_source_sink[A].remove(B) + + #Here we need to update the sink source since some edges have been removed + #See phyloplace worklfow (all nodes have an output channel) -> none connected to sink + #TODO clean this cause it's just a copy of what is above + not_source_2_sink = [] + node_2_sink = [] + for node in links_flattened: + if(links_flattened_source_sink[node]==[]): + node_2_sink.append(node) + else: + not_source_2_sink+=links_flattened_source_sink[node] + not_source_2_sink = set(not_source_2_sink) + source_2_node = list(set(links_flattened.keys()).difference(not_source_2_sink)) + links_flattened_source_sink["source"], links_flattened_source_sink["sink"] = source_2_node, [] + for node in node_2_sink: + links_flattened_source_sink[node].append("sink") + + structure_type = "" if(len(edges_create_cycles)==0): structure_type = "DAG" @@ -617,6 +634,11 @@ class Graph(): with open(self.get_output_dir()/ "graphs/metadata_process_dependency_graph.json", 'w') as output_file : json.dump(dico, output_file, indent=4) + def get_metadata_user_view(self): + dico = self.get_metadata(self.user_view_with_subworkflows ) + with open(self.get_output_dir()/ "graphs/metadata_user_view.json", 'w') as output_file : + json.dump(dico, output_file, indent=4) + #def get_metadata_graph_wo_operations(self): # G = self.networkX_wo_operations # dico = self.get_metadata(G) diff --git a/src/nextflow_file.py b/src/nextflow_file.py index 30e3daa..b12bea6 100644 --- a/src/nextflow_file.py +++ b/src/nextflow_file.py @@ -715,6 +715,13 @@ class Nextflow_File(Nextflow_Building_Blocks): self.graph.initialise(processes_2_remove = processes_2_remove) self.graph.generate_level_graphs(render_graphs = render_graphs, label_edge=label_edge, label_node=label_node) + def generate_user_and_process_metadata(self): + #TODO -> this first line is added in reality it needs to be commented + self.graph.get_metadata_specification_graph() + self.graph.get_metadata_process_dependency_graph() + self.graph.get_metadata_user_view() + + def get_graph(self): return self.graph #def get_metadata_graph_wo_operations(self): diff --git a/src/outils_graph.py b/src/outils_graph.py index 577239d..e88d68e 100644 --- a/src/outils_graph.py +++ b/src/outils_graph.py @@ -2,11 +2,11 @@ import graphviz import copy import numpy as np -process_id = "<src.process.Process" +process_id = "src.process.Process" operation_id = "<src.operation.Operation" def is_process(node_id): - if(node_id[:len(process_id)]==process_id): + if(process_id in node_id): return True return False @@ -31,8 +31,11 @@ def add_nodes(dot, dico, label_node = True): except: fillcolor = "" if(label_node): + #here + #dot.node(n["id"], "", shape="circle", fillcolor=fillcolor, color = color, style="filled") dot.node(n["id"], n["name"], shape=n["shape"], xlabel= xlabel, fillcolor=fillcolor, color = color, style="filled") else: + #dot.node(n["id"], "", shape="circle", fillcolor=fillcolor, color = color, style="filled") dot.node(n["id"], n["name"], shape=n["shape"], fillcolor=fillcolor, color=color, style="filled") for sub in dico["subworkflows"]: @@ -56,6 +59,48 @@ def fill_dot(dot, dico, label_node = True, label_edge = True): add_edges(dot, dico, label_edge = label_edge) + +def add_nodes_metro(dot, dico, relevant_nodes = -1): + nodes_relevant = [] + #Recupering the relvant nodes + if(relevant_nodes == -1): + nodes_relevant = dico["nodes"] + else: + for n in dico["nodes"]: + if(n["name"] in relevant_nodes): + nodes_relevant.append(n) + + for n in dico["nodes"]: + if(n in nodes_relevant): + #dot.node(n["id"], "", shape="circle", style="filled") + dot.node(n["id"], "", xlabel = n["name"],shape="circle", style="filled") + else: + dot.node(n["id"], n["name"], shape="point", style="filled") + + for sub in dico["subworkflows"]: + with dot.subgraph(name="cluster"+sub) as c: + add_nodes_metro(c, dico["subworkflows"][sub], relevant_nodes = relevant_nodes ) + c.attr(label=sub) + +def add_edges_metro(dot, dico): + for e in dico["edges"]: + dot.edge(e['A'], e['B'], + arrowhead = "none", #https://graphviz.org/doc/info/arrows.html + arrowsize= "1", #If the arrowhead is 'none' this parameter doesn't change anything + penwidth= "2" + ) + + for sub in dico["subworkflows"]: + with dot.subgraph(name="cluster"+sub) as c: + add_edges_metro(dot, dico["subworkflows"][sub]) + +def metro_dot(dot, dico, relevant_nodes = -1): + dot.attr(rankdir='LR') + dot.attr(ranksep="2") + add_nodes_metro(dot, dico, relevant_nodes = relevant_nodes) + add_edges_metro(dot, dico) + + def fill_dot_2(dot, dico, label_node = True, label_edge = True): def add_nodes(dot, dico, label_node = True): for n in dico["nodes"]: @@ -92,10 +137,16 @@ def fill_dot_2(dot, dico, label_node = True, label_edge = True): c.attr(label=sub) -def generate_graph_dot(filename, dico, label_node = True, label_edge = True, render_graphs = True): - dot = graphviz.Digraph(filename=filename, format='png', comment="temp") - fill_dot(dot, dico, label_node, label_edge) +def generate_graph_dot(filename, dico, label_node = True, label_edge = True, render_graphs = True, relevant_nodes = -1): + #dot = graphviz.Digraph(filename=filename, format='png', comment="temp") + dot = graphviz.Digraph() + if(relevant_nodes==-1): + fill_dot(dot, dico, label_node, label_edge) + else: + metro_dot(dot, dico, relevant_nodes = relevant_nodes) dot.save(filename=f'{filename}.dot') + dot.format = 'dot' + dot.render(filename=f'{filename}_pos') if(render_graphs): dot.render(filename=f'{filename}.dot', outfile=f'{filename}.png') @@ -143,11 +194,26 @@ def generate_graph_mermaid(filename, dico, label_node = True, label_edge = True, with open(f"{filename}.mmd", "w") as text_file: text_file.write(txt) - - -def generate_graph(filename, dico, label_node = True, label_edge = True, render_graphs = True, dot = True, mermaid = True): +def get_number_simple_loops(link_dico): + nb = 0 + for node in link_dico: + if(node in link_dico[node]): + nb += 1 + return nb + +def generate_graph(filename, param_dico, label_node = True, label_edge = True, render_graphs = True, dot = True, mermaid = True, root = False, relevant_nodes = -1): + dico = copy.deepcopy(param_dico) + if(root): + outputs = get_output_nodes(dico) + inputs = get_input_nodes(dico) + dico["nodes"].append({"id": "input","name": "i","shape": "triangle", "fillcolor":"#ffffff"}) + dico["nodes"].append({"id": "output","name": "o","shape": "triangle", "fillcolor":"#ffffff"}) + for out in outputs: + dico["edges"].append({'A':out, 'B':'output', "label": ""}) + for input in inputs: + dico["edges"].append({'A':"input", 'B':input, "label": ""}) if(dot): - generate_graph_dot(filename, dico, label_node, label_edge, render_graphs) + generate_graph_dot(filename, dico, label_node, label_edge, render_graphs, relevant_nodes = relevant_nodes) if(mermaid): generate_graph_mermaid(filename, dico, label_node, label_edge, render_graphs) @@ -306,7 +372,6 @@ def topological_sort(graph): #A variant of this answer https://stackoverflow.com/a/5164820 def get_number_paths_source_2_sink(graph): topo_sort = topological_sort(graph) - dict_paths_from_node_2_sink = {} for node in topo_sort: dict_paths_from_node_2_sink[node] = 1 @@ -429,11 +494,10 @@ def get_name_from_id(dico, ID): return names def get_output_nodes(dico): - N = [] - for n in dico["nodes"]: - N.append(n['id']) + edges = get_all_edges(dico) + N = get_all_nodes_id(dico) none_outputs = [] - for e in dico["edges"]: + for e in edges: none_outputs.append(e['A']) outputs = list(set(N) - set(none_outputs)) #outputs_names = [] @@ -442,11 +506,11 @@ def get_output_nodes(dico): return outputs def get_input_nodes(dico): - N = [] - for n in dico["nodes"]: - N.append(n['id']) + edges = get_all_edges(dico) + N = get_all_nodes_id(dico) + none_inputs = [] - for e in dico["edges"]: + for e in edges: none_inputs.append(e['B']) inputs = list(set(N) - set(none_inputs)) #inputs_names = [] @@ -776,14 +840,15 @@ def relev_user_view_builder(dico_param, relevant_modules): added_edges.append(edge_string) #The output nodes are the nodes which their outputs aren't connected to anything else - outputs = get_output_nodes(new_dico) - inputs = get_input_nodes(new_dico) - new_dico["nodes"].append({"id": "input","name": "i","shape": "triangle", "fillcolor":"#ffffff"}) - new_dico["nodes"].append({"id": "output","name": "o","shape": "triangle", "fillcolor":"#ffffff"}) - for out in outputs: - new_dico["edges"].append({'A':out, 'B':'output', "label": ""}) - for input in inputs: - new_dico["edges"].append({'A':"input", 'B':input, "label": ""}) + #TODO -> remove these comments if you want to root the graph + #outputs = get_output_nodes(new_dico) + #inputs = get_input_nodes(new_dico) + #new_dico["nodes"].append({"id": "input","name": "i","shape": "triangle", "fillcolor":"#ffffff"}) + #new_dico["nodes"].append({"id": "output","name": "o","shape": "triangle", "fillcolor":"#ffffff"}) + #for out in outputs: + # new_dico["edges"].append({'A':out, 'B':'output', "label": ""}) + #for input in inputs: + # new_dico["edges"].append({'A':"input", 'B':input, "label": ""}) return new_dico, new_nodes #This function fills the new_dico with the flattened_dico but @@ -805,9 +870,9 @@ def add_subworkflows_2_dico(full_dico, flattened_dico, add_root_nodes = True): new_dico = add_nodes(full_dico, flattened_dico) new_dico["edges"] = flattened_dico["edges"] - if(add_root_nodes): - new_dico["nodes"].append({"id": "input","name": "i","shape": "triangle", "fillcolor":"#ffffff"}) - new_dico["nodes"].append({"id": "output","name": "o","shape": "triangle", "fillcolor":"#ffffff"}) + #if(add_root_nodes): + # new_dico["nodes"].append({"id": "input","name": "i","shape": "triangle", "fillcolor":"#ffffff"}) + # new_dico["nodes"].append({"id": "output","name": "o","shape": "triangle", "fillcolor":"#ffffff"}) return new_dico def get_max_level(dico, val = 0): @@ -825,6 +890,20 @@ def fill_node_2_subworkflows(dico, node_2_subworkflows, back_log_subworklows = [ for sub in dico["subworkflows"]: fill_node_2_subworkflows(dico["subworkflows"][sub], node_2_subworkflows, back_log_subworklows+[sub]) +def get_all_edges(dico): + edges = [] + edges+=dico["edges"] + for sub in dico["subworkflows"]: + edges+=get_all_edges(dico["subworkflows"][sub]) + return edges + +def get_all_nodes_id(dico): + nodes = [] + for n in dico["nodes"]: + nodes.append(n["id"]) + for sub in dico["subworkflows"]: + nodes+=get_all_nodes_id(dico["subworkflows"][sub]) + return nodes def get_graph_level_l(dico, level): @@ -856,12 +935,6 @@ def get_graph_level_l(dico, level): new_dico = add_nodes(dico, level, current_level=0) - def get_all_edges(dico): - edges = [] - edges+=dico["edges"] - for sub in dico["subworkflows"]: - edges+=get_all_edges(dico["subworkflows"][sub]) - return edges already_added = [] for edge in get_all_edges(dico): diff --git a/src/workflow.py b/src/workflow.py index 61df16e..7de7c82 100644 --- a/src/workflow.py +++ b/src/workflow.py @@ -596,4 +596,7 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen return self.nextflow_file.node_2_subworkflows_user_view() def check_fake_dependency_user_view(self): - return self.nextflow_file.check_fake_dependency_user_view() \ No newline at end of file + return self.nextflow_file.check_fake_dependency_user_view() + + def generate_user_and_process_metadata(self): + self.nextflow_file.generate_user_and_process_metadata() \ No newline at end of file -- GitLab