From 0315c99af192318bea5c76b1fc5a268887ceab12 Mon Sep 17 00:00:00 2001
From: George Marchment <georgemarchment@yahoo.fr>
Date: Tue, 5 Nov 2024 14:47:13 +0100
Subject: [PATCH] added functionnality + started work on metro map

---
 src/graph.py         |  30 +++++++--
 src/nextflow_file.py |   7 +++
 src/outils_graph.py  | 143 ++++++++++++++++++++++++++++++++-----------
 src/workflow.py      |   5 +-
 4 files changed, 145 insertions(+), 40 deletions(-)

diff --git a/src/graph.py b/src/graph.py
index 7c02415..0eee6a7 100644
--- a/src/graph.py
+++ b/src/graph.py
@@ -208,7 +208,7 @@ class Graph():
 
     
     def render_graph_wo_operations(self, filename = "process_dependency_graph", render_graphs = True):
-        generate_graph(self.get_output_dir()/'graphs'/filename, self.dico_process_dependency_graph, render_graphs = render_graphs, label_edge=False, label_node=False)
+        generate_graph(self.get_output_dir()/'graphs'/filename, self.dico_process_dependency_graph, render_graphs = render_graphs, label_edge=False, label_node=False, root = True)
     
 
     def get_dependency_graph(self):
@@ -314,8 +314,8 @@ class Graph():
     def generate_user_view(self, relevant_processes = [], render_graphs = True):
         user_view, user_view_with_subworkflows = self.get_user_view_graph(relevant_processes = relevant_processes)
         self.user_view_with_subworkflows = user_view_with_subworkflows
-        generate_graph(self.get_output_dir()/'graphs'/"user_view", user_view, label_edge=True, label_node=True, render_graphs = render_graphs)
-        generate_graph(self.get_output_dir()/'graphs'/"user_view_with_subworkflows", user_view_with_subworkflows, label_edge=True, label_node=True, render_graphs = render_graphs)
+        generate_graph(self.get_output_dir()/'graphs'/"user_view", user_view, label_edge=True, label_node=True, render_graphs = render_graphs, root = False, relevant_nodes = copy.deepcopy(relevant_processes))
+        generate_graph(self.get_output_dir()/'graphs'/"user_view_with_subworkflows", user_view_with_subworkflows, label_edge=True, label_node=True, render_graphs = render_graphs, root = False, relevant_nodes = copy.deepcopy(relevant_processes))
 
     #============================
     #GENERATE LEVEL GRAPHS
@@ -531,7 +531,6 @@ class Graph():
         links_flattened = initia_link_dico_rec(self.dico_flattened)
         not_source_2_sink = []
         node_2_sink = []
-
         for node in links_flattened:
             if(links_flattened[node]==[]):
                 node_2_sink.append(node)
@@ -555,6 +554,24 @@ class Graph():
         for A, B in edges_create_cycles:
             links_flattened_source_sink[A].remove(B)
 
+
+        #Here we need to update the sink source since some edges have been removed
+        #See phyloplace worklfow (all nodes have an output channel) -> none connected to sink
+        #TODO clean this cause it's just a copy of what is above
+        not_source_2_sink = []
+        node_2_sink = []
+        for node in links_flattened:
+            if(links_flattened_source_sink[node]==[]):
+                node_2_sink.append(node)
+            else:
+                not_source_2_sink+=links_flattened_source_sink[node]
+        not_source_2_sink = set(not_source_2_sink)
+        source_2_node = list(set(links_flattened.keys()).difference(not_source_2_sink))
+        links_flattened_source_sink["source"], links_flattened_source_sink["sink"] = source_2_node, []
+        for node in node_2_sink:
+            links_flattened_source_sink[node].append("sink")    
+        
+
         structure_type = ""
         if(len(edges_create_cycles)==0):
             structure_type = "DAG"
@@ -617,6 +634,11 @@ class Graph():
         with open(self.get_output_dir()/ "graphs/metadata_process_dependency_graph.json", 'w') as output_file :
             json.dump(dico, output_file, indent=4)
 
+    def get_metadata_user_view(self):
+        dico = self.get_metadata(self.user_view_with_subworkflows )
+        with open(self.get_output_dir()/ "graphs/metadata_user_view.json", 'w') as output_file :
+            json.dump(dico, output_file, indent=4)
+
     #def get_metadata_graph_wo_operations(self):
     #    G = self.networkX_wo_operations
     #    dico = self.get_metadata(G)
diff --git a/src/nextflow_file.py b/src/nextflow_file.py
index 30e3daa..b12bea6 100644
--- a/src/nextflow_file.py
+++ b/src/nextflow_file.py
@@ -715,6 +715,13 @@ class Nextflow_File(Nextflow_Building_Blocks):
         self.graph.initialise(processes_2_remove = processes_2_remove)
         self.graph.generate_level_graphs(render_graphs = render_graphs, label_edge=label_edge, label_node=label_node)
 
+    def generate_user_and_process_metadata(self):
+        #TODO -> this first line is added in reality it needs to be commented
+        self.graph.get_metadata_specification_graph()
+        self.graph.get_metadata_process_dependency_graph()
+        self.graph.get_metadata_user_view()
+
+
     def get_graph(self):
         return self.graph
     #def get_metadata_graph_wo_operations(self):
diff --git a/src/outils_graph.py b/src/outils_graph.py
index 577239d..e88d68e 100644
--- a/src/outils_graph.py
+++ b/src/outils_graph.py
@@ -2,11 +2,11 @@ import graphviz
 import copy
 import numpy as np
 
-process_id = "<src.process.Process"
+process_id = "src.process.Process"
 operation_id = "<src.operation.Operation"
 
 def is_process(node_id):
-    if(node_id[:len(process_id)]==process_id):
+    if(process_id in node_id):
         return True
     return False
 
@@ -31,8 +31,11 @@ def add_nodes(dot, dico, label_node = True):
         except:
             fillcolor = ""
         if(label_node):
+            #here
+            #dot.node(n["id"], "", shape="circle", fillcolor=fillcolor, color = color, style="filled")
             dot.node(n["id"], n["name"], shape=n["shape"], xlabel= xlabel, fillcolor=fillcolor, color = color, style="filled")
         else:
+            #dot.node(n["id"], "", shape="circle", fillcolor=fillcolor, color = color, style="filled")
             dot.node(n["id"], n["name"], shape=n["shape"], fillcolor=fillcolor, color=color, style="filled")
 
     for sub in dico["subworkflows"]:
@@ -56,6 +59,48 @@ def fill_dot(dot, dico, label_node = True, label_edge = True):
     add_edges(dot, dico, label_edge = label_edge)
 
 
+
+def add_nodes_metro(dot, dico, relevant_nodes = -1):
+    nodes_relevant = []
+    #Recupering the relvant nodes
+    if(relevant_nodes == -1):
+        nodes_relevant = dico["nodes"]
+    else:
+        for n in dico["nodes"]:
+            if(n["name"] in relevant_nodes):
+                nodes_relevant.append(n)
+    
+    for n in dico["nodes"]:
+        if(n in nodes_relevant):
+            #dot.node(n["id"], "", shape="circle", style="filled")
+            dot.node(n["id"], "", xlabel = n["name"],shape="circle", style="filled")
+        else:
+            dot.node(n["id"], n["name"], shape="point", style="filled")
+
+    for sub in dico["subworkflows"]:
+        with dot.subgraph(name="cluster"+sub) as c:
+            add_nodes_metro(c, dico["subworkflows"][sub], relevant_nodes = relevant_nodes )
+            c.attr(label=sub)
+
+def add_edges_metro(dot, dico):
+    for e in dico["edges"]:
+        dot.edge(e['A'], e['B'], 
+                 arrowhead = "none", #https://graphviz.org/doc/info/arrows.html
+                 arrowsize= "1", #If the arrowhead is 'none' this parameter doesn't change anything
+                 penwidth= "2"
+                 )
+
+    for sub in dico["subworkflows"]:
+        with dot.subgraph(name="cluster"+sub) as c:
+            add_edges_metro(dot, dico["subworkflows"][sub])
+
+def metro_dot(dot, dico, relevant_nodes = -1):
+    dot.attr(rankdir='LR')
+    dot.attr(ranksep="2") 
+    add_nodes_metro(dot, dico, relevant_nodes = relevant_nodes)
+    add_edges_metro(dot, dico)
+
+
 def fill_dot_2(dot, dico, label_node = True, label_edge = True):
     def add_nodes(dot, dico, label_node = True):
         for n in dico["nodes"]:
@@ -92,10 +137,16 @@ def fill_dot_2(dot, dico, label_node = True, label_edge = True):
             c.attr(label=sub)
 
 
-def generate_graph_dot(filename, dico, label_node = True, label_edge = True, render_graphs = True):
-    dot = graphviz.Digraph(filename=filename, format='png', comment="temp")
-    fill_dot(dot, dico, label_node, label_edge)
+def generate_graph_dot(filename, dico, label_node = True, label_edge = True, render_graphs = True, relevant_nodes = -1):
+    #dot = graphviz.Digraph(filename=filename, format='png', comment="temp")
+    dot = graphviz.Digraph()
+    if(relevant_nodes==-1):
+        fill_dot(dot, dico, label_node, label_edge)
+    else:
+        metro_dot(dot, dico, relevant_nodes = relevant_nodes)
     dot.save(filename=f'{filename}.dot')
+    dot.format = 'dot'
+    dot.render(filename=f'{filename}_pos')
     if(render_graphs):
         dot.render(filename=f'{filename}.dot', outfile=f'{filename}.png')
 
@@ -143,11 +194,26 @@ def generate_graph_mermaid(filename, dico, label_node = True, label_edge = True,
     with open(f"{filename}.mmd", "w") as text_file:
         text_file.write(txt)
 
-
-
-def generate_graph(filename, dico, label_node = True, label_edge = True, render_graphs = True, dot = True, mermaid = True):
+def get_number_simple_loops(link_dico):
+    nb = 0
+    for node in link_dico:
+        if(node in link_dico[node]):
+            nb += 1
+    return nb
+
+def generate_graph(filename, param_dico, label_node = True, label_edge = True, render_graphs = True, dot = True, mermaid = True, root = False, relevant_nodes = -1):
+    dico = copy.deepcopy(param_dico)
+    if(root):
+        outputs = get_output_nodes(dico)
+        inputs = get_input_nodes(dico)
+        dico["nodes"].append({"id": "input","name": "i","shape": "triangle", "fillcolor":"#ffffff"})
+        dico["nodes"].append({"id": "output","name": "o","shape": "triangle", "fillcolor":"#ffffff"})
+        for out in outputs:
+            dico["edges"].append({'A':out, 'B':'output', "label": ""})
+        for input in inputs:
+            dico["edges"].append({'A':"input", 'B':input, "label": ""})
     if(dot):
-        generate_graph_dot(filename, dico, label_node, label_edge, render_graphs)
+        generate_graph_dot(filename, dico, label_node, label_edge, render_graphs, relevant_nodes = relevant_nodes)
     if(mermaid):
         generate_graph_mermaid(filename, dico, label_node, label_edge, render_graphs)
 
@@ -306,7 +372,6 @@ def topological_sort(graph):
 #A variant of this answer https://stackoverflow.com/a/5164820
 def get_number_paths_source_2_sink(graph):
     topo_sort  = topological_sort(graph)
-
     dict_paths_from_node_2_sink = {}
     for node in topo_sort:
         dict_paths_from_node_2_sink[node] = 1
@@ -429,11 +494,10 @@ def get_name_from_id(dico, ID):
     return names
 
 def get_output_nodes(dico):
-    N = []
-    for n in dico["nodes"]:
-        N.append(n['id'])
+    edges = get_all_edges(dico)
+    N = get_all_nodes_id(dico)
     none_outputs = []
-    for e in dico["edges"]:
+    for e in edges:
         none_outputs.append(e['A'])
     outputs = list(set(N) - set(none_outputs))
     #outputs_names = []
@@ -442,11 +506,11 @@ def get_output_nodes(dico):
     return outputs
 
 def get_input_nodes(dico):
-    N = []
-    for n in dico["nodes"]:
-        N.append(n['id'])
+    edges = get_all_edges(dico)
+    N = get_all_nodes_id(dico)
+
     none_inputs = []
-    for e in dico["edges"]:
+    for e in edges:
         none_inputs.append(e['B'])
     inputs = list(set(N) - set(none_inputs))
     #inputs_names = []
@@ -776,14 +840,15 @@ def relev_user_view_builder(dico_param, relevant_modules):
                         added_edges.append(edge_string)
     
     #The output nodes are the nodes which their outputs aren't connected to anything else 
-    outputs = get_output_nodes(new_dico)
-    inputs = get_input_nodes(new_dico)
-    new_dico["nodes"].append({"id": "input","name": "i","shape": "triangle", "fillcolor":"#ffffff"})
-    new_dico["nodes"].append({"id": "output","name": "o","shape": "triangle", "fillcolor":"#ffffff"})
-    for out in outputs:
-        new_dico["edges"].append({'A':out, 'B':'output', "label": ""})
-    for input in inputs:
-        new_dico["edges"].append({'A':"input", 'B':input, "label": ""})
+    #TODO -> remove these comments if you want to root the graph
+    #outputs = get_output_nodes(new_dico)
+    #inputs = get_input_nodes(new_dico)
+    #new_dico["nodes"].append({"id": "input","name": "i","shape": "triangle", "fillcolor":"#ffffff"})
+    #new_dico["nodes"].append({"id": "output","name": "o","shape": "triangle", "fillcolor":"#ffffff"})
+    #for out in outputs:
+    #    new_dico["edges"].append({'A':out, 'B':'output', "label": ""})
+    #for input in inputs:
+    #    new_dico["edges"].append({'A':"input", 'B':input, "label": ""})
     return new_dico, new_nodes
 
 #This function fills the new_dico with the flattened_dico but 
@@ -805,9 +870,9 @@ def add_subworkflows_2_dico(full_dico, flattened_dico, add_root_nodes = True):
             
     new_dico = add_nodes(full_dico, flattened_dico)
     new_dico["edges"] = flattened_dico["edges"]
-    if(add_root_nodes):
-        new_dico["nodes"].append({"id": "input","name": "i","shape": "triangle", "fillcolor":"#ffffff"})
-        new_dico["nodes"].append({"id": "output","name": "o","shape": "triangle", "fillcolor":"#ffffff"})
+    #if(add_root_nodes):
+    #    new_dico["nodes"].append({"id": "input","name": "i","shape": "triangle", "fillcolor":"#ffffff"})
+    #    new_dico["nodes"].append({"id": "output","name": "o","shape": "triangle", "fillcolor":"#ffffff"})
     return new_dico
 
 def get_max_level(dico, val = 0):
@@ -825,6 +890,20 @@ def fill_node_2_subworkflows(dico, node_2_subworkflows, back_log_subworklows = [
     for sub in dico["subworkflows"]:
         fill_node_2_subworkflows(dico["subworkflows"][sub], node_2_subworkflows, back_log_subworklows+[sub])
 
+def get_all_edges(dico):
+    edges = []
+    edges+=dico["edges"]
+    for sub in dico["subworkflows"]:
+        edges+=get_all_edges(dico["subworkflows"][sub])
+    return edges
+
+def get_all_nodes_id(dico):
+    nodes = []
+    for n in dico["nodes"]:
+        nodes.append(n["id"])
+    for sub in dico["subworkflows"]:
+        nodes+=get_all_nodes_id(dico["subworkflows"][sub])
+    return nodes
 
 def get_graph_level_l(dico, level):
 
@@ -856,12 +935,6 @@ def get_graph_level_l(dico, level):
     
     new_dico = add_nodes(dico, level, current_level=0)
  
-    def get_all_edges(dico):
-        edges = []
-        edges+=dico["edges"]
-        for sub in dico["subworkflows"]:
-            edges+=get_all_edges(dico["subworkflows"][sub])
-        return edges
 
     already_added = []
     for edge in get_all_edges(dico):
diff --git a/src/workflow.py b/src/workflow.py
index 61df16e..7de7c82 100644
--- a/src/workflow.py
+++ b/src/workflow.py
@@ -596,4 +596,7 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen
         return self.nextflow_file.node_2_subworkflows_user_view()
     
     def check_fake_dependency_user_view(self):
-        return self.nextflow_file.check_fake_dependency_user_view()
\ No newline at end of file
+        return self.nextflow_file.check_fake_dependency_user_view()
+    
+    def generate_user_and_process_metadata(self):
+        self.nextflow_file.generate_user_and_process_metadata()
\ No newline at end of file
-- 
GitLab