From 613e6a5ccec50b8739d136add9780ed5988b36af Mon Sep 17 00:00:00 2001
From: Fize Jacques <jacques.fize@cirad.fr>
Date: Fri, 12 Feb 2021 14:03:33 +0100
Subject: [PATCH] Add Visualisation tools of link prediction results+ Debug

---
 draw_visu.py                     | 148 +++++++++++++++++++++++++++++++
 generate_theoric_random_graph.py |  38 ++++----
 lib/random.py                    |   8 +-
 3 files changed, 173 insertions(+), 21 deletions(-)
 create mode 100644 draw_visu.py

diff --git a/draw_visu.py b/draw_visu.py
new file mode 100644
index 0000000..04e34c3
--- /dev/null
+++ b/draw_visu.py
@@ -0,0 +1,148 @@
+# coding = utf-8
+
+import pandas as pd
+import numpy as np
+import seaborn as sns
+import matplotlib.pyplot as plt
+import re
+import os
+
+import networkx as nx
+
+
+def get_graph_attr(fn, graph_dir):
+    g_fn = os.path.join(graph_dir, fn)
+    if not os.path.exists(g_fn):
+        raise FileNotFoundError(g_fn)
+    G = nx.read_gml(g_fn).graph
+    return G
+
+
+def get_sample_id_old(ch):
+    id_graph = re.findall("\d+", ch)[0]
+    if len(id_graph) == 3:
+        return id_graph[-2:]
+    else:
+        return id_graph[-1:]
+
+
+def get_sample_id(fn, file_format="gml"):
+    return int(fn.strip(".{0}".format(file_format)).split("_")[-1])
+
+
+def load_data(fn, graph_dir):
+    df = pd.read_csv(fn, sep="\t")
+    df["type_graph"] = df.filename.apply(lambda x: x[6:]).apply(lambda x: re.sub("_[\d]+.gml", "", x).replace("_", " "))
+    df["parameters"] = df.filename.apply(lambda x: get_graph_attr(x, graph_dir))
+    df["sample"] = df.filename.apply(get_sample_id_old)
+    non_ne = {'random_prediction', 'common_neighbours', 'jaccard_coefficient', 'adamic_adar_index',
+              'preferential_attachment', 'resource_allocation_index', 'stochastic_block_model',
+              'stochastic_block_model_degree_corrected', 'spatial_link_prediction'}
+    df["type_method"] = df.name.apply(lambda x: "heuristic" if x in non_ne else "network_embedding_based")
+    return df
+
+
+def set_custom_palette(x, y, max_color='red', close_color='turquoise', other_color='lightgrey'):
+    def get_color(x, max_val, min_diff):
+        if x == max_val:
+            return max_color
+        elif x > max_val - (0.01 + min_diff) and x < max_val + (0.01 + min_diff):
+            return close_color
+        else:
+            return other_color
+
+    pal = []
+    df = pd.concat((x, y), axis=1)
+    mean_df = df.groupby(x.name, as_index=False).mean()
+    mean_per_x = dict(mean_df.values)
+    max_val = mean_df[y.name].max()
+    min_diff = (max_val - mean_df[y.name]).median()
+    col_per_method = {k: get_color(v, max_val, min_diff) for k, v in mean_per_x.items()}
+
+    for i, val in enumerate(x):
+        pal.append(col_per_method[val])
+
+    return pal
+
+def highlight_barplot(x, y, **kwargs):
+    if kwargs.get("palette", None):
+        kwargs["palette"] = set_custom_palette(x, y)
+        sns.barplot(x=x, y=y, **kwargs)
+    else:
+        sns.barplot(x=x, y=y, palette=set_custom_palette(x, y), **kwargs)
+
+class DrawingResults():
+    def __init__(self, df_results):
+        self.df = df_results
+
+    def __draw(self, g, **kwargs):
+
+        if "figsize" in kwargs:
+            g.fig.set_size_inches(*kwargs["figsize"])
+
+        [plt.setp(ax.get_xticklabels(), rotation=kwargs.get("rotation", 90)) for ax in g.axes.flat]
+        g.fig.subplots_adjust(wspace=.09, hspace=.02)
+
+        if  kwargs.get("output_filename",None):
+            save_params = {}
+            if "save_param" in kwargs and type(kwargs["save_param"]) == dict:
+                save_params.update(kwargs["save_param"])
+            g.savefig(kwargs["output_filename"], **save_params)
+        else:
+            plt.show()
+
+    def metric_per_nodes_edges(self, type_graph=None, agg_func=None,metric="auroc", **draw_args):
+        new_df = self.df.copy()
+        if agg_func:
+            if agg_func in "mean max min std".split():
+                new_df = new_df.groupby("name nb_edge size type_graph type_method".split(), as_index=False)
+                new_df = getattr(new_df, agg_func)()
+            else:
+                raise ValueError("Method {0} does not exists in pandas.core.groupby.generic.DataFrameGroupBy".format(agg_func))
+
+        if type_graph and type_graph in new_df.type_graph.unique():
+            new_df = new_df[new_df.type_graph == type_graph].copy()
+
+        g = sns.FacetGrid(new_df, row="size", col="nb_edge", margin_titles=True, height=2.5)
+
+        plot_func = draw_args.get('plot_func', sns.barplot)
+        g.map(plot_func, "name", metric)
+
+        return self.__draw(g, **draw_args)
+
+    def metric_global(self,  agg_func=None,metric="auroc", **draw_args):
+
+        new_df = self.df.copy()
+        if agg_func:
+            new_df = self.df.groupby("name nb_edge size type_graph type_method".split(), as_index=False)
+            if agg_func in "mean max min std".split():
+                new_df = getattr(new_df,agg_func)()
+                new_df = new_df.groupby("name type_graph type_method".split(), as_index=False)
+                new_df = getattr(new_df, agg_func)()
+
+            else:
+                raise ValueError("Method {0} does not exists in pandas.core.groupby.generic.DataFrameGroupBy".format(agg_func))
+
+        g = sns.FacetGrid(new_df,  col="type_graph", margin_titles=True, height=2.5)
+
+        plot_func = draw_args.get('plot_func', sns.barplot)
+        g.map(plot_func, "name", metric, palette="tab20")
+
+        return self.__draw(g, **draw_args)
+
+    def caracteristic_distribution(self, caracteristic, **draw_args):
+        g = sns.FacetGrid(self.df, col="type_graph", col_wrap=4, )
+        g.map(sns.histplot, caracteristic)
+
+        return self.__draw(g, **draw_args)
+
+    def parameter_impact(self, type_graph, parameter, second_parameter="size", metric="auroc", **draw_args):
+        _df = self.df[self.df.type_graph == type_graph].copy()
+        _df[parameter] = _df.parameters.apply(lambda x: x[parameter])
+
+        g = sns.FacetGrid(_df, row=second_parameter, col=parameter, margin_titles=True, height=2.5)
+        plot_func = draw_args.get('plot_func', sns.barplot)
+        g.map(plot_func, "name", metric, palette="tab20")
+
+        return self.__draw(g,**draw_args)
+
diff --git a/generate_theoric_random_graph.py b/generate_theoric_random_graph.py
index d7f5796..65c4555 100644
--- a/generate_theoric_random_graph.py
+++ b/generate_theoric_random_graph.py
@@ -19,30 +19,30 @@ args = parser.parse_args()
 
 GRAPH_SIZE = [80,800]
 EDGE_SIZE = [2,4,5]
-sample_per_params  = 4
+sample_per_params  = 10
 
 OUTPUT_DIR = args.output_dir
 if not os.path.exists(OUTPUT_DIR):
-    raise FileExistsError("Output directory does not exists !")
+    os.makedirs(args.output_dir)
 
 
 parameters = {
-    # "stochastic_block_model_graph": {
-    #     "nb_nodes":GRAPH_SIZE,
-    #     "nb_edges":EDGE_SIZE,
-    #     "nb_com" :[2,5,8,16],
-    #     "percentage_edge_betw":[0.1,0.01]
-    # },
-    # "ER_graph": {
-    #     "nb_nodes":GRAPH_SIZE,
-    #     "nb_edges":EDGE_SIZE
-    # },
-    # "powerlaw_graph": {  # configuration_model
-    #     "nb_nodes":GRAPH_SIZE,
-    #     "nb_edges":EDGE_SIZE,
-    #     "exponent":[2,3],
-    #     "tries":[100]
-    # },
+    "stochastic_block_model_graph": {
+        "nb_nodes":GRAPH_SIZE,
+        "nb_edges":EDGE_SIZE,
+        "nb_com" :[2,5,8,16],
+        "percentage_edge_betw":[0.1,0.01]
+    },
+    "ER_graph": {
+        "nb_nodes":GRAPH_SIZE,
+        "nb_edges":EDGE_SIZE
+    },
+    "powerlaw_graph": {  # configuration_model
+        "nb_nodes":GRAPH_SIZE,
+        "nb_edges":EDGE_SIZE,
+        "exponent":[2,3],
+        "tries":[100]
+    },
     "spatial_graph":{
         "nb_nodes":GRAPH_SIZE,
         "nb_edges":EDGE_SIZE,
@@ -66,7 +66,7 @@ for method,args in pbar:
             try:
                 G = func(**params)
                 G.graph.update(params)
-                nx.write_gml(G, OUTPUT_DIR+"/graph_{method}_{ix}{sp_id}.gml".format(method=method,ix=ix,sp_id=sp_id),stringizer=str)
+                nx.write_gml(G, OUTPUT_DIR+"/graph_{method}_{ix}_{sp_id}.gml".format(method=method,ix=ix,sp_id=sp_id),stringizer=str)
             except Exception as e:
                 print(e)
                 print("Can't generate graphs using these parameters")
diff --git a/lib/random.py b/lib/random.py
index 9b95de8..55867e2 100644
--- a/lib/random.py
+++ b/lib/random.py
@@ -150,7 +150,7 @@ def powerlaw_graph(nb_nodes, nb_edges, exponent=2, tries=1000, min_deg=0):
     return G
 
 
-def spatial_graph(nb_nodes, nb_edges, coords="country", dist_func=lambda a, b: np.linalg.norm(a - b), self_link=False):
+def spatial_graph(nb_nodes, nb_edges, coords="country", dist_func=lambda a, b: np.linalg.norm(a - b), self_link=False,weighted = False):
     """
     Generate a spatial graph with a specific number of vertices and edges
     Parameters
@@ -202,6 +202,7 @@ def spatial_graph(nb_nodes, nb_edges, coords="country", dist_func=lambda a, b: n
 
     nodes = np.arange(nb_nodes).astype(int)
     sizes = [len(x) for x in np.array_split(np.arange(nb_edges), nb_nodes)]
+
     new_df = df[(df.src == nodes[0]) | (df.tar == nodes[0])].sample(n=sizes[0], weights="weight").copy()
     add_register(new_df.hash.values)
     df = df[~in_register(df.hash.values)]
@@ -212,7 +213,10 @@ def spatial_graph(nb_nodes, nb_edges, coords="country", dist_func=lambda a, b: n
         add_register(new_df.hash.values)
         df = df[~in_register(df.hash.values)]
 
-    G = nx.from_pandas_edgelist(new_df, source="src", target="tar", edge_attr="weight")
+    if weighted:
+        G = nx.from_pandas_edgelist(new_df, source="src", target="tar", edge_attr="weight")
+    else:
+        G = nx.from_pandas_edgelist(new_df, source="src", target="tar")
     for n in list(G.nodes()): G.nodes[n]["pos"] = coords[n]
     return G
 
-- 
GitLab