Skip to content
Snippets Groups Projects
Commit 613e6a5c authored by Fize Jacques's avatar Fize Jacques
Browse files

Add Visualisation tools of link prediction results+ Debug

parent 111adf7f
No related branches found
No related tags found
No related merge requests found
# coding = utf-8
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import re
import os
import networkx as nx
def get_graph_attr(fn, graph_dir):
g_fn = os.path.join(graph_dir, fn)
if not os.path.exists(g_fn):
raise FileNotFoundError(g_fn)
G = nx.read_gml(g_fn).graph
return G
def get_sample_id_old(ch):
id_graph = re.findall("\d+", ch)[0]
if len(id_graph) == 3:
return id_graph[-2:]
else:
return id_graph[-1:]
def get_sample_id(fn, file_format="gml"):
return int(fn.strip(".{0}".format(file_format)).split("_")[-1])
def load_data(fn, graph_dir):
df = pd.read_csv(fn, sep="\t")
df["type_graph"] = df.filename.apply(lambda x: x[6:]).apply(lambda x: re.sub("_[\d]+.gml", "", x).replace("_", " "))
df["parameters"] = df.filename.apply(lambda x: get_graph_attr(x, graph_dir))
df["sample"] = df.filename.apply(get_sample_id_old)
non_ne = {'random_prediction', 'common_neighbours', 'jaccard_coefficient', 'adamic_adar_index',
'preferential_attachment', 'resource_allocation_index', 'stochastic_block_model',
'stochastic_block_model_degree_corrected', 'spatial_link_prediction'}
df["type_method"] = df.name.apply(lambda x: "heuristic" if x in non_ne else "network_embedding_based")
return df
def set_custom_palette(x, y, max_color='red', close_color='turquoise', other_color='lightgrey'):
def get_color(x, max_val, min_diff):
if x == max_val:
return max_color
elif x > max_val - (0.01 + min_diff) and x < max_val + (0.01 + min_diff):
return close_color
else:
return other_color
pal = []
df = pd.concat((x, y), axis=1)
mean_df = df.groupby(x.name, as_index=False).mean()
mean_per_x = dict(mean_df.values)
max_val = mean_df[y.name].max()
min_diff = (max_val - mean_df[y.name]).median()
col_per_method = {k: get_color(v, max_val, min_diff) for k, v in mean_per_x.items()}
for i, val in enumerate(x):
pal.append(col_per_method[val])
return pal
def highlight_barplot(x, y, **kwargs):
if kwargs.get("palette", None):
kwargs["palette"] = set_custom_palette(x, y)
sns.barplot(x=x, y=y, **kwargs)
else:
sns.barplot(x=x, y=y, palette=set_custom_palette(x, y), **kwargs)
class DrawingResults():
def __init__(self, df_results):
self.df = df_results
def __draw(self, g, **kwargs):
if "figsize" in kwargs:
g.fig.set_size_inches(*kwargs["figsize"])
[plt.setp(ax.get_xticklabels(), rotation=kwargs.get("rotation", 90)) for ax in g.axes.flat]
g.fig.subplots_adjust(wspace=.09, hspace=.02)
if kwargs.get("output_filename",None):
save_params = {}
if "save_param" in kwargs and type(kwargs["save_param"]) == dict:
save_params.update(kwargs["save_param"])
g.savefig(kwargs["output_filename"], **save_params)
else:
plt.show()
def metric_per_nodes_edges(self, type_graph=None, agg_func=None,metric="auroc", **draw_args):
new_df = self.df.copy()
if agg_func:
if agg_func in "mean max min std".split():
new_df = new_df.groupby("name nb_edge size type_graph type_method".split(), as_index=False)
new_df = getattr(new_df, agg_func)()
else:
raise ValueError("Method {0} does not exists in pandas.core.groupby.generic.DataFrameGroupBy".format(agg_func))
if type_graph and type_graph in new_df.type_graph.unique():
new_df = new_df[new_df.type_graph == type_graph].copy()
g = sns.FacetGrid(new_df, row="size", col="nb_edge", margin_titles=True, height=2.5)
plot_func = draw_args.get('plot_func', sns.barplot)
g.map(plot_func, "name", metric)
return self.__draw(g, **draw_args)
def metric_global(self, agg_func=None,metric="auroc", **draw_args):
new_df = self.df.copy()
if agg_func:
new_df = self.df.groupby("name nb_edge size type_graph type_method".split(), as_index=False)
if agg_func in "mean max min std".split():
new_df = getattr(new_df,agg_func)()
new_df = new_df.groupby("name type_graph type_method".split(), as_index=False)
new_df = getattr(new_df, agg_func)()
else:
raise ValueError("Method {0} does not exists in pandas.core.groupby.generic.DataFrameGroupBy".format(agg_func))
g = sns.FacetGrid(new_df, col="type_graph", margin_titles=True, height=2.5)
plot_func = draw_args.get('plot_func', sns.barplot)
g.map(plot_func, "name", metric, palette="tab20")
return self.__draw(g, **draw_args)
def caracteristic_distribution(self, caracteristic, **draw_args):
g = sns.FacetGrid(self.df, col="type_graph", col_wrap=4, )
g.map(sns.histplot, caracteristic)
return self.__draw(g, **draw_args)
def parameter_impact(self, type_graph, parameter, second_parameter="size", metric="auroc", **draw_args):
_df = self.df[self.df.type_graph == type_graph].copy()
_df[parameter] = _df.parameters.apply(lambda x: x[parameter])
g = sns.FacetGrid(_df, row=second_parameter, col=parameter, margin_titles=True, height=2.5)
plot_func = draw_args.get('plot_func', sns.barplot)
g.map(plot_func, "name", metric, palette="tab20")
return self.__draw(g,**draw_args)
...@@ -19,30 +19,30 @@ args = parser.parse_args() ...@@ -19,30 +19,30 @@ args = parser.parse_args()
GRAPH_SIZE = [80,800] GRAPH_SIZE = [80,800]
EDGE_SIZE = [2,4,5] EDGE_SIZE = [2,4,5]
sample_per_params = 4 sample_per_params = 10
OUTPUT_DIR = args.output_dir OUTPUT_DIR = args.output_dir
if not os.path.exists(OUTPUT_DIR): if not os.path.exists(OUTPUT_DIR):
raise FileExistsError("Output directory does not exists !") os.makedirs(args.output_dir)
parameters = { parameters = {
# "stochastic_block_model_graph": { "stochastic_block_model_graph": {
# "nb_nodes":GRAPH_SIZE, "nb_nodes":GRAPH_SIZE,
# "nb_edges":EDGE_SIZE, "nb_edges":EDGE_SIZE,
# "nb_com" :[2,5,8,16], "nb_com" :[2,5,8,16],
# "percentage_edge_betw":[0.1,0.01] "percentage_edge_betw":[0.1,0.01]
# }, },
# "ER_graph": { "ER_graph": {
# "nb_nodes":GRAPH_SIZE, "nb_nodes":GRAPH_SIZE,
# "nb_edges":EDGE_SIZE "nb_edges":EDGE_SIZE
# }, },
# "powerlaw_graph": { # configuration_model "powerlaw_graph": { # configuration_model
# "nb_nodes":GRAPH_SIZE, "nb_nodes":GRAPH_SIZE,
# "nb_edges":EDGE_SIZE, "nb_edges":EDGE_SIZE,
# "exponent":[2,3], "exponent":[2,3],
# "tries":[100] "tries":[100]
# }, },
"spatial_graph":{ "spatial_graph":{
"nb_nodes":GRAPH_SIZE, "nb_nodes":GRAPH_SIZE,
"nb_edges":EDGE_SIZE, "nb_edges":EDGE_SIZE,
...@@ -66,7 +66,7 @@ for method,args in pbar: ...@@ -66,7 +66,7 @@ for method,args in pbar:
try: try:
G = func(**params) G = func(**params)
G.graph.update(params) G.graph.update(params)
nx.write_gml(G, OUTPUT_DIR+"/graph_{method}_{ix}{sp_id}.gml".format(method=method,ix=ix,sp_id=sp_id),stringizer=str) nx.write_gml(G, OUTPUT_DIR+"/graph_{method}_{ix}_{sp_id}.gml".format(method=method,ix=ix,sp_id=sp_id),stringizer=str)
except Exception as e: except Exception as e:
print(e) print(e)
print("Can't generate graphs using these parameters") print("Can't generate graphs using these parameters")
......
...@@ -150,7 +150,7 @@ def powerlaw_graph(nb_nodes, nb_edges, exponent=2, tries=1000, min_deg=0): ...@@ -150,7 +150,7 @@ def powerlaw_graph(nb_nodes, nb_edges, exponent=2, tries=1000, min_deg=0):
return G return G
def spatial_graph(nb_nodes, nb_edges, coords="country", dist_func=lambda a, b: np.linalg.norm(a - b), self_link=False): def spatial_graph(nb_nodes, nb_edges, coords="country", dist_func=lambda a, b: np.linalg.norm(a - b), self_link=False,weighted = False):
""" """
Generate a spatial graph with a specific number of vertices and edges Generate a spatial graph with a specific number of vertices and edges
Parameters Parameters
...@@ -202,6 +202,7 @@ def spatial_graph(nb_nodes, nb_edges, coords="country", dist_func=lambda a, b: n ...@@ -202,6 +202,7 @@ def spatial_graph(nb_nodes, nb_edges, coords="country", dist_func=lambda a, b: n
nodes = np.arange(nb_nodes).astype(int) nodes = np.arange(nb_nodes).astype(int)
sizes = [len(x) for x in np.array_split(np.arange(nb_edges), nb_nodes)] sizes = [len(x) for x in np.array_split(np.arange(nb_edges), nb_nodes)]
new_df = df[(df.src == nodes[0]) | (df.tar == nodes[0])].sample(n=sizes[0], weights="weight").copy() new_df = df[(df.src == nodes[0]) | (df.tar == nodes[0])].sample(n=sizes[0], weights="weight").copy()
add_register(new_df.hash.values) add_register(new_df.hash.values)
df = df[~in_register(df.hash.values)] df = df[~in_register(df.hash.values)]
...@@ -212,7 +213,10 @@ def spatial_graph(nb_nodes, nb_edges, coords="country", dist_func=lambda a, b: n ...@@ -212,7 +213,10 @@ def spatial_graph(nb_nodes, nb_edges, coords="country", dist_func=lambda a, b: n
add_register(new_df.hash.values) add_register(new_df.hash.values)
df = df[~in_register(df.hash.values)] df = df[~in_register(df.hash.values)]
G = nx.from_pandas_edgelist(new_df, source="src", target="tar", edge_attr="weight") if weighted:
G = nx.from_pandas_edgelist(new_df, source="src", target="tar", edge_attr="weight")
else:
G = nx.from_pandas_edgelist(new_df, source="src", target="tar")
for n in list(G.nodes()): G.nodes[n]["pos"] = coords[n] for n in list(G.nodes()): G.nodes[n]["pos"] = coords[n]
return G return G
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment