From 3f21589136efb04d6404028543df0eb077a71ce4 Mon Sep 17 00:00:00 2001
From: Fize Jacques <jacques.fize@cirad.fr>
Date: Mon, 29 Mar 2021 16:39:38 +0200
Subject: [PATCH]  debug

---
 generate_theoric_random_graph.py |  48 ++++----
 lib/random.py                    | 204 ++++++++++++++++++++++---------
 lib/visualisation.py             |   9 +-
 3 files changed, 180 insertions(+), 81 deletions(-)

diff --git a/generate_theoric_random_graph.py b/generate_theoric_random_graph.py
index 65c4555..53d6b9b 100644
--- a/generate_theoric_random_graph.py
+++ b/generate_theoric_random_graph.py
@@ -17,8 +17,8 @@ parser.add_argument("output_dir")
 args = parser.parse_args()
 
 
-GRAPH_SIZE = [80,800]
-EDGE_SIZE = [2,4,5]
+GRAPH_SIZE = [80,800,5000]
+EDGE_SIZE = [2,4,5,10]
 sample_per_params  = 10
 
 OUTPUT_DIR = args.output_dir
@@ -27,30 +27,36 @@ if not os.path.exists(OUTPUT_DIR):
 
 
 parameters = {
-    "stochastic_block_model_graph": {
+    # "stochastic_block_model_graph": {
+    #     "nb_nodes":GRAPH_SIZE,
+    #     "nb_edges":EDGE_SIZE,
+    #     "nb_com" :[2,5,8,16,10,25],
+    #     "percentage_edge_betw":[0.1,0.01]
+    # },
+    # "ER_graph": {
+    #     "nb_nodes":GRAPH_SIZE,
+    #     "nb_edges":EDGE_SIZE
+    # },
+    # "powerlaw_graph": {  # configuration_model
+    #     "nb_nodes":GRAPH_SIZE,
+    #     "nb_edges":EDGE_SIZE,
+    #     "exponent":[2,3],
+    #     "tries":[100]
+    # },
+    # "spatial_graph":{
+    #     "nb_nodes":GRAPH_SIZE,
+    #     "nb_edges":EDGE_SIZE,
+    #     "coords":["random","country"],
+    # },
+    "mixed_model_spat_sbm":{
         "nb_nodes":GRAPH_SIZE,
         "nb_edges":EDGE_SIZE,
-        "nb_com" :[2,5,8,16],
-        "percentage_edge_betw":[0.1,0.01]
-    },
-    "ER_graph": {
-        "nb_nodes":GRAPH_SIZE,
-        "nb_edges":EDGE_SIZE
-    },
-    "powerlaw_graph": {  # configuration_model
-        "nb_nodes":GRAPH_SIZE,
-        "nb_edges":EDGE_SIZE,
-        "exponent":[2,3],
-        "tries":[100]
-    },
-    "spatial_graph":{
-        "nb_nodes":GRAPH_SIZE,
-        "nb_edges":EDGE_SIZE,
-        "coords":["random","country"],
+        "nb_com":[2,4,8,16],
+        "alpha":[0,0.01,0.1,0.5,0.7,1]
+
     }
 }
 
-
 def get_params(inp):
     return (dict(zip(inp.keys(), values)) for values in itertools.product(*inp.values()))
 
diff --git a/lib/random.py b/lib/random.py
index 29f164c..1d09352 100644
--- a/lib/random.py
+++ b/lib/random.py
@@ -1,10 +1,12 @@
 # coding = utf-8
+import copy
 from collections import Iterable
 
 import numpy as np
 import networkx as nx
 import pandas as pd
 from networkx.generators.degree_seq import _to_stublist
+from cdlib import algorithms
 import random
 
 
@@ -150,7 +152,8 @@ def powerlaw_graph(nb_nodes, nb_edges, exponent=2, tries=1000, min_deg=0):
     return G
 
 
-def spatial_graph(nb_nodes, nb_edges, coords="country", dist_func=lambda a, b: np.linalg.norm(a - b), self_link=False,weighted = False):
+def spatial_graph(nb_nodes, nb_edges, coords="country", dist_func=lambda a, b: np.linalg.norm(a - b) ** 2,
+                  self_link=False, weighted=False):
     """
     Generate a spatial graph with a specific number of vertices and edges
     Parameters
@@ -167,6 +170,9 @@ def spatial_graph(nb_nodes, nb_edges, coords="country", dist_func=lambda a, b: n
     nx.Graph
         generated graph
     """
+    if nb_nodes > nb_edges:
+        raise ValueError(
+            "To obtain a specific nb of nodes, the number of edges must be equal or superior to the number of nodes !")
     if coords and isinstance(coords, Iterable) and not isinstance(coords, str):
         if len(coords) != nb_nodes:
             raise ValueError("number of nodes must match the size of the coords dict")
@@ -182,43 +188,64 @@ def spatial_graph(nb_nodes, nb_edges, coords="country", dist_func=lambda a, b: n
         coords_index = np.random.choice(np.arange(len(coords)), nb_nodes)
         coords = coords[coords_index]
     data = []
+    float_epsilon = np.finfo(float).eps
     for i in range(nb_nodes):
         for j in range(nb_nodes):
             if i == j and not self_link:
                 continue
-            data.append([i, j, 1/((1+dist_func(coords[i], coords[j])**4))])
+            data.append([i, j, 1 / (float_epsilon+(dist_func(coords[i], coords[j])))])
     df = pd.DataFrame(data, columns="src tar weight".split()).astype({"src": int, "tar": int})
     df["hash"] = df.apply(lambda x: "_".join(sorted([str(int(x.src)), str(int(x.tar))])), axis=1)
     df = df.drop_duplicates(subset="hash")
-    df["weight"] = df.weight/df.weight.sum()
-
-    register = set([])
-
-    def add_register(hashes):
-        for hash_ in hashes:
-            register.add(hash_)
-
-    def in_register(hashes):
-        return np.array([True if hash_ in register else False for hash_ in hashes])
-
-    nodes = np.arange(nb_nodes).astype(int)
-    sizes = [len(x) for x in np.array_split(np.arange(nb_edges), nb_nodes)]
-
-    new_df = df[(df.src == nodes[0]) | (df.tar == nodes[0])].sample(n=sizes[0], weights="weight").copy()
-    add_register(new_df.hash.values)
-    df = df[~in_register(df.hash.values)]
-
-    for ix, node in enumerate(nodes[1:]):
-        sample = df[(df.src == node) | (df.tar == node)].sample(n=sizes[ix + 1], weights="weight").copy()
-        new_df = pd.concat((new_df, sample))
-        add_register(new_df.hash.values)
-        df = df[~in_register(df.hash.values)]
-
-    if weighted:
-        G = nx.from_pandas_edgelist(new_df, source="src", target="tar", edge_attr="weight")
-    else:
-        G = nx.from_pandas_edgelist(new_df, source="src", target="tar")
-    for n in list(G.nodes()): G.nodes[n]["pos"] = coords[n]
+    df["weight"] = df.weight / df.weight.sum()
+
+    iter_ = 0
+    f = False
+    best_G = None
+    new_df = None
+    while iter_ < 50 and f == False:
+        new_df = df.sample(n=nb_edges, weights="weight")
+        if weighted:
+            G = nx.from_pandas_edgelist(new_df, source="src", target="tar", edge_attr="weight")
+        else:
+            G = nx.from_pandas_edgelist(new_df, source="src", target="tar")
+        f = (len(G) == nb_nodes and G.size() == nb_edges)
+        if not best_G == None:
+            if abs(len(best_G) - nb_nodes) > abs(len(G) - nb_nodes):
+                best_G = G.copy()
+        else:
+            best_G = G.copy()
+        iter_ += 1
+    G = best_G.copy()
+
+    if not len(G) == nb_nodes:
+        diff = abs(len(G) - nb_nodes)
+        df_deg = pd.DataFrame(nx.degree(G), columns="node degree".split())
+        df_deg = df_deg[df_deg.degree > 2].sort_values(by="degree", ascending=False)
+        i = 1
+        while 1:
+            new_df = df_deg.head(i)
+            if ((new_df.degree) - 1).sum() > diff:
+                break
+            i += 1
+        df_deg = df_deg.head(i)
+        edges_ = []
+        for node in df_deg.node.values:
+            node_edges = list(G.edges(node))
+            random.shuffle(node_edges)
+            edges_.extend([[ed[0], ed[1]] for ed in node_edges[1:]])
+        idx = np.random.choice(np.arange(len(edges_)), size=diff, replace=False)
+        edges_ = np.array(edges_)[idx]
+        G.remove_edges_from(edges_)
+
+        missing_nodes = list(set(range(nb_nodes)) - set(list(G.nodes())))
+        for node in missing_nodes:
+            new_df = df[(df.src == node) | (df.tar == node)].sample(n=1, weights="weight")
+            edges = new_df["src tar".split()].values
+            G.add_edges_from(edges)
+
+    for n in list(G.nodes()):
+        G.nodes[n]["pos"] = coords[n]
     return G
 
 
@@ -271,44 +298,31 @@ def stochastic_block_model_graph(nb_nodes, nb_edges, nb_com, percentage_edge_bet
     if verbose:
         print(G.size())
 
+
     u_in = sum([nb_of_pair((b_assign_array==b).sum()) for b in range(nb_com)])
     u_out = nb_of_pair(len(G)) - u_in
-    l_out = u_out*percentage_edge_betw
+    l_out = nb_edges*percentage_edge_betw
+    p_out = l_out/u_out
     l_in = nb_edges - l_out
 
-    percentage_edge_within = l_in / u_in
+    p_in = l_in / u_in
     if verbose:
-        print("u_out",u_out)
         print("u_in",u_in)
+        print("u_out", u_out)
         print("l_out",l_out)
         print("l_in", l_in)
-        print("p_in",percentage_edge_within)
-    # percentage_edge_within = 1 - percentage_edge_betw
+        print("p_in",p_in)
+        print("p_out", p_out)
 
 
-    inter_edges, intra_edges = [], []
-    register = set([])
-    for n1 in list(G.nodes()):
-        for n2 in list(G.nodes()):
-            hash_ = "_".join(sorted([str(n1), str(n2)]))
-            if (n1 == n2) or (hash_ in register):
-                continue
-            b1, b2 = block_assign[n1], block_assign[n2]
-            if b1 != b2:
-                inter_edges.append([n1, n2])
-            else:
-                intra_edges.append([n1, n2])
-            register.add(hash_)
-
+    inter_edges, intra_edges = get_inter_intra_edges(G,G.is_directed())
     inter_edges = np.asarray(inter_edges)
     intra_edges = np.asarray(intra_edges)
     inter_N, intra_N = len(inter_edges), len(intra_edges)
-    probs_inter = np.ones(inter_N) * percentage_edge_betw
-    probs_intra = np.ones(intra_N) * percentage_edge_within
+    probs_inter = np.ones(inter_N) * p_out
+    probs_intra = np.ones(intra_N) * p_in
 
     all_edges = np.concatenate((inter_edges, intra_edges))
-    del inter_edges
-    del intra_edges
     all_probs = np.concatenate((probs_inter, probs_intra))
     del probs_inter
     del probs_intra
@@ -316,7 +330,6 @@ def stochastic_block_model_graph(nb_nodes, nb_edges, nb_com, percentage_edge_bet
 
     if verbose:
         print(inter_N, intra_N)
-        print(int(np.ceil(nb_edges * percentage_edge_betw)), int(np.ceil(nb_edges * percentage_edge_within)))
 
     final_edges = []
     index_selected_pairs = np.random.choice(np.arange(len(all_edges)), nb_edges, p=all_probs, replace=False)
@@ -327,7 +340,7 @@ def stochastic_block_model_graph(nb_nodes, nb_edges, nb_com, percentage_edge_bet
 
     G2 = nx.from_edgelist(final_edges)
     if len(G2) != nb_nodes:
-        equilibrate(G2, nb_nodes, percentage_edge_betw, percentage_edge_within, inter_edges, intra_edges, block_assign)
+        equilibrate(G2, nb_nodes, percentage_edge_betw, 1-percentage_edge_betw, inter_edges, intra_edges, block_assign)
 
     for n in list(G2.nodes()):
         G2.nodes[n]["block"] = block_assign[n]
@@ -407,3 +420,82 @@ def equilibrate(G, nb_nodes, percentage_edge_betw, percentage_edge_within, inter
     for ed in new_edges:
         G.add_edge(*ed)
     return G
+
+
+def add_partitions_G(G, nb_com=5):
+    blocks = np.random.choice(np.arange(nb_com), len(G), p=[1 / nb_com] * nb_com)
+    for ix, node in enumerate(list(G.nodes())):
+        try:
+            G.nodes[node]["block"] = blocks[ix]
+        except KeyError:
+            G.nodes[node]["block"] = 0
+    return G
+
+
+def get_inter_intra_edges(G, directed=False):
+    block_assign = nx.get_node_attributes(G, "block")
+    assert (len(block_assign) ==len(G))
+
+    inter_edges, intra_edges = [], []
+    register = set([])
+    for n1 in list(G.nodes()):
+        for n2 in list(G.nodes()):
+            if directed:
+                "_".join([str(n1), str(n2)])
+            else:
+                hash_ = "_".join(sorted([str(n1), str(n2)]))
+            if (n1 == n2) or (hash_ in register):
+                continue
+            b1, b2 = block_assign[n1], block_assign[n2]
+            if b1 != b2:
+                inter_edges.append([n1, n2])
+            else:
+                intra_edges.append([n1, n2])
+            register.add(hash_)
+    return inter_edges,intra_edges
+
+def mixed_model_spat_sbm(nb_nodes, nb_edges, nb_com, alpha, percentage_edge_betw=0.01,
+                         dist_func=lambda p1, p2: np.linalg.norm(p1 - p2) ** 2):
+    G = spatial_graph(nb_nodes, nb_edges, coords="random", dist_func=dist_func)
+    G = add_partitions_G(G, nb_com)
+    nb_edges = G.size()
+
+    def nb_of_pair(N):
+        return (N * (N - 1)) / 2
+
+    block_assign = nx.get_node_attributes(G, "block")
+    assert (len(block_assign) == len(G))
+    b_assign_array = np.asarray(list(block_assign.values()))
+
+    u_in = sum([nb_of_pair((b_assign_array == b).sum()) for b in range(np.max(b_assign_array))])
+    u_out = nb_of_pair(len(G)) - u_in
+    l_out = nb_edges * percentage_edge_betw
+    p_out = l_out / u_out
+    l_in = nb_edges - l_out
+    p_in = l_in / u_in
+
+
+    inter_edges, intra_edges = get_inter_intra_edges(G,G.is_directed())
+    inter_N, intra_N = len(inter_edges), len(intra_edges)
+    probs_sbm_inter = np.ones(inter_N) * p_out
+    probs_sbm_intra = np.ones(intra_N) * p_in
+
+    all_edges = np.concatenate((inter_edges, intra_edges))
+    all_probs_sbm = np.concatenate((probs_sbm_inter, probs_sbm_intra))
+    all_probs_sbm /= all_probs_sbm.sum()
+
+    all_probs_spa = np.asarray([1 / dist_func(edge[0], edge[1]) for edge in all_edges])
+    all_probs_spa /= all_probs_spa.sum()
+
+    all_probs = alpha * (all_probs_sbm) + (1 - alpha) * all_probs_spa
+
+    final_edges = []
+    index_selected_pairs = np.random.choice(np.arange(len(all_edges)), nb_edges, p=all_probs, replace=False)
+    final_edges.extend(all_edges[index_selected_pairs])
+    G2 = nx.from_edgelist(final_edges)
+
+    for n in list(G2.nodes()):
+        G2.nodes[n]["block"] = block_assign[n]
+        G2.nodes[n]["pos"] = G.nodes[n]["pos"]
+
+    return G2
\ No newline at end of file
diff --git a/lib/visualisation.py b/lib/visualisation.py
index aad6b85..032a64d 100644
--- a/lib/visualisation.py
+++ b/lib/visualisation.py
@@ -92,7 +92,7 @@ class DrawingResults():
             g.fig.set_size_inches(*kwargs["figsize"])
 
         [plt.setp(ax.get_xticklabels(), rotation=kwargs.get("rotation", 90)) for ax in g.axes.flat]
-        g.fig.subplots_adjust(wspace=.09, hspace=.02)
+        g.fig.subplots_adjust(wspace=.09)#, hspace=.02)
 
         if  kwargs.get("output_filename",None):
             save_params = {}
@@ -114,7 +114,7 @@ class DrawingResults():
         if type_graph and type_graph in new_df.type_graph.unique():
             new_df = new_df[new_df.type_graph == type_graph].copy()
 
-        g = sns.FacetGrid(new_df, row="size", col="nb_edge", margin_titles=True, height=2.5)
+        g = sns.FacetGrid(new_df, row="size", col="nb_edge", margin_titles=True)
 
         plot_func = draw_args.get('plot_func', sns.barplot)
         g.map(plot_func, "name", metric)
@@ -134,7 +134,8 @@ class DrawingResults():
             else:
                 raise ValueError("Method {0} does not exists in pandas.core.groupby.generic.DataFrameGroupBy".format(agg_func))
 
-        g = sns.FacetGrid(new_df,  col="type_graph", margin_titles=True, height=2.5)
+
+        g = sns.FacetGrid(new_df,  col="type_graph", col_wrap=2, margin_titles=True)
 
         plot_func = draw_args.get('plot_func', sns.barplot)
         g.map(plot_func, "name", metric, palette="tab20")
@@ -153,7 +154,7 @@ class DrawingResults():
 
         g = sns.FacetGrid(_df, row=second_parameter, col=parameter, margin_titles=True, height=2.5)
         plot_func = draw_args.get('plot_func', sns.barplot)
-        g.map(plot_func, "name", metric, palette="tab20")
+        g.map(plot_func, "name", metric, palette=draw_args.get("cmap","tab20"))
 
         return self.__draw(g,**draw_args)
 
-- 
GitLab