Debug powerlaw configuration model graph generation+ and stuff

ea7ea1d7 · Fize Jacques · cadf30a2 · ea7ea1d7 · ea7ea1d7 · ea7ea1d7
Commit ea7ea1d7 authored 4 years ago by Fize Jacques
--- a/generate_theoric_random_graph.py
+++ b/generate_theoric_random_graph.py
@@ -41,9 +41,9 @@ parameters = {
        "exponent":[2,3]
    },
    "spatial_graph":{
-        "nb_nodes":GRAPH_SIZE,
+        "nb_nodes":[100,150],
        "nb_edges":EDGE_SIZE,
-        "coords":["random"],
+        "coords":["random","country"],
    }
 }


--- a/lib/random.py
+++ b/lib/random.py
@@ -4,7 +4,7 @@ from collections import Iterable
 import numpy as np
 import networkx as nx
 import pandas as pd
-
+from networkx.generators.degree_seq import _to_stublist
 import random


@@ -75,7 +75,33 @@ def get_countries_coords():
    return np.asarray(gdf.centroid.apply(lambda x: [x.x, x.y]).values.tolist())


-def powerlaw_graph(nb_nodes, nb_edges, exponent=2, tries=100, min_deg=1):
+def _conf_model(degree_seq):
+    stubs_list = _to_stublist(degree_seq)
+    random.shuffle(stubs_list)
+    register = set()
+    edges = []
+    hash_func = lambda x, y: "_".join(sorted([str(x), str(y)]))
+    tries = 0
+    while len(stubs_list) > 0 and tries < 100:
+        to_del = set([])
+        for i in range(0, len(stubs_list) - 2, 2):
+            u, v = stubs_list[i], stubs_list[i + 1]
+            hash_ = hash_func(u, v)
+            if hash_ in register:
+                continue
+            else:
+                register.add(hash_)
+                edges.append([u, v])
+                to_del.add(i)
+                to_del.add(i + 1)
+        stubs_list = [stubs_list[i] for i in range(len(stubs_list)) if not i in to_del]
+        random.shuffle(stubs_list)
+        tries += 1
+    G = nx.from_edgelist(edges)
+    return G
+
+
+def powerlaw_graph(nb_nodes, nb_edges, exponent=2, tries=1000, min_deg=1):
    """
    Generate a graph with a definied number of vertices, edges, and a degree distribution that fit the power law.
    Parameters
@@ -91,8 +117,36 @@ def powerlaw_graph(nb_nodes, nb_edges, exponent=2, tries=100, min_deg=1):
    nx.Graph
            generated graph
    """
-    seq = powerlaw(nb_nodes, nb_edges, exponent, tries, min_deg)
-    return nx.configuration_model(seq.astype(int))
+    G = _conf_model(powerlaw(nb_nodes, nb_edges, exponent, tries, min_deg).astype(int))
+    tries_ = 0
+    while len(G) != nb_nodes and tries_ <tries:
+        G = _conf_model(powerlaw(nb_nodes, nb_edges, exponent, tries, min_deg).astype(int))
+        tries_ += 1
+    if len(G) != nb_nodes:
+        print(nb_nodes,nb_edges,exponent)
+        raise Exception("Cant compute configuration model based on parameters")
+
+    if G.size() != nb_edges:
+        diff = abs(G.size() - nb_edges)
+        signe = 1 if G.size() - nb_edges < 0 else -1
+        if signe:
+            for n in list(G.nodes()):
+                if G.size() == nb_edges:
+                    break
+                for n2 in list(G.nodes()):
+                    if not G.has_edge(n, n2): G.add_edge(n, n2)
+                    if G.size() == nb_edges:
+                        break
+        else:
+            edges_ = list(G.edges())
+            random.shuffle(edges_)
+            i = diff
+            for ed in edges_:
+                u, v = ed[0], ed[1]
+                if G.degree(u) > 1 and G.degree(v) > 1:
+                    G.remove_edge(u, v)
+                    i -= 1
+    return G


 def spatial_graph(nb_nodes, nb_edges, coords="country", dist_func=lambda a, b: np.linalg.norm(a - b), self_link=False):
@@ -132,11 +186,32 @@ def spatial_graph(nb_nodes, nb_edges, coords="country", dist_func=lambda a, b: n
            if i == j and not self_link:
                continue
            data.append([i, j, dist_func(coords[i], coords[j])])
-    df = pd.DataFrame(data, columns="src tar weight".split())
-    df["hash"] = df.apply(lambda x: "_".join(sorted([str(x.src), str(x.tar)])), axis=1)
-    df = df.drop_duplicates(subset=["hash"])
-    df = df.sample(nb_edges, weights="weight")
-    G = nx.from_pandas_edgelist(df, source="src", target="tar", edge_attr="weight")
+    df = pd.DataFrame(data, columns="src tar weight".split()).astype({"src": int, "tar": int})
+    df["hash"] = df.apply(lambda x: "_".join(sorted([str(int(x.src)), str(int(x.tar))])), axis=1)
+    df = df.drop_duplicates(subset="hash")
+
+    register = set([])
+
+    def add_register(hashes):
+        for hash_ in hashes:
+            register.add(hash_)
+
+    def in_register(hashes):
+        return np.array([True if hash_ in register else False for hash_ in hashes])
+
+    nodes = np.arange(nb_nodes).astype(int)
+    sizes = [len(x) for x in np.array_split(np.arange(nb_edges), nb_nodes)]
+    new_df = df[(df.src == nodes[0]) | (df.tar == nodes[0])].sample(n=sizes[0], weights="weight").copy()
+    add_register(new_df.hash.values)
+    df = df[~in_register(df.hash.values)]
+
+    for ix, node in enumerate(nodes[1:]):
+        sample = df[(df.src == node) | (df.tar == node)].sample(n=sizes[ix + 1], weights="weight").copy()
+        new_df = pd.concat((new_df, sample))
+        add_register(new_df.hash.values)
+        df = df[~in_register(df.hash.values)]
+
+    G = nx.from_pandas_edgelist(new_df, source="src", target="tar", edge_attr="weight")
    for n in list(G.nodes()): G.nodes[n]["pos"] = coords[n]
    return G

@@ -254,7 +329,7 @@ def equilibrate(G, nb_nodes, percentage_edge_betw, percentage_edge_within, inter

    def draw_(array, register, hash_func=lambda x, y: "_".join(sorted([str(x), str(y)]))):
        tries = 0
-        while tries <1000:
+        while tries < 1000:
            index_array = np.random.choice(np.arange(len(array)), 1)
            res = array[index_array]
            res = res[0]
@@ -262,7 +337,7 @@ def equilibrate(G, nb_nodes, percentage_edge_betw, percentage_edge_within, inter
            if not hash_ in register:
                register.add(hash_)
                return index_array
-            tries +=1
+            tries += 1
        raise Exception("Error ! (TODO)")

    # Draw new edges

--- a/run_eval.py
+++ b/run_eval.py
@@ -42,7 +42,7 @@ for fn in pbar:

    top10node = pd.DataFrame(list(G.degree()), columns="node degree".split()).sort_values("degree",
                                                                                          ascending=False).head(10).node.values
-    df_results["nb_edge"] = len(list(G.edges()))
+    df_results["nb_edge"] = G.size()
    df_results["transitivity"] = nx.transitivity(G)
    df_results["density"] = nx.density(G)
    df_results["top10_node"] = "|".join(top10node)