diff --git a/generate_theoric_random_graph.py b/generate_theoric_random_graph.py index fbe8868da8df16f69007226a8e762ce1af535b02..39ec3dc6f76f6dff80e0b767f5b073bee10cc69a 100644 --- a/generate_theoric_random_graph.py +++ b/generate_theoric_random_graph.py @@ -41,9 +41,9 @@ parameters = { "exponent":[2,3] }, "spatial_graph":{ - "nb_nodes":GRAPH_SIZE, + "nb_nodes":[100,150], "nb_edges":EDGE_SIZE, - "coords":["random"], + "coords":["random","country"], } } diff --git a/lib/random.py b/lib/random.py index 196c67005c8f8466c0fd58366f7e321adbd31585..58dd6c61d95e2a419e0ee253dae0c6ad8578cf39 100644 --- a/lib/random.py +++ b/lib/random.py @@ -4,7 +4,7 @@ from collections import Iterable import numpy as np import networkx as nx import pandas as pd - +from networkx.generators.degree_seq import _to_stublist import random @@ -75,7 +75,33 @@ def get_countries_coords(): return np.asarray(gdf.centroid.apply(lambda x: [x.x, x.y]).values.tolist()) -def powerlaw_graph(nb_nodes, nb_edges, exponent=2, tries=100, min_deg=1): +def _conf_model(degree_seq): + stubs_list = _to_stublist(degree_seq) + random.shuffle(stubs_list) + register = set() + edges = [] + hash_func = lambda x, y: "_".join(sorted([str(x), str(y)])) + tries = 0 + while len(stubs_list) > 0 and tries < 100: + to_del = set([]) + for i in range(0, len(stubs_list) - 2, 2): + u, v = stubs_list[i], stubs_list[i + 1] + hash_ = hash_func(u, v) + if hash_ in register: + continue + else: + register.add(hash_) + edges.append([u, v]) + to_del.add(i) + to_del.add(i + 1) + stubs_list = [stubs_list[i] for i in range(len(stubs_list)) if not i in to_del] + random.shuffle(stubs_list) + tries += 1 + G = nx.from_edgelist(edges) + return G + + +def powerlaw_graph(nb_nodes, nb_edges, exponent=2, tries=1000, min_deg=1): """ Generate a graph with a definied number of vertices, edges, and a degree distribution that fit the power law. Parameters @@ -91,8 +117,36 @@ def powerlaw_graph(nb_nodes, nb_edges, exponent=2, tries=100, min_deg=1): nx.Graph generated graph """ - seq = powerlaw(nb_nodes, nb_edges, exponent, tries, min_deg) - return nx.configuration_model(seq.astype(int)) + G = _conf_model(powerlaw(nb_nodes, nb_edges, exponent, tries, min_deg).astype(int)) + tries_ = 0 + while len(G) != nb_nodes and tries_ <tries: + G = _conf_model(powerlaw(nb_nodes, nb_edges, exponent, tries, min_deg).astype(int)) + tries_ += 1 + if len(G) != nb_nodes: + print(nb_nodes,nb_edges,exponent) + raise Exception("Cant compute configuration model based on parameters") + + if G.size() != nb_edges: + diff = abs(G.size() - nb_edges) + signe = 1 if G.size() - nb_edges < 0 else -1 + if signe: + for n in list(G.nodes()): + if G.size() == nb_edges: + break + for n2 in list(G.nodes()): + if not G.has_edge(n, n2): G.add_edge(n, n2) + if G.size() == nb_edges: + break + else: + edges_ = list(G.edges()) + random.shuffle(edges_) + i = diff + for ed in edges_: + u, v = ed[0], ed[1] + if G.degree(u) > 1 and G.degree(v) > 1: + G.remove_edge(u, v) + i -= 1 + return G def spatial_graph(nb_nodes, nb_edges, coords="country", dist_func=lambda a, b: np.linalg.norm(a - b), self_link=False): @@ -132,11 +186,32 @@ def spatial_graph(nb_nodes, nb_edges, coords="country", dist_func=lambda a, b: n if i == j and not self_link: continue data.append([i, j, dist_func(coords[i], coords[j])]) - df = pd.DataFrame(data, columns="src tar weight".split()) - df["hash"] = df.apply(lambda x: "_".join(sorted([str(x.src), str(x.tar)])), axis=1) - df = df.drop_duplicates(subset=["hash"]) - df = df.sample(nb_edges, weights="weight") - G = nx.from_pandas_edgelist(df, source="src", target="tar", edge_attr="weight") + df = pd.DataFrame(data, columns="src tar weight".split()).astype({"src": int, "tar": int}) + df["hash"] = df.apply(lambda x: "_".join(sorted([str(int(x.src)), str(int(x.tar))])), axis=1) + df = df.drop_duplicates(subset="hash") + + register = set([]) + + def add_register(hashes): + for hash_ in hashes: + register.add(hash_) + + def in_register(hashes): + return np.array([True if hash_ in register else False for hash_ in hashes]) + + nodes = np.arange(nb_nodes).astype(int) + sizes = [len(x) for x in np.array_split(np.arange(nb_edges), nb_nodes)] + new_df = df[(df.src == nodes[0]) | (df.tar == nodes[0])].sample(n=sizes[0], weights="weight").copy() + add_register(new_df.hash.values) + df = df[~in_register(df.hash.values)] + + for ix, node in enumerate(nodes[1:]): + sample = df[(df.src == node) | (df.tar == node)].sample(n=sizes[ix + 1], weights="weight").copy() + new_df = pd.concat((new_df, sample)) + add_register(new_df.hash.values) + df = df[~in_register(df.hash.values)] + + G = nx.from_pandas_edgelist(new_df, source="src", target="tar", edge_attr="weight") for n in list(G.nodes()): G.nodes[n]["pos"] = coords[n] return G @@ -254,7 +329,7 @@ def equilibrate(G, nb_nodes, percentage_edge_betw, percentage_edge_within, inter def draw_(array, register, hash_func=lambda x, y: "_".join(sorted([str(x), str(y)]))): tries = 0 - while tries <1000: + while tries < 1000: index_array = np.random.choice(np.arange(len(array)), 1) res = array[index_array] res = res[0] @@ -262,7 +337,7 @@ def equilibrate(G, nb_nodes, percentage_edge_betw, percentage_edge_within, inter if not hash_ in register: register.add(hash_) return index_array - tries +=1 + tries += 1 raise Exception("Error ! (TODO)") # Draw new edges diff --git a/run_eval.py b/run_eval.py index c4a739b792cf9df17a05c7c47582d40038192720..6567a89735e0f7166a98c8674b85c7933d38f83a 100644 --- a/run_eval.py +++ b/run_eval.py @@ -42,7 +42,7 @@ for fn in pbar: top10node = pd.DataFrame(list(G.degree()), columns="node degree".split()).sort_values("degree", ascending=False).head(10).node.values - df_results["nb_edge"] = len(list(G.edges())) + df_results["nb_edge"] = G.size() df_results["transitivity"] = nx.transitivity(G) df_results["density"] = nx.density(G) df_results["top10_node"] = "|".join(top10node)