diff --git a/generate_theoric_random_graph.py b/generate_theoric_random_graph.py index 1dde7cf38929f72fb20345da0e069d5576913cd3..fe5959dfb76729be9a2616de8b26dfab241dfd03 100644 --- a/generate_theoric_random_graph.py +++ b/generate_theoric_random_graph.py @@ -1,4 +1,5 @@ # coding = utf-8 +import itertools import os import networkx as nx @@ -8,76 +9,63 @@ import pandas as pd import random import copy from tqdm import tqdm +import lib.random as ra # COMMAND PARSING parser = argparse.ArgumentParser() parser.add_argument("output_dir") args = parser.parse_args() -def generate_sbm_prob_matrix(nb_of_blocks,prob_btw_block=0.1): - M = np.zeros((nb_of_blocks,nb_of_blocks)) - np.fill_diagonal(M,[random.random() for i in range(nb_of_blocks)]) - for i in range(nb_of_blocks): - for j in range(nb_of_blocks): - if i == j:continue - M[i,j] = prob_btw_block - M[j,i] = prob_btw_block - return M - -GRAPH_SIZE = [50,75,100] -EDGE_SIZE = [] +GRAPH_SIZE = [100,150,200] +EDGE_SIZE = [300,500] OUTPUT_DIR = args.output_dir if not os.path.exists(OUTPUT_DIR): raise FileExistsError("Output directory does not exists !") -nx.waxman_graph + parameters = { - "planted_partition_graph": { - "l": [3,5,8], # nb of groups - "k": [10,20], # nb de noeud - "p_in": [0.2,0.5,0.7], - "p_out": [0.1] + "stochastic_block_model_graph": { + "nb_nodes":GRAPH_SIZE, + "nb_edges":EDGE_SIZE, + "nb_com" :[2,5], + "percentage_edge_betw":[0.1,0.01] }, - "stochastic_block_model": { - "sizes": [[random.choice([10,20,30]) for k in range(i)] for i in [3,5,8]], - "p": [] # Filled later - }, - "dense_gnm_random_graph": { - "n": GRAPH_SIZE, - "m": EDGE_SIZE + "ER_graph": { + "nb_nodes":GRAPH_SIZE, + "nb_edges":EDGE_SIZE }, "powerlaw_graph": { # configuration_model - "n": GRAPH_SIZE, + "nb_nodes":GRAPH_SIZE, + "nb_edges":EDGE_SIZE, + "exponent":[2,3] }, + "spatial_graph":{ + "nb_nodes":GRAPH_SIZE, + "nb_edges":EDGE_SIZE, + "coords":["random"], + } } -# Generating transition matrices for stochastic block model -parameters["stochastic_block_model"]["p"] = [generate_sbm_prob_matrix(len(l)) for l in parameters["stochastic_block_model"]["sizes"]] #getattr(nx,"geographical_threshold_graph")(**dict(n=20,theta=0.4)) -def get_params(dict_params): - nb_of_parameter = np.prod([len(a) for _,a in dict_params.items()]) - parameters_dicts = [{} for i in range(nb_of_parameter)] - for par,values in dict_params.items(): - division = nb_of_parameter/len(values) - for ix in range(nb_of_parameter): - parameters_dicts[ix][par] = values[int(ix//division)] - return parameters_dicts + + +def get_params(inp): + return (dict(zip(inp.keys(), values)) for values in itertools.product(*inp.values())) + + pbar = tqdm(parameters.items(),total=len(parameters)) for method,args in pbar: pbar.set_description("Generating graphs using : " + method) list_of_params = get_params(parameters[method]) - func = getattr(nx,method) + func = getattr(ra,method) for ix,params in enumerate(list_of_params): # try: - if method == "random_powerlaw_tree_sequence": - sequence = func(**params) - G = nx.configuration_model(sequence) - else: - G = func(**params) + print(params) + G = func(**params) G.graph.update(params) nx.write_gml(G, OUTPUT_DIR+"/graph_{method}_{ix}.gml".format(method=method,ix=ix),stringizer=str) # except Exception as e: diff --git a/lib/random.py b/lib/random.py index 8b57697a345fcc598674e3acafb38f3f87cba0b1..8af1cd407a9aee39315950178c058ce02db04402 100644 --- a/lib/random.py +++ b/lib/random.py @@ -9,6 +9,21 @@ import random def powerlaw(nb_nodes, nb_edges, exponent=2, tries=100, min_deg=1): + """ + Return a degree distribution that fit the power law and specified number of edges and vertices. + Parameters + ---------- + nb_nodes : int + nb_edges : int + exponent : int + tries : int + min_deg : int + + Returns + ------- + np.ndarray + degree sequence + """ nb_stubs = nb_edges * 2 # Draw a first time a powerlaw degree sequence degs = np.round(nx.utils.powerlaw_sequence(nb_nodes, exponent=exponent)) @@ -40,7 +55,7 @@ def powerlaw(nb_nodes, nb_edges, exponent=2, tries=100, min_deg=1): for ind in indexes: degs[ind] = degs[ind] + signe - return degs + return degs.astype(int) def get_countries_coords(): @@ -56,13 +71,46 @@ def get_countries_coords(): except: raise ImportError("Geopandas is not installed !") gdf = gpd.read_file(gpd.datasets.get_path("naturalearth_lowres")) + return np.asarray(gdf.centroid.apply(lambda x: [x.x, x.y]).values.tolist()) def powerlaw_graph(nb_nodes, nb_edges, exponent=2, tries=100, min_deg=1): - return nx.configuration_model(powerlaw(nb_nodes,nb_edges,exponent,tries,min_deg)) + """ + Generate a graph with a definied number of vertices, edges, and a degree distribution that fit the power law. + Parameters + ---------- + nb_nodes : int + nb_edges : int + exponent : int + tries : int + min_deg : int + + Returns + ------- + nx.Graph + generated graph + """ + seq = powerlaw(nb_nodes, nb_edges, exponent, tries, min_deg) + return nx.configuration_model(seq.astype(int)) def spatial_graph(nb_nodes, nb_edges, coords="country", dist_func=lambda a, b: np.linalg.norm(a - b), self_link=False): + """ + Generate a spatial graph with a specific number of vertices and edges + Parameters + ---------- + nb_nodes : int + nb_edges : int + coords : array of shape (n,2) or str + if str, possible choice are "random" or "country" + dist_func : callable + self_link : bool + + Returns + ------- + nx.Graph + generated graph + """ if coords and isinstance(coords, Iterable) and not isinstance(coords, str): if len(coords) != nb_nodes: raise ValueError("number of nodes must match the size of the coords dict") @@ -84,20 +132,54 @@ def spatial_graph(nb_nodes, nb_edges, coords="country", dist_func=lambda a, b: n continue data.append([i, j, dist_func(coords[i], coords[j])]) df = pd.DataFrame(data, columns="src tar weight".split()) + df["hash"] = df.apply(lambda x : "_".join(sorted([str(x.src),str(x.tar)])) ,axis=1) + df = df.drop_duplicates(subset=["hash"]) df = df.sample(nb_edges, weights="weight") G = nx.from_pandas_edgelist(df, source="src", target="tar", edge_attr="weight") for n in list(G.nodes()): G.nodes[n]["pos"] = coords[n] return G def ER_graph(nb_nodes,nb_edges): + """ + Generate a random graph with a specific nb of nodes and edges. + Parameters + ---------- + nb_nodes : int + nb_edges : int + + Returns + ------- + nx.Graph + generated graph + """ return nx.dense_gnm_random_graph(nb_nodes,nb_edges) def stochastic_block_model_graph(nb_nodes,nb_edges,nb_com,percentage_edge_betw,verbose=False): + """ + Generate a stochastic block model graph with defined number of vertices and edges. + Parameters + ---------- + nb_nodes : int + nb_edges : int + nb_com : int + percentage_edge_betw : float + verbose : bool + + Returns + ------- + nx.Graph + generated graph + """ + + if nb_nodes%nb_com != 0: + raise ValueError("Modulo between the number of nodes and community must be equal to 0") + + edge_max = (1 / nb_com) * ((nb_nodes * (nb_nodes - 1)) / 2) + if nb_edges > edge_max: + raise ValueError("nb_edges must be inferior to {0}".format(edge_max)) percentage_edge_within = 1 - percentage_edge_betw - if nb_edges > (1 / nb_com) * (nb_nodes * (nb_nodes - 1)) / 2: - raise ValueError("nb_edges must be inferior to {0}".format((1 / nb_com) * (nb_nodes * (nb_nodes - 1)) / 2)) G = nx.planted_partition_graph(nb_com, int(np.round(nb_nodes / nb_com)), 1, 1) if verbose: @@ -112,7 +194,7 @@ def stochastic_block_model_graph(nb_nodes,nb_edges,nb_com,percentage_edge_betw,v if (n1 == n2) or (hash_ in register): continue b1, b2 = block_assign[n1], block_assign[n2] - if b1 != b2: + if b1 != b2 : inter_edges.append([n1, n2]) else: intra_edges.append([n1, n2])