Skip to content
Snippets Groups Projects
Commit cadf30a2 authored by Fize Jacques's avatar Fize Jacques
Browse files

debug SBM - generated graph can have a number of nodes inferior to the one expected

parent de800bfd
No related branches found
No related tags found
No related merge requests found
......@@ -64,7 +64,6 @@ for method,args in pbar:
func = getattr(ra,method)
for ix,params in enumerate(list_of_params):
# try:
print(params)
G = func(**params)
G.graph.update(params)
nx.write_gml(G, OUTPUT_DIR+"/graph_{method}_{ix}.gml".format(method=method,ix=ix),stringizer=str)
......
......@@ -94,6 +94,7 @@ def powerlaw_graph(nb_nodes, nb_edges, exponent=2, tries=100, min_deg=1):
seq = powerlaw(nb_nodes, nb_edges, exponent, tries, min_deg)
return nx.configuration_model(seq.astype(int))
def spatial_graph(nb_nodes, nb_edges, coords="country", dist_func=lambda a, b: np.linalg.norm(a - b), self_link=False):
"""
Generate a spatial graph with a specific number of vertices and edges
......@@ -132,14 +133,15 @@ def spatial_graph(nb_nodes, nb_edges, coords="country", dist_func=lambda a, b: n
continue
data.append([i, j, dist_func(coords[i], coords[j])])
df = pd.DataFrame(data, columns="src tar weight".split())
df["hash"] = df.apply(lambda x : "_".join(sorted([str(x.src),str(x.tar)])) ,axis=1)
df["hash"] = df.apply(lambda x: "_".join(sorted([str(x.src), str(x.tar)])), axis=1)
df = df.drop_duplicates(subset=["hash"])
df = df.sample(nb_edges, weights="weight")
G = nx.from_pandas_edgelist(df, source="src", target="tar", edge_attr="weight")
for n in list(G.nodes()): G.nodes[n]["pos"] = coords[n]
return G
def ER_graph(nb_nodes,nb_edges):
def ER_graph(nb_nodes, nb_edges):
"""
Generate a random graph with a specific nb of nodes and edges.
Parameters
......@@ -152,10 +154,10 @@ def ER_graph(nb_nodes,nb_edges):
nx.Graph
generated graph
"""
return nx.dense_gnm_random_graph(nb_nodes,nb_edges)
return nx.dense_gnm_random_graph(nb_nodes, nb_edges)
def stochastic_block_model_graph(nb_nodes,nb_edges,nb_com,percentage_edge_betw,verbose=False):
def stochastic_block_model_graph(nb_nodes, nb_edges, nb_com, percentage_edge_betw, verbose=False):
"""
Generate a stochastic block model graph with defined number of vertices and edges.
Parameters
......@@ -172,7 +174,7 @@ def stochastic_block_model_graph(nb_nodes,nb_edges,nb_com,percentage_edge_betw,v
generated graph
"""
if nb_nodes%nb_com != 0:
if nb_nodes % nb_com != 0:
raise ValueError("Modulo between the number of nodes and community must be equal to 0")
edge_max = (1 / nb_com) * ((nb_nodes * (nb_nodes - 1)) / 2)
......@@ -186,7 +188,7 @@ def stochastic_block_model_graph(nb_nodes,nb_edges,nb_com,percentage_edge_betw,v
print(G.size())
block_assign = nx.get_node_attributes(G, "block")
inter_edges,intra_edges = [], []
inter_edges, intra_edges = [], []
register = set([])
for n1 in list(G.nodes()):
for n2 in list(G.nodes()):
......@@ -194,7 +196,7 @@ def stochastic_block_model_graph(nb_nodes,nb_edges,nb_com,percentage_edge_betw,v
if (n1 == n2) or (hash_ in register):
continue
b1, b2 = block_assign[n1], block_assign[n2]
if b1 != b2 :
if b1 != b2:
inter_edges.append([n1, n2])
else:
intra_edges.append([n1, n2])
......@@ -209,14 +211,93 @@ def stochastic_block_model_graph(nb_nodes,nb_edges,nb_com,percentage_edge_betw,v
print(int(np.ceil(nb_edges * percentage_edge_betw)), int(np.ceil(nb_edges * percentage_edge_within)))
final_edges = []
index_inter = np.random.choice(np.arange(inter_N), int(np.ceil(nb_edges * percentage_edge_betw)), replace=False)
index_intra = np.random.choice(np.arange(intra_N), int(np.ceil(nb_edges * percentage_edge_within)), replace=False)
index_inter = np.random.choice(np.arange(inter_N), int(np.round(nb_edges * percentage_edge_betw)), replace=False)
index_intra = np.random.choice(np.arange(intra_N), int(np.round(nb_edges * percentage_edge_within)), replace=False)
final_edges.extend(inter_edges[index_inter])
final_edges.extend(intra_edges[index_intra])
if verbose:
print(len(final_edges))
G2 = nx.from_edgelist(final_edges)
if len(G2) != nb_nodes:
equilibrate(G2, nb_nodes, percentage_edge_betw, percentage_edge_within, inter_edges, intra_edges, block_assign)
for n in list(G2.nodes()):
G2.nodes[n]["block"] = block_assign[n]
return G2
def equilibrate(G, nb_nodes, percentage_edge_betw, percentage_edge_within, inter_edges, intra_edges, block_assign):
"""
Sometimes the generated graph from the stochastic block model have some missing nodes due to the sampling method.
This function fix this issue.
Parameters
----------
G
nb_nodes
percentage_edge_betw
percentage_edge_within
inter_edges
intra_edges
block_assign
Returns
-------
"""
diff = nb_nodes - len(G)
nodes_missing = list(set(list(range(nb_nodes))) - set(list(G.nodes())))
nb_edge_inter = int(np.round(len(nodes_missing) * percentage_edge_betw))
nodes_inter = np.random.choice(nodes_missing, nb_edge_inter, replace=False)
nodes_intra = list(set(nodes_missing) - set(nodes_inter))
def draw_(array, register, hash_func=lambda x, y: "_".join(sorted([str(x), str(y)]))):
tries = 0
while tries <1000:
index_array = np.random.choice(np.arange(len(array)), 1)
res = array[index_array]
res = res[0]
hash_ = hash_func(res[0], res[1])
if not hash_ in register:
register.add(hash_)
return index_array
tries +=1
raise Exception("Error ! (TODO)")
# Draw new edges
new_edges = []
register = set([])
for node in nodes_inter:
mask_inter = np.isin(inter_edges, node).sum(axis=1).astype(bool)
index_inter = draw_(inter_edges[mask_inter], register)
new_edges.extend(inter_edges[mask_inter][index_inter])
for node in nodes_intra:
mask_intra = np.isin(intra_edges, node).sum(axis=1).astype(bool)
index_intra = draw_(intra_edges[mask_intra], register)
new_edges.extend(intra_edges[mask_intra][index_intra])
# Draw existing edge to delete
edge_to_delete = []
inter_edges_c, intra_edges_c = [], []
for ed in list(G.edges()):
if G.degree(ed[0]) > 1 and G.degree(ed[1]) > 1: # We don't want the nodes edge to be removed with the
# disappearance of the edge
if block_assign[ed[0]] != block_assign[ed[1]]:
inter_edges_c.append([ed[0], ed[1]])
else:
intra_edges_c.append([ed[0], ed[1]])
index_inter = np.random.choice(np.arange(len(inter_edges_c)), int(np.round(diff * percentage_edge_betw)),
replace=False)
index_intra = np.random.choice(np.arange(len(intra_edges_c)), int(np.round(diff * percentage_edge_within)),
replace=False)
edge_to_delete.extend(np.asarray(inter_edges_c)[index_inter])
edge_to_delete.extend(np.asarray(intra_edges_c)[index_intra])
for ed in edge_to_delete:
G.remove_edge(*ed)
for ed in new_edges:
G.add_edge(*ed)
return G
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment