diff --git a/generate_mixed_model_graph.py b/generate_mixed_model_graph.py index ee7646a4749979c5c35b39e8458ef330c0196f8a..fce2101acb0b7a0d98ca5c08e21660e7a5d8fab1 100644 --- a/generate_mixed_model_graph.py +++ b/generate_mixed_model_graph.py @@ -19,7 +19,7 @@ args = parser.parse_args() GRAPH_SIZE = [300,1000] EDGE_SIZE = [2] -sample_per_params = 10 +sample_per_params = 1 OUTPUT_DIR = args.output_dir if not os.path.exists(OUTPUT_DIR): diff --git a/lib/erosion_model.py b/lib/erosion_model.py index 5f503b1ad33d0c48fb066e2d94d9bd3c528c1d7c..ccc64ff3425d4fe1b2aa8669fd7052bca093bae6 100644 --- a/lib/erosion_model.py +++ b/lib/erosion_model.py @@ -7,6 +7,7 @@ from .random import get_spat_probs, get_sbm_probs from .lambda_func import euclid_dist as dist from .lambda_func import hash_func +from evalne.utils import preprocess as pp from evalne.methods.similarity import stochastic_block_model,spatial_link_prediction import pandas as pd @@ -14,6 +15,10 @@ import networkx as nx import numpy as np float_epsilon = np.finfo(float).eps +VERBOSE = True +def log(x): + if VERBOSE: + print(x) class ErosionModel(): def __init__(self, G): @@ -27,35 +32,46 @@ class ErosionModel(): self.nb_of_erosion = 0 def erode(self): - self.nb_of_erosion += 1 + test_H, _ = pp.prep_graph(self.H.copy(), maincc=True, relabel=False) + if len(test_H) < 30: + return False if self.H.size() < 30: - self.probs_df["p_{0}".format(self.nb_of_erosion)] = self.probs_df["p_{0}".format(self.nb_of_erosion - 1)] - return + return False + self.nb_of_erosion += 1 + old_probs = dict(self.probs_df["hash_ p_{0}".format(self.nb_of_erosion - 1).split()].values) auc_sbm, auc_spatial = get_auc_heuristics(self.H, 60) + print(auc_spatial,auc_sbm) edges = get_all_possible_edges(self.H) if auc_sbm > auc_spatial: probs = stochastic_block_model(self.H, edges) else: probs = spatial_link_prediction(self.H, edges) + edges = np.asarray(edges) probs_dom = np.asarray(probs) - probs_dom /= probs_dom.sum() + sum_prob_dom = probs_dom.sum() + sum_prob_dom_H = sum([probs[ix] for ix, ed in enumerate(edges) if self.H.has_edge(*ed)]) + probs_dom /= sum_prob_dom edge_prob = dict(zip([hash_func(ed) for ed in edges], probs_dom)) self.probs_df["p_{0}".format(self.nb_of_erosion)] = self.probs_df.apply( lambda x: edge_prob[hash_func([int(x.u), int(x.v)])] if hash_func([int(x.u), int(x.v)]) in edge_prob else 0, axis=1) - new_nb_edges = (np.asarray( - [(1 / self.H.size()) - probs_dom[ix] for ix, ed in enumerate(edges) if self.H.has_edge(*ed)])).sum() * self.H.size() + hhh = np.asarray( + [(1 / self.H.size()) - ((probs_dom[ix]*sum_prob_dom)/sum_prob_dom_H) for ix, ed in enumerate(edges) if self.H.has_edge(*ed)]) + hhh[hhh < 0] = 0 + new_nb_edges = hhh.sum() * self.H.size() + #print(hhh) probs_erosion = np.asarray([old_probs[hash_func(ed)] - probs_dom[ix] for ix, ed in enumerate(edges)]) - probs_erosion[probs_erosion < 0] = float_epsilon + probs_erosion[probs_erosion <= 0] = float_epsilon probs_erosion /= probs_erosion.sum() + final_edges = [] index_selected_pairs = np.random.choice(np.arange(len(edges)), round(new_nb_edges), p=probs_erosion, replace=False) # round(0.7*H.size()) @@ -66,6 +82,7 @@ class ErosionModel(): G2.nodes[n]["block"] = self.block_assign[n] G2.nodes[n]["pos"] = self.coordinates[n] self.H = G2.copy() + return probs_erosion def erode_n_times(self,n): if self.nb_of_erosion >0: @@ -75,7 +92,12 @@ class ErosionModel(): self.nb_of_erosion = 0 self.H = self.G.copy() for i in range(n): - self.erode() + log(i) + log(self.H.size()) + r = self.erode() + if r == False: #Â we cannot erode further + log("Cannot erode further") + break def initialize(self): diff --git a/lib/random.py b/lib/random.py index 207bcba41e76763b80485567fccdd5bd9b42baef..9347d95acd5c7089df5eea2a03a708da48e6120b 100644 --- a/lib/random.py +++ b/lib/random.py @@ -488,15 +488,23 @@ def mixed_model_spat_sbm(nb_nodes, nb_edges, nb_com, alpha, percentage_edge_betw all_probs_sbm /= all_probs_sbm.sum() pos = nx.get_node_attributes(G,"pos") - all_probs_spa = np.asarray([1 / (float_epsilon +dist_func(pos[edge[0]], pos[edge[1]])) for edge in all_edges]) + all_probs_spa = np.asarray([1 / (float_epsilon + dist_func(pos[edge[0]], pos[edge[1]])) for edge in all_edges]) all_probs_spa /= all_probs_spa.sum() - all_probs = alpha * (all_probs_sbm) + (1 - alpha) * all_probs_spa + #all_probs = alpha * (all_probs_sbm) + (1 - alpha) * all_probs_spa + nb_edges_sbm,nb_edges_spa = round(alpha*nb_edges),round((1-alpha)*nb_edges) final_edges = [] - index_selected_pairs = np.random.choice(np.arange(len(all_edges)), nb_edges, p=all_probs, replace=False) - final_edges.extend(all_edges[index_selected_pairs]) + index_selected_pairs_sbm = np.random.choice(np.arange(len(all_edges)), nb_edges_sbm, p=all_probs_sbm, replace=False) + final_edges.extend(all_edges[index_selected_pairs_sbm]) + + all_probs_spa[index_selected_pairs_sbm] = all_probs_spa.min() + all_probs_spa/= all_probs_spa.sum() + + index_selected_pairs_spa = np.random.choice(np.arange(len(all_edges)), nb_edges_spa, p=all_probs_spa, replace=False) + final_edges.extend(all_edges[index_selected_pairs_spa]) + G2 = nx.from_edgelist(final_edges) for n in list(G2.nodes()):