diff --git a/eval_mixed_model.py b/eval_mixed_model.py
index 9d10dc384bd7d17b5d0225f83d2e93d9f38fd511..62187ff87444eafa8b4be9a691f2fa48945ed591 100644
--- a/eval_mixed_model.py
+++ b/eval_mixed_model.py
@@ -41,7 +41,7 @@ parser.add_argument('-f', '--features', help='Feature(s) used in the model train
parser.add_argument("-v","--verbose",action="store_true")
parser.add_argument("-t","--timeout",default=30,type=int)
-args= parser.parse_args()
+args= parser.parse_args("300 600 5 1 1 -v -f it_probs".split())
# COMMAND LINE ARGS VALUES
GRAPH_NODE_NB = args.nb_nodes
@@ -81,13 +81,16 @@ for n1 in list(G.nodes()):
register.add(hash_func((n1,n2)))
df_data = pd.DataFrame(data,columns="u v".split())
df_data["hash_"] = df_data.apply(lambda row:hash_func((int(row.u),int(row.v))), axis=1)
+df_data["p_0"] = df_data.apply(lambda x:1 if G.has_edge(x.u,x.v) else 0,axis =1)
pos = nx.get_node_attributes(G,"pos")
block_assign = nx.get_node_attributes(G,"block")
+
H = G.copy()
float_epsilon = np.finfo(float).eps
-df_data["p_0"] = df_data.apply(lambda x:1 if G.has_edge(x.u,x.v) else 0,axis =1)
+
+
for i in range(1,NB_ITERATION+1):
if H.size() < 30:
df_data["p_{0}".format(i)] = df_data["p_{0}".format(i-1)]
@@ -96,27 +99,29 @@ for i in range(1,NB_ITERATION+1):
auc_sbm,auc_spatial = get_aucs(H)
if VERBOSE : print("SBM: ",auc_sbm,"SPATIAL: ",auc_spatial)
if auc_sbm> auc_spatial:
- edges,probs = get_sbm_probs(H,ALPHA)
+ edges,probs = get_sbm_probs(H,0.01)
else:
edges,probs = get_spat_probs(H)
- probs = np.asarray(probs)
- probs /= probs.sum()
- h_probs = np.asarray([(1 / H.size()) - probs[ix] for ix, ed in enumerate(edges) if H.has_edge(*ed)])
+ edges = np.asarray(edges)
+
+ probs_dom = np.asarray(probs)
+ probs_dom /= probs_dom.sum()
+
+ edge_prob = dict(zip([hash_func(ed) for ed in edges], probs_dom))
+ df_data["p_{0}".format(i)] = df_data.apply(lambda x: edge_prob[hash_func([int(x.u),int(x.v)])] if hash_func([int(x.u),int(x.v)]) in edge_prob else 0,axis=1)
+
+ h_probs = np.asarray([(1 / H.size()) - probs_dom[ix] for ix, ed in enumerate(edges) if H.has_edge(*ed)])
new_nb_edges = h_probs.sum() * H.size()
if VERBOSE:print("new NB of Edges",new_nb_edges)
- edges = np.asarray(edges)
- probs = np.asarray([old_probs[hash_func(ed)]-probs[ix] for ix,ed in enumerate(edges)])
- probs[probs <0] = float_epsilon
- probs /= probs.sum()
- edge_prob = dict(zip([hash_func(ed) for ed in edges],probs))
+ probs_erosion = np.asarray([old_probs[hash_func(ed)]-probs_dom[ix] for ix,ed in enumerate(edges)])
+ probs_erosion[probs_erosion <0] = float_epsilon
+ probs_erosion /= probs_erosion.sum()
-
- df_data["p_{0}".format(i)] = df_data.apply(lambda x: edge_prob[hash_func([int(x.u),int(x.v)])] if hash_func([int(x.u),int(x.v)]) in edge_prob else 0,axis=1)
final_edges = []
- index_selected_pairs = np.random.choice(np.arange(len(edges)),round(new_nb_edges) , p=probs, replace=False)#round(0.7*H.size())
+ index_selected_pairs = np.random.choice(np.arange(len(edges)),round(new_nb_edges) , p=probs_erosion, replace=False)#round(0.7*H.size())
final_edges.extend(edges[index_selected_pairs])
G2 = nx.from_edgelist(final_edges)
for n in list(G2.nodes()):
@@ -125,12 +130,12 @@ for i in range(1,NB_ITERATION+1):
H=G2.copy()
if VERBOSE:print(df_data)
+
edge_feature= {hash_func([int(row.u),int(row.v)]):[row["p_{0}".format(i)] for i in range(1,NB_ITERATION+1)] for ix,row in df_data.iterrows()}
-G, _ = pp.prep_graph(G,maincc=True)
+G, _ = pp.prep_graph(G,maincc=True,relabel=False)
traintest_split = LPEvalSplit()
traintest_split.compute_splits(G, split_alg="spanning_tree", train_frac=0.90, fe_ratio=1)
-nee = LPEvaluator(traintest_split)
X_train = traintest_split.train_edges
y_train = traintest_split.train_labels
@@ -166,7 +171,7 @@ X_test = X_test[:,2:]
classifier_dict = {
"naive-bayes":GaussianNB(),
- "svm":SVC(),
+ #"svm":SVC(),
"sgd":SGDClassifier(),
"knn":KNeighborsClassifier(),
"decision-tree": DecisionTreeClassifier(),
@@ -191,7 +196,7 @@ auc_sbm, auc_spa = get_aucs(G)
if VERBOSE: print("SBM AUUROC",auc_sbm,"SPATIAL AUROC",auc_spa)
data = []
pbar = tqdm(parameters)
-for classi_ in parameters:
+for classi_ in classifier_dict:
pbar.set_description(classi_)
if len(parameters[classi_])>0:
clf = GridSearchCV(