Big debug

7a88c508 · Fize Jacques · 57c967d2 · 7a88c508
Commit 7a88c508 authored 4 years ago by Fize Jacques
--- a/eval_mixed_model.py
+++ b/eval_mixed_model.py
@@ -41,7 +41,7 @@ parser.add_argument('-f', '--features', help='Feature(s) used in the model train
 parser.add_argument("-v","--verbose",action="store_true")
 parser.add_argument("-t","--timeout",default=30,type=int)

-args= parser.parse_args()
+args= parser.parse_args("300 600 5 1 1 -v -f it_probs".split())

 # COMMAND LINE ARGS VALUES
 GRAPH_NODE_NB = args.nb_nodes
@@ -81,13 +81,16 @@ for n1 in list(G.nodes()):
                register.add(hash_func((n1,n2)))
 df_data = pd.DataFrame(data,columns="u v".split())
 df_data["hash_"] = df_data.apply(lambda row:hash_func((int(row.u),int(row.v))), axis=1)
+df_data["p_0"] = df_data.apply(lambda x:1 if G.has_edge(x.u,x.v) else 0,axis =1)


 pos = nx.get_node_attributes(G,"pos")
 block_assign = nx.get_node_attributes(G,"block")
+
 H = G.copy()
 float_epsilon = np.finfo(float).eps
-df_data["p_0"] = df_data.apply(lambda x:1 if G.has_edge(x.u,x.v) else 0,axis =1)
+
+
 for i in range(1,NB_ITERATION+1):
    if H.size() < 30:
        df_data["p_{0}".format(i)] = df_data["p_{0}".format(i-1)]
@@ -96,27 +99,29 @@ for i in range(1,NB_ITERATION+1):
    auc_sbm,auc_spatial = get_aucs(H)
    if VERBOSE : print("SBM: ",auc_sbm,"SPATIAL: ",auc_spatial)
    if auc_sbm> auc_spatial:
-        edges,probs = get_sbm_probs(H,ALPHA)
+        edges,probs = get_sbm_probs(H,0.01)
    else:
        edges,probs = get_spat_probs(H)
-    probs = np.asarray(probs)
-    probs /= probs.sum()

-    h_probs = np.asarray([(1 / H.size()) - probs[ix] for ix, ed in enumerate(edges) if H.has_edge(*ed)])
+    edges = np.asarray(edges)
+
+    probs_dom = np.asarray(probs)
+    probs_dom /= probs_dom.sum()
+
+    edge_prob = dict(zip([hash_func(ed) for ed in edges], probs_dom))
+    df_data["p_{0}".format(i)] = df_data.apply(lambda x: edge_prob[hash_func([int(x.u),int(x.v)])] if hash_func([int(x.u),int(x.v)]) in edge_prob else 0,axis=1)
+
+    h_probs = np.asarray([(1 / H.size()) - probs_dom[ix] for ix, ed in enumerate(edges) if H.has_edge(*ed)])
    new_nb_edges = h_probs.sum() * H.size()
    if VERBOSE:print("new NB of Edges",new_nb_edges)

-    edges = np.asarray(edges)
-    probs = np.asarray([old_probs[hash_func(ed)]-probs[ix] for ix,ed in enumerate(edges)])
-    probs[probs <0] = float_epsilon
-    probs /= probs.sum()
-    edge_prob = dict(zip([hash_func(ed) for ed in edges],probs))

+    probs_erosion = np.asarray([old_probs[hash_func(ed)]-probs_dom[ix] for ix,ed in enumerate(edges)])
+    probs_erosion[probs_erosion <0] = float_epsilon
+    probs_erosion /= probs_erosion.sum()

-
-    df_data["p_{0}".format(i)] = df_data.apply(lambda x: edge_prob[hash_func([int(x.u),int(x.v)])] if hash_func([int(x.u),int(x.v)]) in edge_prob else 0,axis=1)
    final_edges = []
-    index_selected_pairs = np.random.choice(np.arange(len(edges)),round(new_nb_edges) , p=probs, replace=False)#round(0.7*H.size())
+    index_selected_pairs = np.random.choice(np.arange(len(edges)),round(new_nb_edges) , p=probs_erosion, replace=False)#round(0.7*H.size())
    final_edges.extend(edges[index_selected_pairs])
    G2 = nx.from_edgelist(final_edges)
    for n in list(G2.nodes()):
@@ -125,12 +130,12 @@ for i in range(1,NB_ITERATION+1):
    H=G2.copy()

 if VERBOSE:print(df_data)
+
 edge_feature= {hash_func([int(row.u),int(row.v)]):[row["p_{0}".format(i)] for i in range(1,NB_ITERATION+1)] for ix,row in df_data.iterrows()}

-G, _ = pp.prep_graph(G,maincc=True)
+G, _ = pp.prep_graph(G,maincc=True,relabel=False)
 traintest_split = LPEvalSplit()
 traintest_split.compute_splits(G, split_alg="spanning_tree", train_frac=0.90, fe_ratio=1)
-nee = LPEvaluator(traintest_split)

 X_train = traintest_split.train_edges
 y_train = traintest_split.train_labels
@@ -166,7 +171,7 @@ X_test = X_test[:,2:]

 classifier_dict = {
    "naive-bayes":GaussianNB(),
-    "svm":SVC(),
+    #"svm":SVC(),
    "sgd":SGDClassifier(),
    "knn":KNeighborsClassifier(),
    "decision-tree": DecisionTreeClassifier(),
@@ -191,7 +196,7 @@ auc_sbm, auc_spa = get_aucs(G)
 if VERBOSE: print("SBM AUUROC",auc_sbm,"SPATIAL AUROC",auc_spa)
 data = []
 pbar = tqdm(parameters)
-for classi_ in parameters:
+for classi_ in classifier_dict:
    pbar.set_description(classi_)
    if len(parameters[classi_])>0:
        clf = GridSearchCV(