From d5036dabf3d8a4435096c0da81821cb8c5a972f0 Mon Sep 17 00:00:00 2001
From: Fize Jacques <jacques.fize@cirad.fr>
Date: Mon, 12 Apr 2021 18:32:14 +0200
Subject: [PATCH] debug

---
 eval_mixed_model.py     | 49 +++++++++++++++++++++++++++--------------
 lib/random.py           |  1 -
 run_eval_mixed_model.sh | 17 +++++++++-----
 3 files changed, 43 insertions(+), 24 deletions(-)
 mode change 100644 => 100755 run_eval_mixed_model.sh

diff --git a/eval_mixed_model.py b/eval_mixed_model.py
index 678dcda..54cce18 100644
--- a/eval_mixed_model.py
+++ b/eval_mixed_model.py
@@ -74,28 +74,41 @@ for n1 in list(G.nodes()):
                 data.append([n1,n2])
                 register.add(hash_func((n1,n2)))
 df_data = pd.DataFrame(data,columns="u v".split())
+df_data["hash_"] = df_data.apply(lambda row:hash_func((int(row.u),int(row.v))), axis=1)
 
 
 pos = nx.get_node_attributes(G,"pos")
 block_assign = nx.get_node_attributes(G,"block")
 H = G.copy()
 float_epsilon = np.finfo(float).eps
-for i in range(NB_ITERATION):
+df_data["p_0"] = df_data.apply(lambda x:1 if G.has_edge(x.u,x.v) else 0,axis =1)
+print(df_data)
+for i in range(1,NB_ITERATION+1):
+    old_probs = dict(df_data["hash_ p_{0}".format(i-1).split()].values)
     auc_sbm,auc_spatial = get_aucs(H)
-    if VERBOSE : print(auc_sbm,auc_spatial)
+    if VERBOSE : print("SBM: ",auc_sbm,"SPATIAL: ",auc_spatial)
     if auc_sbm> auc_spatial:
         edges,probs = get_sbm_probs(H,ALPHA)
     else:
         edges,probs = get_spat_probs(H)
     probs = np.asarray(probs)
+    probs /= probs.sum()
+
+    h_probs = np.asarray([(1 / H.size()) - probs[ix] for ix, ed in enumerate(edges) if H.has_edge(*ed)])
+    new_nb_edges = h_probs.sum() * H.size()
+    if VERBOSE:print("new NB of Edges",new_nb_edges)
+
     edges = np.asarray(edges)
-    edge_prob = dict(zip([hash_func(ed) for ed in edges],probs))
-    probs = np.asarray([(1 if H.has_edge(*ed) else 0)-probs[ix] for ix,ed in enumerate(edges)])
-    probs = np.asarray([ float_epsilon if p<=0 else p for p in probs])
+    probs = np.asarray([old_probs[hash_func(ed)]-probs[ix] for ix,ed in enumerate(edges)])
+    probs[probs <0] = float_epsilon
     probs /= probs.sum()
+    edge_prob = dict(zip([hash_func(ed) for ed in edges],probs))
+
+
+
     df_data["p_{0}".format(i)] = df_data.apply(lambda x: edge_prob[hash_func([int(x.u),int(x.v)])] if hash_func([int(x.u),int(x.v)]) in edge_prob else 0,axis=1)
     final_edges = []
-    index_selected_pairs = np.random.choice(np.arange(len(edges)), round((H.size()*0.7)), p=probs, replace=False)
+    index_selected_pairs = np.random.choice(np.arange(len(edges)),round(new_nb_edges) , p=probs, replace=False)#round(0.7*H.size())
     final_edges.extend(edges[index_selected_pairs])
     G2 = nx.from_edgelist(final_edges)
     for n in list(G2.nodes()):
@@ -104,7 +117,7 @@ for i in range(NB_ITERATION):
     H=G2.copy()
 
 
-edge_feature= {hash_func([int(row.u),int(row.v)]):[row.p_0,row.p_1] for ix,row in df_data.iterrows()}
+edge_feature= {hash_func([int(row.u),int(row.v)]):[row["p_{0}".format(i)] for i in range(1,NB_ITERATION+1)] for ix,row in df_data.iterrows()}
 
 G, _ = pp.prep_graph(G,maincc=True)
 traintest_split = LPEvalSplit()
@@ -118,30 +131,30 @@ y_test = traintest_split.test_labels
 
 if "pos" in FEATURES:
     pos = nx.get_node_attributes(G,"pos")
-    dist_X_train = np.asarray([dist(pos[ed[0]],pos[ed[1]]) for ed in X_train]).reshape(-1,1)
-    dist_X_test = np.asarray([dist(pos[ed[0]],pos[ed[1]]) for ed in X_test]).reshape(-1,1)
+    dist_X_train = np.asarray([dist(pos[ed[0]],pos[ed[1]]) for ed in X_train[:,:2]]).reshape(-1,1)
+    dist_X_test = np.asarray([dist(pos[ed[0]],pos[ed[1]]) for ed in X_test[:,:2]]).reshape(-1,1)
 
     X_train = np.concatenate((X_train, dist_X_train), axis=1)
     X_test = np.concatenate((X_test, dist_X_test), axis=1)
 
 if "centrality" in FEATURES:
     centrality = nx.degree_centrality(G)
-    centrality_X_train = np.asarray([[centrality[ed[0]],centrality[ed[1]]] for ed in X_train])
-    centrality_X_test = np.asarray([[centrality[ed[0]],centrality[ed[1]]] for ed in X_test])
+    centrality_X_train = np.asarray([[centrality[ed[0]],centrality[ed[1]]] for ed in X_train[:,:2]])
+    centrality_X_test = np.asarray([[centrality[ed[0]],centrality[ed[1]]] for ed in X_test[:,:2]])
 
     X_train = np.concatenate((X_train, centrality_X_train), axis=1)
     X_test = np.concatenate((X_test, centrality_X_test), axis=1)
 
 
 if "it_probs":
-    if_not =[0 for i in range(NB_ITERATION-1)]
-    feature_X_train = np.asarray([ (edge_feature[hash_func(ed)] if hash_func(ed) in edge_feature else if_not) for ed in X_train])
-    feature_X_test = np.asarray([ (edge_feature[hash_func(ed)] if hash_func(ed) in edge_feature else if_not) for ed in X_test])
-
+    if_not =[0 for i in range(NB_ITERATION)]
+    feature_X_train = np.asarray([ (edge_feature[hash_func(ed)] if hash_func(ed) in edge_feature else if_not) for ed in X_train[:,:2]])
+    feature_X_test = np.asarray([ (edge_feature[hash_func(ed)] if hash_func(ed) in edge_feature else if_not) for ed in X_test[:,:2]])
     X_train = np.concatenate((X_train, feature_X_train), axis=1)
     X_test = np.concatenate((X_test, feature_X_test ), axis=1)
 
-
+X_train = X_train[:,2:]
+X_test = X_test[:,2:]
 
 classifier_dict = {
     "naive-bayes":GaussianNB(),
@@ -191,5 +204,7 @@ df["alpha"] = ALPHA
 df["nb_nodes"] = GRAPH_NODE_NB
 df["nb_edges"] = GRAPH_EDGE_NB
 df["nb_com"] = NB_COM
+df["nb_iterations"] = NB_ITERATION
+df["features"] = "_".join(FEATURES)
 if VERBOSE : print(df)
-df.to_csv("{0}_{1}_{2}_{3}.csv".format(GRAPH_NODE_NB,GRAPH_EDGE_NB,NB_COM,ALPHA),sep="\t",index=None)
\ No newline at end of file
+df.to_csv("{0}_{1}_{2}_{3}_{4}_{5}.csv".format(GRAPH_NODE_NB,GRAPH_EDGE_NB,NB_COM,ALPHA,NB_ITERATION,"_".join(FEATURES)),sep="\t",index=None)
\ No newline at end of file
diff --git a/lib/random.py b/lib/random.py
index 7980f43..3253f2e 100644
--- a/lib/random.py
+++ b/lib/random.py
@@ -539,7 +539,6 @@ def get_sbm_probs(G, percentage_edge_betw, verbose=False):
     all_probs = np.concatenate((probs_inter, probs_intra))
     del probs_inter
     del probs_intra
-    all_probs /= all_probs.sum()
     return all_edges,all_probs
 
 
diff --git a/run_eval_mixed_model.sh b/run_eval_mixed_model.sh
old mode 100644
new mode 100755
index 1f59ee2..a927bb5
--- a/run_eval_mixed_model.sh
+++ b/run_eval_mixed_model.sh
@@ -1,11 +1,16 @@
 #!/bin/bash
-nb_iteration = 3
-for alpha in 0 0.2 0.5 0.7 1
+for nb_iteration in 2 3 4 5
 do
-  for nbcom in 2 3 4 5
+  for feats in "it_probs" "it_probs,pos" "it_probs,centrality" "it_probs,centrality,pos"
   do
-    echo "alpha= "$alpha", nb_com= "$nbcom
-    python eval_mixed_model.py 100 200 $nbcom $alpha $nb_iteration -f pos,centrality,it_probs
-    python eval_mixed_model.py 300 600 $nbcom $alpha $nb_iteration -f pos,centrality,it_probs
+    for alpha in 0 0.2 0.5 0.7 1
+    do
+      for nbcom in 2 3 4 5
+      do
+        echo "alpha= "$alpha", nb_com= "$nbcom", conf= "$conf", nb_iteration= "$nb_iteration
+        python eval_mixed_model.py 100 200 $nbcom $alpha $nb_iteration -f $feats
+        python eval_mixed_model.py 300 600 $nbcom $alpha $nb_iteration -f $feats
+      done
+    done
   done
 done
\ No newline at end of file
-- 
GitLab