Skip to content
Snippets Groups Projects
Commit d5036dab authored by Fize Jacques's avatar Fize Jacques
Browse files

debug

parent 991d74bc
No related branches found
No related tags found
No related merge requests found
...@@ -74,28 +74,41 @@ for n1 in list(G.nodes()): ...@@ -74,28 +74,41 @@ for n1 in list(G.nodes()):
data.append([n1,n2]) data.append([n1,n2])
register.add(hash_func((n1,n2))) register.add(hash_func((n1,n2)))
df_data = pd.DataFrame(data,columns="u v".split()) df_data = pd.DataFrame(data,columns="u v".split())
df_data["hash_"] = df_data.apply(lambda row:hash_func((int(row.u),int(row.v))), axis=1)
pos = nx.get_node_attributes(G,"pos") pos = nx.get_node_attributes(G,"pos")
block_assign = nx.get_node_attributes(G,"block") block_assign = nx.get_node_attributes(G,"block")
H = G.copy() H = G.copy()
float_epsilon = np.finfo(float).eps float_epsilon = np.finfo(float).eps
for i in range(NB_ITERATION): df_data["p_0"] = df_data.apply(lambda x:1 if G.has_edge(x.u,x.v) else 0,axis =1)
print(df_data)
for i in range(1,NB_ITERATION+1):
old_probs = dict(df_data["hash_ p_{0}".format(i-1).split()].values)
auc_sbm,auc_spatial = get_aucs(H) auc_sbm,auc_spatial = get_aucs(H)
if VERBOSE : print(auc_sbm,auc_spatial) if VERBOSE : print("SBM: ",auc_sbm,"SPATIAL: ",auc_spatial)
if auc_sbm> auc_spatial: if auc_sbm> auc_spatial:
edges,probs = get_sbm_probs(H,ALPHA) edges,probs = get_sbm_probs(H,ALPHA)
else: else:
edges,probs = get_spat_probs(H) edges,probs = get_spat_probs(H)
probs = np.asarray(probs) probs = np.asarray(probs)
probs /= probs.sum()
h_probs = np.asarray([(1 / H.size()) - probs[ix] for ix, ed in enumerate(edges) if H.has_edge(*ed)])
new_nb_edges = h_probs.sum() * H.size()
if VERBOSE:print("new NB of Edges",new_nb_edges)
edges = np.asarray(edges) edges = np.asarray(edges)
edge_prob = dict(zip([hash_func(ed) for ed in edges],probs)) probs = np.asarray([old_probs[hash_func(ed)]-probs[ix] for ix,ed in enumerate(edges)])
probs = np.asarray([(1 if H.has_edge(*ed) else 0)-probs[ix] for ix,ed in enumerate(edges)]) probs[probs <0] = float_epsilon
probs = np.asarray([ float_epsilon if p<=0 else p for p in probs])
probs /= probs.sum() probs /= probs.sum()
edge_prob = dict(zip([hash_func(ed) for ed in edges],probs))
df_data["p_{0}".format(i)] = df_data.apply(lambda x: edge_prob[hash_func([int(x.u),int(x.v)])] if hash_func([int(x.u),int(x.v)]) in edge_prob else 0,axis=1) df_data["p_{0}".format(i)] = df_data.apply(lambda x: edge_prob[hash_func([int(x.u),int(x.v)])] if hash_func([int(x.u),int(x.v)]) in edge_prob else 0,axis=1)
final_edges = [] final_edges = []
index_selected_pairs = np.random.choice(np.arange(len(edges)), round((H.size()*0.7)), p=probs, replace=False) index_selected_pairs = np.random.choice(np.arange(len(edges)),round(new_nb_edges) , p=probs, replace=False)#round(0.7*H.size())
final_edges.extend(edges[index_selected_pairs]) final_edges.extend(edges[index_selected_pairs])
G2 = nx.from_edgelist(final_edges) G2 = nx.from_edgelist(final_edges)
for n in list(G2.nodes()): for n in list(G2.nodes()):
...@@ -104,7 +117,7 @@ for i in range(NB_ITERATION): ...@@ -104,7 +117,7 @@ for i in range(NB_ITERATION):
H=G2.copy() H=G2.copy()
edge_feature= {hash_func([int(row.u),int(row.v)]):[row.p_0,row.p_1] for ix,row in df_data.iterrows()} edge_feature= {hash_func([int(row.u),int(row.v)]):[row["p_{0}".format(i)] for i in range(1,NB_ITERATION+1)] for ix,row in df_data.iterrows()}
G, _ = pp.prep_graph(G,maincc=True) G, _ = pp.prep_graph(G,maincc=True)
traintest_split = LPEvalSplit() traintest_split = LPEvalSplit()
...@@ -118,30 +131,30 @@ y_test = traintest_split.test_labels ...@@ -118,30 +131,30 @@ y_test = traintest_split.test_labels
if "pos" in FEATURES: if "pos" in FEATURES:
pos = nx.get_node_attributes(G,"pos") pos = nx.get_node_attributes(G,"pos")
dist_X_train = np.asarray([dist(pos[ed[0]],pos[ed[1]]) for ed in X_train]).reshape(-1,1) dist_X_train = np.asarray([dist(pos[ed[0]],pos[ed[1]]) for ed in X_train[:,:2]]).reshape(-1,1)
dist_X_test = np.asarray([dist(pos[ed[0]],pos[ed[1]]) for ed in X_test]).reshape(-1,1) dist_X_test = np.asarray([dist(pos[ed[0]],pos[ed[1]]) for ed in X_test[:,:2]]).reshape(-1,1)
X_train = np.concatenate((X_train, dist_X_train), axis=1) X_train = np.concatenate((X_train, dist_X_train), axis=1)
X_test = np.concatenate((X_test, dist_X_test), axis=1) X_test = np.concatenate((X_test, dist_X_test), axis=1)
if "centrality" in FEATURES: if "centrality" in FEATURES:
centrality = nx.degree_centrality(G) centrality = nx.degree_centrality(G)
centrality_X_train = np.asarray([[centrality[ed[0]],centrality[ed[1]]] for ed in X_train]) centrality_X_train = np.asarray([[centrality[ed[0]],centrality[ed[1]]] for ed in X_train[:,:2]])
centrality_X_test = np.asarray([[centrality[ed[0]],centrality[ed[1]]] for ed in X_test]) centrality_X_test = np.asarray([[centrality[ed[0]],centrality[ed[1]]] for ed in X_test[:,:2]])
X_train = np.concatenate((X_train, centrality_X_train), axis=1) X_train = np.concatenate((X_train, centrality_X_train), axis=1)
X_test = np.concatenate((X_test, centrality_X_test), axis=1) X_test = np.concatenate((X_test, centrality_X_test), axis=1)
if "it_probs": if "it_probs":
if_not =[0 for i in range(NB_ITERATION-1)] if_not =[0 for i in range(NB_ITERATION)]
feature_X_train = np.asarray([ (edge_feature[hash_func(ed)] if hash_func(ed) in edge_feature else if_not) for ed in X_train]) feature_X_train = np.asarray([ (edge_feature[hash_func(ed)] if hash_func(ed) in edge_feature else if_not) for ed in X_train[:,:2]])
feature_X_test = np.asarray([ (edge_feature[hash_func(ed)] if hash_func(ed) in edge_feature else if_not) for ed in X_test]) feature_X_test = np.asarray([ (edge_feature[hash_func(ed)] if hash_func(ed) in edge_feature else if_not) for ed in X_test[:,:2]])
X_train = np.concatenate((X_train, feature_X_train), axis=1) X_train = np.concatenate((X_train, feature_X_train), axis=1)
X_test = np.concatenate((X_test, feature_X_test ), axis=1) X_test = np.concatenate((X_test, feature_X_test ), axis=1)
X_train = X_train[:,2:]
X_test = X_test[:,2:]
classifier_dict = { classifier_dict = {
"naive-bayes":GaussianNB(), "naive-bayes":GaussianNB(),
...@@ -191,5 +204,7 @@ df["alpha"] = ALPHA ...@@ -191,5 +204,7 @@ df["alpha"] = ALPHA
df["nb_nodes"] = GRAPH_NODE_NB df["nb_nodes"] = GRAPH_NODE_NB
df["nb_edges"] = GRAPH_EDGE_NB df["nb_edges"] = GRAPH_EDGE_NB
df["nb_com"] = NB_COM df["nb_com"] = NB_COM
df["nb_iterations"] = NB_ITERATION
df["features"] = "_".join(FEATURES)
if VERBOSE : print(df) if VERBOSE : print(df)
df.to_csv("{0}_{1}_{2}_{3}.csv".format(GRAPH_NODE_NB,GRAPH_EDGE_NB,NB_COM,ALPHA),sep="\t",index=None) df.to_csv("{0}_{1}_{2}_{3}_{4}_{5}.csv".format(GRAPH_NODE_NB,GRAPH_EDGE_NB,NB_COM,ALPHA,NB_ITERATION,"_".join(FEATURES)),sep="\t",index=None)
\ No newline at end of file \ No newline at end of file
...@@ -539,7 +539,6 @@ def get_sbm_probs(G, percentage_edge_betw, verbose=False): ...@@ -539,7 +539,6 @@ def get_sbm_probs(G, percentage_edge_betw, verbose=False):
all_probs = np.concatenate((probs_inter, probs_intra)) all_probs = np.concatenate((probs_inter, probs_intra))
del probs_inter del probs_inter
del probs_intra del probs_intra
all_probs /= all_probs.sum()
return all_edges,all_probs return all_edges,all_probs
......
#!/bin/bash #!/bin/bash
nb_iteration = 3 for nb_iteration in 2 3 4 5
for alpha in 0 0.2 0.5 0.7 1
do do
for nbcom in 2 3 4 5 for feats in "it_probs" "it_probs,pos" "it_probs,centrality" "it_probs,centrality,pos"
do do
echo "alpha= "$alpha", nb_com= "$nbcom for alpha in 0 0.2 0.5 0.7 1
python eval_mixed_model.py 100 200 $nbcom $alpha $nb_iteration -f pos,centrality,it_probs do
python eval_mixed_model.py 300 600 $nbcom $alpha $nb_iteration -f pos,centrality,it_probs for nbcom in 2 3 4 5
do
echo "alpha= "$alpha", nb_com= "$nbcom", conf= "$conf", nb_iteration= "$nb_iteration
python eval_mixed_model.py 100 200 $nbcom $alpha $nb_iteration -f $feats
python eval_mixed_model.py 300 600 $nbcom $alpha $nb_iteration -f $feats
done
done
done done
done done
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment