From d5036dabf3d8a4435096c0da81821cb8c5a972f0 Mon Sep 17 00:00:00 2001 From: Fize Jacques <jacques.fize@cirad.fr> Date: Mon, 12 Apr 2021 18:32:14 +0200 Subject: [PATCH] debug --- eval_mixed_model.py | 49 +++++++++++++++++++++++++++-------------- lib/random.py | 1 - run_eval_mixed_model.sh | 17 +++++++++----- 3 files changed, 43 insertions(+), 24 deletions(-) mode change 100644 => 100755 run_eval_mixed_model.sh diff --git a/eval_mixed_model.py b/eval_mixed_model.py index 678dcda..54cce18 100644 --- a/eval_mixed_model.py +++ b/eval_mixed_model.py @@ -74,28 +74,41 @@ for n1 in list(G.nodes()): data.append([n1,n2]) register.add(hash_func((n1,n2))) df_data = pd.DataFrame(data,columns="u v".split()) +df_data["hash_"] = df_data.apply(lambda row:hash_func((int(row.u),int(row.v))), axis=1) pos = nx.get_node_attributes(G,"pos") block_assign = nx.get_node_attributes(G,"block") H = G.copy() float_epsilon = np.finfo(float).eps -for i in range(NB_ITERATION): +df_data["p_0"] = df_data.apply(lambda x:1 if G.has_edge(x.u,x.v) else 0,axis =1) +print(df_data) +for i in range(1,NB_ITERATION+1): + old_probs = dict(df_data["hash_ p_{0}".format(i-1).split()].values) auc_sbm,auc_spatial = get_aucs(H) - if VERBOSE : print(auc_sbm,auc_spatial) + if VERBOSE : print("SBM: ",auc_sbm,"SPATIAL: ",auc_spatial) if auc_sbm> auc_spatial: edges,probs = get_sbm_probs(H,ALPHA) else: edges,probs = get_spat_probs(H) probs = np.asarray(probs) + probs /= probs.sum() + + h_probs = np.asarray([(1 / H.size()) - probs[ix] for ix, ed in enumerate(edges) if H.has_edge(*ed)]) + new_nb_edges = h_probs.sum() * H.size() + if VERBOSE:print("new NB of Edges",new_nb_edges) + edges = np.asarray(edges) - edge_prob = dict(zip([hash_func(ed) for ed in edges],probs)) - probs = np.asarray([(1 if H.has_edge(*ed) else 0)-probs[ix] for ix,ed in enumerate(edges)]) - probs = np.asarray([ float_epsilon if p<=0 else p for p in probs]) + probs = np.asarray([old_probs[hash_func(ed)]-probs[ix] for ix,ed in enumerate(edges)]) + probs[probs <0] = float_epsilon probs /= probs.sum() + edge_prob = dict(zip([hash_func(ed) for ed in edges],probs)) + + + df_data["p_{0}".format(i)] = df_data.apply(lambda x: edge_prob[hash_func([int(x.u),int(x.v)])] if hash_func([int(x.u),int(x.v)]) in edge_prob else 0,axis=1) final_edges = [] - index_selected_pairs = np.random.choice(np.arange(len(edges)), round((H.size()*0.7)), p=probs, replace=False) + index_selected_pairs = np.random.choice(np.arange(len(edges)),round(new_nb_edges) , p=probs, replace=False)#round(0.7*H.size()) final_edges.extend(edges[index_selected_pairs]) G2 = nx.from_edgelist(final_edges) for n in list(G2.nodes()): @@ -104,7 +117,7 @@ for i in range(NB_ITERATION): H=G2.copy() -edge_feature= {hash_func([int(row.u),int(row.v)]):[row.p_0,row.p_1] for ix,row in df_data.iterrows()} +edge_feature= {hash_func([int(row.u),int(row.v)]):[row["p_{0}".format(i)] for i in range(1,NB_ITERATION+1)] for ix,row in df_data.iterrows()} G, _ = pp.prep_graph(G,maincc=True) traintest_split = LPEvalSplit() @@ -118,30 +131,30 @@ y_test = traintest_split.test_labels if "pos" in FEATURES: pos = nx.get_node_attributes(G,"pos") - dist_X_train = np.asarray([dist(pos[ed[0]],pos[ed[1]]) for ed in X_train]).reshape(-1,1) - dist_X_test = np.asarray([dist(pos[ed[0]],pos[ed[1]]) for ed in X_test]).reshape(-1,1) + dist_X_train = np.asarray([dist(pos[ed[0]],pos[ed[1]]) for ed in X_train[:,:2]]).reshape(-1,1) + dist_X_test = np.asarray([dist(pos[ed[0]],pos[ed[1]]) for ed in X_test[:,:2]]).reshape(-1,1) X_train = np.concatenate((X_train, dist_X_train), axis=1) X_test = np.concatenate((X_test, dist_X_test), axis=1) if "centrality" in FEATURES: centrality = nx.degree_centrality(G) - centrality_X_train = np.asarray([[centrality[ed[0]],centrality[ed[1]]] for ed in X_train]) - centrality_X_test = np.asarray([[centrality[ed[0]],centrality[ed[1]]] for ed in X_test]) + centrality_X_train = np.asarray([[centrality[ed[0]],centrality[ed[1]]] for ed in X_train[:,:2]]) + centrality_X_test = np.asarray([[centrality[ed[0]],centrality[ed[1]]] for ed in X_test[:,:2]]) X_train = np.concatenate((X_train, centrality_X_train), axis=1) X_test = np.concatenate((X_test, centrality_X_test), axis=1) if "it_probs": - if_not =[0 for i in range(NB_ITERATION-1)] - feature_X_train = np.asarray([ (edge_feature[hash_func(ed)] if hash_func(ed) in edge_feature else if_not) for ed in X_train]) - feature_X_test = np.asarray([ (edge_feature[hash_func(ed)] if hash_func(ed) in edge_feature else if_not) for ed in X_test]) - + if_not =[0 for i in range(NB_ITERATION)] + feature_X_train = np.asarray([ (edge_feature[hash_func(ed)] if hash_func(ed) in edge_feature else if_not) for ed in X_train[:,:2]]) + feature_X_test = np.asarray([ (edge_feature[hash_func(ed)] if hash_func(ed) in edge_feature else if_not) for ed in X_test[:,:2]]) X_train = np.concatenate((X_train, feature_X_train), axis=1) X_test = np.concatenate((X_test, feature_X_test ), axis=1) - +X_train = X_train[:,2:] +X_test = X_test[:,2:] classifier_dict = { "naive-bayes":GaussianNB(), @@ -191,5 +204,7 @@ df["alpha"] = ALPHA df["nb_nodes"] = GRAPH_NODE_NB df["nb_edges"] = GRAPH_EDGE_NB df["nb_com"] = NB_COM +df["nb_iterations"] = NB_ITERATION +df["features"] = "_".join(FEATURES) if VERBOSE : print(df) -df.to_csv("{0}_{1}_{2}_{3}.csv".format(GRAPH_NODE_NB,GRAPH_EDGE_NB,NB_COM,ALPHA),sep="\t",index=None) \ No newline at end of file +df.to_csv("{0}_{1}_{2}_{3}_{4}_{5}.csv".format(GRAPH_NODE_NB,GRAPH_EDGE_NB,NB_COM,ALPHA,NB_ITERATION,"_".join(FEATURES)),sep="\t",index=None) \ No newline at end of file diff --git a/lib/random.py b/lib/random.py index 7980f43..3253f2e 100644 --- a/lib/random.py +++ b/lib/random.py @@ -539,7 +539,6 @@ def get_sbm_probs(G, percentage_edge_betw, verbose=False): all_probs = np.concatenate((probs_inter, probs_intra)) del probs_inter del probs_intra - all_probs /= all_probs.sum() return all_edges,all_probs diff --git a/run_eval_mixed_model.sh b/run_eval_mixed_model.sh old mode 100644 new mode 100755 index 1f59ee2..a927bb5 --- a/run_eval_mixed_model.sh +++ b/run_eval_mixed_model.sh @@ -1,11 +1,16 @@ #!/bin/bash -nb_iteration = 3 -for alpha in 0 0.2 0.5 0.7 1 +for nb_iteration in 2 3 4 5 do - for nbcom in 2 3 4 5 + for feats in "it_probs" "it_probs,pos" "it_probs,centrality" "it_probs,centrality,pos" do - echo "alpha= "$alpha", nb_com= "$nbcom - python eval_mixed_model.py 100 200 $nbcom $alpha $nb_iteration -f pos,centrality,it_probs - python eval_mixed_model.py 300 600 $nbcom $alpha $nb_iteration -f pos,centrality,it_probs + for alpha in 0 0.2 0.5 0.7 1 + do + for nbcom in 2 3 4 5 + do + echo "alpha= "$alpha", nb_com= "$nbcom", conf= "$conf", nb_iteration= "$nb_iteration + python eval_mixed_model.py 100 200 $nbcom $alpha $nb_iteration -f $feats + python eval_mixed_model.py 300 600 $nbcom $alpha $nb_iteration -f $feats + done + done done done \ No newline at end of file -- GitLab