diff --git a/eval_mixed_model.py b/eval_mixed_model.py index 9d10dc384bd7d17b5d0225f83d2e93d9f38fd511..62187ff87444eafa8b4be9a691f2fa48945ed591 100644 --- a/eval_mixed_model.py +++ b/eval_mixed_model.py @@ -41,7 +41,7 @@ parser.add_argument('-f', '--features', help='Feature(s) used in the model train parser.add_argument("-v","--verbose",action="store_true") parser.add_argument("-t","--timeout",default=30,type=int) -args= parser.parse_args() +args= parser.parse_args("300 600 5 1 1 -v -f it_probs".split()) # COMMAND LINE ARGS VALUES GRAPH_NODE_NB = args.nb_nodes @@ -81,13 +81,16 @@ for n1 in list(G.nodes()): register.add(hash_func((n1,n2))) df_data = pd.DataFrame(data,columns="u v".split()) df_data["hash_"] = df_data.apply(lambda row:hash_func((int(row.u),int(row.v))), axis=1) +df_data["p_0"] = df_data.apply(lambda x:1 if G.has_edge(x.u,x.v) else 0,axis =1) pos = nx.get_node_attributes(G,"pos") block_assign = nx.get_node_attributes(G,"block") + H = G.copy() float_epsilon = np.finfo(float).eps -df_data["p_0"] = df_data.apply(lambda x:1 if G.has_edge(x.u,x.v) else 0,axis =1) + + for i in range(1,NB_ITERATION+1): if H.size() < 30: df_data["p_{0}".format(i)] = df_data["p_{0}".format(i-1)] @@ -96,27 +99,29 @@ for i in range(1,NB_ITERATION+1): auc_sbm,auc_spatial = get_aucs(H) if VERBOSE : print("SBM: ",auc_sbm,"SPATIAL: ",auc_spatial) if auc_sbm> auc_spatial: - edges,probs = get_sbm_probs(H,ALPHA) + edges,probs = get_sbm_probs(H,0.01) else: edges,probs = get_spat_probs(H) - probs = np.asarray(probs) - probs /= probs.sum() - h_probs = np.asarray([(1 / H.size()) - probs[ix] for ix, ed in enumerate(edges) if H.has_edge(*ed)]) + edges = np.asarray(edges) + + probs_dom = np.asarray(probs) + probs_dom /= probs_dom.sum() + + edge_prob = dict(zip([hash_func(ed) for ed in edges], probs_dom)) + df_data["p_{0}".format(i)] = df_data.apply(lambda x: edge_prob[hash_func([int(x.u),int(x.v)])] if hash_func([int(x.u),int(x.v)]) in edge_prob else 0,axis=1) + + h_probs = np.asarray([(1 / H.size()) - probs_dom[ix] for ix, ed in enumerate(edges) if H.has_edge(*ed)]) new_nb_edges = h_probs.sum() * H.size() if VERBOSE:print("new NB of Edges",new_nb_edges) - edges = np.asarray(edges) - probs = np.asarray([old_probs[hash_func(ed)]-probs[ix] for ix,ed in enumerate(edges)]) - probs[probs <0] = float_epsilon - probs /= probs.sum() - edge_prob = dict(zip([hash_func(ed) for ed in edges],probs)) + probs_erosion = np.asarray([old_probs[hash_func(ed)]-probs_dom[ix] for ix,ed in enumerate(edges)]) + probs_erosion[probs_erosion <0] = float_epsilon + probs_erosion /= probs_erosion.sum() - - df_data["p_{0}".format(i)] = df_data.apply(lambda x: edge_prob[hash_func([int(x.u),int(x.v)])] if hash_func([int(x.u),int(x.v)]) in edge_prob else 0,axis=1) final_edges = [] - index_selected_pairs = np.random.choice(np.arange(len(edges)),round(new_nb_edges) , p=probs, replace=False)#round(0.7*H.size()) + index_selected_pairs = np.random.choice(np.arange(len(edges)),round(new_nb_edges) , p=probs_erosion, replace=False)#round(0.7*H.size()) final_edges.extend(edges[index_selected_pairs]) G2 = nx.from_edgelist(final_edges) for n in list(G2.nodes()): @@ -125,12 +130,12 @@ for i in range(1,NB_ITERATION+1): H=G2.copy() if VERBOSE:print(df_data) + edge_feature= {hash_func([int(row.u),int(row.v)]):[row["p_{0}".format(i)] for i in range(1,NB_ITERATION+1)] for ix,row in df_data.iterrows()} -G, _ = pp.prep_graph(G,maincc=True) +G, _ = pp.prep_graph(G,maincc=True,relabel=False) traintest_split = LPEvalSplit() traintest_split.compute_splits(G, split_alg="spanning_tree", train_frac=0.90, fe_ratio=1) -nee = LPEvaluator(traintest_split) X_train = traintest_split.train_edges y_train = traintest_split.train_labels @@ -166,7 +171,7 @@ X_test = X_test[:,2:] classifier_dict = { "naive-bayes":GaussianNB(), - "svm":SVC(), + #"svm":SVC(), "sgd":SGDClassifier(), "knn":KNeighborsClassifier(), "decision-tree": DecisionTreeClassifier(), @@ -191,7 +196,7 @@ auc_sbm, auc_spa = get_aucs(G) if VERBOSE: print("SBM AUUROC",auc_sbm,"SPATIAL AUROC",auc_spa) data = [] pbar = tqdm(parameters) -for classi_ in parameters: +for classi_ in classifier_dict: pbar.set_description(classi_) if len(parameters[classi_])>0: clf = GridSearchCV(