Skip to content
Snippets Groups Projects
Commit 7a88c508 authored by Fize Jacques's avatar Fize Jacques
Browse files

Big debug

parent 57c967d2
No related branches found
No related tags found
No related merge requests found
......@@ -41,7 +41,7 @@ parser.add_argument('-f', '--features', help='Feature(s) used in the model train
parser.add_argument("-v","--verbose",action="store_true")
parser.add_argument("-t","--timeout",default=30,type=int)
args= parser.parse_args()
args= parser.parse_args("300 600 5 1 1 -v -f it_probs".split())
# COMMAND LINE ARGS VALUES
GRAPH_NODE_NB = args.nb_nodes
......@@ -81,13 +81,16 @@ for n1 in list(G.nodes()):
register.add(hash_func((n1,n2)))
df_data = pd.DataFrame(data,columns="u v".split())
df_data["hash_"] = df_data.apply(lambda row:hash_func((int(row.u),int(row.v))), axis=1)
df_data["p_0"] = df_data.apply(lambda x:1 if G.has_edge(x.u,x.v) else 0,axis =1)
pos = nx.get_node_attributes(G,"pos")
block_assign = nx.get_node_attributes(G,"block")
H = G.copy()
float_epsilon = np.finfo(float).eps
df_data["p_0"] = df_data.apply(lambda x:1 if G.has_edge(x.u,x.v) else 0,axis =1)
for i in range(1,NB_ITERATION+1):
if H.size() < 30:
df_data["p_{0}".format(i)] = df_data["p_{0}".format(i-1)]
......@@ -96,27 +99,29 @@ for i in range(1,NB_ITERATION+1):
auc_sbm,auc_spatial = get_aucs(H)
if VERBOSE : print("SBM: ",auc_sbm,"SPATIAL: ",auc_spatial)
if auc_sbm> auc_spatial:
edges,probs = get_sbm_probs(H,ALPHA)
edges,probs = get_sbm_probs(H,0.01)
else:
edges,probs = get_spat_probs(H)
probs = np.asarray(probs)
probs /= probs.sum()
h_probs = np.asarray([(1 / H.size()) - probs[ix] for ix, ed in enumerate(edges) if H.has_edge(*ed)])
edges = np.asarray(edges)
probs_dom = np.asarray(probs)
probs_dom /= probs_dom.sum()
edge_prob = dict(zip([hash_func(ed) for ed in edges], probs_dom))
df_data["p_{0}".format(i)] = df_data.apply(lambda x: edge_prob[hash_func([int(x.u),int(x.v)])] if hash_func([int(x.u),int(x.v)]) in edge_prob else 0,axis=1)
h_probs = np.asarray([(1 / H.size()) - probs_dom[ix] for ix, ed in enumerate(edges) if H.has_edge(*ed)])
new_nb_edges = h_probs.sum() * H.size()
if VERBOSE:print("new NB of Edges",new_nb_edges)
edges = np.asarray(edges)
probs = np.asarray([old_probs[hash_func(ed)]-probs[ix] for ix,ed in enumerate(edges)])
probs[probs <0] = float_epsilon
probs /= probs.sum()
edge_prob = dict(zip([hash_func(ed) for ed in edges],probs))
probs_erosion = np.asarray([old_probs[hash_func(ed)]-probs_dom[ix] for ix,ed in enumerate(edges)])
probs_erosion[probs_erosion <0] = float_epsilon
probs_erosion /= probs_erosion.sum()
df_data["p_{0}".format(i)] = df_data.apply(lambda x: edge_prob[hash_func([int(x.u),int(x.v)])] if hash_func([int(x.u),int(x.v)]) in edge_prob else 0,axis=1)
final_edges = []
index_selected_pairs = np.random.choice(np.arange(len(edges)),round(new_nb_edges) , p=probs, replace=False)#round(0.7*H.size())
index_selected_pairs = np.random.choice(np.arange(len(edges)),round(new_nb_edges) , p=probs_erosion, replace=False)#round(0.7*H.size())
final_edges.extend(edges[index_selected_pairs])
G2 = nx.from_edgelist(final_edges)
for n in list(G2.nodes()):
......@@ -125,12 +130,12 @@ for i in range(1,NB_ITERATION+1):
H=G2.copy()
if VERBOSE:print(df_data)
edge_feature= {hash_func([int(row.u),int(row.v)]):[row["p_{0}".format(i)] for i in range(1,NB_ITERATION+1)] for ix,row in df_data.iterrows()}
G, _ = pp.prep_graph(G,maincc=True)
G, _ = pp.prep_graph(G,maincc=True,relabel=False)
traintest_split = LPEvalSplit()
traintest_split.compute_splits(G, split_alg="spanning_tree", train_frac=0.90, fe_ratio=1)
nee = LPEvaluator(traintest_split)
X_train = traintest_split.train_edges
y_train = traintest_split.train_labels
......@@ -166,7 +171,7 @@ X_test = X_test[:,2:]
classifier_dict = {
"naive-bayes":GaussianNB(),
"svm":SVC(),
#"svm":SVC(),
"sgd":SGDClassifier(),
"knn":KNeighborsClassifier(),
"decision-tree": DecisionTreeClassifier(),
......@@ -191,7 +196,7 @@ auc_sbm, auc_spa = get_aucs(G)
if VERBOSE: print("SBM AUUROC",auc_sbm,"SPATIAL AUROC",auc_spa)
data = []
pbar = tqdm(parameters)
for classi_ in parameters:
for classi_ in classifier_dict:
pbar.set_description(classi_)
if len(parameters[classi_])>0:
clf = GridSearchCV(
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment