Skip to content
Snippets Groups Projects
Commit 7a88c508 authored by Fize Jacques's avatar Fize Jacques
Browse files

Big debug

parent 57c967d2
No related branches found
No related tags found
No related merge requests found
...@@ -41,7 +41,7 @@ parser.add_argument('-f', '--features', help='Feature(s) used in the model train ...@@ -41,7 +41,7 @@ parser.add_argument('-f', '--features', help='Feature(s) used in the model train
parser.add_argument("-v","--verbose",action="store_true") parser.add_argument("-v","--verbose",action="store_true")
parser.add_argument("-t","--timeout",default=30,type=int) parser.add_argument("-t","--timeout",default=30,type=int)
args= parser.parse_args() args= parser.parse_args("300 600 5 1 1 -v -f it_probs".split())
# COMMAND LINE ARGS VALUES # COMMAND LINE ARGS VALUES
GRAPH_NODE_NB = args.nb_nodes GRAPH_NODE_NB = args.nb_nodes
...@@ -81,13 +81,16 @@ for n1 in list(G.nodes()): ...@@ -81,13 +81,16 @@ for n1 in list(G.nodes()):
register.add(hash_func((n1,n2))) register.add(hash_func((n1,n2)))
df_data = pd.DataFrame(data,columns="u v".split()) df_data = pd.DataFrame(data,columns="u v".split())
df_data["hash_"] = df_data.apply(lambda row:hash_func((int(row.u),int(row.v))), axis=1) df_data["hash_"] = df_data.apply(lambda row:hash_func((int(row.u),int(row.v))), axis=1)
df_data["p_0"] = df_data.apply(lambda x:1 if G.has_edge(x.u,x.v) else 0,axis =1)
pos = nx.get_node_attributes(G,"pos") pos = nx.get_node_attributes(G,"pos")
block_assign = nx.get_node_attributes(G,"block") block_assign = nx.get_node_attributes(G,"block")
H = G.copy() H = G.copy()
float_epsilon = np.finfo(float).eps float_epsilon = np.finfo(float).eps
df_data["p_0"] = df_data.apply(lambda x:1 if G.has_edge(x.u,x.v) else 0,axis =1)
for i in range(1,NB_ITERATION+1): for i in range(1,NB_ITERATION+1):
if H.size() < 30: if H.size() < 30:
df_data["p_{0}".format(i)] = df_data["p_{0}".format(i-1)] df_data["p_{0}".format(i)] = df_data["p_{0}".format(i-1)]
...@@ -96,27 +99,29 @@ for i in range(1,NB_ITERATION+1): ...@@ -96,27 +99,29 @@ for i in range(1,NB_ITERATION+1):
auc_sbm,auc_spatial = get_aucs(H) auc_sbm,auc_spatial = get_aucs(H)
if VERBOSE : print("SBM: ",auc_sbm,"SPATIAL: ",auc_spatial) if VERBOSE : print("SBM: ",auc_sbm,"SPATIAL: ",auc_spatial)
if auc_sbm> auc_spatial: if auc_sbm> auc_spatial:
edges,probs = get_sbm_probs(H,ALPHA) edges,probs = get_sbm_probs(H,0.01)
else: else:
edges,probs = get_spat_probs(H) edges,probs = get_spat_probs(H)
probs = np.asarray(probs)
probs /= probs.sum()
h_probs = np.asarray([(1 / H.size()) - probs[ix] for ix, ed in enumerate(edges) if H.has_edge(*ed)]) edges = np.asarray(edges)
probs_dom = np.asarray(probs)
probs_dom /= probs_dom.sum()
edge_prob = dict(zip([hash_func(ed) for ed in edges], probs_dom))
df_data["p_{0}".format(i)] = df_data.apply(lambda x: edge_prob[hash_func([int(x.u),int(x.v)])] if hash_func([int(x.u),int(x.v)]) in edge_prob else 0,axis=1)
h_probs = np.asarray([(1 / H.size()) - probs_dom[ix] for ix, ed in enumerate(edges) if H.has_edge(*ed)])
new_nb_edges = h_probs.sum() * H.size() new_nb_edges = h_probs.sum() * H.size()
if VERBOSE:print("new NB of Edges",new_nb_edges) if VERBOSE:print("new NB of Edges",new_nb_edges)
edges = np.asarray(edges)
probs = np.asarray([old_probs[hash_func(ed)]-probs[ix] for ix,ed in enumerate(edges)])
probs[probs <0] = float_epsilon
probs /= probs.sum()
edge_prob = dict(zip([hash_func(ed) for ed in edges],probs))
probs_erosion = np.asarray([old_probs[hash_func(ed)]-probs_dom[ix] for ix,ed in enumerate(edges)])
probs_erosion[probs_erosion <0] = float_epsilon
probs_erosion /= probs_erosion.sum()
df_data["p_{0}".format(i)] = df_data.apply(lambda x: edge_prob[hash_func([int(x.u),int(x.v)])] if hash_func([int(x.u),int(x.v)]) in edge_prob else 0,axis=1)
final_edges = [] final_edges = []
index_selected_pairs = np.random.choice(np.arange(len(edges)),round(new_nb_edges) , p=probs, replace=False)#round(0.7*H.size()) index_selected_pairs = np.random.choice(np.arange(len(edges)),round(new_nb_edges) , p=probs_erosion, replace=False)#round(0.7*H.size())
final_edges.extend(edges[index_selected_pairs]) final_edges.extend(edges[index_selected_pairs])
G2 = nx.from_edgelist(final_edges) G2 = nx.from_edgelist(final_edges)
for n in list(G2.nodes()): for n in list(G2.nodes()):
...@@ -125,12 +130,12 @@ for i in range(1,NB_ITERATION+1): ...@@ -125,12 +130,12 @@ for i in range(1,NB_ITERATION+1):
H=G2.copy() H=G2.copy()
if VERBOSE:print(df_data) if VERBOSE:print(df_data)
edge_feature= {hash_func([int(row.u),int(row.v)]):[row["p_{0}".format(i)] for i in range(1,NB_ITERATION+1)] for ix,row in df_data.iterrows()} edge_feature= {hash_func([int(row.u),int(row.v)]):[row["p_{0}".format(i)] for i in range(1,NB_ITERATION+1)] for ix,row in df_data.iterrows()}
G, _ = pp.prep_graph(G,maincc=True) G, _ = pp.prep_graph(G,maincc=True,relabel=False)
traintest_split = LPEvalSplit() traintest_split = LPEvalSplit()
traintest_split.compute_splits(G, split_alg="spanning_tree", train_frac=0.90, fe_ratio=1) traintest_split.compute_splits(G, split_alg="spanning_tree", train_frac=0.90, fe_ratio=1)
nee = LPEvaluator(traintest_split)
X_train = traintest_split.train_edges X_train = traintest_split.train_edges
y_train = traintest_split.train_labels y_train = traintest_split.train_labels
...@@ -166,7 +171,7 @@ X_test = X_test[:,2:] ...@@ -166,7 +171,7 @@ X_test = X_test[:,2:]
classifier_dict = { classifier_dict = {
"naive-bayes":GaussianNB(), "naive-bayes":GaussianNB(),
"svm":SVC(), #"svm":SVC(),
"sgd":SGDClassifier(), "sgd":SGDClassifier(),
"knn":KNeighborsClassifier(), "knn":KNeighborsClassifier(),
"decision-tree": DecisionTreeClassifier(), "decision-tree": DecisionTreeClassifier(),
...@@ -191,7 +196,7 @@ auc_sbm, auc_spa = get_aucs(G) ...@@ -191,7 +196,7 @@ auc_sbm, auc_spa = get_aucs(G)
if VERBOSE: print("SBM AUUROC",auc_sbm,"SPATIAL AUROC",auc_spa) if VERBOSE: print("SBM AUUROC",auc_sbm,"SPATIAL AUROC",auc_spa)
data = [] data = []
pbar = tqdm(parameters) pbar = tqdm(parameters)
for classi_ in parameters: for classi_ in classifier_dict:
pbar.set_description(classi_) pbar.set_description(classi_)
if len(parameters[classi_])>0: if len(parameters[classi_])>0:
clf = GridSearchCV( clf = GridSearchCV(
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment