Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
L
linkprediction_depo
Manage
Activity
Members
Labels
Plan
Issues
0
Issue boards
Milestones
Wiki
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Jacques Fize
linkprediction_depo
Commits
299c9dcd
Commit
299c9dcd
authored
4 years ago
by
Fize Jacques
Browse files
Options
Downloads
Patches
Plain Diff
add evaluation script for mixed model
parent
0f3317b7
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
eval_mixed_model.py
+184
-0
184 additions, 0 deletions
eval_mixed_model.py
lib/random.py
+60
-2
60 additions, 2 deletions
lib/random.py
run_eval_mixed_model.sh
+11
-0
11 additions, 0 deletions
run_eval_mixed_model.sh
with
255 additions
and
2 deletions
eval_mixed_model.py
0 → 100644
+
184
−
0
View file @
299c9dcd
# coding = utf-8
import
argparse
from
lib.random
import
mixed_model_spat_sbm
,
get_spat_probs
,
get_sbm_probs
import
networkx
as
nx
import
numpy
as
np
import
pandas
as
pd
from
tqdm
import
tqdm
from
evalne.evaluation.evaluator
import
LPEvaluator
from
evalne.evaluation.split
import
EvalSplit
as
LPEvalSplit
from
evalne.utils
import
preprocess
as
pp
from
sklearn.linear_model
import
LogisticRegression
from
sklearn.tree
import
DecisionTreeClassifier
from
sklearn.svm
import
SVC
,
LinearSVC
from
sklearn.neighbors
import
KNeighborsClassifier
from
sklearn.ensemble
import
RandomForestClassifier
,
AdaBoostClassifier
from
sklearn.naive_bayes
import
GaussianNB
from
sklearn.neural_network
import
MLPClassifier
from
sklearn.linear_model
import
SGDClassifier
from
sklearn.model_selection
import
GridSearchCV
from
sklearn.metrics
import
roc_auc_score
,
precision_score
,
recall_score
,
f1_score
from
sklearn.metrics
import
make_scorer
roc_auc_scorer
=
make_scorer
(
roc_auc_score
,
greater_is_better
=
True
,
needs_threshold
=
True
)
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"
nb_nodes
"
,
type
=
int
)
parser
.
add_argument
(
"
nb_edges
"
,
type
=
int
)
parser
.
add_argument
(
"
nb_com
"
,
type
=
int
)
parser
.
add_argument
(
"
alpha
"
,
type
=
float
)
parser
.
add_argument
(
"
-v
"
,
"
--verbose
"
,
action
=
"
store_true
"
)
args
=
parser
.
parse_args
()
GRAPH_NODE_NB
=
args
.
nb_nodes
GRAPH_EDGE_NB
=
args
.
nb_edges
ALPHA
=
args
.
alpha
NB_COM
=
args
.
nb_com
NB_ITERATION
=
3
VERBOSE
=
args
.
verbose
dist
=
lambda
a
,
b
:
np
.
linalg
.
norm
(
a
-
b
)
**
2
hash_func
=
lambda
x
:
"
_
"
.
join
(
sorted
([
str
(
x
[
0
]),
str
(
x
[
1
])]))
def
get_aucs
(
G
):
H
,
_
=
pp
.
prep_graph
(
G
.
copy
(),
maincc
=
True
)
traintest_split
=
LPEvalSplit
()
traintest_split
.
compute_splits
(
H
,
split_alg
=
"
spanning_tree
"
,
train_frac
=
0.90
,
fe_ratio
=
1
)
nee
=
LPEvaluator
(
traintest_split
)
auc_spatial
=
nee
.
evaluate_baseline
(
method
=
"
spatial_link_prediction
"
).
test_scores
.
auroc
()
auc_sbm
=
nee
.
evaluate_baseline
(
method
=
"
stochastic_block_model
"
).
test_scores
.
auroc
()
return
auc_sbm
,
auc_spatial
dist
=
lambda
a
,
b
:
np
.
linalg
.
norm
(
a
-
b
)
G
,
all_probs_sbm
,
all_probs_spa
=
mixed_model_spat_sbm
(
GRAPH_NODE_NB
,
GRAPH_EDGE_NB
,
NB_COM
,
alpha
=
ALPHA
)
register
=
set
([])
data
=
[]
for
n1
in
list
(
G
.
nodes
()):
for
n2
in
list
(
G
.
nodes
()):
if
n1
!=
n2
and
hash_func
((
n1
,
n2
))
not
in
register
:
data
.
append
([
n1
,
n2
])
register
.
add
(
hash_func
((
n1
,
n2
)))
df_data
=
pd
.
DataFrame
(
data
,
columns
=
"
u v
"
.
split
())
pos
=
nx
.
get_node_attributes
(
G
,
"
pos
"
)
block_assign
=
nx
.
get_node_attributes
(
G
,
"
block
"
)
H
=
G
.
copy
()
float_epsilon
=
np
.
finfo
(
float
).
eps
for
i
in
range
(
NB_ITERATION
):
auc_sbm
,
auc_spatial
=
get_aucs
(
H
)
if
VERBOSE
:
print
(
auc_sbm
,
auc_spatial
)
if
auc_sbm
>
auc_spatial
:
edges
,
probs
=
get_sbm_probs
(
H
,
ALPHA
)
else
:
edges
,
probs
=
get_spat_probs
(
H
)
probs
=
np
.
asarray
(
probs
)
edges
=
np
.
asarray
(
edges
)
edge_prob
=
dict
(
zip
([
hash_func
(
ed
)
for
ed
in
edges
],
probs
))
probs
=
np
.
asarray
([(
1
if
H
.
has_edge
(
*
ed
)
else
0
)
-
probs
[
ix
]
for
ix
,
ed
in
enumerate
(
edges
)])
probs
=
np
.
asarray
([
float_epsilon
if
p
<=
0
else
p
for
p
in
probs
])
probs
/=
probs
.
sum
()
df_data
[
"
p_{0}
"
.
format
(
i
)]
=
df_data
.
apply
(
lambda
x
:
edge_prob
[
hash_func
([
int
(
x
.
u
),
int
(
x
.
v
)])]
if
hash_func
([
int
(
x
.
u
),
int
(
x
.
v
)])
in
edge_prob
else
0
,
axis
=
1
)
final_edges
=
[]
index_selected_pairs
=
np
.
random
.
choice
(
np
.
arange
(
len
(
edges
)),
round
((
H
.
size
()
*
0.7
)),
p
=
probs
,
replace
=
False
)
final_edges
.
extend
(
edges
[
index_selected_pairs
])
G2
=
nx
.
from_edgelist
(
final_edges
)
for
n
in
list
(
G2
.
nodes
()):
G2
.
nodes
[
n
][
"
block
"
]
=
block_assign
[
n
]
G2
.
nodes
[
n
][
"
pos
"
]
=
pos
[
n
]
H
=
G2
.
copy
()
edge_feature
=
{
hash_func
([
int
(
row
.
u
),
int
(
row
.
v
)]):[
row
.
p_0
,
row
.
p_1
]
for
ix
,
row
in
df_data
.
iterrows
()}
G
,
_
=
pp
.
prep_graph
(
G
,
maincc
=
True
)
traintest_split
=
LPEvalSplit
()
traintest_split
.
compute_splits
(
G
,
split_alg
=
"
spanning_tree
"
,
train_frac
=
0.90
,
fe_ratio
=
1
)
nee
=
LPEvaluator
(
traintest_split
)
X_train
=
traintest_split
.
train_edges
y_train
=
traintest_split
.
train_labels
X_test
=
traintest_split
.
test_edges
y_test
=
traintest_split
.
test_labels
pos
=
nx
.
get_node_attributes
(
G
,
"
pos
"
)
dist_X_train
=
np
.
asarray
([
dist
(
pos
[
ed
[
0
]],
pos
[
ed
[
1
]])
for
ed
in
X_train
]).
reshape
(
-
1
,
1
)
dist_X_test
=
np
.
asarray
([
dist
(
pos
[
ed
[
0
]],
pos
[
ed
[
1
]])
for
ed
in
X_test
]).
reshape
(
-
1
,
1
)
centrality
=
nx
.
degree_centrality
(
G
)
centrality_X_train
=
np
.
asarray
([[
centrality
[
ed
[
0
]],
centrality
[
ed
[
1
]]]
for
ed
in
X_train
])
centrality_X_test
=
np
.
asarray
([[
centrality
[
ed
[
0
]],
centrality
[
ed
[
1
]]]
for
ed
in
X_test
])
if_not
=
[
0
for
i
in
range
(
NB_ITERATION
-
1
)]
feature_X_train
=
np
.
asarray
([
(
edge_feature
[
hash_func
(
ed
)]
if
hash_func
(
ed
)
in
edge_feature
else
if_not
)
for
ed
in
X_train
])
feature_X_test
=
np
.
asarray
([
(
edge_feature
[
hash_func
(
ed
)]
if
hash_func
(
ed
)
in
edge_feature
else
if_not
)
for
ed
in
X_test
])
##ADD centrality and distance to X train
X_train
=
np
.
concatenate
((
X_train
,
dist_X_train
,
centrality_X_train
),
axis
=
1
)
X_test
=
np
.
concatenate
((
X_test
,
dist_X_test
,
centrality_X_test
),
axis
=
1
)
classifier_dict
=
{
"
naive-bayes
"
:
GaussianNB
(),
"
svm
"
:
SVC
(),
"
sgd
"
:
SGDClassifier
(),
"
knn
"
:
KNeighborsClassifier
(),
"
decision-tree
"
:
DecisionTreeClassifier
(),
"
random-forest
"
:
RandomForestClassifier
(),
"
mlp
"
:
MLPClassifier
(),
"
logistic_reg
"
:
LogisticRegression
(),
"
linear_svm
"
:
LinearSVC
()
}
parameters
=
{
"
naive-bayes
"
:[],
#"svm":[{"kernel":["rbf","linear"], 'gamma': [1e-1,1e-2,1e-3, 1,10,100]}],
"
sgd
"
:[{
"
penalty
"
:[
"
l1
"
,
"
l2
"
],
"
loss
"
:[
"
hinge
"
,
"
modified_huber
"
,
"
log
"
]}],
"
knn
"
:[{
"
n_neighbors
"
:
list
(
range
(
4
,
8
)),
"
p
"
:[
1
,
2
]}],
"
decision-tree
"
:
[{
"
criterion
"
:[
"
gini
"
,
"
entropy
"
]}],
"
random-forest
"
:[{
"
criterion
"
:[
"
gini
"
,
"
entropy
"
],
"
n_estimators
"
:[
10
,
50
,
100
]}],
"
mlp
"
:[],
"
logistic_reg
"
:[],
"
linear_svm
"
:[]
}
auc_sbm
,
auc_spa
=
get_aucs
(
G
)
if
VERBOSE
:
print
(
"
SBM AUUROC
"
,
auc_sbm
,
"
SPATIAL AUROC
"
,
auc_spa
)
data
=
[]
pbar
=
tqdm
(
parameters
)
for
classi_
in
parameters
:
pbar
.
set_description
(
classi_
)
if
len
(
parameters
[
classi_
])
>
0
:
clf
=
GridSearchCV
(
classifier_dict
[
classi_
],
parameters
[
classi_
],
scoring
=
roc_auc_scorer
,
n_jobs
=-
1
)
clf
.
fit
(
X_train
,
y_train
)
y_pred
=
clf
.
best_estimator_
.
predict
(
X_test
)
else
:
classifier_dict
[
classi_
].
fit
(
X_train
,
y_train
)
y_pred
=
classifier_dict
[
classi_
].
predict
(
X_test
)
data
.
append
([
classifier_dict
[
classi_
].
__class__
.
__name__
,
precision_score
(
y_test
,
y_pred
),
recall_score
(
y_test
,
y_pred
),
f1_score
(
y_test
,
y_pred
),
roc_auc_score
(
y_test
,
y_pred
)])
df
=
pd
.
DataFrame
(
data
,
columns
=
"
method precision recall f1-score auroc
"
.
split
())
df
[
"
auc_sbm
"
]
=
auc_sbm
df
[
"
auc_spatial
"
]
=
auc_spa
df
[
"
alpha
"
]
=
ALPHA
df
[
"
nb_nodes
"
]
=
GRAPH_NODE_NB
df
[
"
nb_edges
"
]
=
GRAPH_EDGE_NB
df
[
"
nb_com
"
]
=
NB_COM
if
VERBOSE
:
print
(
df
)
df
.
to_csv
(
"
{0}_{1}_{2}_{3}.csv
"
.
format
(
GRAPH_NODE_NB
,
GRAPH_EDGE_NB
,
NB_COM
,
ALPHA
),
sep
=
"
\t
"
,
index
=
None
)
\ No newline at end of file
This diff is collapsed.
Click to expand it.
lib/random.py
+
60
−
2
View file @
299c9dcd
...
@@ -8,6 +8,9 @@ import pandas as pd
...
@@ -8,6 +8,9 @@ import pandas as pd
from
networkx.generators.degree_seq
import
_to_stublist
from
networkx.generators.degree_seq
import
_to_stublist
from
cdlib
import
algorithms
from
cdlib
import
algorithms
import
random
import
random
float_epsilon
=
np
.
finfo
(
float
).
eps
def
powerlaw
(
nb_nodes
,
nb_edges
,
exponent
=
2
,
tries
=
100
,
min_deg
=
1
):
def
powerlaw
(
nb_nodes
,
nb_edges
,
exponent
=
2
,
tries
=
100
,
min_deg
=
1
):
...
@@ -441,7 +444,7 @@ def get_inter_intra_edges(G, directed=False):
...
@@ -441,7 +444,7 @@ def get_inter_intra_edges(G, directed=False):
for
n1
in
list
(
G
.
nodes
()):
for
n1
in
list
(
G
.
nodes
()):
for
n2
in
list
(
G
.
nodes
()):
for
n2
in
list
(
G
.
nodes
()):
if
directed
:
if
directed
:
"
_
"
.
join
([
str
(
n1
),
str
(
n2
)])
hash_
=
"
_
"
.
join
([
str
(
n1
),
str
(
n2
)])
else
:
else
:
hash_
=
"
_
"
.
join
(
sorted
([
str
(
n1
),
str
(
n2
)]))
hash_
=
"
_
"
.
join
(
sorted
([
str
(
n1
),
str
(
n2
)]))
if
(
n1
==
n2
)
or
(
hash_
in
register
):
if
(
n1
==
n2
)
or
(
hash_
in
register
):
...
@@ -487,6 +490,8 @@ def mixed_model_spat_sbm(nb_nodes, nb_edges, nb_com, alpha, percentage_edge_betw
...
@@ -487,6 +490,8 @@ def mixed_model_spat_sbm(nb_nodes, nb_edges, nb_com, alpha, percentage_edge_betw
pos
=
nx
.
get_node_attributes
(
G
,
"
pos
"
)
pos
=
nx
.
get_node_attributes
(
G
,
"
pos
"
)
all_probs_spa
=
np
.
asarray
([
1
/
(
float_epsilon
+
dist_func
(
pos
[
edge
[
0
]],
pos
[
edge
[
1
]]))
for
edge
in
all_edges
])
all_probs_spa
=
np
.
asarray
([
1
/
(
float_epsilon
+
dist_func
(
pos
[
edge
[
0
]],
pos
[
edge
[
1
]]))
for
edge
in
all_edges
])
all_probs_spa
/=
all_probs_spa
.
sum
()
all_probs_spa
/=
all_probs_spa
.
sum
()
all_probs
=
alpha
*
(
all_probs_sbm
)
+
(
1
-
alpha
)
*
all_probs_spa
all_probs
=
alpha
*
(
all_probs_sbm
)
+
(
1
-
alpha
)
*
all_probs_spa
final_edges
=
[]
final_edges
=
[]
...
@@ -498,4 +503,57 @@ def mixed_model_spat_sbm(nb_nodes, nb_edges, nb_com, alpha, percentage_edge_betw
...
@@ -498,4 +503,57 @@ def mixed_model_spat_sbm(nb_nodes, nb_edges, nb_com, alpha, percentage_edge_betw
G2
.
nodes
[
n
][
"
block
"
]
=
block_assign
[
n
]
G2
.
nodes
[
n
][
"
block
"
]
=
block_assign
[
n
]
G2
.
nodes
[
n
][
"
pos
"
]
=
G
.
nodes
[
n
][
"
pos
"
]
G2
.
nodes
[
n
][
"
pos
"
]
=
G
.
nodes
[
n
][
"
pos
"
]
return
G2
return
G2
,
all_probs_sbm
,
all_probs_spa
\ No newline at end of file
def
get_sbm_probs
(
G
,
percentage_edge_betw
,
verbose
=
False
):
hash_func
=
lambda
x
:
"
_
"
.
join
(
sorted
([
str
(
x
[
0
]),
str
(
x
[
1
])]))
def
nb_of_pair
(
N
):
return
(
N
*
(
N
-
1
))
/
2
block_assign
=
nx
.
get_node_attributes
(
G
,
"
block
"
)
nb_com
=
len
(
set
(
block_assign
.
values
()))
nb_nodes
=
len
(
G
)
nb_edges
=
G
.
size
()
b_assign_array
=
np
.
asarray
(
list
(
nx
.
get_node_attributes
(
G
,
"
block
"
).
values
()))
u_in
=
sum
([
nb_of_pair
((
b_assign_array
==
b
).
sum
())
for
b
in
range
(
nb_com
)])
u_out
=
nb_of_pair
(
len
(
G
))
-
u_in
l_out
=
nb_edges
*
percentage_edge_betw
p_out
=
l_out
/
u_out
l_in
=
nb_edges
-
l_out
p_in
=
l_in
/
u_in
inter_edges
,
intra_edges
=
get_inter_intra_edges
(
G
,
G
.
is_directed
())
inter_edges
=
np
.
asarray
(
inter_edges
)
intra_edges
=
np
.
asarray
(
intra_edges
)
inter_N
,
intra_N
=
len
(
inter_edges
),
len
(
intra_edges
)
probs_inter
=
np
.
ones
(
inter_N
)
*
p_out
probs_intra
=
np
.
ones
(
intra_N
)
*
p_in
all_edges
=
np
.
concatenate
((
inter_edges
,
intra_edges
))
all_probs
=
np
.
concatenate
((
probs_inter
,
probs_intra
))
del
probs_inter
del
probs_intra
all_probs
/=
all_probs
.
sum
()
return
all_edges
,
all_probs
def
get_spat_probs
(
G
,
dist
=
lambda
a
,
b
:
np
.
linalg
.
norm
(
a
-
b
)
**
2
):
hash_func
=
lambda
x
:
"
_
"
.
join
(
sorted
([
str
(
x
[
0
]),
str
(
x
[
1
])]))
pos
=
nx
.
get_node_attributes
(
G
,
"
pos
"
)
spat_model
=
lambda
u
,
v
:
1
/
(
float_epsilon
+
dist
(
pos
[
u
],
pos
[
v
]))
register
=
set
([])
edges
,
probs
=
[],
[]
for
n1
in
list
(
G
.
nodes
()):
for
n2
in
list
(
G
.
nodes
()):
if
n1
!=
n2
and
hash_func
((
n1
,
n2
))
not
in
register
:
edges
.
append
([
n1
,
n2
])
probs
.
append
(
spat_model
(
n1
,
n2
))
register
.
add
(
hash_func
((
n1
,
n2
)))
return
edges
,
probs
\ No newline at end of file
This diff is collapsed.
Click to expand it.
run_eval_mixed_model.sh
0 → 100644
+
11
−
0
View file @
299c9dcd
#!/bin/bash
for
alpha
in
0 0.2 0.5 0.7 1
do
for
nbcom
in
2 3 4 5
do
echo
"alpha= "
$alpha
", nb_com= "
$nbcom
python eval_mixed_model.py 100 200
$nbcom
$alpha
python eval_mixed_model.py 300 600
$nbcom
$alpha
done
done
\ No newline at end of file
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment