Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
L
linkprediction_depo
Manage
Activity
Members
Labels
Plan
Issues
0
Issue boards
Milestones
Wiki
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Jacques Fize
linkprediction_depo
Commits
613e6a5c
Commit
613e6a5c
authored
4 years ago
by
Fize Jacques
Browse files
Options
Downloads
Patches
Plain Diff
Add Visualisation tools of link prediction results+ Debug
parent
111adf7f
No related branches found
No related tags found
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
draw_visu.py
+148
-0
148 additions, 0 deletions
draw_visu.py
generate_theoric_random_graph.py
+19
-19
19 additions, 19 deletions
generate_theoric_random_graph.py
lib/random.py
+6
-2
6 additions, 2 deletions
lib/random.py
with
173 additions
and
21 deletions
draw_visu.py
0 → 100644
+
148
−
0
View file @
613e6a5c
# coding = utf-8
import
pandas
as
pd
import
numpy
as
np
import
seaborn
as
sns
import
matplotlib.pyplot
as
plt
import
re
import
os
import
networkx
as
nx
def
get_graph_attr
(
fn
,
graph_dir
):
g_fn
=
os
.
path
.
join
(
graph_dir
,
fn
)
if
not
os
.
path
.
exists
(
g_fn
):
raise
FileNotFoundError
(
g_fn
)
G
=
nx
.
read_gml
(
g_fn
).
graph
return
G
def
get_sample_id_old
(
ch
):
id_graph
=
re
.
findall
(
"
\d+
"
,
ch
)[
0
]
if
len
(
id_graph
)
==
3
:
return
id_graph
[
-
2
:]
else
:
return
id_graph
[
-
1
:]
def
get_sample_id
(
fn
,
file_format
=
"
gml
"
):
return
int
(
fn
.
strip
(
"
.{0}
"
.
format
(
file_format
)).
split
(
"
_
"
)[
-
1
])
def
load_data
(
fn
,
graph_dir
):
df
=
pd
.
read_csv
(
fn
,
sep
=
"
\t
"
)
df
[
"
type_graph
"
]
=
df
.
filename
.
apply
(
lambda
x
:
x
[
6
:]).
apply
(
lambda
x
:
re
.
sub
(
"
_[\d]+.gml
"
,
""
,
x
).
replace
(
"
_
"
,
"
"
))
df
[
"
parameters
"
]
=
df
.
filename
.
apply
(
lambda
x
:
get_graph_attr
(
x
,
graph_dir
))
df
[
"
sample
"
]
=
df
.
filename
.
apply
(
get_sample_id_old
)
non_ne
=
{
'
random_prediction
'
,
'
common_neighbours
'
,
'
jaccard_coefficient
'
,
'
adamic_adar_index
'
,
'
preferential_attachment
'
,
'
resource_allocation_index
'
,
'
stochastic_block_model
'
,
'
stochastic_block_model_degree_corrected
'
,
'
spatial_link_prediction
'
}
df
[
"
type_method
"
]
=
df
.
name
.
apply
(
lambda
x
:
"
heuristic
"
if
x
in
non_ne
else
"
network_embedding_based
"
)
return
df
def
set_custom_palette
(
x
,
y
,
max_color
=
'
red
'
,
close_color
=
'
turquoise
'
,
other_color
=
'
lightgrey
'
):
def
get_color
(
x
,
max_val
,
min_diff
):
if
x
==
max_val
:
return
max_color
elif
x
>
max_val
-
(
0.01
+
min_diff
)
and
x
<
max_val
+
(
0.01
+
min_diff
):
return
close_color
else
:
return
other_color
pal
=
[]
df
=
pd
.
concat
((
x
,
y
),
axis
=
1
)
mean_df
=
df
.
groupby
(
x
.
name
,
as_index
=
False
).
mean
()
mean_per_x
=
dict
(
mean_df
.
values
)
max_val
=
mean_df
[
y
.
name
].
max
()
min_diff
=
(
max_val
-
mean_df
[
y
.
name
]).
median
()
col_per_method
=
{
k
:
get_color
(
v
,
max_val
,
min_diff
)
for
k
,
v
in
mean_per_x
.
items
()}
for
i
,
val
in
enumerate
(
x
):
pal
.
append
(
col_per_method
[
val
])
return
pal
def
highlight_barplot
(
x
,
y
,
**
kwargs
):
if
kwargs
.
get
(
"
palette
"
,
None
):
kwargs
[
"
palette
"
]
=
set_custom_palette
(
x
,
y
)
sns
.
barplot
(
x
=
x
,
y
=
y
,
**
kwargs
)
else
:
sns
.
barplot
(
x
=
x
,
y
=
y
,
palette
=
set_custom_palette
(
x
,
y
),
**
kwargs
)
class
DrawingResults
():
def
__init__
(
self
,
df_results
):
self
.
df
=
df_results
def
__draw
(
self
,
g
,
**
kwargs
):
if
"
figsize
"
in
kwargs
:
g
.
fig
.
set_size_inches
(
*
kwargs
[
"
figsize
"
])
[
plt
.
setp
(
ax
.
get_xticklabels
(),
rotation
=
kwargs
.
get
(
"
rotation
"
,
90
))
for
ax
in
g
.
axes
.
flat
]
g
.
fig
.
subplots_adjust
(
wspace
=
.
09
,
hspace
=
.
02
)
if
kwargs
.
get
(
"
output_filename
"
,
None
):
save_params
=
{}
if
"
save_param
"
in
kwargs
and
type
(
kwargs
[
"
save_param
"
])
==
dict
:
save_params
.
update
(
kwargs
[
"
save_param
"
])
g
.
savefig
(
kwargs
[
"
output_filename
"
],
**
save_params
)
else
:
plt
.
show
()
def
metric_per_nodes_edges
(
self
,
type_graph
=
None
,
agg_func
=
None
,
metric
=
"
auroc
"
,
**
draw_args
):
new_df
=
self
.
df
.
copy
()
if
agg_func
:
if
agg_func
in
"
mean max min std
"
.
split
():
new_df
=
new_df
.
groupby
(
"
name nb_edge size type_graph type_method
"
.
split
(),
as_index
=
False
)
new_df
=
getattr
(
new_df
,
agg_func
)()
else
:
raise
ValueError
(
"
Method {0} does not exists in pandas.core.groupby.generic.DataFrameGroupBy
"
.
format
(
agg_func
))
if
type_graph
and
type_graph
in
new_df
.
type_graph
.
unique
():
new_df
=
new_df
[
new_df
.
type_graph
==
type_graph
].
copy
()
g
=
sns
.
FacetGrid
(
new_df
,
row
=
"
size
"
,
col
=
"
nb_edge
"
,
margin_titles
=
True
,
height
=
2.5
)
plot_func
=
draw_args
.
get
(
'
plot_func
'
,
sns
.
barplot
)
g
.
map
(
plot_func
,
"
name
"
,
metric
)
return
self
.
__draw
(
g
,
**
draw_args
)
def
metric_global
(
self
,
agg_func
=
None
,
metric
=
"
auroc
"
,
**
draw_args
):
new_df
=
self
.
df
.
copy
()
if
agg_func
:
new_df
=
self
.
df
.
groupby
(
"
name nb_edge size type_graph type_method
"
.
split
(),
as_index
=
False
)
if
agg_func
in
"
mean max min std
"
.
split
():
new_df
=
getattr
(
new_df
,
agg_func
)()
new_df
=
new_df
.
groupby
(
"
name type_graph type_method
"
.
split
(),
as_index
=
False
)
new_df
=
getattr
(
new_df
,
agg_func
)()
else
:
raise
ValueError
(
"
Method {0} does not exists in pandas.core.groupby.generic.DataFrameGroupBy
"
.
format
(
agg_func
))
g
=
sns
.
FacetGrid
(
new_df
,
col
=
"
type_graph
"
,
margin_titles
=
True
,
height
=
2.5
)
plot_func
=
draw_args
.
get
(
'
plot_func
'
,
sns
.
barplot
)
g
.
map
(
plot_func
,
"
name
"
,
metric
,
palette
=
"
tab20
"
)
return
self
.
__draw
(
g
,
**
draw_args
)
def
caracteristic_distribution
(
self
,
caracteristic
,
**
draw_args
):
g
=
sns
.
FacetGrid
(
self
.
df
,
col
=
"
type_graph
"
,
col_wrap
=
4
,
)
g
.
map
(
sns
.
histplot
,
caracteristic
)
return
self
.
__draw
(
g
,
**
draw_args
)
def
parameter_impact
(
self
,
type_graph
,
parameter
,
second_parameter
=
"
size
"
,
metric
=
"
auroc
"
,
**
draw_args
):
_df
=
self
.
df
[
self
.
df
.
type_graph
==
type_graph
].
copy
()
_df
[
parameter
]
=
_df
.
parameters
.
apply
(
lambda
x
:
x
[
parameter
])
g
=
sns
.
FacetGrid
(
_df
,
row
=
second_parameter
,
col
=
parameter
,
margin_titles
=
True
,
height
=
2.5
)
plot_func
=
draw_args
.
get
(
'
plot_func
'
,
sns
.
barplot
)
g
.
map
(
plot_func
,
"
name
"
,
metric
,
palette
=
"
tab20
"
)
return
self
.
__draw
(
g
,
**
draw_args
)
This diff is collapsed.
Click to expand it.
generate_theoric_random_graph.py
+
19
−
19
View file @
613e6a5c
...
@@ -19,30 +19,30 @@ args = parser.parse_args()
...
@@ -19,30 +19,30 @@ args = parser.parse_args()
GRAPH_SIZE
=
[
80
,
800
]
GRAPH_SIZE
=
[
80
,
800
]
EDGE_SIZE
=
[
2
,
4
,
5
]
EDGE_SIZE
=
[
2
,
4
,
5
]
sample_per_params
=
4
sample_per_params
=
10
OUTPUT_DIR
=
args
.
output_dir
OUTPUT_DIR
=
args
.
output_dir
if
not
os
.
path
.
exists
(
OUTPUT_DIR
):
if
not
os
.
path
.
exists
(
OUTPUT_DIR
):
raise
FileExistsError
(
"
O
utput
dir
ectory does not exists !
"
)
os
.
makedirs
(
args
.
o
utput
_
dir
)
parameters
=
{
parameters
=
{
#
"stochastic_block_model_graph": {
"
stochastic_block_model_graph
"
:
{
#
"nb_nodes":GRAPH_SIZE,
"
nb_nodes
"
:
GRAPH_SIZE
,
#
"nb_edges":EDGE_SIZE,
"
nb_edges
"
:
EDGE_SIZE
,
#
"nb_com" :[2,5,8,16],
"
nb_com
"
:[
2
,
5
,
8
,
16
],
#
"percentage_edge_betw":[0.1,0.01]
"
percentage_edge_betw
"
:[
0.1
,
0.01
]
#
},
},
#
"ER_graph": {
"
ER_graph
"
:
{
#
"nb_nodes":GRAPH_SIZE,
"
nb_nodes
"
:
GRAPH_SIZE
,
#
"nb_edges":EDGE_SIZE
"
nb_edges
"
:
EDGE_SIZE
#
},
},
#
"powerlaw_graph": { # configuration_model
"
powerlaw_graph
"
:
{
# configuration_model
#
"nb_nodes":GRAPH_SIZE,
"
nb_nodes
"
:
GRAPH_SIZE
,
#
"nb_edges":EDGE_SIZE,
"
nb_edges
"
:
EDGE_SIZE
,
#
"exponent":[2,3],
"
exponent
"
:[
2
,
3
],
#
"tries":[100]
"
tries
"
:[
100
]
#
},
},
"
spatial_graph
"
:{
"
spatial_graph
"
:{
"
nb_nodes
"
:
GRAPH_SIZE
,
"
nb_nodes
"
:
GRAPH_SIZE
,
"
nb_edges
"
:
EDGE_SIZE
,
"
nb_edges
"
:
EDGE_SIZE
,
...
@@ -66,7 +66,7 @@ for method,args in pbar:
...
@@ -66,7 +66,7 @@ for method,args in pbar:
try
:
try
:
G
=
func
(
**
params
)
G
=
func
(
**
params
)
G
.
graph
.
update
(
params
)
G
.
graph
.
update
(
params
)
nx
.
write_gml
(
G
,
OUTPUT_DIR
+
"
/graph_{method}_{ix}{sp_id}.gml
"
.
format
(
method
=
method
,
ix
=
ix
,
sp_id
=
sp_id
),
stringizer
=
str
)
nx
.
write_gml
(
G
,
OUTPUT_DIR
+
"
/graph_{method}_{ix}
_
{sp_id}.gml
"
.
format
(
method
=
method
,
ix
=
ix
,
sp_id
=
sp_id
),
stringizer
=
str
)
except
Exception
as
e
:
except
Exception
as
e
:
print
(
e
)
print
(
e
)
print
(
"
Can
'
t generate graphs using these parameters
"
)
print
(
"
Can
'
t generate graphs using these parameters
"
)
...
...
This diff is collapsed.
Click to expand it.
lib/random.py
+
6
−
2
View file @
613e6a5c
...
@@ -150,7 +150,7 @@ def powerlaw_graph(nb_nodes, nb_edges, exponent=2, tries=1000, min_deg=0):
...
@@ -150,7 +150,7 @@ def powerlaw_graph(nb_nodes, nb_edges, exponent=2, tries=1000, min_deg=0):
return
G
return
G
def
spatial_graph
(
nb_nodes
,
nb_edges
,
coords
=
"
country
"
,
dist_func
=
lambda
a
,
b
:
np
.
linalg
.
norm
(
a
-
b
),
self_link
=
False
):
def
spatial_graph
(
nb_nodes
,
nb_edges
,
coords
=
"
country
"
,
dist_func
=
lambda
a
,
b
:
np
.
linalg
.
norm
(
a
-
b
),
self_link
=
False
,
weighted
=
False
):
"""
"""
Generate a spatial graph with a specific number of vertices and edges
Generate a spatial graph with a specific number of vertices and edges
Parameters
Parameters
...
@@ -202,6 +202,7 @@ def spatial_graph(nb_nodes, nb_edges, coords="country", dist_func=lambda a, b: n
...
@@ -202,6 +202,7 @@ def spatial_graph(nb_nodes, nb_edges, coords="country", dist_func=lambda a, b: n
nodes
=
np
.
arange
(
nb_nodes
).
astype
(
int
)
nodes
=
np
.
arange
(
nb_nodes
).
astype
(
int
)
sizes
=
[
len
(
x
)
for
x
in
np
.
array_split
(
np
.
arange
(
nb_edges
),
nb_nodes
)]
sizes
=
[
len
(
x
)
for
x
in
np
.
array_split
(
np
.
arange
(
nb_edges
),
nb_nodes
)]
new_df
=
df
[(
df
.
src
==
nodes
[
0
])
|
(
df
.
tar
==
nodes
[
0
])].
sample
(
n
=
sizes
[
0
],
weights
=
"
weight
"
).
copy
()
new_df
=
df
[(
df
.
src
==
nodes
[
0
])
|
(
df
.
tar
==
nodes
[
0
])].
sample
(
n
=
sizes
[
0
],
weights
=
"
weight
"
).
copy
()
add_register
(
new_df
.
hash
.
values
)
add_register
(
new_df
.
hash
.
values
)
df
=
df
[
~
in_register
(
df
.
hash
.
values
)]
df
=
df
[
~
in_register
(
df
.
hash
.
values
)]
...
@@ -212,7 +213,10 @@ def spatial_graph(nb_nodes, nb_edges, coords="country", dist_func=lambda a, b: n
...
@@ -212,7 +213,10 @@ def spatial_graph(nb_nodes, nb_edges, coords="country", dist_func=lambda a, b: n
add_register
(
new_df
.
hash
.
values
)
add_register
(
new_df
.
hash
.
values
)
df
=
df
[
~
in_register
(
df
.
hash
.
values
)]
df
=
df
[
~
in_register
(
df
.
hash
.
values
)]
G
=
nx
.
from_pandas_edgelist
(
new_df
,
source
=
"
src
"
,
target
=
"
tar
"
,
edge_attr
=
"
weight
"
)
if
weighted
:
G
=
nx
.
from_pandas_edgelist
(
new_df
,
source
=
"
src
"
,
target
=
"
tar
"
,
edge_attr
=
"
weight
"
)
else
:
G
=
nx
.
from_pandas_edgelist
(
new_df
,
source
=
"
src
"
,
target
=
"
tar
"
)
for
n
in
list
(
G
.
nodes
()):
G
.
nodes
[
n
][
"
pos
"
]
=
coords
[
n
]
for
n
in
list
(
G
.
nodes
()):
G
.
nodes
[
n
][
"
pos
"
]
=
coords
[
n
]
return
G
return
G
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment