Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
L
linkprediction_depo
Manage
Activity
Members
Labels
Plan
Issues
0
Issue boards
Milestones
Wiki
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Jacques Fize
linkprediction_depo
Commits
de800bfd
Commit
de800bfd
authored
4 years ago
by
Fize Jacques
Browse files
Options
Downloads
Patches
Plain Diff
debug
parent
b5992610
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
generate_theoric_random_graph.py
+30
-42
30 additions, 42 deletions
generate_theoric_random_graph.py
lib/random.py
+87
-5
87 additions, 5 deletions
lib/random.py
with
117 additions
and
47 deletions
generate_theoric_random_graph.py
+
30
−
42
View file @
de800bfd
# coding = utf-8
import
itertools
import
os
import
networkx
as
nx
...
...
@@ -8,76 +9,63 @@ import pandas as pd
import
random
import
copy
from
tqdm
import
tqdm
import
lib.random
as
ra
# COMMAND PARSING
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"
output_dir
"
)
args
=
parser
.
parse_args
()
def
generate_sbm_prob_matrix
(
nb_of_blocks
,
prob_btw_block
=
0.1
):
M
=
np
.
zeros
((
nb_of_blocks
,
nb_of_blocks
))
np
.
fill_diagonal
(
M
,[
random
.
random
()
for
i
in
range
(
nb_of_blocks
)])
for
i
in
range
(
nb_of_blocks
):
for
j
in
range
(
nb_of_blocks
):
if
i
==
j
:
continue
M
[
i
,
j
]
=
prob_btw_block
M
[
j
,
i
]
=
prob_btw_block
return
M
GRAPH_SIZE
=
[
50
,
75
,
100
]
EDGE_SIZE
=
[]
GRAPH_SIZE
=
[
100
,
150
,
200
]
EDGE_SIZE
=
[
300
,
500
]
OUTPUT_DIR
=
args
.
output_dir
if
not
os
.
path
.
exists
(
OUTPUT_DIR
):
raise
FileExistsError
(
"
Output directory does not exists !
"
)
nx
.
waxman_graph
parameters
=
{
"
planted_partition
_graph
"
:
{
"
l
"
:
[
3
,
5
,
8
],
# nb of groups
"
k
"
:
[
10
,
20
],
# nb de noeud
"
p_in
"
:
[
0.2
,
0.5
,
0.7
],
"
p
_out
"
:
[
0.1
]
"
stochastic_block_model
_graph
"
:
{
"
nb_nodes
"
:
GRAPH_SIZE
,
"
nb_edges
"
:
EDGE_SIZE
,
"
nb_com
"
:[
2
,
5
],
"
p
ercentage_edge_betw
"
:[
0.1
,
0.01
]
},
"
stochastic_block_model
"
:
{
"
sizes
"
:
[[
random
.
choice
([
10
,
20
,
30
])
for
k
in
range
(
i
)]
for
i
in
[
3
,
5
,
8
]],
"
p
"
:
[]
# Filled later
},
"
dense_gnm_random_graph
"
:
{
"
n
"
:
GRAPH_SIZE
,
"
m
"
:
EDGE_SIZE
"
ER_graph
"
:
{
"
nb_nodes
"
:
GRAPH_SIZE
,
"
nb_edges
"
:
EDGE_SIZE
},
"
powerlaw_graph
"
:
{
# configuration_model
"
n
"
:
GRAPH_SIZE
,
"
nb_nodes
"
:
GRAPH_SIZE
,
"
nb_edges
"
:
EDGE_SIZE
,
"
exponent
"
:[
2
,
3
]
},
"
spatial_graph
"
:{
"
nb_nodes
"
:
GRAPH_SIZE
,
"
nb_edges
"
:
EDGE_SIZE
,
"
coords
"
:[
"
random
"
],
}
}
# Generating transition matrices for stochastic block model
parameters
[
"
stochastic_block_model
"
][
"
p
"
]
=
[
generate_sbm_prob_matrix
(
len
(
l
))
for
l
in
parameters
[
"
stochastic_block_model
"
][
"
sizes
"
]]
#getattr(nx,"geographical_threshold_graph")(**dict(n=20,theta=0.4))
def
get_params
(
dict_params
):
nb_of_parameter
=
np
.
prod
([
len
(
a
)
for
_
,
a
in
dict_params
.
items
()])
parameters_dicts
=
[{}
for
i
in
range
(
nb_of_parameter
)]
for
par
,
values
in
dict_params
.
items
():
division
=
nb_of_parameter
/
len
(
values
)
for
ix
in
range
(
nb_of_parameter
):
parameters_dicts
[
ix
][
par
]
=
values
[
int
(
ix
//
division
)]
return
parameters_dicts
def
get_params
(
inp
):
return
(
dict
(
zip
(
inp
.
keys
(),
values
))
for
values
in
itertools
.
product
(
*
inp
.
values
()))
pbar
=
tqdm
(
parameters
.
items
(),
total
=
len
(
parameters
))
for
method
,
args
in
pbar
:
pbar
.
set_description
(
"
Generating graphs using :
"
+
method
)
list_of_params
=
get_params
(
parameters
[
method
])
func
=
getattr
(
nx
,
method
)
func
=
getattr
(
ra
,
method
)
for
ix
,
params
in
enumerate
(
list_of_params
):
# try:
if
method
==
"
random_powerlaw_tree_sequence
"
:
sequence
=
func
(
**
params
)
G
=
nx
.
configuration_model
(
sequence
)
else
:
G
=
func
(
**
params
)
print
(
params
)
G
=
func
(
**
params
)
G
.
graph
.
update
(
params
)
nx
.
write_gml
(
G
,
OUTPUT_DIR
+
"
/graph_{method}_{ix}.gml
"
.
format
(
method
=
method
,
ix
=
ix
),
stringizer
=
str
)
# except Exception as e:
...
...
This diff is collapsed.
Click to expand it.
lib/random.py
+
87
−
5
View file @
de800bfd
...
...
@@ -9,6 +9,21 @@ import random
def
powerlaw
(
nb_nodes
,
nb_edges
,
exponent
=
2
,
tries
=
100
,
min_deg
=
1
):
"""
Return a degree distribution that fit the power law and specified number of edges and vertices.
Parameters
----------
nb_nodes : int
nb_edges : int
exponent : int
tries : int
min_deg : int
Returns
-------
np.ndarray
degree sequence
"""
nb_stubs
=
nb_edges
*
2
# Draw a first time a powerlaw degree sequence
degs
=
np
.
round
(
nx
.
utils
.
powerlaw_sequence
(
nb_nodes
,
exponent
=
exponent
))
...
...
@@ -40,7 +55,7 @@ def powerlaw(nb_nodes, nb_edges, exponent=2, tries=100, min_deg=1):
for
ind
in
indexes
:
degs
[
ind
]
=
degs
[
ind
]
+
signe
return
degs
return
degs
.
astype
(
int
)
def
get_countries_coords
():
...
...
@@ -56,13 +71,46 @@ def get_countries_coords():
except
:
raise
ImportError
(
"
Geopandas is not installed !
"
)
gdf
=
gpd
.
read_file
(
gpd
.
datasets
.
get_path
(
"
naturalearth_lowres
"
))
return
np
.
asarray
(
gdf
.
centroid
.
apply
(
lambda
x
:
[
x
.
x
,
x
.
y
]).
values
.
tolist
())
def
powerlaw_graph
(
nb_nodes
,
nb_edges
,
exponent
=
2
,
tries
=
100
,
min_deg
=
1
):
return
nx
.
configuration_model
(
powerlaw
(
nb_nodes
,
nb_edges
,
exponent
,
tries
,
min_deg
))
"""
Generate a graph with a definied number of vertices, edges, and a degree distribution that fit the power law.
Parameters
----------
nb_nodes : int
nb_edges : int
exponent : int
tries : int
min_deg : int
Returns
-------
nx.Graph
generated graph
"""
seq
=
powerlaw
(
nb_nodes
,
nb_edges
,
exponent
,
tries
,
min_deg
)
return
nx
.
configuration_model
(
seq
.
astype
(
int
))
def
spatial_graph
(
nb_nodes
,
nb_edges
,
coords
=
"
country
"
,
dist_func
=
lambda
a
,
b
:
np
.
linalg
.
norm
(
a
-
b
),
self_link
=
False
):
"""
Generate a spatial graph with a specific number of vertices and edges
Parameters
----------
nb_nodes : int
nb_edges : int
coords : array of shape (n,2) or str
if str, possible choice are
"
random
"
or
"
country
"
dist_func : callable
self_link : bool
Returns
-------
nx.Graph
generated graph
"""
if
coords
and
isinstance
(
coords
,
Iterable
)
and
not
isinstance
(
coords
,
str
):
if
len
(
coords
)
!=
nb_nodes
:
raise
ValueError
(
"
number of nodes must match the size of the coords dict
"
)
...
...
@@ -84,20 +132,54 @@ def spatial_graph(nb_nodes, nb_edges, coords="country", dist_func=lambda a, b: n
continue
data
.
append
([
i
,
j
,
dist_func
(
coords
[
i
],
coords
[
j
])])
df
=
pd
.
DataFrame
(
data
,
columns
=
"
src tar weight
"
.
split
())
df
[
"
hash
"
]
=
df
.
apply
(
lambda
x
:
"
_
"
.
join
(
sorted
([
str
(
x
.
src
),
str
(
x
.
tar
)]))
,
axis
=
1
)
df
=
df
.
drop_duplicates
(
subset
=
[
"
hash
"
])
df
=
df
.
sample
(
nb_edges
,
weights
=
"
weight
"
)
G
=
nx
.
from_pandas_edgelist
(
df
,
source
=
"
src
"
,
target
=
"
tar
"
,
edge_attr
=
"
weight
"
)
for
n
in
list
(
G
.
nodes
()):
G
.
nodes
[
n
][
"
pos
"
]
=
coords
[
n
]
return
G
def
ER_graph
(
nb_nodes
,
nb_edges
):
"""
Generate a random graph with a specific nb of nodes and edges.
Parameters
----------
nb_nodes : int
nb_edges : int
Returns
-------
nx.Graph
generated graph
"""
return
nx
.
dense_gnm_random_graph
(
nb_nodes
,
nb_edges
)
def
stochastic_block_model_graph
(
nb_nodes
,
nb_edges
,
nb_com
,
percentage_edge_betw
,
verbose
=
False
):
"""
Generate a stochastic block model graph with defined number of vertices and edges.
Parameters
----------
nb_nodes : int
nb_edges : int
nb_com : int
percentage_edge_betw : float
verbose : bool
Returns
-------
nx.Graph
generated graph
"""
if
nb_nodes
%
nb_com
!=
0
:
raise
ValueError
(
"
Modulo between the number of nodes and community must be equal to 0
"
)
edge_max
=
(
1
/
nb_com
)
*
((
nb_nodes
*
(
nb_nodes
-
1
))
/
2
)
if
nb_edges
>
edge_max
:
raise
ValueError
(
"
nb_edges must be inferior to {0}
"
.
format
(
edge_max
))
percentage_edge_within
=
1
-
percentage_edge_betw
if
nb_edges
>
(
1
/
nb_com
)
*
(
nb_nodes
*
(
nb_nodes
-
1
))
/
2
:
raise
ValueError
(
"
nb_edges must be inferior to {0}
"
.
format
((
1
/
nb_com
)
*
(
nb_nodes
*
(
nb_nodes
-
1
))
/
2
))
G
=
nx
.
planted_partition_graph
(
nb_com
,
int
(
np
.
round
(
nb_nodes
/
nb_com
)),
1
,
1
)
if
verbose
:
...
...
@@ -112,7 +194,7 @@ def stochastic_block_model_graph(nb_nodes,nb_edges,nb_com,percentage_edge_betw,v
if
(
n1
==
n2
)
or
(
hash_
in
register
):
continue
b1
,
b2
=
block_assign
[
n1
],
block_assign
[
n2
]
if
b1
!=
b2
:
if
b1
!=
b2
:
inter_edges
.
append
([
n1
,
n2
])
else
:
intra_edges
.
append
([
n1
,
n2
])
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment