Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
L
linkprediction_depo
Manage
Activity
Members
Labels
Plan
Issues
0
Issue boards
Milestones
Wiki
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Jacques Fize
linkprediction_depo
Commits
de800bfd
Commit
de800bfd
authored
4 years ago
by
Fize Jacques
Browse files
Options
Downloads
Patches
Plain Diff
debug
parent
b5992610
No related branches found
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
generate_theoric_random_graph.py
+30
-42
30 additions, 42 deletions
generate_theoric_random_graph.py
lib/random.py
+87
-5
87 additions, 5 deletions
lib/random.py
with
117 additions
and
47 deletions
generate_theoric_random_graph.py
+
30
−
42
View file @
de800bfd
# coding = utf-8
# coding = utf-8
import
itertools
import
os
import
os
import
networkx
as
nx
import
networkx
as
nx
...
@@ -8,76 +9,63 @@ import pandas as pd
...
@@ -8,76 +9,63 @@ import pandas as pd
import
random
import
random
import
copy
import
copy
from
tqdm
import
tqdm
from
tqdm
import
tqdm
import
lib.random
as
ra
# COMMAND PARSING
# COMMAND PARSING
parser
=
argparse
.
ArgumentParser
()
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"
output_dir
"
)
parser
.
add_argument
(
"
output_dir
"
)
args
=
parser
.
parse_args
()
args
=
parser
.
parse_args
()
def
generate_sbm_prob_matrix
(
nb_of_blocks
,
prob_btw_block
=
0.1
):
M
=
np
.
zeros
((
nb_of_blocks
,
nb_of_blocks
))
np
.
fill_diagonal
(
M
,[
random
.
random
()
for
i
in
range
(
nb_of_blocks
)])
for
i
in
range
(
nb_of_blocks
):
for
j
in
range
(
nb_of_blocks
):
if
i
==
j
:
continue
M
[
i
,
j
]
=
prob_btw_block
M
[
j
,
i
]
=
prob_btw_block
return
M
GRAPH_SIZE
=
[
100
,
150
,
200
]
GRAPH_SIZE
=
[
50
,
75
,
100
]
EDGE_SIZE
=
[
300
,
500
]
EDGE_SIZE
=
[]
OUTPUT_DIR
=
args
.
output_dir
OUTPUT_DIR
=
args
.
output_dir
if
not
os
.
path
.
exists
(
OUTPUT_DIR
):
if
not
os
.
path
.
exists
(
OUTPUT_DIR
):
raise
FileExistsError
(
"
Output directory does not exists !
"
)
raise
FileExistsError
(
"
Output directory does not exists !
"
)
nx
.
waxman_graph
parameters
=
{
parameters
=
{
"
planted_partition
_graph
"
:
{
"
stochastic_block_model
_graph
"
:
{
"
l
"
:
[
3
,
5
,
8
],
# nb of groups
"
nb_nodes
"
:
GRAPH_SIZE
,
"
k
"
:
[
10
,
20
],
# nb de noeud
"
nb_edges
"
:
EDGE_SIZE
,
"
p_in
"
:
[
0.2
,
0.5
,
0.7
],
"
nb_com
"
:[
2
,
5
],
"
p
_out
"
:
[
0.1
]
"
p
ercentage_edge_betw
"
:[
0.1
,
0.01
]
},
},
"
stochastic_block_model
"
:
{
"
ER_graph
"
:
{
"
sizes
"
:
[[
random
.
choice
([
10
,
20
,
30
])
for
k
in
range
(
i
)]
for
i
in
[
3
,
5
,
8
]],
"
nb_nodes
"
:
GRAPH_SIZE
,
"
p
"
:
[]
# Filled later
"
nb_edges
"
:
EDGE_SIZE
},
"
dense_gnm_random_graph
"
:
{
"
n
"
:
GRAPH_SIZE
,
"
m
"
:
EDGE_SIZE
},
},
"
powerlaw_graph
"
:
{
# configuration_model
"
powerlaw_graph
"
:
{
# configuration_model
"
n
"
:
GRAPH_SIZE
,
"
nb_nodes
"
:
GRAPH_SIZE
,
"
nb_edges
"
:
EDGE_SIZE
,
"
exponent
"
:[
2
,
3
]
},
},
"
spatial_graph
"
:{
"
nb_nodes
"
:
GRAPH_SIZE
,
"
nb_edges
"
:
EDGE_SIZE
,
"
coords
"
:[
"
random
"
],
}
}
}
# Generating transition matrices for stochastic block model
parameters
[
"
stochastic_block_model
"
][
"
p
"
]
=
[
generate_sbm_prob_matrix
(
len
(
l
))
for
l
in
parameters
[
"
stochastic_block_model
"
][
"
sizes
"
]]
#getattr(nx,"geographical_threshold_graph")(**dict(n=20,theta=0.4))
#getattr(nx,"geographical_threshold_graph")(**dict(n=20,theta=0.4))
def
get_params
(
dict_params
):
nb_of_parameter
=
np
.
prod
([
len
(
a
)
for
_
,
a
in
dict_params
.
items
()])
parameters_dicts
=
[{}
for
i
in
range
(
nb_of_parameter
)]
def
get_params
(
inp
):
for
par
,
values
in
dict_params
.
items
():
return
(
dict
(
zip
(
inp
.
keys
(),
values
))
for
values
in
itertools
.
product
(
*
inp
.
values
()))
division
=
nb_of_parameter
/
len
(
values
)
for
ix
in
range
(
nb_of_parameter
):
parameters_dicts
[
ix
][
par
]
=
values
[
int
(
ix
//
division
)]
return
parameters_dicts
pbar
=
tqdm
(
parameters
.
items
(),
total
=
len
(
parameters
))
pbar
=
tqdm
(
parameters
.
items
(),
total
=
len
(
parameters
))
for
method
,
args
in
pbar
:
for
method
,
args
in
pbar
:
pbar
.
set_description
(
"
Generating graphs using :
"
+
method
)
pbar
.
set_description
(
"
Generating graphs using :
"
+
method
)
list_of_params
=
get_params
(
parameters
[
method
])
list_of_params
=
get_params
(
parameters
[
method
])
func
=
getattr
(
nx
,
method
)
func
=
getattr
(
ra
,
method
)
for
ix
,
params
in
enumerate
(
list_of_params
):
for
ix
,
params
in
enumerate
(
list_of_params
):
# try:
# try:
if
method
==
"
random_powerlaw_tree_sequence
"
:
print
(
params
)
sequence
=
func
(
**
params
)
G
=
func
(
**
params
)
G
=
nx
.
configuration_model
(
sequence
)
else
:
G
=
func
(
**
params
)
G
.
graph
.
update
(
params
)
G
.
graph
.
update
(
params
)
nx
.
write_gml
(
G
,
OUTPUT_DIR
+
"
/graph_{method}_{ix}.gml
"
.
format
(
method
=
method
,
ix
=
ix
),
stringizer
=
str
)
nx
.
write_gml
(
G
,
OUTPUT_DIR
+
"
/graph_{method}_{ix}.gml
"
.
format
(
method
=
method
,
ix
=
ix
),
stringizer
=
str
)
# except Exception as e:
# except Exception as e:
...
...
This diff is collapsed.
Click to expand it.
lib/random.py
+
87
−
5
View file @
de800bfd
...
@@ -9,6 +9,21 @@ import random
...
@@ -9,6 +9,21 @@ import random
def
powerlaw
(
nb_nodes
,
nb_edges
,
exponent
=
2
,
tries
=
100
,
min_deg
=
1
):
def
powerlaw
(
nb_nodes
,
nb_edges
,
exponent
=
2
,
tries
=
100
,
min_deg
=
1
):
"""
Return a degree distribution that fit the power law and specified number of edges and vertices.
Parameters
----------
nb_nodes : int
nb_edges : int
exponent : int
tries : int
min_deg : int
Returns
-------
np.ndarray
degree sequence
"""
nb_stubs
=
nb_edges
*
2
nb_stubs
=
nb_edges
*
2
# Draw a first time a powerlaw degree sequence
# Draw a first time a powerlaw degree sequence
degs
=
np
.
round
(
nx
.
utils
.
powerlaw_sequence
(
nb_nodes
,
exponent
=
exponent
))
degs
=
np
.
round
(
nx
.
utils
.
powerlaw_sequence
(
nb_nodes
,
exponent
=
exponent
))
...
@@ -40,7 +55,7 @@ def powerlaw(nb_nodes, nb_edges, exponent=2, tries=100, min_deg=1):
...
@@ -40,7 +55,7 @@ def powerlaw(nb_nodes, nb_edges, exponent=2, tries=100, min_deg=1):
for
ind
in
indexes
:
for
ind
in
indexes
:
degs
[
ind
]
=
degs
[
ind
]
+
signe
degs
[
ind
]
=
degs
[
ind
]
+
signe
return
degs
return
degs
.
astype
(
int
)
def
get_countries_coords
():
def
get_countries_coords
():
...
@@ -56,13 +71,46 @@ def get_countries_coords():
...
@@ -56,13 +71,46 @@ def get_countries_coords():
except
:
except
:
raise
ImportError
(
"
Geopandas is not installed !
"
)
raise
ImportError
(
"
Geopandas is not installed !
"
)
gdf
=
gpd
.
read_file
(
gpd
.
datasets
.
get_path
(
"
naturalearth_lowres
"
))
gdf
=
gpd
.
read_file
(
gpd
.
datasets
.
get_path
(
"
naturalearth_lowres
"
))
return
np
.
asarray
(
gdf
.
centroid
.
apply
(
lambda
x
:
[
x
.
x
,
x
.
y
]).
values
.
tolist
())
return
np
.
asarray
(
gdf
.
centroid
.
apply
(
lambda
x
:
[
x
.
x
,
x
.
y
]).
values
.
tolist
())
def
powerlaw_graph
(
nb_nodes
,
nb_edges
,
exponent
=
2
,
tries
=
100
,
min_deg
=
1
):
def
powerlaw_graph
(
nb_nodes
,
nb_edges
,
exponent
=
2
,
tries
=
100
,
min_deg
=
1
):
return
nx
.
configuration_model
(
powerlaw
(
nb_nodes
,
nb_edges
,
exponent
,
tries
,
min_deg
))
"""
Generate a graph with a definied number of vertices, edges, and a degree distribution that fit the power law.
Parameters
----------
nb_nodes : int
nb_edges : int
exponent : int
tries : int
min_deg : int
Returns
-------
nx.Graph
generated graph
"""
seq
=
powerlaw
(
nb_nodes
,
nb_edges
,
exponent
,
tries
,
min_deg
)
return
nx
.
configuration_model
(
seq
.
astype
(
int
))
def
spatial_graph
(
nb_nodes
,
nb_edges
,
coords
=
"
country
"
,
dist_func
=
lambda
a
,
b
:
np
.
linalg
.
norm
(
a
-
b
),
self_link
=
False
):
def
spatial_graph
(
nb_nodes
,
nb_edges
,
coords
=
"
country
"
,
dist_func
=
lambda
a
,
b
:
np
.
linalg
.
norm
(
a
-
b
),
self_link
=
False
):
"""
Generate a spatial graph with a specific number of vertices and edges
Parameters
----------
nb_nodes : int
nb_edges : int
coords : array of shape (n,2) or str
if str, possible choice are
"
random
"
or
"
country
"
dist_func : callable
self_link : bool
Returns
-------
nx.Graph
generated graph
"""
if
coords
and
isinstance
(
coords
,
Iterable
)
and
not
isinstance
(
coords
,
str
):
if
coords
and
isinstance
(
coords
,
Iterable
)
and
not
isinstance
(
coords
,
str
):
if
len
(
coords
)
!=
nb_nodes
:
if
len
(
coords
)
!=
nb_nodes
:
raise
ValueError
(
"
number of nodes must match the size of the coords dict
"
)
raise
ValueError
(
"
number of nodes must match the size of the coords dict
"
)
...
@@ -84,20 +132,54 @@ def spatial_graph(nb_nodes, nb_edges, coords="country", dist_func=lambda a, b: n
...
@@ -84,20 +132,54 @@ def spatial_graph(nb_nodes, nb_edges, coords="country", dist_func=lambda a, b: n
continue
continue
data
.
append
([
i
,
j
,
dist_func
(
coords
[
i
],
coords
[
j
])])
data
.
append
([
i
,
j
,
dist_func
(
coords
[
i
],
coords
[
j
])])
df
=
pd
.
DataFrame
(
data
,
columns
=
"
src tar weight
"
.
split
())
df
=
pd
.
DataFrame
(
data
,
columns
=
"
src tar weight
"
.
split
())
df
[
"
hash
"
]
=
df
.
apply
(
lambda
x
:
"
_
"
.
join
(
sorted
([
str
(
x
.
src
),
str
(
x
.
tar
)]))
,
axis
=
1
)
df
=
df
.
drop_duplicates
(
subset
=
[
"
hash
"
])
df
=
df
.
sample
(
nb_edges
,
weights
=
"
weight
"
)
df
=
df
.
sample
(
nb_edges
,
weights
=
"
weight
"
)
G
=
nx
.
from_pandas_edgelist
(
df
,
source
=
"
src
"
,
target
=
"
tar
"
,
edge_attr
=
"
weight
"
)
G
=
nx
.
from_pandas_edgelist
(
df
,
source
=
"
src
"
,
target
=
"
tar
"
,
edge_attr
=
"
weight
"
)
for
n
in
list
(
G
.
nodes
()):
G
.
nodes
[
n
][
"
pos
"
]
=
coords
[
n
]
for
n
in
list
(
G
.
nodes
()):
G
.
nodes
[
n
][
"
pos
"
]
=
coords
[
n
]
return
G
return
G
def
ER_graph
(
nb_nodes
,
nb_edges
):
def
ER_graph
(
nb_nodes
,
nb_edges
):
"""
Generate a random graph with a specific nb of nodes and edges.
Parameters
----------
nb_nodes : int
nb_edges : int
Returns
-------
nx.Graph
generated graph
"""
return
nx
.
dense_gnm_random_graph
(
nb_nodes
,
nb_edges
)
return
nx
.
dense_gnm_random_graph
(
nb_nodes
,
nb_edges
)
def
stochastic_block_model_graph
(
nb_nodes
,
nb_edges
,
nb_com
,
percentage_edge_betw
,
verbose
=
False
):
def
stochastic_block_model_graph
(
nb_nodes
,
nb_edges
,
nb_com
,
percentage_edge_betw
,
verbose
=
False
):
"""
Generate a stochastic block model graph with defined number of vertices and edges.
Parameters
----------
nb_nodes : int
nb_edges : int
nb_com : int
percentage_edge_betw : float
verbose : bool
Returns
-------
nx.Graph
generated graph
"""
if
nb_nodes
%
nb_com
!=
0
:
raise
ValueError
(
"
Modulo between the number of nodes and community must be equal to 0
"
)
edge_max
=
(
1
/
nb_com
)
*
((
nb_nodes
*
(
nb_nodes
-
1
))
/
2
)
if
nb_edges
>
edge_max
:
raise
ValueError
(
"
nb_edges must be inferior to {0}
"
.
format
(
edge_max
))
percentage_edge_within
=
1
-
percentage_edge_betw
percentage_edge_within
=
1
-
percentage_edge_betw
if
nb_edges
>
(
1
/
nb_com
)
*
(
nb_nodes
*
(
nb_nodes
-
1
))
/
2
:
raise
ValueError
(
"
nb_edges must be inferior to {0}
"
.
format
((
1
/
nb_com
)
*
(
nb_nodes
*
(
nb_nodes
-
1
))
/
2
))
G
=
nx
.
planted_partition_graph
(
nb_com
,
int
(
np
.
round
(
nb_nodes
/
nb_com
)),
1
,
1
)
G
=
nx
.
planted_partition_graph
(
nb_com
,
int
(
np
.
round
(
nb_nodes
/
nb_com
)),
1
,
1
)
if
verbose
:
if
verbose
:
...
@@ -112,7 +194,7 @@ def stochastic_block_model_graph(nb_nodes,nb_edges,nb_com,percentage_edge_betw,v
...
@@ -112,7 +194,7 @@ def stochastic_block_model_graph(nb_nodes,nb_edges,nb_com,percentage_edge_betw,v
if
(
n1
==
n2
)
or
(
hash_
in
register
):
if
(
n1
==
n2
)
or
(
hash_
in
register
):
continue
continue
b1
,
b2
=
block_assign
[
n1
],
block_assign
[
n2
]
b1
,
b2
=
block_assign
[
n1
],
block_assign
[
n2
]
if
b1
!=
b2
:
if
b1
!=
b2
:
inter_edges
.
append
([
n1
,
n2
])
inter_edges
.
append
([
n1
,
n2
])
else
:
else
:
intra_edges
.
append
([
n1
,
n2
])
intra_edges
.
append
([
n1
,
n2
])
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment