Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
L
linkprediction_depo
Manage
Activity
Members
Labels
Plan
Issues
0
Issue boards
Milestones
Wiki
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Jacques Fize
linkprediction_depo
Commits
cadf30a2
Commit
cadf30a2
authored
4 years ago
by
Fize Jacques
Browse files
Options
Downloads
Patches
Plain Diff
debug SBM - generated graph can have a number of nodes inferior to the one expected
parent
de800bfd
No related branches found
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
generate_theoric_random_graph.py
+0
-1
0 additions, 1 deletion
generate_theoric_random_graph.py
lib/random.py
+90
-9
90 additions, 9 deletions
lib/random.py
with
90 additions
and
10 deletions
generate_theoric_random_graph.py
+
0
−
1
View file @
cadf30a2
...
...
@@ -64,7 +64,6 @@ for method,args in pbar:
func
=
getattr
(
ra
,
method
)
for
ix
,
params
in
enumerate
(
list_of_params
):
# try:
print
(
params
)
G
=
func
(
**
params
)
G
.
graph
.
update
(
params
)
nx
.
write_gml
(
G
,
OUTPUT_DIR
+
"
/graph_{method}_{ix}.gml
"
.
format
(
method
=
method
,
ix
=
ix
),
stringizer
=
str
)
...
...
This diff is collapsed.
Click to expand it.
lib/random.py
+
90
−
9
View file @
cadf30a2
...
...
@@ -94,6 +94,7 @@ def powerlaw_graph(nb_nodes, nb_edges, exponent=2, tries=100, min_deg=1):
seq
=
powerlaw
(
nb_nodes
,
nb_edges
,
exponent
,
tries
,
min_deg
)
return
nx
.
configuration_model
(
seq
.
astype
(
int
))
def
spatial_graph
(
nb_nodes
,
nb_edges
,
coords
=
"
country
"
,
dist_func
=
lambda
a
,
b
:
np
.
linalg
.
norm
(
a
-
b
),
self_link
=
False
):
"""
Generate a spatial graph with a specific number of vertices and edges
...
...
@@ -132,14 +133,15 @@ def spatial_graph(nb_nodes, nb_edges, coords="country", dist_func=lambda a, b: n
continue
data
.
append
([
i
,
j
,
dist_func
(
coords
[
i
],
coords
[
j
])])
df
=
pd
.
DataFrame
(
data
,
columns
=
"
src tar weight
"
.
split
())
df
[
"
hash
"
]
=
df
.
apply
(
lambda
x
:
"
_
"
.
join
(
sorted
([
str
(
x
.
src
),
str
(
x
.
tar
)]))
,
axis
=
1
)
df
[
"
hash
"
]
=
df
.
apply
(
lambda
x
:
"
_
"
.
join
(
sorted
([
str
(
x
.
src
),
str
(
x
.
tar
)])),
axis
=
1
)
df
=
df
.
drop_duplicates
(
subset
=
[
"
hash
"
])
df
=
df
.
sample
(
nb_edges
,
weights
=
"
weight
"
)
G
=
nx
.
from_pandas_edgelist
(
df
,
source
=
"
src
"
,
target
=
"
tar
"
,
edge_attr
=
"
weight
"
)
for
n
in
list
(
G
.
nodes
()):
G
.
nodes
[
n
][
"
pos
"
]
=
coords
[
n
]
return
G
def
ER_graph
(
nb_nodes
,
nb_edges
):
def
ER_graph
(
nb_nodes
,
nb_edges
):
"""
Generate a random graph with a specific nb of nodes and edges.
Parameters
...
...
@@ -152,10 +154,10 @@ def ER_graph(nb_nodes,nb_edges):
nx.Graph
generated graph
"""
return
nx
.
dense_gnm_random_graph
(
nb_nodes
,
nb_edges
)
return
nx
.
dense_gnm_random_graph
(
nb_nodes
,
nb_edges
)
def
stochastic_block_model_graph
(
nb_nodes
,
nb_edges
,
nb_com
,
percentage_edge_betw
,
verbose
=
False
):
def
stochastic_block_model_graph
(
nb_nodes
,
nb_edges
,
nb_com
,
percentage_edge_betw
,
verbose
=
False
):
"""
Generate a stochastic block model graph with defined number of vertices and edges.
Parameters
...
...
@@ -172,7 +174,7 @@ def stochastic_block_model_graph(nb_nodes,nb_edges,nb_com,percentage_edge_betw,v
generated graph
"""
if
nb_nodes
%
nb_com
!=
0
:
if
nb_nodes
%
nb_com
!=
0
:
raise
ValueError
(
"
Modulo between the number of nodes and community must be equal to 0
"
)
edge_max
=
(
1
/
nb_com
)
*
((
nb_nodes
*
(
nb_nodes
-
1
))
/
2
)
...
...
@@ -186,7 +188,7 @@ def stochastic_block_model_graph(nb_nodes,nb_edges,nb_com,percentage_edge_betw,v
print
(
G
.
size
())
block_assign
=
nx
.
get_node_attributes
(
G
,
"
block
"
)
inter_edges
,
intra_edges
=
[],
[]
inter_edges
,
intra_edges
=
[],
[]
register
=
set
([])
for
n1
in
list
(
G
.
nodes
()):
for
n2
in
list
(
G
.
nodes
()):
...
...
@@ -194,7 +196,7 @@ def stochastic_block_model_graph(nb_nodes,nb_edges,nb_com,percentage_edge_betw,v
if
(
n1
==
n2
)
or
(
hash_
in
register
):
continue
b1
,
b2
=
block_assign
[
n1
],
block_assign
[
n2
]
if
b1
!=
b2
:
if
b1
!=
b2
:
inter_edges
.
append
([
n1
,
n2
])
else
:
intra_edges
.
append
([
n1
,
n2
])
...
...
@@ -209,14 +211,93 @@ def stochastic_block_model_graph(nb_nodes,nb_edges,nb_com,percentage_edge_betw,v
print
(
int
(
np
.
ceil
(
nb_edges
*
percentage_edge_betw
)),
int
(
np
.
ceil
(
nb_edges
*
percentage_edge_within
)))
final_edges
=
[]
index_inter
=
np
.
random
.
choice
(
np
.
arange
(
inter_N
),
int
(
np
.
ceil
(
nb_edges
*
percentage_edge_betw
)),
replace
=
False
)
index_intra
=
np
.
random
.
choice
(
np
.
arange
(
intra_N
),
int
(
np
.
ceil
(
nb_edges
*
percentage_edge_within
)),
replace
=
False
)
index_inter
=
np
.
random
.
choice
(
np
.
arange
(
inter_N
),
int
(
np
.
round
(
nb_edges
*
percentage_edge_betw
)),
replace
=
False
)
index_intra
=
np
.
random
.
choice
(
np
.
arange
(
intra_N
),
int
(
np
.
round
(
nb_edges
*
percentage_edge_within
)),
replace
=
False
)
final_edges
.
extend
(
inter_edges
[
index_inter
])
final_edges
.
extend
(
intra_edges
[
index_intra
])
if
verbose
:
print
(
len
(
final_edges
))
G2
=
nx
.
from_edgelist
(
final_edges
)
if
len
(
G2
)
!=
nb_nodes
:
equilibrate
(
G2
,
nb_nodes
,
percentage_edge_betw
,
percentage_edge_within
,
inter_edges
,
intra_edges
,
block_assign
)
for
n
in
list
(
G2
.
nodes
()):
G2
.
nodes
[
n
][
"
block
"
]
=
block_assign
[
n
]
return
G2
def
equilibrate
(
G
,
nb_nodes
,
percentage_edge_betw
,
percentage_edge_within
,
inter_edges
,
intra_edges
,
block_assign
):
"""
Sometimes the generated graph from the stochastic block model have some missing nodes due to the sampling method.
This function fix this issue.
Parameters
----------
G
nb_nodes
percentage_edge_betw
percentage_edge_within
inter_edges
intra_edges
block_assign
Returns
-------
"""
diff
=
nb_nodes
-
len
(
G
)
nodes_missing
=
list
(
set
(
list
(
range
(
nb_nodes
)))
-
set
(
list
(
G
.
nodes
())))
nb_edge_inter
=
int
(
np
.
round
(
len
(
nodes_missing
)
*
percentage_edge_betw
))
nodes_inter
=
np
.
random
.
choice
(
nodes_missing
,
nb_edge_inter
,
replace
=
False
)
nodes_intra
=
list
(
set
(
nodes_missing
)
-
set
(
nodes_inter
))
def
draw_
(
array
,
register
,
hash_func
=
lambda
x
,
y
:
"
_
"
.
join
(
sorted
([
str
(
x
),
str
(
y
)]))):
tries
=
0
while
tries
<
1000
:
index_array
=
np
.
random
.
choice
(
np
.
arange
(
len
(
array
)),
1
)
res
=
array
[
index_array
]
res
=
res
[
0
]
hash_
=
hash_func
(
res
[
0
],
res
[
1
])
if
not
hash_
in
register
:
register
.
add
(
hash_
)
return
index_array
tries
+=
1
raise
Exception
(
"
Error ! (TODO)
"
)
# Draw new edges
new_edges
=
[]
register
=
set
([])
for
node
in
nodes_inter
:
mask_inter
=
np
.
isin
(
inter_edges
,
node
).
sum
(
axis
=
1
).
astype
(
bool
)
index_inter
=
draw_
(
inter_edges
[
mask_inter
],
register
)
new_edges
.
extend
(
inter_edges
[
mask_inter
][
index_inter
])
for
node
in
nodes_intra
:
mask_intra
=
np
.
isin
(
intra_edges
,
node
).
sum
(
axis
=
1
).
astype
(
bool
)
index_intra
=
draw_
(
intra_edges
[
mask_intra
],
register
)
new_edges
.
extend
(
intra_edges
[
mask_intra
][
index_intra
])
# Draw existing edge to delete
edge_to_delete
=
[]
inter_edges_c
,
intra_edges_c
=
[],
[]
for
ed
in
list
(
G
.
edges
()):
if
G
.
degree
(
ed
[
0
])
>
1
and
G
.
degree
(
ed
[
1
])
>
1
:
# We don't want the nodes edge to be removed with the
# disappearance of the edge
if
block_assign
[
ed
[
0
]]
!=
block_assign
[
ed
[
1
]]:
inter_edges_c
.
append
([
ed
[
0
],
ed
[
1
]])
else
:
intra_edges_c
.
append
([
ed
[
0
],
ed
[
1
]])
index_inter
=
np
.
random
.
choice
(
np
.
arange
(
len
(
inter_edges_c
)),
int
(
np
.
round
(
diff
*
percentage_edge_betw
)),
replace
=
False
)
index_intra
=
np
.
random
.
choice
(
np
.
arange
(
len
(
intra_edges_c
)),
int
(
np
.
round
(
diff
*
percentage_edge_within
)),
replace
=
False
)
edge_to_delete
.
extend
(
np
.
asarray
(
inter_edges_c
)[
index_inter
])
edge_to_delete
.
extend
(
np
.
asarray
(
intra_edges_c
)[
index_intra
])
for
ed
in
edge_to_delete
:
G
.
remove_edge
(
*
ed
)
for
ed
in
new_edges
:
G
.
add_edge
(
*
ed
)
return
G
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment