Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
L
linkprediction_depo
Manage
Activity
Members
Labels
Plan
Issues
0
Issue boards
Milestones
Wiki
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Jacques Fize
linkprediction_depo
Commits
ea7ea1d7
Commit
ea7ea1d7
authored
4 years ago
by
Fize Jacques
Browse files
Options
Downloads
Patches
Plain Diff
Debug powerlaw configuration model graph generation+ and stuff
parent
cadf30a2
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
generate_theoric_random_graph.py
+2
-2
2 additions, 2 deletions
generate_theoric_random_graph.py
lib/random.py
+86
-11
86 additions, 11 deletions
lib/random.py
run_eval.py
+1
-1
1 addition, 1 deletion
run_eval.py
with
89 additions
and
14 deletions
generate_theoric_random_graph.py
+
2
−
2
View file @
ea7ea1d7
...
...
@@ -41,9 +41,9 @@ parameters = {
"
exponent
"
:[
2
,
3
]
},
"
spatial_graph
"
:{
"
nb_nodes
"
:
GRAPH_SIZE
,
"
nb_nodes
"
:
[
100
,
150
]
,
"
nb_edges
"
:
EDGE_SIZE
,
"
coords
"
:[
"
random
"
],
"
coords
"
:[
"
random
"
,
"
country
"
],
}
}
...
...
This diff is collapsed.
Click to expand it.
lib/random.py
+
86
−
11
View file @
ea7ea1d7
...
...
@@ -4,7 +4,7 @@ from collections import Iterable
import
numpy
as
np
import
networkx
as
nx
import
pandas
as
pd
from
networkx.generators.degree_seq
import
_to_stublist
import
random
...
...
@@ -75,7 +75,33 @@ def get_countries_coords():
return
np
.
asarray
(
gdf
.
centroid
.
apply
(
lambda
x
:
[
x
.
x
,
x
.
y
]).
values
.
tolist
())
def
powerlaw_graph
(
nb_nodes
,
nb_edges
,
exponent
=
2
,
tries
=
100
,
min_deg
=
1
):
def
_conf_model
(
degree_seq
):
stubs_list
=
_to_stublist
(
degree_seq
)
random
.
shuffle
(
stubs_list
)
register
=
set
()
edges
=
[]
hash_func
=
lambda
x
,
y
:
"
_
"
.
join
(
sorted
([
str
(
x
),
str
(
y
)]))
tries
=
0
while
len
(
stubs_list
)
>
0
and
tries
<
100
:
to_del
=
set
([])
for
i
in
range
(
0
,
len
(
stubs_list
)
-
2
,
2
):
u
,
v
=
stubs_list
[
i
],
stubs_list
[
i
+
1
]
hash_
=
hash_func
(
u
,
v
)
if
hash_
in
register
:
continue
else
:
register
.
add
(
hash_
)
edges
.
append
([
u
,
v
])
to_del
.
add
(
i
)
to_del
.
add
(
i
+
1
)
stubs_list
=
[
stubs_list
[
i
]
for
i
in
range
(
len
(
stubs_list
))
if
not
i
in
to_del
]
random
.
shuffle
(
stubs_list
)
tries
+=
1
G
=
nx
.
from_edgelist
(
edges
)
return
G
def
powerlaw_graph
(
nb_nodes
,
nb_edges
,
exponent
=
2
,
tries
=
1000
,
min_deg
=
1
):
"""
Generate a graph with a definied number of vertices, edges, and a degree distribution that fit the power law.
Parameters
...
...
@@ -91,8 +117,36 @@ def powerlaw_graph(nb_nodes, nb_edges, exponent=2, tries=100, min_deg=1):
nx.Graph
generated graph
"""
seq
=
powerlaw
(
nb_nodes
,
nb_edges
,
exponent
,
tries
,
min_deg
)
return
nx
.
configuration_model
(
seq
.
astype
(
int
))
G
=
_conf_model
(
powerlaw
(
nb_nodes
,
nb_edges
,
exponent
,
tries
,
min_deg
).
astype
(
int
))
tries_
=
0
while
len
(
G
)
!=
nb_nodes
and
tries_
<
tries
:
G
=
_conf_model
(
powerlaw
(
nb_nodes
,
nb_edges
,
exponent
,
tries
,
min_deg
).
astype
(
int
))
tries_
+=
1
if
len
(
G
)
!=
nb_nodes
:
print
(
nb_nodes
,
nb_edges
,
exponent
)
raise
Exception
(
"
Cant compute configuration model based on parameters
"
)
if
G
.
size
()
!=
nb_edges
:
diff
=
abs
(
G
.
size
()
-
nb_edges
)
signe
=
1
if
G
.
size
()
-
nb_edges
<
0
else
-
1
if
signe
:
for
n
in
list
(
G
.
nodes
()):
if
G
.
size
()
==
nb_edges
:
break
for
n2
in
list
(
G
.
nodes
()):
if
not
G
.
has_edge
(
n
,
n2
):
G
.
add_edge
(
n
,
n2
)
if
G
.
size
()
==
nb_edges
:
break
else
:
edges_
=
list
(
G
.
edges
())
random
.
shuffle
(
edges_
)
i
=
diff
for
ed
in
edges_
:
u
,
v
=
ed
[
0
],
ed
[
1
]
if
G
.
degree
(
u
)
>
1
and
G
.
degree
(
v
)
>
1
:
G
.
remove_edge
(
u
,
v
)
i
-=
1
return
G
def
spatial_graph
(
nb_nodes
,
nb_edges
,
coords
=
"
country
"
,
dist_func
=
lambda
a
,
b
:
np
.
linalg
.
norm
(
a
-
b
),
self_link
=
False
):
...
...
@@ -132,11 +186,32 @@ def spatial_graph(nb_nodes, nb_edges, coords="country", dist_func=lambda a, b: n
if
i
==
j
and
not
self_link
:
continue
data
.
append
([
i
,
j
,
dist_func
(
coords
[
i
],
coords
[
j
])])
df
=
pd
.
DataFrame
(
data
,
columns
=
"
src tar weight
"
.
split
())
df
[
"
hash
"
]
=
df
.
apply
(
lambda
x
:
"
_
"
.
join
(
sorted
([
str
(
x
.
src
),
str
(
x
.
tar
)])),
axis
=
1
)
df
=
df
.
drop_duplicates
(
subset
=
[
"
hash
"
])
df
=
df
.
sample
(
nb_edges
,
weights
=
"
weight
"
)
G
=
nx
.
from_pandas_edgelist
(
df
,
source
=
"
src
"
,
target
=
"
tar
"
,
edge_attr
=
"
weight
"
)
df
=
pd
.
DataFrame
(
data
,
columns
=
"
src tar weight
"
.
split
()).
astype
({
"
src
"
:
int
,
"
tar
"
:
int
})
df
[
"
hash
"
]
=
df
.
apply
(
lambda
x
:
"
_
"
.
join
(
sorted
([
str
(
int
(
x
.
src
)),
str
(
int
(
x
.
tar
))])),
axis
=
1
)
df
=
df
.
drop_duplicates
(
subset
=
"
hash
"
)
register
=
set
([])
def
add_register
(
hashes
):
for
hash_
in
hashes
:
register
.
add
(
hash_
)
def
in_register
(
hashes
):
return
np
.
array
([
True
if
hash_
in
register
else
False
for
hash_
in
hashes
])
nodes
=
np
.
arange
(
nb_nodes
).
astype
(
int
)
sizes
=
[
len
(
x
)
for
x
in
np
.
array_split
(
np
.
arange
(
nb_edges
),
nb_nodes
)]
new_df
=
df
[(
df
.
src
==
nodes
[
0
])
|
(
df
.
tar
==
nodes
[
0
])].
sample
(
n
=
sizes
[
0
],
weights
=
"
weight
"
).
copy
()
add_register
(
new_df
.
hash
.
values
)
df
=
df
[
~
in_register
(
df
.
hash
.
values
)]
for
ix
,
node
in
enumerate
(
nodes
[
1
:]):
sample
=
df
[(
df
.
src
==
node
)
|
(
df
.
tar
==
node
)].
sample
(
n
=
sizes
[
ix
+
1
],
weights
=
"
weight
"
).
copy
()
new_df
=
pd
.
concat
((
new_df
,
sample
))
add_register
(
new_df
.
hash
.
values
)
df
=
df
[
~
in_register
(
df
.
hash
.
values
)]
G
=
nx
.
from_pandas_edgelist
(
new_df
,
source
=
"
src
"
,
target
=
"
tar
"
,
edge_attr
=
"
weight
"
)
for
n
in
list
(
G
.
nodes
()):
G
.
nodes
[
n
][
"
pos
"
]
=
coords
[
n
]
return
G
...
...
@@ -254,7 +329,7 @@ def equilibrate(G, nb_nodes, percentage_edge_betw, percentage_edge_within, inter
def
draw_
(
array
,
register
,
hash_func
=
lambda
x
,
y
:
"
_
"
.
join
(
sorted
([
str
(
x
),
str
(
y
)]))):
tries
=
0
while
tries
<
1000
:
while
tries
<
1000
:
index_array
=
np
.
random
.
choice
(
np
.
arange
(
len
(
array
)),
1
)
res
=
array
[
index_array
]
res
=
res
[
0
]
...
...
@@ -262,7 +337,7 @@ def equilibrate(G, nb_nodes, percentage_edge_betw, percentage_edge_within, inter
if
not
hash_
in
register
:
register
.
add
(
hash_
)
return
index_array
tries
+=
1
tries
+=
1
raise
Exception
(
"
Error ! (TODO)
"
)
# Draw new edges
...
...
This diff is collapsed.
Click to expand it.
run_eval.py
+
1
−
1
View file @
ea7ea1d7
...
...
@@ -42,7 +42,7 @@ for fn in pbar:
top10node
=
pd
.
DataFrame
(
list
(
G
.
degree
()),
columns
=
"
node degree
"
.
split
()).
sort_values
(
"
degree
"
,
ascending
=
False
).
head
(
10
).
node
.
values
df_results
[
"
nb_edge
"
]
=
len
(
list
(
G
.
edges
())
)
df_results
[
"
nb_edge
"
]
=
G
.
size
(
)
df_results
[
"
transitivity
"
]
=
nx
.
transitivity
(
G
)
df_results
[
"
density
"
]
=
nx
.
density
(
G
)
df_results
[
"
top10_node
"
]
=
"
|
"
.
join
(
top10node
)
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment