Skip to content
Snippets Groups Projects
Commit 8376d2c5 authored by Fize Jacques's avatar Fize Jacques
Browse files

debug and add helpers and utils

parent b9802aa3
No related branches found
No related tags found
No related merge requests found
# Created by .ignore support plugin (hsz.mobi)
### Python template
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
*.DS_Store*
.idea*
\ No newline at end of file
This diff is collapsed.
...@@ -3,25 +3,26 @@ ...@@ -3,25 +3,26 @@
import argparse import argparse
import networkx as nx import networkx as nx
import pandas as pd import pandas as pd
import joblib,json import joblib
import json
from lib.draw import draw from lib.draw import draw
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("input_file",help="edgelist format (sep = \",\" )") parser.add_argument("input_file",help="edgelist format (sep = \",\" )")
parser.add_argument("output_file")
parser.add_argument("--encoder-file",help="LabelEncoder instance that allows to obtain a label for each node") parser.add_argument("--encoder-file",help="LabelEncoder instance that allows to obtain a label for each node")
parser.add_argument("--country",help="if country node",action="store_true") parser.add_argument("--country",help="if country node",action="store_true")
parser.add_argument("-w",action="store_true") parser.add_argument("-w",action="store_true")
parser.add_argument("output_file")
args = parser.parse_args() args = parser.parse_args()
if args.w: if args.w:
df = pd.read_csv(args.input_file,header=None,names="source target weight".split()) df = pd.read_csv(args.input_file,header=None,names="source target weight".split())
G = nx.from_pandas_edgelist(df,edge_attr="weight") G = nx.from_pandas_edgelist(df,edge_attr="weight",create_using=nx.DiGraph())
else: else:
df = pd.read_csv(args.input_file, header=None, names="source target weight".split()) df = pd.read_csv(args.input_file, header=None, names="source target".split())
G = nx.from_pandas_edgelist(df, edge_attr="weight") G = nx.from_pandas_edgelist(df,create_using=nx.DiGraph())
encoder = None encoder = None
labels_dict = {} labels_dict = {}
...@@ -37,4 +38,4 @@ if args.encoder_file: ...@@ -37,4 +38,4 @@ if args.encoder_file:
labels_dict[node] = encoder.inverse_transform([node])[0] labels_dict[node] = encoder.inverse_transform([node])[0]
fig, ax = draw(G,labels_dict) fig, ax = draw(G,labels_dict)
fig.savefig("test.pdf") fig.savefig(args.output_file)
\ No newline at end of file \ No newline at end of file
...@@ -3,15 +3,18 @@ from evalne.evaluation.split import EvalSplit as LPEvalSplit ...@@ -3,15 +3,18 @@ from evalne.evaluation.split import EvalSplit as LPEvalSplit
from evalne.evaluation.score import Scoresheet from evalne.evaluation.score import Scoresheet
from evalne.utils import preprocess as pp from evalne.utils import preprocess as pp
from lib.utils import load_edgelist
import argparse import argparse
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("edgelist_graph_filename") parser.add_argument("edgelist_graph_filename")
parser.add_argument("-v","--verbose",action="store_true")
args = parser.parse_args()#("data/fb_country_country_sample_6_size1000.txt".split()) args = parser.parse_args()#("data/fb_country_country_sample_6_size1000.txt".split())
# Load and preprocess the network # Load and preprocess the network
G = pp.load_graph(args.edgelist_graph_filename,directed=True) G = load_edgelist(args.edgelist_graph_filename,is_directed=True,weighted=True)
G, _ = pp.prep_graph(G,maincc=True) G, _ = pp.prep_graph(G,maincc=True)
# Create an evaluator and generate train/test edge split # Create an evaluator and generate train/test edge split
...@@ -33,7 +36,7 @@ methods = ['random_prediction', ...@@ -33,7 +36,7 @@ methods = ['random_prediction',
# Evaluate baselines # Evaluate baselines
for method in methods: for method in methods:
result = nee.evaluate_baseline(method=method) result = nee.evaluate_baseline(method=method, )
scoresheet.log_results(result) scoresheet.log_results(result)
try: try:
...@@ -58,7 +61,7 @@ try: ...@@ -58,7 +61,7 @@ try:
for i in range(len(methods)): for i in range(len(methods)):
command = commands[i] + " --input {} --output {} --representation-size {}" command = commands[i] + " --input {} --output {} --representation-size {}"
results = nee.evaluate_cmd(method_name=methods[i], method_type='ne', command=command, results = nee.evaluate_cmd(method_name=methods[i], method_type='ne', command=command,
edge_embedding_methods=edge_emb, input_delim=' ', output_delim=' ') edge_embedding_methods=edge_emb, input_delim=' ', output_delim=' ', verbose=args.verbose)
scoresheet.log_results(results) scoresheet.log_results(results)
except ImportError: except ImportError:
...@@ -66,5 +69,6 @@ except ImportError: ...@@ -66,5 +69,6 @@ except ImportError:
pass pass
# Get output # Get output
scoresheet.print_tabular() if args.verbose:
scoresheet.print_tabular()
scoresheet.write_all(args.edgelist_graph_filename+"_results_lp") scoresheet.write_all(args.edgelist_graph_filename+"_results_lp")
\ No newline at end of file
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import matplotlib.patheffects as path_effects import matplotlib.patheffects as path_effects
import seaborn as sns
import networkx as nx import networkx as nx
import pandas as pd import pandas as pd
import numpy as np import numpy as np
from glob import glob
from fa2 import ForceAtlas2 from fa2 import ForceAtlas2
def get_force_atlas(weight_influence=0, scaling_ratio=3.0, gravity=5): def get_force_atlas(weight_influence=0, scaling_ratio=3.0, gravity=5):
"""
Return an instance of ForceAtlas with a specific configuration
Parameters
----------
weight_influence: float
between 0 and 1 (default 0)
scaling_ratio : float or int
see fa2 documentation(default 3)
gravity : float or int
see fa2 documentation (default 5)
Returns
-------
ForceAtlas2
instance of ForceAtlas2
"""
forceatlas2 = ForceAtlas2( forceatlas2 = ForceAtlas2(
# Behavior alternatives # Behavior alternatives
outboundAttractionDistribution=True, # Dissuade hubs outboundAttractionDistribution=True, # Dissuade hubs
...@@ -32,20 +50,29 @@ def get_force_atlas(weight_influence=0, scaling_ratio=3.0, gravity=5): ...@@ -32,20 +50,29 @@ def get_force_atlas(weight_influence=0, scaling_ratio=3.0, gravity=5):
def draw(G, labels_dict={}, iteration_force_atlase=2000, figsize=(40, 20), font_size=12, stroke_width=3, def draw(G, labels_dict={}, iteration_force_atlase=2000, figsize=(40, 20), font_size=12, stroke_width=3,
stroke_color="black", font_color="white", edge_cmap=plt.cm.viridis, weight = True): stroke_color="black", font_color="white", edge_cmap=plt.cm.viridis, weight=True):
""" """
Return a figure of the current graph Return a figure of a NetworkX graph
Parameters Parameters
---------- ----------
G G : nx.Graph
labels_dict graph instance
iteration_force_atlase labels_dict: dict
figsize label for each node id
font_size iteration_force_atlase: int
stroke_width nb of iteration for the Force Atlas algorithm
stroke_color figsize: tuple
font_color figure size (matplotlib)
edge_cmap font_size: int
font size
stroke_width : int
text contour size
stroke_color: str
text contour color
font_color : str
text color
edge_cmap: matplotlib.pyplot.cm
Matplotlib Colormap instance used when edges are associated with a weight
Returns Returns
------- -------
...@@ -63,8 +90,8 @@ def draw(G, labels_dict={}, iteration_force_atlase=2000, figsize=(40, 20), font_ ...@@ -63,8 +90,8 @@ def draw(G, labels_dict={}, iteration_force_atlase=2000, figsize=(40, 20), font_
fig, ax = plt.subplots(1, figsize=figsize) fig, ax = plt.subplots(1, figsize=figsize)
# Draw nodes # Draw nodes
nx.draw_networkx_nodes(G, positions, node_color='#999', ax=ax) nodes = nx.draw_networkx_nodes(G, positions, node_color='#999', ax=ax)
edges = None
# Draw edges # Draw edges
if weight: if weight:
weights_width = [G[u][v]['weight'] * 200 for u, v in list(G.edges())] weights_width = [G[u][v]['weight'] * 200 for u, v in list(G.edges())]
...@@ -72,7 +99,7 @@ def draw(G, labels_dict={}, iteration_force_atlase=2000, figsize=(40, 20), font_ ...@@ -72,7 +99,7 @@ def draw(G, labels_dict={}, iteration_force_atlase=2000, figsize=(40, 20), font_
edges = nx.draw_networkx_edges(G, positions, edge_color=colors, width=weights_width, edges = nx.draw_networkx_edges(G, positions, edge_color=colors, width=weights_width,
edge_cmap=edge_cmap, ax=ax) edge_cmap=edge_cmap, ax=ax)
else: else:
edges = nx.draw_networkx_edges(G, positions, ax=ax,edge_color="#999") edges = nx.draw_networkx_edges(G, positions, ax=ax, edge_color="#999")
# Plot nodes label # Plot nodes label
for node, pos in positions.items(): for node, pos in positions.items():
...@@ -87,6 +114,38 @@ def draw(G, labels_dict={}, iteration_force_atlase=2000, figsize=(40, 20), font_ ...@@ -87,6 +114,38 @@ def draw(G, labels_dict={}, iteration_force_atlase=2000, figsize=(40, 20), font_
path_effects.Normal()]) # effet de style path_effects.Normal()]) # effet de style
# Plot colorbar # Plot colorbar
if weight: if weight:
plt.colorbar(edges) sm = plt.cm.ScalarMappable(cmap=edge_cmap, norm=plt.Normalize(vmin=min(colors), vmax=max(colors)))
sm.set_array([])
fig.colorbar(sm)
plt.axis("off") plt.axis("off")
plt.tight_layout()
return fig, ax
def average_degree(graph_dir, ext=".txt"):
"""
Produce a figure that shows the average degree per number of edges in a graph dataset.
Parameters
----------
graph_dir: str
graph dataset directory path
ext : str
extension of the graph file (must be edgelist format)
Returns
-------
Figure, AxesSubplot
"""
plt.gcf()
fns = glob(graph_dir + "/*" + ext)
data = []
for fn in fns:
df = pd.read_csv(fn, header=None, names="source target".split())
G = nx.from_pandas_edgelist(df, create_using=nx.DiGraph())
degree_values = np.asarray(list(G.degree()))[:, 1]
data.append([len(list(G.edges())), degree_values.mean()])
df = pd.DataFrame(data, columns="nb_edges avg_degree".split())
fig, ax = plt.subplots(1, figsize=(10, 5))
ax = sns.scatterplot(data=df, x="nb_edges", y="avg_degree", hue="nb_edges", legend=False, ax=ax)
ax.set(xlabel="Number of edges", ylabel="Average Degree")
return fig, ax return fig, ax
import pandas as pd import pandas as pd
import geopandas as gpd
import numpy as np import numpy as np
import networkx as nx import networkx as nx
import graph_tool as gt import os
try:
import graph_tool as gt
except:
pass
def parse_evalne_output(string):
def foo(x):
try:
return eval(x)
except:
return x
modif = string.split("---------------------------")[-1]
results = modif.split("\n \n")
logs = []
for log in results:
log = log.strip().split("\n")
name, data = log[0].strip(":"), log[1:]
data = [d.split("\t") for d in data]
data = [[i.strip().strip(":") for i in d] for d in data]
data = dict([[d[0], foo(d[1])] for d in data])
data["name"] =name
logs.append(data)
return pd.DataFrame.from_records(logs)
def get_centroid(gdf,key_id): def get_centroid(gdf,key_id):
......
import pandas as pd import pandas as pd
from sklearn.preprocessing import LabelEncoder from sklearn.preprocessing import LabelEncoder
import numpy as np import numpy as np
import networkx as nx
def load_country_country_data(filename, self_link=False): def load_country_country_data(filename, self_link=False):
...@@ -70,3 +71,16 @@ def to_edgelist(sample, encoder, weight=False): ...@@ -70,3 +71,16 @@ def to_edgelist(sample, encoder, weight=False):
if not weight: if not weight:
del new_df["weight"] del new_df["weight"]
return new_df return new_df
def load_edgelist(path, weighted=False, is_directed=False, sep=","):
template = nx.Graph()
if is_directed:
template = nx.DiGraph()
if weighted:
df = pd.read_csv(path, header=None, names="source target weight".split(),sep=sep)
G = nx.from_pandas_edgelist(df, edge_attr="weight", create_using=template)
else:
df = pd.read_csv(path, header=None, names="source target".split(),sep=sep)
G = nx.from_pandas_edgelist(df, create_using=template)
return G
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment