Skip to content
Snippets Groups Projects
Commit 8376d2c5 authored by Fize Jacques's avatar Fize Jacques
Browse files

debug and add helpers and utils

parent b9802aa3
No related branches found
No related tags found
No related merge requests found
# Created by .ignore support plugin (hsz.mobi)
### Python template
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
*.DS_Store*
.idea*
\ No newline at end of file
source diff could not be displayed: it is too large. Options to address this: view the blob.
......@@ -3,25 +3,26 @@
import argparse
import networkx as nx
import pandas as pd
import joblib,json
import joblib
import json
from lib.draw import draw
parser = argparse.ArgumentParser()
parser.add_argument("input_file",help="edgelist format (sep = \",\" )")
parser.add_argument("output_file")
parser.add_argument("--encoder-file",help="LabelEncoder instance that allows to obtain a label for each node")
parser.add_argument("--country",help="if country node",action="store_true")
parser.add_argument("-w",action="store_true")
parser.add_argument("output_file")
args = parser.parse_args()
if args.w:
df = pd.read_csv(args.input_file,header=None,names="source target weight".split())
G = nx.from_pandas_edgelist(df,edge_attr="weight")
G = nx.from_pandas_edgelist(df,edge_attr="weight",create_using=nx.DiGraph())
else:
df = pd.read_csv(args.input_file, header=None, names="source target weight".split())
G = nx.from_pandas_edgelist(df, edge_attr="weight")
df = pd.read_csv(args.input_file, header=None, names="source target".split())
G = nx.from_pandas_edgelist(df,create_using=nx.DiGraph())
encoder = None
labels_dict = {}
......@@ -37,4 +38,4 @@ if args.encoder_file:
labels_dict[node] = encoder.inverse_transform([node])[0]
fig, ax = draw(G,labels_dict)
fig.savefig("test.pdf")
\ No newline at end of file
fig.savefig(args.output_file)
\ No newline at end of file
......@@ -3,15 +3,18 @@ from evalne.evaluation.split import EvalSplit as LPEvalSplit
from evalne.evaluation.score import Scoresheet
from evalne.utils import preprocess as pp
from lib.utils import load_edgelist
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("edgelist_graph_filename")
parser.add_argument("-v","--verbose",action="store_true")
args = parser.parse_args()#("data/fb_country_country_sample_6_size1000.txt".split())
# Load and preprocess the network
G = pp.load_graph(args.edgelist_graph_filename,directed=True)
G = load_edgelist(args.edgelist_graph_filename,is_directed=True,weighted=True)
G, _ = pp.prep_graph(G,maincc=True)
# Create an evaluator and generate train/test edge split
......@@ -33,7 +36,7 @@ methods = ['random_prediction',
# Evaluate baselines
for method in methods:
result = nee.evaluate_baseline(method=method)
result = nee.evaluate_baseline(method=method, )
scoresheet.log_results(result)
try:
......@@ -58,7 +61,7 @@ try:
for i in range(len(methods)):
command = commands[i] + " --input {} --output {} --representation-size {}"
results = nee.evaluate_cmd(method_name=methods[i], method_type='ne', command=command,
edge_embedding_methods=edge_emb, input_delim=' ', output_delim=' ')
edge_embedding_methods=edge_emb, input_delim=' ', output_delim=' ', verbose=args.verbose)
scoresheet.log_results(results)
except ImportError:
......@@ -66,5 +69,6 @@ except ImportError:
pass
# Get output
scoresheet.print_tabular()
if args.verbose:
scoresheet.print_tabular()
scoresheet.write_all(args.edgelist_graph_filename+"_results_lp")
\ No newline at end of file
import matplotlib.pyplot as plt
import matplotlib.patheffects as path_effects
import seaborn as sns
import networkx as nx
import pandas as pd
import numpy as np
from glob import glob
from fa2 import ForceAtlas2
def get_force_atlas(weight_influence=0, scaling_ratio=3.0, gravity=5):
"""
Return an instance of ForceAtlas with a specific configuration
Parameters
----------
weight_influence: float
between 0 and 1 (default 0)
scaling_ratio : float or int
see fa2 documentation(default 3)
gravity : float or int
see fa2 documentation (default 5)
Returns
-------
ForceAtlas2
instance of ForceAtlas2
"""
forceatlas2 = ForceAtlas2(
# Behavior alternatives
outboundAttractionDistribution=True, # Dissuade hubs
......@@ -32,20 +50,29 @@ def get_force_atlas(weight_influence=0, scaling_ratio=3.0, gravity=5):
def draw(G, labels_dict={}, iteration_force_atlase=2000, figsize=(40, 20), font_size=12, stroke_width=3,
stroke_color="black", font_color="white", edge_cmap=plt.cm.viridis, weight = True):
stroke_color="black", font_color="white", edge_cmap=plt.cm.viridis, weight=True):
"""
Return a figure of the current graph
Return a figure of a NetworkX graph
Parameters
----------
G
labels_dict
iteration_force_atlase
figsize
font_size
stroke_width
stroke_color
font_color
edge_cmap
G : nx.Graph
graph instance
labels_dict: dict
label for each node id
iteration_force_atlase: int
nb of iteration for the Force Atlas algorithm
figsize: tuple
figure size (matplotlib)
font_size: int
font size
stroke_width : int
text contour size
stroke_color: str
text contour color
font_color : str
text color
edge_cmap: matplotlib.pyplot.cm
Matplotlib Colormap instance used when edges are associated with a weight
Returns
-------
......@@ -63,8 +90,8 @@ def draw(G, labels_dict={}, iteration_force_atlase=2000, figsize=(40, 20), font_
fig, ax = plt.subplots(1, figsize=figsize)
# Draw nodes
nx.draw_networkx_nodes(G, positions, node_color='#999', ax=ax)
nodes = nx.draw_networkx_nodes(G, positions, node_color='#999', ax=ax)
edges = None
# Draw edges
if weight:
weights_width = [G[u][v]['weight'] * 200 for u, v in list(G.edges())]
......@@ -72,7 +99,7 @@ def draw(G, labels_dict={}, iteration_force_atlase=2000, figsize=(40, 20), font_
edges = nx.draw_networkx_edges(G, positions, edge_color=colors, width=weights_width,
edge_cmap=edge_cmap, ax=ax)
else:
edges = nx.draw_networkx_edges(G, positions, ax=ax,edge_color="#999")
edges = nx.draw_networkx_edges(G, positions, ax=ax, edge_color="#999")
# Plot nodes label
for node, pos in positions.items():
......@@ -87,6 +114,38 @@ def draw(G, labels_dict={}, iteration_force_atlase=2000, figsize=(40, 20), font_
path_effects.Normal()]) # effet de style
# Plot colorbar
if weight:
plt.colorbar(edges)
sm = plt.cm.ScalarMappable(cmap=edge_cmap, norm=plt.Normalize(vmin=min(colors), vmax=max(colors)))
sm.set_array([])
fig.colorbar(sm)
plt.axis("off")
plt.tight_layout()
return fig, ax
def average_degree(graph_dir, ext=".txt"):
"""
Produce a figure that shows the average degree per number of edges in a graph dataset.
Parameters
----------
graph_dir: str
graph dataset directory path
ext : str
extension of the graph file (must be edgelist format)
Returns
-------
Figure, AxesSubplot
"""
plt.gcf()
fns = glob(graph_dir + "/*" + ext)
data = []
for fn in fns:
df = pd.read_csv(fn, header=None, names="source target".split())
G = nx.from_pandas_edgelist(df, create_using=nx.DiGraph())
degree_values = np.asarray(list(G.degree()))[:, 1]
data.append([len(list(G.edges())), degree_values.mean()])
df = pd.DataFrame(data, columns="nb_edges avg_degree".split())
fig, ax = plt.subplots(1, figsize=(10, 5))
ax = sns.scatterplot(data=df, x="nb_edges", y="avg_degree", hue="nb_edges", legend=False, ax=ax)
ax.set(xlabel="Number of edges", ylabel="Average Degree")
return fig, ax
import pandas as pd
import geopandas as gpd
import numpy as np
import networkx as nx
import graph_tool as gt
import os
try:
import graph_tool as gt
except:
pass
def parse_evalne_output(string):
def foo(x):
try:
return eval(x)
except:
return x
modif = string.split("---------------------------")[-1]
results = modif.split("\n \n")
logs = []
for log in results:
log = log.strip().split("\n")
name, data = log[0].strip(":"), log[1:]
data = [d.split("\t") for d in data]
data = [[i.strip().strip(":") for i in d] for d in data]
data = dict([[d[0], foo(d[1])] for d in data])
data["name"] =name
logs.append(data)
return pd.DataFrame.from_records(logs)
def get_centroid(gdf,key_id):
......
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import numpy as np
import networkx as nx
def load_country_country_data(filename, self_link=False):
......@@ -70,3 +71,16 @@ def to_edgelist(sample, encoder, weight=False):
if not weight:
del new_df["weight"]
return new_df
def load_edgelist(path, weighted=False, is_directed=False, sep=","):
template = nx.Graph()
if is_directed:
template = nx.DiGraph()
if weighted:
df = pd.read_csv(path, header=None, names="source target weight".split(),sep=sep)
G = nx.from_pandas_edgelist(df, edge_attr="weight", create_using=template)
else:
df = pd.read_csv(path, header=None, names="source target".split(),sep=sep)
G = nx.from_pandas_edgelist(df, create_using=template)
return G
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment