Skip to content
Snippets Groups Projects
Commit d5e49b01 authored by Fize Jacques's avatar Fize Jacques
Browse files

Create draw function

parent 7cfaeaa8
No related branches found
No related tags found
No related merge requests found
This diff is collapsed.
import numpy as np
from joblib import dump
from sklearn.preprocessing import LabelEncoder
from lib.utils import load_country_country_data, sample_with_pandas, to_edgelist
import argparse
import os
parser = argparse.ArgumentParser()
parser.add_argument("input_tsv")
parser.add_argument("output_dir")
parser.add_argument('-d', '--dimensions', help='Size of generated graph',
type=lambda s: [int(item) for item in s.split(',')],default=[50,100,200,500,1000])
parser.add_argument("-n",type=int,help="Number of graph generated per size",default=6)
parser.add_argument("-s","--self-link",action="store_true")
args = parser.parse_args()
if not os.path.exists(args.output_dir):
print("Output Dir does not exists !")
# Load the data
df = load_country_country_data(args.input_tsv,self_link=args.self_link)
# Normalise the sci index
df["norm_scaled_sci"] = df.scaled_sci/df.scaled_sci.sum()
encoder = LabelEncoder()
encoder.fit(np.concatenate((df.user_loc.values,df.fr_loc.values)))
for i in range(args.n): # For a number of graph
for size in args.dimensions: # Per size
test = sample_with_pandas(df,size) # sample edges using the normalised FB social interconnectedness index
output_df = to_edgelist(test,encoder,weight=True) # Parse to edgelist format
output_df.to_csv(args.output_dir + "/fb_country_country_sample_{0}_size{1}.txt".format(i,size),index=False,header= False,sep=",") # Save the output
# Save encoder to reverse the label transformation
dump(encoder,args.output_dir + "/encoder.joblib")
\ No newline at end of file
import pandas as pd
import geopandas as gpd
import numpy as np
import networkx as nx
def get_centroid(gdf,key_id):
gdf["centroid_"] = gdf.centroid.apply(lambda coord: [coord.x,coord.y])
return dict(gdf[(key_id + " centroid_").split()].values)
def get_labels(gdf,key_id,key_label):
return dict(gdf[(key_id + " " + key_label).split()].values)
def to_networkx(df,coords_dict, labels_dict):
nodelist = df.user_loc.unique().tolist()
nodelist.extend(df.fr_loc.unique().tolist())
G = nx.from_pandas_edgelist(df,source = "user_loc",target="sfr_loc",edge_attr="weight")
return G
\ No newline at end of file
import matplotlib.pyplot as plt
import matplotlib.patheffects as path_effects
import networkx as nx
import pandas as pd
import numpy as np
from fa2 import ForceAtlas2
def get_force_atlas(weight_influence=0, scaling_ratio=3.0, gravity=5):
forceatlas2 = ForceAtlas2(
# Behavior alternatives
outboundAttractionDistribution=True, # Dissuade hubs
linLogMode=False, # NOT IMPLEMENTED
adjustSizes=False, # Prevent overlap (NOT IMPLEMENTED)
edgeWeightInfluence=weight_influence,
# Performance
jitterTolerance=1.0, # Tolerance
barnesHutOptimize=True,
barnesHutTheta=1.2,
multiThreaded=False, # NOT IMPLEMENTED
# Tuning
scalingRatio=scaling_ratio,
strongGravityMode=False,
gravity=gravity,
# Log
verbose=False)
return forceatlas2
def draw(G, labels_dict={}, iteration_force_atlase=2000, figsize=(40, 20), font_size=12, stroke_width=3,
stroke_color="black", font_color="white", edge_cmap=plt.cm.viridis, weight = True):
"""
Return a figure of the current graph
Parameters
----------
G
labels_dict
iteration_force_atlase
figsize
font_size
stroke_width
stroke_color
font_color
edge_cmap
Returns
-------
AxesSubplot
matplotlib canvas
"""
plt.gcf() # Clean previous figure associated with the 'plt' instance
# Compute node position using the Force Atlas algorithm
force_atlas = get_force_atlas()
positions = force_atlas.forceatlas2_networkx_layout(G,
pos=None,
iterations=iteration_force_atlase)
# Initialise the figure canvas
fig, ax = plt.subplots(1, figsize=figsize)
# Draw nodes
nx.draw_networkx_nodes(G, positions, node_color='#999', ax=ax)
# Draw edges
if weight:
weights_width = [G[u][v]['weight'] * 200 for u, v in list(G.edges())]
colors = [G[u][v]['weight'] for u, v in list(G.edges())]
edges = nx.draw_networkx_edges(G, positions, edge_color=colors, width=weights_width,
edge_cmap=edge_cmap, ax=ax)
else:
edges = nx.draw_networkx_edges(G, positions, ax=ax,edge_color="#999")
# Plot nodes label
for node, pos in positions.items():
if labels_dict:
text = ax.text(pos[0], pos[1], labels_dict[node], color=font_color,
ha='center', va='center', size=font_size)
else:
text = ax.text(pos[0], pos[1], node, color=font_color,
ha='center', va='center', size=font_size)
text.set_path_effects([path_effects.Stroke(linewidth=stroke_width, foreground=stroke_color),
path_effects.Normal()]) # effet de style
# Plot colorbar
if weight:
plt.colorbar(edges)
plt.axis("off")
import pandas as pd
import geopandas as gpd
import numpy as np
import networkx as nx
import graph_tool as gt
def get_centroid(gdf,key_id):
gdf["centroid_"] = gdf.centroid.apply(lambda coord: [coord.x,coord.y])
return dict(gdf[(key_id + " centroid_").split()].values)
def get_labels(gdf,key_id,key_label):
return dict(gdf[(key_id + " " + key_label).split()].values)
def to_networkx(df):
nodelist = df.user_loc.unique().tolist()
nodelist.extend(df.fr_loc.unique().tolist())
G = nx.from_pandas_edgelist(df,source = "user_loc",target="fr_loc",edge_attr="weight")
return G
def get_prop_type(value, key=None):
"""
Performs typing and value conversion for the graph_tool PropertyMap class.
If a key is provided, it also ensures the key is in a format that can be
used with the PropertyMap. Returns a tuple, (type name, value, key)
"""
# Deal with the value
if isinstance(value, bool):
tname = 'bool'
elif isinstance(value, int):
tname = 'float'
value = float(value)
elif isinstance(value, float):
tname = 'float'
elif isinstance(value, dict):
tname = 'object'
else:
tname = 'string'
value = str(value)
return tname, value, key
def nx2gt(nxG):
"""
Converts a networkx graph to a graph-tool graph.
Code from http://bbengfort.github.io/snippets/2016/06/23/graph-tool-from-networkx.html
"""
# Phase 0: Create a directed or undirected graph-tool Graph
gtG = gt.Graph(directed=nxG.is_directed())
# Add the Graph properties as "internal properties"
for key, value in nxG.graph.items():
# Convert the value and key into a type for graph-tool
tname, value, key = get_prop_type(value, key)
prop = gtG.new_graph_property(tname) # Create the PropertyMap
gtG.graph_properties[key] = prop # Set the PropertyMap
gtG.graph_properties[key] = value # Set the actual value
# Phase 1: Add the vertex and edge property maps
# Go through all nodes and edges and add seen properties
# Add the node properties first
nprops = set() # cache keys to only add properties once
for node, data in nxG.nodes(data=True):
# Go through all the properties if not seen and add them.
for key, val in data.items():
if key in nprops:
continue # Skip properties already added
# Convert the value and key into a type for graph-tool
tname, _, key = get_prop_type(val, key)
prop = gtG.new_vertex_property(tname) # Create the PropertyMap
gtG.vertex_properties[key] = prop # Set the PropertyMap
# Add the key to the already seen properties
nprops.add(key)
# Also add the node id: in NetworkX a node can be any hashable type, but
# in graph-tool node are defined as indices. So we capture any strings
# in a special PropertyMap called 'id' -- modify as needed!
gtG.vertex_properties['id'] = gtG.new_vertex_property('string')
# Add the edge properties second
eprops = set() # cache keys to only add properties once
for src, dst, data in nxG.edges(data=True):
# Go through all the edge properties if not seen and add them.
for key, val in data.items():
if key in eprops:
continue # Skip properties already added
# Convert the value and key into a type for graph-tool
tname, _, key = get_prop_type(val, key)
prop = gtG.new_edge_property(tname) # Create the PropertyMap
gtG.edge_properties[key] = prop # Set the PropertyMap
# Add the key to the already seen properties
eprops.add(key)
# Phase 2: Actually add all the nodes and vertices with their properties
# Add the nodes
vertices = {} # vertex mapping for tracking edges later
for node, data in nxG.nodes(data=True):
# Create the vertex and annotate for our edges later
v = gtG.add_vertex()
vertices[node] = v
# Set the vertex properties, not forgetting the id property
data['id'] = str(node)
for key, value in data.items():
gtG.vp[key][v] = value # vp is short for vertex_properties
# Add the edges
for src, dst, data in nxG.edges(data=True):
# Look up the vertex structs from our vertices mapping and add edge.
e = gtG.add_edge(vertices[src], vertices[dst])
# Add the edge properties
for key, value in data.items():
gtG.ep[key][e] = value # ep is short for edge_properties
# Done, finally!
return gtG
\ No newline at end of file
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import numpy as np
def load_country_country_data(filename,self_link=False):
df = pd.read_csv(filename,sep="\t")
def load_country_country_data(filename, self_link=False):
"""
Load and preprocess data
Parameters
----------
filename: str
input filename
self_link: bool
use or not self link
Returns
-------
pandas.Dataframe
data
"""
df = pd.read_csv(filename, sep="\t")
df = df[(~df.user_loc.isna()) & (~df.fr_loc.isna())]
ign = ["CW","XK"] # No coords for these two countries ... got to investigate!
ign = ["CW", "XK"] # No coords for these two countries ... got to investigate!
df = df[(~df.user_loc.isin(ign)) & (~df.fr_loc.isin(ign))]
if not self_link:
mask = df.apply(lambda x:False if x.user_loc ==x.fr_loc else True,axis=1)
mask = df.apply(lambda x: False if x.user_loc == x.fr_loc else True, axis=1)
df = df[mask]
return df
def sample_with_pandas(df,N):
def sample_with_pandas(df, N):
"""
Return a sample of the avalaible connection using Pandas Dataframe.sample() method
Parameters
----------
df : pd.Dataframe
df : pandas.Dataframe
input
Returns
......@@ -29,15 +43,30 @@ def sample_with_pandas(df,N):
Selected edges
"""
if not "norm_scaled_sci" in df.columns.values:
df["norm_scaled_sci"] = df.scaled_sci/df.scaled_sci.sum()
return df.sample(n=N,weights="norm_scaled_sci").rename(columns={"norm_scaled_sci":"weight"})
df["norm_scaled_sci"] = df.scaled_sci / df.scaled_sci.sum()
return df.sample(n=N, weights="norm_scaled_sci").rename(columns={"norm_scaled_sci": "weight"})
def to_edgelist(sample,encoder,weight=False):
def to_edgelist(sample, encoder, weight=False):
"""
Parse FB SCI dataframe to edgelist format
Parameters
----------
sample : pandas.Dataframe
dataframe
encoder : sklearn.preprocessing.LabelEncoder
encoder
weight : bool
include (or not) FB SC index in output
Returns
-------
"""
new_df = sample.copy()
new_df["fr_loc"] = encoder.transform(new_df.fr_loc.values)
new_df["user_loc"] = encoder.transform(new_df.user_loc.values)
del new_df["scaled_sci"]
if not weight:
del new_df["weight"]
return new_df
\ No newline at end of file
return new_df
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment