Skip to content
Snippets Groups Projects
generate_random_graph.py 1.87 KiB
Newer Older
Fize Jacques's avatar
Fize Jacques committed
import numpy as np

from joblib import dump
Fize Jacques's avatar
Fize Jacques committed
import networkx as nx
Fize Jacques's avatar
Fize Jacques committed
from sklearn.preprocessing import LabelEncoder

from lib.utils import load_country_country_data, sample_with_pandas, to_edgelist

import argparse
import os

parser = argparse.ArgumentParser()

parser.add_argument("input_tsv")
parser.add_argument("output_dir")
parser.add_argument('-d', '--dimensions', help='Size of generated graph', 
    type=lambda s: [int(item) for item in s.split(',')],default=[50,100,200,500,1000])
parser.add_argument("-n",type=int,help="Number of graph generated per size",default=6)
parser.add_argument("-s","--self-link",action="store_true")

args = parser.parse_args()

if not os.path.exists(args.output_dir):
    print("Output Dir does not exists !")

# Load the data
df = load_country_country_data(args.input_tsv,self_link=args.self_link)
Fize Jacques's avatar
Fize Jacques committed
df["hash"] = df.apply(lambda row:"_".join(sorted([row.user_loc,row.fr_loc])),axis=1)
df = df.drop_duplicates(subset=['hash'])
Fize Jacques's avatar
Fize Jacques committed
# Normalise the sci index
df["norm_scaled_sci"] = df.scaled_sci/df.scaled_sci.sum()

encoder = LabelEncoder()
encoder.fit(np.concatenate((df.user_loc.values,df.fr_loc.values)))

for i in range(args.n): # For a number of graph
    for size in args.dimensions: # Per size
Fize Jacques's avatar
Fize Jacques committed
        test = sample_with_pandas(df,size) # sample edges using the normalised FB social interconnectedness indew
        G = nx.from_pandas_edgelist(test, source="user_loc",target="fr_loc", edge_attr="weight", create_using=nx.Graph())
        nx.write_gml(G,args.output_dir + "/fb_country_country_sample_{0}_size{1}.gml".format(i, size))
Fize Jacques's avatar
Fize Jacques committed
        #output_df = to_edgelist(test,encoder,weight=True) # Parse to edgelist format
        #output_df.to_csv(args.output_dir + "/fb_country_country_sample_{0}_size{1}.txt".format(i,size),index=False,header= False,sep=",") # Save the output
Fize Jacques's avatar
Fize Jacques committed

# Save encoder to reverse the label transformation
Fize Jacques's avatar
Fize Jacques committed
#dump(encoder,args.output_dir + "/encoder.joblib")