diff --git a/evalNE_script.py b/evalNE_script.py index 4449d10d110e9244c98e53e6932fc38b289783c4..e36d76303dd489ad9f12c37c1bd2d18af33d84d4 100644 --- a/evalNE_script.py +++ b/evalNE_script.py @@ -41,9 +41,9 @@ log("Building link prediction dataset...") # Create an evaluator and generate train/test edge split traintest_split = LPEvalSplit() try: - traintest_split.compute_splits(G,split_alg="fast",train_frac=0.6,fe_ratio=1) + traintest_split.compute_splits(G, split_alg="spanning_tree", train_frac=0.9, fe_ratio=1) except ValueError: - traintest_split.compute_splits(G, split_alg="spanning_tree", train_frac=0.6, fe_ratio=1) + traintest_split.compute_splits(G, split_alg="fast", train_frac=0.9, fe_ratio=1) nee = LPEvaluator(traintest_split) log("Dataset Built !") @@ -63,7 +63,7 @@ methods = ['random_prediction', "spatial_link_prediction" ] -# Evaluate baselines +# Evaluate heristics pbar = tqdm(methods,disable= (not args.verbose)) for method in pbar: pbar.set_description("Evaluate "+method) diff --git a/generate_random_graph.py b/generate_random_graph.py index 0f8b0dca1cd92a054aa593c84dfb617f560856e9..35a6e9356435029e7471d1c77c77485e60abadae 100644 --- a/generate_random_graph.py +++ b/generate_random_graph.py @@ -37,7 +37,7 @@ for i in range(args.n): #Â For a number of graph for size in args.dimensions: #Â Per size test = sample_with_pandas(df,size) # sample edges using the normalised FB social interconnectedness indew G = nx.from_pandas_edgelist(test, source="user_loc",target="fr_loc", edge_attr="weight", create_using=nx.Graph()) - nx.write_gexf(G,args.output_dir + "/fb_country_country_sample_{0}_size{1}.gexf".format(i, size)) + nx.write_gml(G,args.output_dir + "/fb_country_country_sample_{0}_size{1}.gml".format(i, size)) #output_df = to_edgelist(test,encoder,weight=True) #Â Parse to edgelist format #output_df.to_csv(args.output_dir + "/fb_country_country_sample_{0}_size{1}.txt".format(i,size),index=False,header= False,sep=",") # Save the output diff --git a/generate_theoric_random_graph.py b/generate_theoric_random_graph.py index 8d6ef8e03167fd96648c2cb14c3db34f2f090342..1777fa706aed8383e2bd4343d0d14da23ff7abb0 100644 --- a/generate_theoric_random_graph.py +++ b/generate_theoric_random_graph.py @@ -19,6 +19,7 @@ args = parser.parse_args() GRAPH_SIZE = [80,800] EDGE_SIZE = [2,3] +sample_per_params = 1 OUTPUT_DIR = args.output_dir if not os.path.exists(OUTPUT_DIR): @@ -50,29 +51,24 @@ parameters = { } -#getattr(nx,"geographical_threshold_graph")(**dict(n=20,theta=0.4)) - - - def get_params(inp): return (dict(zip(inp.keys(), values)) for values in itertools.product(*inp.values())) - - pbar = tqdm(parameters.items(),total=len(parameters)) for method,args in pbar: pbar.set_description("Generating graphs using : " + method) list_of_params = get_params(parameters[method]) func = getattr(ra,method) for ix,params in enumerate(list_of_params): - params["nb_edges"] = params["nb_edges"]*params["nb_nodes"] - print(params) - try: - G = func(**params) - G.graph.update(params) - nx.write_gml(G, OUTPUT_DIR+"/graph_{method}_{ix}.gml".format(method=method,ix=ix),stringizer=str) - except Exception as e: - print(e) - print("Can't generate graphs using these parameters") + params["nb_edges"] = params["nb_edges"] * params["nb_nodes"] + print("Gen graph using the following parameters : ",params) + for sp_id in range(sample_per_params): + try: + G = func(**params) + G.graph.update(params) + nx.write_gml(G, OUTPUT_DIR+"/graph_{method}_{ix}{sp_id}.gml".format(method=method,ix=ix,sp_id=sp_id),stringizer=str) + except Exception as e: + print(e) + print("Can't generate graphs using these parameters") diff --git a/lib/random.py b/lib/random.py index afe6a83960360b138abaf4eac2d22830c387b5c7..9b95de8f02c38da8e84c6c5d8b3f525e3a5279f0 100644 --- a/lib/random.py +++ b/lib/random.py @@ -186,7 +186,7 @@ def spatial_graph(nb_nodes, nb_edges, coords="country", dist_func=lambda a, b: n for j in range(nb_nodes): if i == j and not self_link: continue - data.append([i, j, 1/(1+(dist_func(coords[i], coords[j])**2))]) + data.append([i, j, 1/((1+dist_func(coords[i], coords[j])**4))]) df = pd.DataFrame(data, columns="src tar weight".split()).astype({"src": int, "tar": int}) df["hash"] = df.apply(lambda x: "_".join(sorted([str(int(x.src)), str(int(x.tar))])), axis=1) df = df.drop_duplicates(subset="hash") diff --git a/requirements.txt b/requirements.txt index 62b71368cfa795b096c4df754dd1cad28a47d66f..939f6cf2ec7faac059bc68cc69375de12e54b128 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,3 +2,5 @@ pandas numpy sklearn seaborn +haversine +geopandas