Skip to content
Snippets Groups Projects
Commit 0ccb0c49 authored by Fize Jacques's avatar Fize Jacques
Browse files

add multithread run_eval script + debug split graph error

parent 2fcbb8a4
No related branches found
No related tags found
No related merge requests found
...@@ -40,7 +40,10 @@ log("Density "+ str(len(G)/len(list(G.edges())))) ...@@ -40,7 +40,10 @@ log("Density "+ str(len(G)/len(list(G.edges()))))
log("Building link prediction dataset...") log("Building link prediction dataset...")
# Create an evaluator and generate train/test edge split # Create an evaluator and generate train/test edge split
traintest_split = LPEvalSplit() traintest_split = LPEvalSplit()
traintest_split.compute_splits(G,split_alg="fast",train_frac=0.6,fe_ratio=1) try:
traintest_split.compute_splits(G,split_alg="fast",train_frac=0.6,fe_ratio=1)
except ValueError:
traintest_split.compute_splits(G, split_alg="spanning_tree", train_frac=0.6, fe_ratio=1)
nee = LPEvaluator(traintest_split) nee = LPEvaluator(traintest_split)
log("Dataset Built !") log("Dataset Built !")
......
# coding = utf-8
import glob
import subprocess
from lib.helpers import parse_evalne_output
from lib.utils import load_edgelist
import os
import pandas as pd
from tqdm import tqdm
import networkx as nx
from joblib import Parallel,delayed
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("dataset_dir")
parser.add_argument("output_filename")
parser.add_argument("-f", "--format", default="gexf", choices=["gexf", "gml", "txt"])
args = parser.parse_args()
fns = sorted(glob.glob(args.dataset_dir + "/*." + args.format))
def run_eval(fn):
command = "python evalNE_script.py {0} -f {1} -n".format(fn, args.format).split()
output = subprocess.run(command)
if not output.returncode == 0:
print("Error! for the command :", " ".join(command))
all_res = []
# Run link prediction
Parallel(n_jobs=4,backend="multiprocessing")(delayed(run_eval)(fn) for fn in tqdm(fns))
pbar = tqdm(fns)
for fn in pbar:
pbar.set_description("compile eval from "+ fn)
if os.path.exists(fn + "_results_lp"):
df_results = parse_evalne_output(open(fn + "_results_lp").read())
name = os.path.basename(fn)
G = None
if args.format == "edgelist":
G = load_edgelist(path=fn)
elif args.format == "gml":
G = nx.read_gml(fn)
else:
G = nx.read_gexf(fn)
top10node = pd.DataFrame(list(G.degree()), columns="node degree".split()).sort_values("degree",
ascending=False).head(10).node.values
df_results["nb_edge"] = G.size()
df_results["transitivity"] = nx.transitivity(G)
df_results["density"] = nx.density(G)
df_results["top10_node"] = "|".join(top10node)
df_results["size"] = len(G)
df_results["filename"] = name
all_res.append(df_results)
pd.concat(all_res).to_csv(args.output_filename, sep="\t", index=False)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment