diff --git a/evalNE_script.py b/evalNE_script.py index 7c1ceeff24d0dd3b4f329c684e2e2f38a3ba4270..7f29050eadea07598623b40fd70c1d4a2daac370 100644 --- a/evalNE_script.py +++ b/evalNE_script.py @@ -40,7 +40,10 @@ log("Density "+ str(len(G)/len(list(G.edges())))) log("Building link prediction dataset...") # Create an evaluator and generate train/test edge split traintest_split = LPEvalSplit() -traintest_split.compute_splits(G,split_alg="fast",train_frac=0.6,fe_ratio=1) +try: + traintest_split.compute_splits(G,split_alg="fast",train_frac=0.6,fe_ratio=1) +except ValueError: + traintest_split.compute_splits(G, split_alg="spanning_tree", train_frac=0.6, fe_ratio=1) nee = LPEvaluator(traintest_split) log("Dataset Built !") diff --git a/run_eval_par.py b/run_eval_par.py new file mode 100644 index 0000000000000000000000000000000000000000..16e4a1f2faf27bc6cd20d83642b7cf07b20e265c --- /dev/null +++ b/run_eval_par.py @@ -0,0 +1,59 @@ +# coding = utf-8 + +import glob +import subprocess +from lib.helpers import parse_evalne_output +from lib.utils import load_edgelist +import os +import pandas as pd +from tqdm import tqdm +import networkx as nx +from joblib import Parallel,delayed + +import argparse + +parser = argparse.ArgumentParser() +parser.add_argument("dataset_dir") +parser.add_argument("output_filename") +parser.add_argument("-f", "--format", default="gexf", choices=["gexf", "gml", "txt"]) + +args = parser.parse_args() +fns = sorted(glob.glob(args.dataset_dir + "/*." + args.format)) + +def run_eval(fn): + command = "python evalNE_script.py {0} -f {1} -n".format(fn, args.format).split() + output = subprocess.run(command) + if not output.returncode == 0: + print("Error! for the command :", " ".join(command)) + +all_res = [] + +# Run link prediction +Parallel(n_jobs=4,backend="multiprocessing")(delayed(run_eval)(fn) for fn in tqdm(fns)) + +pbar = tqdm(fns) +for fn in pbar: + pbar.set_description("compile eval from "+ fn) + + if os.path.exists(fn + "_results_lp"): + df_results = parse_evalne_output(open(fn + "_results_lp").read()) + name = os.path.basename(fn) + G = None + if args.format == "edgelist": + G = load_edgelist(path=fn) + elif args.format == "gml": + G = nx.read_gml(fn) + else: + G = nx.read_gexf(fn) + + top10node = pd.DataFrame(list(G.degree()), columns="node degree".split()).sort_values("degree", + ascending=False).head(10).node.values + df_results["nb_edge"] = G.size() + df_results["transitivity"] = nx.transitivity(G) + df_results["density"] = nx.density(G) + df_results["top10_node"] = "|".join(top10node) + df_results["size"] = len(G) + df_results["filename"] = name + all_res.append(df_results) + +pd.concat(all_res).to_csv(args.output_filename, sep="\t", index=False)