Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# coding = utf-8
import glob
import subprocess
from lib.helpers import parse_evalne_output
from lib.utils import load_edgelist
import os
import pandas as pd
from tqdm import tqdm
import networkx as nx
from joblib import Parallel,delayed
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("dataset_dir")
parser.add_argument("output_filename")
parser.add_argument("-f", "--format", default="gexf", choices=["gexf", "gml", "txt"])
args = parser.parse_args()
fns = sorted(glob.glob(args.dataset_dir + "/*." + args.format))
def run_eval(fn):
command = "python evalNE_script.py {0} -f {1} -n".format(fn, args.format).split()
output = subprocess.run(command)
if not output.returncode == 0:
print("Error! for the command :", " ".join(command))
all_res = []
# Run link prediction
Parallel(n_jobs=4,backend="multiprocessing")(delayed(run_eval)(fn) for fn in tqdm(fns))
pbar = tqdm(fns)
for fn in pbar:
pbar.set_description("compile eval from "+ fn)
if os.path.exists(fn + "_results_lp"):
df_results = parse_evalne_output(open(fn + "_results_lp").read())
name = os.path.basename(fn)
G = None
if args.format == "edgelist":
G = load_edgelist(path=fn)
elif args.format == "gml":
G = nx.read_gml(fn)
else:
G = nx.read_gexf(fn)
top10node = pd.DataFrame(list(G.degree()), columns="node degree".split()).sort_values("degree",
ascending=False).head(10).node.values
df_results["nb_edge"] = G.size()
df_results["transitivity"] = nx.transitivity(G)
df_results["density"] = nx.density(G)
df_results["top10_node"] = "|".join(top10node)
df_results["size"] = len(G)
df_results["filename"] = name
all_res.append(df_results)
pd.concat(all_res).to_csv(args.output_filename, sep="\t", index=False)