diff --git a/evalNE_script.py b/evalNE_script.py index f11e56f1cb79f55d2eda0f253f9558b677e6a1e2..29fdf12ee5638f4522d1f835550b896689b093f6 100644 --- a/evalNE_script.py +++ b/evalNE_script.py @@ -3,27 +3,48 @@ from evalne.evaluation.split import EvalSplit as LPEvalSplit from evalne.evaluation.score import Scoresheet from evalne.utils import preprocess as pp import networkx as nx +from tqdm import tqdm from lib.utils import load_edgelist import argparse parser = argparse.ArgumentParser() -parser.add_argument("edgelist_graph_filename") -parser.add_argument("--ne","--network-embedding",action="store_true",help="If you want to use neural network embedding for link prediction") +parser.add_argument("graph_filename") +parser.add_argument("-n","--network-embedding",action="store_true",help="If you want to use neural network embedding for link prediction") parser.add_argument("-v","--verbose",action="store_true") +parser.add_argument("-f","--format",default="gexf",choices=["gexf","gml","txt"]) -args = parser.parse_args()#("data/fb_country_country_sample_6_size1000.txt".split()) +args = parser.parse_args() + +def log(x): + if args.verbose: + print(x) # Load and preprocess the network -G = nx.read_gexf(args.edgelist_graph_filename)#load_edgelist(args.edgelist_graph_filename,is_directed=True,weighted=True) -G, _ = pp.prep_graph(G,maincc=True) +log("Load Input Graph...") +G = None +if args.format == "txt": + G = load_edgelist(path=args.graph_filename,weighted=True) +elif args.format == "gml": + G = nx.read_gml(args.graph_filename) +else: + G = nx.read_gexf(args.graph_filename) +G, _ = pp.prep_graph(G,maincc=True) +log("Graph Loaded !") +log("Size "+str(len(G))) +log("Nb of Edges "+str(len(list(G.edges())))) +log("Density "+ str(len(G)/len(list(G.edges())))) +log("Building link prediction dataset...") # Create an evaluator and generate train/test edge split traintest_split = LPEvalSplit() traintest_split.compute_splits(G,split_alg="spanning_tree",train_frac=0.8,fe_ratio=1) nee = LPEvaluator(traintest_split) + +log("Dataset Built !") + # Create a Scoresheet to store the results scoresheet = Scoresheet() @@ -40,7 +61,9 @@ methods = ['random_prediction', ] # Evaluate baselines -for method in methods: +pbar = tqdm(methods,disable= (not args.verbose)) +for method in pbar: + pbar.set_description("Evaluate "+method) result = nee.evaluate_baseline(method=method, ) scoresheet.log_results(result) @@ -63,7 +86,9 @@ if args.network_embedding: edge_emb = ['average', 'hadamard'] # Evaluate embedding methods - for i in range(len(methods)): + pbar = tqdm(enumerate(methods), disable=(not args.verbose)) + for i,method in pbar: + pbar.set_description("Evaluate "+method) command = commands[i] + " --input {} --output {} --representation-size {}" results = nee.evaluate_cmd(method_name=methods[i], method_type='ne', command=command, edge_embedding_methods=edge_emb, input_delim=' ', output_delim=' ', verbose=args.verbose) @@ -76,4 +101,6 @@ if args.network_embedding: # Get output if args.verbose: scoresheet.print_tabular() -scoresheet.write_all(args.edgelist_graph_filename+"_results_lp") \ No newline at end of file +log("Saving Output in " + args.graph_filename + "_results_lp") +scoresheet.write_all(args.graph_filename+"_results_lp") +log("Output Saved !") \ No newline at end of file diff --git a/generate_theoric_random_graph.py b/generate_theoric_random_graph.py index cf1e588d14cd988a58e1cc5549fc41bc889a8511..b6f83f2d01b716f364683a884e9bb0282a5309b9 100644 --- a/generate_theoric_random_graph.py +++ b/generate_theoric_random_graph.py @@ -19,13 +19,13 @@ def generate_sbm_prob_matrix(nb_of_blocks,prob_btw_block=0.1): M[j,i] = prob_btw_block return M -GRAPH_SIZE = [50,100,200,500] -OUTPUT_DIR = "test_dataset/" +GRAPH_SIZE = [50,75,100] +OUTPUT_DIR = "data/theoric_graph_1/" parameters = { "planted_partition_graph": { "l": [3,5,8], - "k": [10,20,30], + "k": [10,20], "p_in": [0.2,0.5,0.7], "p_out": [0.1,0.2,0.3] }, @@ -52,7 +52,7 @@ parameters = { }, "geographical_threshold_graph": { "n": GRAPH_SIZE, - "theta": [0.1,0.2,0.4,0.6] + "theta": [0.1,0.2,0.3] }, } # Generating transition matrices for stochastic block model diff --git a/run_eval.py b/run_eval.py index 29bbd10ade84af934527440c76bebf3d194bb76e..163f01737b01a01a336e7637848b85a467cfaf9d 100644 --- a/run_eval.py +++ b/run_eval.py @@ -14,22 +14,32 @@ import argparse parser = argparse.ArgumentParser() parser.add_argument("dataset_dir") parser.add_argument("output_filename") +parser.add_argument("-f", "--format", default="gexf", choices=["gexf", "gml", "txt"]) args = parser.parse_args() -fns = glob.glob(args.dataset_dir + "/*.gexf") +fns = glob.glob(args.dataset_dir + "/*." + args.format) all_res = [] -for fn in tqdm(fns): - print("run eval on ", fn) - command = "python evalNE_script.py {0} -v".format(fn).split() +pbar = tqdm(fns) +for fn in pbar: + pbar.set_description("run eval on "+ fn) + command = "python evalNE_script.py {0} -f {1} -n".format(fn, args.format).split() output = subprocess.run(command) if not output.returncode == 0: print("Error! for the command :", " ".join(command)) continue df_results = parse_evalne_output(open(fn + "_results_lp").read()) name = os.path.basename(fn) - G = nx.read_gexf(fn) - top10node = pd.DataFrame(list(G.degree()), columns="node degree".split()).sort_values("degree",ascending=False).head(10).node.values + G = None + if args.format == "edgelist": + G = load_edgelist(path=fn) + elif args.format == "gml": + G = nx.read_gml(fn) + else: + G = nx.read_gexf(fn) + + top10node = pd.DataFrame(list(G.degree()), columns="node degree".split()).sort_values("degree", + ascending=False).head(10).node.values df_results["nb_edge"] = len(list(G.edges())) df_results["transitivity"] = nx.transitivity(G) df_results["density"] = nx.density(G) @@ -38,4 +48,4 @@ for fn in tqdm(fns): df_results["filename"] = name all_res.append(df_results) -pd.concat(all_res).to_csv(args.output_filename,sep="\t",index=None) \ No newline at end of file +pd.concat(all_res).to_csv(args.output_filename, sep="\t", index=False)