from evalne.evaluation.evaluator import LPEvaluator from evalne.evaluation.split import EvalSplit as LPEvalSplit from evalne.evaluation.score import Scoresheet from evalne.utils import preprocess as pp import networkx as nx from tqdm import tqdm from lib.utils import load_edgelist import argparse parser = argparse.ArgumentParser() parser.add_argument("graph_filename") parser.add_argument("-n","--network-embedding",action="store_true",help="If you want to use neural network embedding for link prediction") parser.add_argument("-v","--verbose",action="store_true") parser.add_argument("-f","--format",default="gexf",choices=["gexf","gml","txt"]) args = parser.parse_args() def log(x): if args.verbose: print(x) # Load and preprocess the network log("Load Input Graph...") G = None if args.format == "txt": G = load_edgelist(path=args.graph_filename,weighted=True) elif args.format == "gml": G = nx.read_gml(args.graph_filename) else: G = nx.read_gexf(args.graph_filename) G, _ = pp.prep_graph(G,maincc=True) log("Graph Loaded !") log("Size "+str(len(G))) log("Nb of Edges "+str(len(list(G.edges())))) log("Density "+ str(len(G)/len(list(G.edges())))) log("Building link prediction dataset...") # Create an evaluator and generate train/test edge split traintest_split = LPEvalSplit() try: traintest_split.compute_splits(G, split_alg="spanning_tree", train_frac=0.9, fe_ratio=1) except ValueError: traintest_split.compute_splits(G, split_alg="fast", train_frac=0.9, fe_ratio=1) nee = LPEvaluator(traintest_split) log("Dataset Built !") # Create a Scoresheet to store the results scoresheet = Scoresheet() # Set the baselines methods = ['random_prediction', 'common_neighbours', 'jaccard_coefficient', "adamic_adar_index", "preferential_attachment", "resource_allocation_index", "stochastic_block_model", "stochastic_block_model_degree_corrected", "spatial_link_prediction" ] # Evaluate heristics pbar = tqdm(methods,disable= (not args.verbose)) for method in pbar: pbar.set_description("Evaluate "+method) result = nee.evaluate_baseline(method=method,) scoresheet.log_results(result) if args.network_embedding: try: # Check if OpenNE is installed import openne # Set embedding methods from OpenNE methods = "node2vec hope-opne gf sdne deepWalk line grarep".split() #lap-opne commands = [ "python -m openne --method node2vec --graph-format edgelist --epochs 100 --number-walks 10 --walk-length 80 --window-size 10", "python -m openne --method hope --epochs 100", "python -m openne --method gf --epochs 100", "python -m openne --method sdne --epochs 100 --encoder-list [1024,128] --beta 5 --bs 500", "python -m openne --method deepWalk --graph-format edgelist --epochs 100 --number-walks 10 --walk-length 80 --window-size 10", "python -m openne --method line --graph-format edgelist --epochs 10", "python -m openne --method grarep --epochs 100" # "python -m openne --method lap --epochs 100", ] edge_emb = ['average', 'hadamard'] # Evaluate embedding methods pbar = tqdm(enumerate(methods), disable=(not args.verbose)) for i,method in pbar: pbar.set_description("Evaluate "+method) command = commands[i] + " --input {} --output {} --representation-size {}" results = nee.evaluate_cmd(method_name=methods[i], method_type='ne', command=command, edge_embedding_methods=edge_emb, input_delim=' ', output_delim=' ', verbose=args.verbose,write_weights=nx.is_weighted(G)) scoresheet.log_results(results) except ImportError: print("The OpenNE library is not installed. Reporting results only for the baselines...") pass # Get output if args.verbose: scoresheet.print_tabular() log("Saving Output in " + args.graph_filename + "_results_lp") scoresheet.write_all(args.graph_filename+"_results_lp") log("Output Saved !")