Skip to content
Snippets Groups Projects
evalNE_script.py 4.31 KiB
Newer Older
Fize Jacques's avatar
Fize Jacques committed
from evalne.evaluation.evaluator import LPEvaluator
from evalne.evaluation.split import EvalSplit as LPEvalSplit
from evalne.evaluation.score import Scoresheet
from evalne.utils import preprocess as pp
Fize Jacques's avatar
Fize Jacques committed
import networkx as nx
Fize Jacques's avatar
Fize Jacques committed
from tqdm import tqdm
Fize Jacques's avatar
Fize Jacques committed

from lib.utils import load_edgelist

Fize Jacques's avatar
Fize Jacques committed
import argparse

parser = argparse.ArgumentParser()
Fize Jacques's avatar
Fize Jacques committed
parser.add_argument("graph_filename")
parser.add_argument("-n","--network-embedding",action="store_true",help="If you want to use neural network embedding for link prediction")
parser.add_argument("-v","--verbose",action="store_true")
Fize Jacques's avatar
Fize Jacques committed
parser.add_argument("-f","--format",default="gexf",choices=["gexf","gml","txt"])
Fize Jacques's avatar
Fize Jacques committed
parser.add_argument("-t","--train-frac",default=0.9,type=float)
Fize Jacques's avatar
Fize Jacques committed

Fize Jacques's avatar
Fize Jacques committed
args = parser.parse_args()

def log(x):
    if args.verbose:
        print(x)
Fize Jacques's avatar
Fize Jacques committed

# Load and preprocess the network
Fize Jacques's avatar
Fize Jacques committed
log("Load Input Graph...")
G = None
if args.format == "txt":
    G = load_edgelist(path=args.graph_filename,weighted=True)
elif args.format == "gml":
    G = nx.read_gml(args.graph_filename)
else:
    G = nx.read_gexf(args.graph_filename)
Fize Jacques's avatar
Fize Jacques committed

Fize Jacques's avatar
Fize Jacques committed
G, _ = pp.prep_graph(G,maincc=True)
log("Graph Loaded !")
log("Size "+str(len(G)))
log("Nb of Edges "+str(len(list(G.edges()))))
log("Density "+ str(len(G)/len(list(G.edges()))))
Fize Jacques's avatar
Fize Jacques committed

Fize Jacques's avatar
Fize Jacques committed
log("Building link prediction dataset...")
Fize Jacques's avatar
Fize Jacques committed
# Create an evaluator and generate train/test edge split
traintest_split = LPEvalSplit()
Fize Jacques's avatar
Fize Jacques committed
    traintest_split.compute_splits(G, split_alg="spanning_tree", train_frac=args.train_frac, fe_ratio=1)
Fize Jacques's avatar
Fize Jacques committed
    traintest_split.compute_splits(G, split_alg="fast", train_frac=args.train_frac, fe_ratio=1)
Fize Jacques's avatar
Fize Jacques committed
log("BEFORE"+ str(len(traintest_split.test_edges)))
Fize Jacques's avatar
Fize Jacques committed
nee = LPEvaluator(traintest_split)
Fize Jacques's avatar
Fize Jacques committed
log("AFTER " +str(len(traintest_split.test_edges)))
Fize Jacques's avatar
Fize Jacques committed
log("Dataset Built !")

Fize Jacques's avatar
Fize Jacques committed
# Create a Scoresheet to store the results
scoresheet = Scoresheet()

# Set the baselines
methods = ['random_prediction',
    'common_neighbours',
    'jaccard_coefficient',
    "adamic_adar_index",
    "preferential_attachment",
Fize Jacques's avatar
Fize Jacques committed
    "resource_allocation_index",
    "stochastic_block_model",
Fize Jacques's avatar
Fize Jacques committed
    "stochastic_block_model_degree_corrected",
    "spatial_link_prediction"
Fize Jacques's avatar
Fize Jacques committed
           ]
Fize Jacques's avatar
Fize Jacques committed

Fize Jacques's avatar
Fize Jacques committed
pbar = tqdm(methods,disable= (not args.verbose))
for method in pbar:
    pbar.set_description("Evaluate "+method)
Fize Jacques's avatar
Fize Jacques committed
    result = nee.evaluate_baseline(method=method,)
Fize Jacques's avatar
Fize Jacques committed
    scoresheet.log_results(result)

if args.network_embedding:
    try:
        # Check if OpenNE is installed
        import openne
        # Set embedding methods from OpenNE
        methods = "node2vec hope-opne gf sdne deepWalk line grarep".split() #lap-opne
        commands = [
            "python -m openne --method node2vec --graph-format edgelist --epochs 100 --number-walks 10 --walk-length 80 --window-size 10",
            "python -m openne --method hope --epochs 100",
            "python -m openne --method gf --epochs 100",
            "python -m openne --method sdne --epochs 100 --encoder-list [1024,128] --beta 5 --bs 500",
            "python -m openne --method deepWalk --graph-format edgelist --epochs 100 --number-walks 10 --walk-length 80 --window-size 10",
            "python -m openne --method line --graph-format edgelist --epochs 10",
            "python -m openne --method grarep --epochs 100"
            # "python -m openne --method lap --epochs 100",
            ]
Fize Jacques's avatar
Fize Jacques committed
        edge_emb = [ 'hadamard'] #'average',
Fize Jacques's avatar
Fize Jacques committed

        # Evaluate embedding methods
Fize Jacques's avatar
Fize Jacques committed
        pbar = tqdm(enumerate(methods), disable=(not args.verbose))
        for i,method in pbar:
            pbar.set_description("Evaluate "+method)
Fize Jacques's avatar
Fize Jacques committed
            is_weighted = nx.is_weighted(G)
            command = commands[i] + " --input {} --output {} --representation-size {}"
Fize Jacques's avatar
Fize Jacques committed
            if is_weighted:
                command = command + " --weighted"
            results = nee.evaluate_cmd(method_name=methods[i], method_type='ne', command=command,
Fize Jacques's avatar
Fize Jacques committed
                                       edge_embedding_methods=edge_emb, input_delim=' ', output_delim=' ',  verbose=args.verbose,write_weights=is_weighted)
            scoresheet.log_results(results)
Fize Jacques's avatar
Fize Jacques committed

    except ImportError:
        print("The OpenNE library is not installed. Reporting results only for the baselines...")
        pass
Fize Jacques's avatar
Fize Jacques committed

# Get output
if args.verbose:
    scoresheet.print_tabular()
Fize Jacques's avatar
Fize Jacques committed
log("Saving Output in " + args.graph_filename + "_results_lp")
scoresheet.write_all(args.graph_filename+"_results_lp")
log("Output Saved !")