From 786f1c6c921b58d3d07469a81a4577c85b43ec84 Mon Sep 17 00:00:00 2001 From: Fize Jacques <jacques.fize@cirad.fr> Date: Wed, 5 May 2021 10:16:12 +0200 Subject: [PATCH] change in the documentation --- doc/source/graph_generator.rst | 13 +++++--- doc/source/link_pred_eval.rst | 59 ++++++++++++++++++++++++++++++++++ evalNE_script.py | 11 +++---- 3 files changed, 73 insertions(+), 10 deletions(-) diff --git a/doc/source/graph_generator.rst b/doc/source/graph_generator.rst index 52128c3..8273369 100644 --- a/doc/source/graph_generator.rst +++ b/doc/source/graph_generator.rst @@ -13,8 +13,11 @@ For example, if you want to generate a graph following the stochastic block mode from lib.random import stochastic_block_model_graph G = stochastic_block_model_graph(nb_nodes=300,nb_edges=1200,nb_com=5,percentage_edge_betw=0.01) +Graph Models +------------ + Stochastic Block Model ----------------------- +^^^^^^^^^^^^^^^^^^^^^^ This model partitions :math:`n` vertices in :math:`k` blocks, and places edges between pairs of nodes with a probability that depends on the vertices membership. The probability of a pair of nodes is computed based four parameter, the number of vertices :math:`|V|` and edges :math:`|E|`, the number of blocks :math:`|B|` and finally, the percentage of interlinks :math:`per_{betw}`, i.e number of selected pairs of nodes that belongs to different blocks. First, we assign randomly a block :math:`b_i \in B` for each node :math:`v \in V`. In this model, all blocks are associated with the same number of nodes :math:`\frac{|V|}{|B|}`. Second, to compute the probability of a pair of nodes to be connected, we use the function :math:`f(u,v)` defined as follows: @@ -46,7 +49,7 @@ To compute the :math:`p_{in}` and :math:`p_{out}`, we do the following: Spatial Model -------------- +^^^^^^^^^^^^^ This model generate a graph with $n$ vertices and $e$ edges selected randomly. Edges are selected using the deterrence function defined in the following formula: .. math:: @@ -57,18 +60,20 @@ where :math:`u` and :math:`v` are two vertices and :math:`p_u` and :math:`p_v` c Nodes coordinates can be generated randomly or randomly selected from existing places (here countries centroids). ER Random Graph ---------------- +^^^^^^^^^^^^^^^ The model returns a ErdÅ‘s-Rényi graph or a binomial graph. Basically, the model generates a graph :math:`G_{n,m}` where :math:`n` corresponds to the number of vertices and :math:`m` the number of edges in :math:`G_{n,m}`. In this model, each pair of nodes has the same probability to be connected. Configuration model -------------------- +^^^^^^^^^^^^^^^^^^^ The configuration model generates a graph (graph with parallel edges and self loops) by randomly assigning edges to match a given degree distribution. In our case, generated graph degree distribution follows a powerlaw. We use the Molloy-Reed approach [molloy1995critical]_ to generate the graph. .. [molloy1995critical] Molloy, M., & Reed, B. (1995). A critical point for random graphs with a given degree sequence. Random structures & algorithms, 6(2â€3), 161-180. + + Generate a graph dataset based on different models and configurations --------------------------------------------------------------------- diff --git a/doc/source/link_pred_eval.rst b/doc/source/link_pred_eval.rst index 82573ee..2d75de6 100644 --- a/doc/source/link_pred_eval.rst +++ b/doc/source/link_pred_eval.rst @@ -1,2 +1,61 @@ Link Prediction Evaluation ========================== + +Link Prediction with state of the art methods +--------------------------------------------- + +In order to evaluate link predictions methods on a graph dataset, we use the `run_eval.py` script. Here +a graph dataset is a dirctory that contains a file for each graph. For now, the accepted graph format are +: edgelist (txt), gephi (.gexf) and graphml (.gml). + +To launch the evaluation, just run the following command in your terminal: + +.. code-block::shell + + python run_eval.py <dataset_dir_path> <output_filename> [-f graph_format] [-t edge fraction used] [-v verbose] + + +Select which link prediction methods to evaluate +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +You can change the methods evaluated in the `evalNE_script.py` file by changing different variables. +To change the heuristics evaluated, change the following : + +.. code-block:: python + + methods_heuristics = ['random_prediction', + 'common_neighbours', + 'jaccard_coefficient', + "adamic_adar_index", + "preferential_attachment", + "resource_allocation_index", + "stochastic_block_model", + "stochastic_block_model_degree_corrected", + "spatial_link_prediction" + ] + +The string corresponds to the name of the function available in `evalne.methods.similarity` modules. + +Thanks to **OpenNE**, we can also evaluated methods based on networks embedding. To remove or add such methods +modify the following variables in the same file : + +.. code-block:: python + + methods_ne = ["node2vec" ,"hope-opne", "gf", "sdne", "deepWalk", "line", "grarep"] + commands = [ + "python -m openne --method node2vec --graph-format edgelist --epochs 100 --number-walks 10 --walk-length 80 --window-size 10", + "python -m openne --method hope --epochs 100", + "python -m openne --method gf --epochs 100", + "python -m openne --method sdne --epochs 100 --encoder-list [1024,128] --beta 5 --bs 500", + "python -m openne --method deepWalk --graph-format edgelist --epochs 100 --number-walks 10 --walk-length 80 --window-size 10", + "python -m openne --method line --graph-format edgelist --epochs 10", + "python -m openne --method grarep --epochs 100" + ] + +More specifically, in `methods` you add the name of the one method you want to use; and in `commands`, you add the openne +command required to compute the embedding using this method. + +Link prediction based on the erosion model +------------------------------------------ + +To run the evaluation of our link prediction method based on erosion, you must use the `eval_mixed_model.py` script. diff --git a/evalNE_script.py b/evalNE_script.py index ceef756..0e62d47 100644 --- a/evalNE_script.py +++ b/evalNE_script.py @@ -58,7 +58,7 @@ log("Dataset Built !") scoresheet = Scoresheet() # Set the baselines -methods = ['random_prediction', +methods_heuristics = ['random_prediction', 'common_neighbours', 'jaccard_coefficient', "adamic_adar_index", @@ -70,7 +70,7 @@ methods = ['random_prediction', ] # Evaluate heristics -pbar = tqdm(methods,disable= (not args.verbose)) +pbar = tqdm(methods_heuristics,disable= (not args.verbose)) for method in pbar: pbar.set_description("Evaluate "+method) result = nee.evaluate_baseline(method=method,) @@ -81,7 +81,7 @@ if args.network_embedding: # Check if OpenNE is installed import openne # Set embedding methods from OpenNE - methods = "node2vec hope-opne gf sdne deepWalk line grarep".split() #lap-opne + methods_ne = ["node2vec" ,"hope-opne", "gf", "sdne", "deepWalk", "line", "grarep"] commands = [ "python -m openne --method node2vec --graph-format edgelist --epochs 100 --number-walks 10 --walk-length 80 --window-size 10", "python -m openne --method hope --epochs 100", @@ -90,19 +90,18 @@ if args.network_embedding: "python -m openne --method deepWalk --graph-format edgelist --epochs 100 --number-walks 10 --walk-length 80 --window-size 10", "python -m openne --method line --graph-format edgelist --epochs 10", "python -m openne --method grarep --epochs 100" - # "python -m openne --method lap --epochs 100", ] edge_emb = [ 'hadamard'] #'average', # Evaluate embedding methods - pbar = tqdm(enumerate(methods), disable=(not args.verbose)) + pbar = tqdm(enumerate(methods_ne), disable=(not args.verbose)) for i,method in pbar: pbar.set_description("Evaluate "+method) is_weighted = nx.is_weighted(G) command = commands[i] + " --input {} --output {} --representation-size {}" if is_weighted: command = command + " --weighted" - results = nee.evaluate_cmd(method_name=methods[i], method_type='ne', command=command, + results = nee.evaluate_cmd(method_name=methods_ne[i], method_type='ne', command=command, edge_embedding_methods=edge_emb, input_delim=' ', output_delim=' ', verbose=args.verbose,write_weights=is_weighted) scoresheet.log_results(results) -- GitLab