diff --git a/.gitignore b/.gitignore index 30fca9137a69c533e197b568a14895feeec3d5c6..02bc55d7f3f7ffc75a668ac79b0b6a935683ccb5 100644 --- a/.gitignore +++ b/.gitignore @@ -210,7 +210,7 @@ instance/ .scrapy # Sphinx documentation -docs/_build/ +doc/build/ # PyBuilder .pybuilder/ @@ -286,4 +286,5 @@ visualisation results outputs notebooks -.idea/ \ No newline at end of file +.idea/ +erosion_test.ipynb \ No newline at end of file diff --git a/doc/Makefile b/doc/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..d0c3cbf1020d5c292abdedf27627c6abe25e2293 --- /dev/null +++ b/doc/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/doc/logo.drawio b/doc/logo.drawio new file mode 100644 index 0000000000000000000000000000000000000000..da3edc2b0eb8a4fed67eadd2fcc30dc07d4b348c --- /dev/null +++ b/doc/logo.drawio @@ -0,0 +1 @@ +<mxfile host="Electron" modified="2021-05-04T12:27:55.299Z" agent="5.0 (Macintosh; Intel Mac OS X 10_16_0) AppleWebKit/537.36 (KHTML, like Gecko) draw.io/12.9.13 Chrome/80.0.3987.163 Electron/8.2.1 Safari/537.36" etag="0mYTPV5g55D7n4PHjvws" version="12.9.13" type="device"><diagram id="Whi_XeIdqUOBZHivEhyE" name="Page-1">zVVNb9swDP01PrawreWjxyVpNwwbNjSHfty0iLE02JYhM7GdXz/JpmJ73oIVKNCeLD6SEvmeKQVsndWfDC/kNy0gDeJQ1AHbBHEcReHcfhzSdMhiueyAxChBQT2wVScgMCT0oASUo0DUOkVVjMGdznPY4QjjxuhqHLbX6fjUgicwAbY7nk7RByVQdugyXvT4Z1CJ9CdH85vOk3EfTJ2UkgtdDSB2G7C10Rq7VVavIXXkeV66vLt/eM+FGcjxfxLqx3n2pazu5bO8P+HTcXP18P3qA9WGjW/Y6EMuwOWEAVtpg1InOufpV60LC0YW/AWIDUnFD6gtJDFLyQu1wsfB+om2cutNPTQab+RoGpcSXoeLmQfavOtFD/TJreWzBS9lW2/kXT/AqAwQDEV0PYKYKN1TR1CpD2YHF/iK6RfkJgG8EMfOAtvJAG2rMY3NM5ByVMdxHZx+0eQc16toFyTkC0SlIo88PdBJE5XtLnaCrLGqpELYFrxtu7JDPFaTl0U3VntVO5aJzCMYhPoyndP2fcKcRoLuBOZHpBpMGEFyMFwee3XC2DsnLF6MCYvfmrDZlDB2N+HMNox/kJOqJLfrnW3cjefK0aLsbfuRHJkSwqWvDJTqxH+2W7kZLrTKsW1jtgpmG7eXvXnK7hJyW+91jls6+2yT++aVZPC3vpdhOZWB/UUG9nIZrNm/DK1v8L6y298=</diagram></mxfile> \ No newline at end of file diff --git a/doc/make.bat b/doc/make.bat new file mode 100644 index 0000000000000000000000000000000000000000..9534b018135ed7d5caed6298980c55e8b1d2ec82 --- /dev/null +++ b/doc/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=build + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/doc/source/_static/css/custom.css b/doc/source/_static/css/custom.css new file mode 100644 index 0000000000000000000000000000000000000000..ae8f9d1401f4c3ea41b09e7758bc7ac1167f1423 --- /dev/null +++ b/doc/source/_static/css/custom.css @@ -0,0 +1,6 @@ +dl{ + margin-top: 0.5em !important; +} +div.sphinxsidebarwrapper p.logo { + padding:2em !important; +} \ No newline at end of file diff --git a/doc/source/conf.py b/doc/source/conf.py new file mode 100644 index 0000000000000000000000000000000000000000..3bde0b973a7c965fff7d2e069aa66024b93a3275 --- /dev/null +++ b/doc/source/conf.py @@ -0,0 +1,76 @@ +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +import os +import sys +sys.path.insert(0, os.path.abspath('../..')) + + +# -- Project information ----------------------------------------------------- + +project = 'Link Prediction' +copyright = '2021, Jacques Fize' +author = 'Jacques Fize' + +# The full version, including alpha/beta/rc tags +release = '0.6' + + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.coverage', + 'sphinx.ext.napoleon', + 'sphinx.ext.autosectionlabel' +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = 'en' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = [] + + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'alabaster' + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] +html_css_files = [ + 'css/custom.css', +] +html_logo = "logo.png" + + +napoleon_google_docstring = False +napoleon_use_param = False +napoleon_use_ivar = True + diff --git a/doc/source/get_started.rst b/doc/source/get_started.rst new file mode 100644 index 0000000000000000000000000000000000000000..531109f4bc6a44e1d41d9619c7d571839752730c --- /dev/null +++ b/doc/source/get_started.rst @@ -0,0 +1,32 @@ +Get Started +=========== + +Installation +------------ + +See :ref:`Installation` + +Generate a graph with a model +----------------------------- + +We developed a serie of functions for generating graph based on the following models: + +* Spatial model +* Stochastic Block Model +* Configuration Model +* Random (ER) +* Mixed Model (Spatial + SBM + +For example, if you want to generate a graph based on a stochastic block model, use the following code : + +.. code-block:: python + + from lib.random import stochastic_block_model_graph + G = stochastic_block_model_graph(nb_nodes=300,nb_edges=1200,nb_com=5,percentage_edge_betw=0.01) + +If you wish to generate a dataset containing generated graph with different configurations, +you can use the script generate_random_graph.py` using the following command : + +.. code-block:: shell + + python generate_theoric_random_graph.py <output_dir> \ No newline at end of file diff --git a/doc/source/graph_generator.rst b/doc/source/graph_generator.rst new file mode 100644 index 0000000000000000000000000000000000000000..52128c3204fe942f64880cfaaa6c5855b3649227 --- /dev/null +++ b/doc/source/graph_generator.rst @@ -0,0 +1,113 @@ + + +Graph Generators +=================== + +All graph generator can be found in the module `lib.random`. For every graph generator, you can set the number of edges and nodes +in the resulting graph. + +For example, if you want to generate a graph following the stochastic block model, use the follwing code : + +.. code-block:: python + + from lib.random import stochastic_block_model_graph + G = stochastic_block_model_graph(nb_nodes=300,nb_edges=1200,nb_com=5,percentage_edge_betw=0.01) + +Stochastic Block Model +---------------------- +This model partitions :math:`n` vertices in :math:`k` blocks, and places edges between pairs of nodes with a probability that depends on the vertices membership. The probability of a pair of nodes is computed based four parameter, the number of vertices :math:`|V|` and edges :math:`|E|`, the number of blocks :math:`|B|` and finally, the percentage of interlinks :math:`per_{betw}`, i.e number of selected pairs of nodes that belongs to different blocks. + +First, we assign randomly a block :math:`b_i \in B` for each node :math:`v \in V`. In this model, all blocks are associated with the same number of nodes :math:`\frac{|V|}{|B|}`. Second, to compute the probability of a pair of nodes to be connected, we use the function :math:`f(u,v)` defined as follows: + +.. math:: + + f(u,v)=\begin{cases} + p_{in}, & if b_u == b_v\\ + p_{out}, & otherwise + \end{cases} + + +where :math:`p_{in}` is the probability of a pair of nodes from the same block to be connected, and :math:`p_{out}` is the probability of a pair of nodes with different block membership to be connected. :math:`b_u` and :math:`b_v` correspond the block membership for the nodes :math:`u` and :math:`v`. + + +To compute the :math:`p_{in}` and :math:`p_{out}`, we do the following: + +.. math:: + + \begin{eqnarray} + U_{in} = \sum\limits_{b\in B} \frac{N_b\times(N_b-1)}{2}\\ + U = \frac{|V| \times(|V| -1)}{2}\\ + U_{out} = U-U_{in}\\ + L_{out} = |E| * per_{betw}\\ + L_{in} = |E| - L_out\\ + \\p_{in} = L_{in}/U_{in}\\ + p_{out} = L_{out}/U_{out} + \end{eqnarray} + + +Spatial Model +------------- +This model generate a graph with $n$ vertices and $e$ edges selected randomly. Edges are selected using the deterrence function defined in the following formula: + +.. math:: + err(u,v) = \frac{1}{|p_u - p_v|^2 +\epsilon} + +where :math:`u` and :math:`v` are two vertices and :math:`p_u` and :math:`p_v` correspond to their position. + +Nodes coordinates can be generated randomly or randomly selected from existing places (here countries centroids). + +ER Random Graph +--------------- +The model returns a ErdÅ‘s-Rényi graph or a binomial graph. Basically, +the model generates a graph :math:`G_{n,m}` where :math:`n` corresponds to the number of vertices +and :math:`m` the number of edges in :math:`G_{n,m}`. In this model, each pair of nodes has the same +probability to be connected. + +Configuration model +------------------- + +The configuration model generates a graph (graph with parallel edges and self loops) by randomly assigning edges to match a given degree distribution. In our case, generated graph degree distribution follows a powerlaw. We use the Molloy-Reed approach [molloy1995critical]_ to generate the graph. + +.. [molloy1995critical] Molloy, M., & Reed, B. (1995). A critical point for random graphs with a given degree sequence. Random structures & algorithms, 6(2â€3), 161-180. +Generate a graph dataset based on different models and configurations +--------------------------------------------------------------------- + +If you wish to generate a dataset containing generated graph with different configurations, +you can use the script generate_random_graph.py` using the following command : + +.. code-block:: shell + + python generate_theoric_random_graph.py <output_dir> + + +You can modify the parameters of each configuration for each model in the script source code. + +Graph parameters +^^^^^^^^^^^^^^^^ + +.. code-block:: python + + GRAPH_SIZE = [80,800,5000] + EDGE_FACTOR = [2,4,5,10] + sample_per_params = 10 + +Model parameters +^^^^^^^^^^^^^^^^ + +.. code-block:: python + + parameters = { + "stochastic_block_model_graph": { + "nb_nodes":GRAPH_SIZE, + "nb_edges":EDGE_FACTOR, + "nb_com" :[2,5,8,16,10,25], + "percentage_edge_betw":[0.1,0.01], + }, + #... + } + + +API Reference +------------- +.. automodule:: lib.random + :members: \ No newline at end of file diff --git a/doc/source/index.rst b/doc/source/index.rst new file mode 100644 index 0000000000000000000000000000000000000000..13a6a5c047e4bae3876cbd53fbdcbded87b55ad8 --- /dev/null +++ b/doc/source/index.rst @@ -0,0 +1,29 @@ +.. Link Prediction documentation master file, created by + sphinx-quickstart on Tue May 4 13:26:48 2021. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to Link Prediction's documentation! +=========================================== + +Summary +------- +.. toctree:: + :maxdepth: 2 + + installation + get_started + graph_generator + link_pred_eval + + + + + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/doc/source/installation.rst b/doc/source/installation.rst new file mode 100644 index 0000000000000000000000000000000000000000..984e0c3c06318e46df1bcfb8648d567dd7ee6ddc --- /dev/null +++ b/doc/source/installation.rst @@ -0,0 +1,20 @@ +Installation +============ + +To use the code in this repo, first you'll need to install Python requirements using the following command + +.. code-block:: shell + + pip install -r requirements.txt + + +Then, install our custom version of EvalNE using pip + +.. code-block:: shell + + pip install git+https://github.com/thunlp/OpenNE.git + pip install git+https://github.com/Jacobe2169/EvalNE + + +Finally, install the Python library `graph-tool +<https://graph-tool.skewed.de>`_. \ No newline at end of file diff --git a/doc/source/link_pred_eval.rst b/doc/source/link_pred_eval.rst new file mode 100644 index 0000000000000000000000000000000000000000..82573ee20705d9c16d79bfe682747960fc836c8e --- /dev/null +++ b/doc/source/link_pred_eval.rst @@ -0,0 +1,2 @@ +Link Prediction Evaluation +========================== diff --git a/doc/source/logo.png b/doc/source/logo.png new file mode 100644 index 0000000000000000000000000000000000000000..23d8b54dc7122fc7bdb43e2a0507706aba0dca57 Binary files /dev/null and b/doc/source/logo.png differ diff --git a/evalNE_script.py b/evalNE_script.py index faa3c6e3341f5f71e342a6981afe7dfbdb9baa8a..ceef7564c427a328b9a9a3ae53c0ef6545e69592 100644 --- a/evalNE_script.py +++ b/evalNE_script.py @@ -1,13 +1,17 @@ +import argparse + +from lib.utils import load_edgelist + from evalne.evaluation.evaluator import LPEvaluator from evalne.evaluation.split import EvalSplit as LPEvalSplit from evalne.evaluation.score import Scoresheet from evalne.utils import preprocess as pp + import networkx as nx from tqdm import tqdm -from lib.utils import load_edgelist -import argparse + parser = argparse.ArgumentParser() parser.add_argument("graph_filename") diff --git a/eval_mixed_model.py b/eval_mixed_model.py index 22c8b7bc689d5c3f6a29296350e1212448646ee3..aaa89da1873224c8500549132de5b7b9ff5e494c 100644 --- a/eval_mixed_model.py +++ b/eval_mixed_model.py @@ -1,17 +1,17 @@ # coding = utf-8 import argparse import time +import glob + +from lib.erosion_model import eval_erosion_model +from lib.utils import load_edgelist import pandas as pd +import networkx as nx -from lib.random import mixed_model_spat_sbm -from lib.erosion_model import eval_erosion_model from joblib import Parallel,delayed -import networkx as nx -import glob from tqdm import tqdm -from lib.utils import load_edgelist parser = argparse.ArgumentParser() parser.add_argument("graph_dir") diff --git a/generate_graph_atlas.py b/generate_graph_atlas.py index 1c90fe92fd289c9c1dfe233a1b0a6c84c2d79e50..844edb1d62d5bd1579ba16833477b1ab492b4b0e 100644 --- a/generate_graph_atlas.py +++ b/generate_graph_atlas.py @@ -1,15 +1,17 @@ # coding = utf-8 - -import networkx as nx -import matplotlib.pyplot as plt -import seaborn as sns import glob -import numpy as np import re -import pandas as pd import argparse import os +import pandas as pd +import numpy as np +import networkx as nx + +import matplotlib.pyplot as plt +import seaborn as sns + + parser = argparse.ArgumentParser() parser.add_argument("graph_directory") parser.add_argument("output_directory") diff --git a/generate_mixed_model_graph.py b/generate_mixed_model_graph.py index 6a6d80c5f624b51325168c2d7a821337bec7c3ba..2eb32e9976850eb21ff0bf7644ba0f850ed00aea 100644 --- a/generate_mixed_model_graph.py +++ b/generate_mixed_model_graph.py @@ -1,11 +1,12 @@ # coding = utf-8 import itertools import os +import argparse + +import lib.random as ra import networkx as nx -import argparse from tqdm import tqdm -import lib.random as ra # COMMAND PARSING parser = argparse.ArgumentParser() diff --git a/generate_theoric_random_graph.py b/generate_theoric_random_graph.py index 51ca442da1585eb8a584de318966febf2f94fb44..fba4cc234617d1c1cca511440b9ced836243b02f 100644 --- a/generate_theoric_random_graph.py +++ b/generate_theoric_random_graph.py @@ -1,11 +1,13 @@ # coding = utf-8 import itertools import os +import argparse + +import lib.random as ra import networkx as nx -import argparse from tqdm import tqdm -import lib.random as ra + # COMMAND PARSING parser = argparse.ArgumentParser() diff --git a/lib/draw.py b/lib/draw.py index a1f8d6b01f917c1af163d1e3fc1d069a77b3e28c..5f13a181988d3f1d2e9eeff118c04976583bb4b1 100644 --- a/lib/draw.py +++ b/lib/draw.py @@ -1,13 +1,13 @@ -import matplotlib.pyplot as plt -import matplotlib.patheffects as path_effects -import seaborn as sns -import networkx as nx +from glob import glob + import pandas as pd import numpy as np -from glob import glob +import networkx as nx from fa2 import ForceAtlas2 - +import matplotlib.pyplot as plt +import matplotlib.patheffects as path_effects +import seaborn as sns def get_force_atlas(weight_influence=0, scaling_ratio=3.0, gravity=5): """ diff --git a/lib/erosion_model.py b/lib/erosion_model.py index 48eb731473891ac9edc82570336532d1bcb5798b..74c330649ca0e4e738b4a9f9a95d6ab8f68e6337 100644 --- a/lib/erosion_model.py +++ b/lib/erosion_model.py @@ -1,8 +1,5 @@ # coding = utf-8 -from sklearn.linear_model import LogisticRegression -from sklearn.metrics import roc_auc_score - from .link_prediction_eval import get_auc_heuristics, split_train_test, get_all_possible_edges from .lambda_func import euclid_dist as dist from .lambda_func import hash_func @@ -15,6 +12,9 @@ import networkx as nx import numpy as np float_epsilon = np.finfo(float).eps +from sklearn.linear_model import LogisticRegression +from sklearn.metrics import roc_auc_score + VERBOSE = True def log(x): if VERBOSE: diff --git a/lib/helpers.py b/lib/helpers.py index e5db269b09e8292ab00595ad05d369c72e10442f..a71fe46c3cb3b84fa1fcefa2b0f5a1c4f12dac8c 100644 --- a/lib/helpers.py +++ b/lib/helpers.py @@ -1,14 +1,14 @@ +from .link_prediction_eval import get_all_possible_edges + import numpy as np import networkx as nx import pandas as pd -from .link_prediction_eval import get_all_possible_edges - - try: import graph_tool as gt except: - pass + raise ImportError("Graph-tool must be installed !") + def parse_evalne_output(string): def foo(x): diff --git a/lib/link_prediction_eval.py b/lib/link_prediction_eval.py index c022e833c213aa480effdc3956074d0a7f3f0289..0b69165f04fc47fde8cb7797da984c4c09d33b0f 100644 --- a/lib/link_prediction_eval.py +++ b/lib/link_prediction_eval.py @@ -1,10 +1,11 @@ # coding = utf-8 +from .lambda_func import hash_func from evalne.evaluation.evaluator import LPEvaluator from evalne.evaluation.split import EvalSplit as LPEvalSplit from evalne.utils import preprocess as pp -from .lambda_func import hash_func + def get_auc_heuristics(G,timeout=60): H, _ = pp.prep_graph(G.copy(),maincc=True,relabel=False) diff --git a/lib/random.py b/lib/random.py index 5b4f1c4ec451fcbdbc13440a1e7734103f4da649..70cdf8df0934d659df3de909a4a1eb892c229dec 100644 --- a/lib/random.py +++ b/lib/random.py @@ -1,19 +1,20 @@ # coding = utf-8 from collections import Iterable +import random import numpy as np -import networkx as nx import pandas as pd +import networkx as nx from networkx.generators.degree_seq import _to_stublist -import random -float_epsilon = np.finfo(float).eps - +float_epsilon = np.finfo(float).eps def powerlaw(nb_nodes, nb_edges, exponent=2, tries=100, min_deg=1): """ Return a degree distribution that fit the power law and specified number of edges and vertices. + ddd + Parameters ---------- nb_nodes : int @@ -64,6 +65,7 @@ def powerlaw(nb_nodes, nb_edges, exponent=2, tries=100, min_deg=1): def get_countries_coords(): """ Return the coordinates of each country in the world. + Returns ------- np.ndarray @@ -108,6 +110,7 @@ def powerlaw_graph(nb_nodes, nb_edges, exponent=2, tries=1000, min_deg=0): """ Generate a graph with a defined number of vertices, edges, and a degree distribution that fit the power law. Using the Molloy-Reed algorithm to + Parameters ---------- nb_nodes : int @@ -157,6 +160,7 @@ def spatial_graph(nb_nodes, nb_edges, coords="country", dist_func=lambda a, b: n self_link=False, weighted=False): """ Generate a spatial graph with a specific number of vertices and edges + Parameters ---------- nb_nodes : int @@ -253,6 +257,7 @@ def spatial_graph(nb_nodes, nb_edges, coords="country", dist_func=lambda a, b: n def ER_graph(nb_nodes, nb_edges): """ Generate a random graph with a specific nb of nodes and edges. + Parameters ---------- nb_nodes : int @@ -269,6 +274,7 @@ def ER_graph(nb_nodes, nb_edges): def stochastic_block_model_graph(nb_nodes, nb_edges, nb_com, percentage_edge_betw, verbose=False): """ Generate a stochastic block model graph with defined number of vertices and edges. + Parameters ---------- nb_nodes : int @@ -352,6 +358,7 @@ def equilibrate(G, nb_nodes, percentage_edge_betw, percentage_edge_within, inter """ Sometimes the generated graph from the stochastic block model have some missing nodes due to the sampling method. This function fix this issue. + Parameters ---------- G @@ -512,57 +519,3 @@ def mixed_model_spat_sbm(nb_nodes, nb_edges, nb_com, alpha, percentage_edge_betw return G2#,all_probs_sbm,all_probs_spa - -def get_sbm_probs(G, percentage_edge_betw, verbose=False): - hash_func = lambda x: "_".join(sorted([str(x[0]), str(x[1])])) - def nb_of_pair(N): - return (N*(N-1))/2 - - block_assign = nx.get_node_attributes(G, "block") - nb_com = len(set(block_assign.values())) - nb_nodes=len(G) - nb_edges = G.size() - b_assign_array = np.asarray(list(nx.get_node_attributes(G,"block").values())) - - - - u_in = sum([nb_of_pair((b_assign_array==b).sum()) for b in range(nb_com)]) - u_out = nb_of_pair(len(G)) - u_in - l_out = nb_edges*percentage_edge_betw - p_out = l_out/u_out - l_in = nb_edges - l_out - - p_in = l_in / u_in - - inter_edges, intra_edges = get_inter_intra_edges(G,G.is_directed()) - inter_edges = np.asarray(inter_edges) - intra_edges = np.asarray(intra_edges) - inter_N, intra_N = len(inter_edges), len(intra_edges) - probs_inter = np.ones(inter_N) * p_out - probs_intra = np.ones(intra_N) * p_in - - all_edges = np.concatenate((inter_edges, intra_edges)) - all_probs = np.concatenate((probs_inter, probs_intra)) - del probs_inter - del probs_intra - return all_edges,all_probs - - -def get_spat_probs(G,dist = lambda a,b : np.linalg.norm(a-b)**2): - hash_func = lambda x: "_".join(sorted([str(x[0]), str(x[1])])) - pos = nx.get_node_attributes(G, "pos") - spat_model = lambda u, v: 1 / (float_epsilon + dist(pos[u], pos[v])) - register = set([]) - edges, probs = [], [] - for n1 in list(G.nodes()): - for n2 in list(G.nodes()): - if n1 != n2 and hash_func((n1, n2)) not in register: - edges.append([n1, n2]) - probs.append(spat_model(n1, n2)) - register.add(hash_func((n1, n2))) - - return edges, probs - - - - diff --git a/lib/visualisation.py b/lib/visualisation.py index ed5d6f45019642453fefadd22d49577ed93bbca3..9e8bdab8cbaee7a616d5af788ae608c2b420ee95 100644 --- a/lib/visualisation.py +++ b/lib/visualisation.py @@ -1,13 +1,12 @@ # coding = utf-8 - -import pandas as pd -import seaborn as sns -import matplotlib.pyplot as plt import re import os +import pandas as pd import networkx as nx +import seaborn as sns +import matplotlib.pyplot as plt def get_graph_attr(fn, graph_dir): g_fn = os.path.join(graph_dir, fn) diff --git a/requirements.txt b/requirements.txt index 4d553c63be14c3096820ec8f69537515720c856a..fd69055a7735a5490422fd5c1764c32be2d04590 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,3 +10,5 @@ seaborn joblib tqdm scikit-learn +fa2 +numpydoc