From 3a26b7ed46d6c94034d5593d692d067caa97620d Mon Sep 17 00:00:00 2001 From: Fize Jacques <jacques.fize@cirad.fr> Date: Tue, 4 May 2021 16:33:25 +0200 Subject: [PATCH] Add sphinx documentation + Change documentation + clean import+ debug --- .gitignore | 5 +- doc/Makefile | 20 ++++++ doc/logo.drawio | 1 + doc/make.bat | 35 +++++++++ doc/source/_static/css/custom.css | 6 ++ doc/source/conf.py | 76 ++++++++++++++++++++ doc/source/get_started.rst | 32 +++++++++ doc/source/graph_generator.rst | 113 ++++++++++++++++++++++++++++++ doc/source/index.rst | 29 ++++++++ doc/source/installation.rst | 20 ++++++ doc/source/link_pred_eval.rst | 2 + doc/source/logo.png | Bin 0 -> 5836 bytes evalNE_script.py | 8 ++- eval_mixed_model.py | 10 +-- generate_graph_atlas.py | 14 ++-- generate_mixed_model_graph.py | 5 +- generate_theoric_random_graph.py | 6 +- lib/draw.py | 12 ++-- lib/erosion_model.py | 6 +- lib/helpers.py | 8 +-- lib/link_prediction_eval.py | 3 +- lib/random.py | 69 +++--------------- lib/visualisation.py | 7 +- requirements.txt | 2 + 24 files changed, 394 insertions(+), 95 deletions(-) create mode 100644 doc/Makefile create mode 100644 doc/logo.drawio create mode 100644 doc/make.bat create mode 100644 doc/source/_static/css/custom.css create mode 100644 doc/source/conf.py create mode 100644 doc/source/get_started.rst create mode 100644 doc/source/graph_generator.rst create mode 100644 doc/source/index.rst create mode 100644 doc/source/installation.rst create mode 100644 doc/source/link_pred_eval.rst create mode 100644 doc/source/logo.png diff --git a/.gitignore b/.gitignore index 30fca91..02bc55d 100644 --- a/.gitignore +++ b/.gitignore @@ -210,7 +210,7 @@ instance/ .scrapy # Sphinx documentation -docs/_build/ +doc/build/ # PyBuilder .pybuilder/ @@ -286,4 +286,5 @@ visualisation results outputs notebooks -.idea/ \ No newline at end of file +.idea/ +erosion_test.ipynb \ No newline at end of file diff --git a/doc/Makefile b/doc/Makefile new file mode 100644 index 0000000..d0c3cbf --- /dev/null +++ b/doc/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/doc/logo.drawio b/doc/logo.drawio new file mode 100644 index 0000000..da3edc2 --- /dev/null +++ b/doc/logo.drawio @@ -0,0 +1 @@ +<mxfile host="Electron" modified="2021-05-04T12:27:55.299Z" agent="5.0 (Macintosh; Intel Mac OS X 10_16_0) AppleWebKit/537.36 (KHTML, like Gecko) draw.io/12.9.13 Chrome/80.0.3987.163 Electron/8.2.1 Safari/537.36" etag="0mYTPV5g55D7n4PHjvws" version="12.9.13" type="device"><diagram id="Whi_XeIdqUOBZHivEhyE" name="Page-1">zVVNb9swDP01PrawreWjxyVpNwwbNjSHfty0iLE02JYhM7GdXz/JpmJ73oIVKNCeLD6SEvmeKQVsndWfDC/kNy0gDeJQ1AHbBHEcReHcfhzSdMhiueyAxChBQT2wVScgMCT0oASUo0DUOkVVjMGdznPY4QjjxuhqHLbX6fjUgicwAbY7nk7RByVQdugyXvT4Z1CJ9CdH85vOk3EfTJ2UkgtdDSB2G7C10Rq7VVavIXXkeV66vLt/eM+FGcjxfxLqx3n2pazu5bO8P+HTcXP18P3qA9WGjW/Y6EMuwOWEAVtpg1InOufpV60LC0YW/AWIDUnFD6gtJDFLyQu1wsfB+om2cutNPTQab+RoGpcSXoeLmQfavOtFD/TJreWzBS9lW2/kXT/AqAwQDEV0PYKYKN1TR1CpD2YHF/iK6RfkJgG8EMfOAtvJAG2rMY3NM5ByVMdxHZx+0eQc16toFyTkC0SlIo88PdBJE5XtLnaCrLGqpELYFrxtu7JDPFaTl0U3VntVO5aJzCMYhPoyndP2fcKcRoLuBOZHpBpMGEFyMFwee3XC2DsnLF6MCYvfmrDZlDB2N+HMNox/kJOqJLfrnW3cjefK0aLsbfuRHJkSwqWvDJTqxH+2W7kZLrTKsW1jtgpmG7eXvXnK7hJyW+91jls6+2yT++aVZPC3vpdhOZWB/UUG9nIZrNm/DK1v8L6y298=</diagram></mxfile> \ No newline at end of file diff --git a/doc/make.bat b/doc/make.bat new file mode 100644 index 0000000..9534b01 --- /dev/null +++ b/doc/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=build + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/doc/source/_static/css/custom.css b/doc/source/_static/css/custom.css new file mode 100644 index 0000000..ae8f9d1 --- /dev/null +++ b/doc/source/_static/css/custom.css @@ -0,0 +1,6 @@ +dl{ + margin-top: 0.5em !important; +} +div.sphinxsidebarwrapper p.logo { + padding:2em !important; +} \ No newline at end of file diff --git a/doc/source/conf.py b/doc/source/conf.py new file mode 100644 index 0000000..3bde0b9 --- /dev/null +++ b/doc/source/conf.py @@ -0,0 +1,76 @@ +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +import os +import sys +sys.path.insert(0, os.path.abspath('../..')) + + +# -- Project information ----------------------------------------------------- + +project = 'Link Prediction' +copyright = '2021, Jacques Fize' +author = 'Jacques Fize' + +# The full version, including alpha/beta/rc tags +release = '0.6' + + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + 'sphinx.ext.autodoc', + 'sphinx.ext.coverage', + 'sphinx.ext.napoleon', + 'sphinx.ext.autosectionlabel' +] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# The language for content autogenerated by Sphinx. Refer to documentation +# for a list of supported languages. +# +# This is also used if you do content translation via gettext catalogs. +# Usually you set "language" from the command line for these cases. +language = 'en' + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = [] + + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'alabaster' + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] +html_css_files = [ + 'css/custom.css', +] +html_logo = "logo.png" + + +napoleon_google_docstring = False +napoleon_use_param = False +napoleon_use_ivar = True + diff --git a/doc/source/get_started.rst b/doc/source/get_started.rst new file mode 100644 index 0000000..531109f --- /dev/null +++ b/doc/source/get_started.rst @@ -0,0 +1,32 @@ +Get Started +=========== + +Installation +------------ + +See :ref:`Installation` + +Generate a graph with a model +----------------------------- + +We developed a serie of functions for generating graph based on the following models: + +* Spatial model +* Stochastic Block Model +* Configuration Model +* Random (ER) +* Mixed Model (Spatial + SBM + +For example, if you want to generate a graph based on a stochastic block model, use the following code : + +.. code-block:: python + + from lib.random import stochastic_block_model_graph + G = stochastic_block_model_graph(nb_nodes=300,nb_edges=1200,nb_com=5,percentage_edge_betw=0.01) + +If you wish to generate a dataset containing generated graph with different configurations, +you can use the script generate_random_graph.py` using the following command : + +.. code-block:: shell + + python generate_theoric_random_graph.py <output_dir> \ No newline at end of file diff --git a/doc/source/graph_generator.rst b/doc/source/graph_generator.rst new file mode 100644 index 0000000..52128c3 --- /dev/null +++ b/doc/source/graph_generator.rst @@ -0,0 +1,113 @@ + + +Graph Generators +=================== + +All graph generator can be found in the module `lib.random`. For every graph generator, you can set the number of edges and nodes +in the resulting graph. + +For example, if you want to generate a graph following the stochastic block model, use the follwing code : + +.. code-block:: python + + from lib.random import stochastic_block_model_graph + G = stochastic_block_model_graph(nb_nodes=300,nb_edges=1200,nb_com=5,percentage_edge_betw=0.01) + +Stochastic Block Model +---------------------- +This model partitions :math:`n` vertices in :math:`k` blocks, and places edges between pairs of nodes with a probability that depends on the vertices membership. The probability of a pair of nodes is computed based four parameter, the number of vertices :math:`|V|` and edges :math:`|E|`, the number of blocks :math:`|B|` and finally, the percentage of interlinks :math:`per_{betw}`, i.e number of selected pairs of nodes that belongs to different blocks. + +First, we assign randomly a block :math:`b_i \in B` for each node :math:`v \in V`. In this model, all blocks are associated with the same number of nodes :math:`\frac{|V|}{|B|}`. Second, to compute the probability of a pair of nodes to be connected, we use the function :math:`f(u,v)` defined as follows: + +.. math:: + + f(u,v)=\begin{cases} + p_{in}, & if b_u == b_v\\ + p_{out}, & otherwise + \end{cases} + + +where :math:`p_{in}` is the probability of a pair of nodes from the same block to be connected, and :math:`p_{out}` is the probability of a pair of nodes with different block membership to be connected. :math:`b_u` and :math:`b_v` correspond the block membership for the nodes :math:`u` and :math:`v`. + + +To compute the :math:`p_{in}` and :math:`p_{out}`, we do the following: + +.. math:: + + \begin{eqnarray} + U_{in} = \sum\limits_{b\in B} \frac{N_b\times(N_b-1)}{2}\\ + U = \frac{|V| \times(|V| -1)}{2}\\ + U_{out} = U-U_{in}\\ + L_{out} = |E| * per_{betw}\\ + L_{in} = |E| - L_out\\ + \\p_{in} = L_{in}/U_{in}\\ + p_{out} = L_{out}/U_{out} + \end{eqnarray} + + +Spatial Model +------------- +This model generate a graph with $n$ vertices and $e$ edges selected randomly. Edges are selected using the deterrence function defined in the following formula: + +.. math:: + err(u,v) = \frac{1}{|p_u - p_v|^2 +\epsilon} + +where :math:`u` and :math:`v` are two vertices and :math:`p_u` and :math:`p_v` correspond to their position. + +Nodes coordinates can be generated randomly or randomly selected from existing places (here countries centroids). + +ER Random Graph +--------------- +The model returns a ErdÅ‘s-Rényi graph or a binomial graph. Basically, +the model generates a graph :math:`G_{n,m}` where :math:`n` corresponds to the number of vertices +and :math:`m` the number of edges in :math:`G_{n,m}`. In this model, each pair of nodes has the same +probability to be connected. + +Configuration model +------------------- + +The configuration model generates a graph (graph with parallel edges and self loops) by randomly assigning edges to match a given degree distribution. In our case, generated graph degree distribution follows a powerlaw. We use the Molloy-Reed approach [molloy1995critical]_ to generate the graph. + +.. [molloy1995critical] Molloy, M., & Reed, B. (1995). A critical point for random graphs with a given degree sequence. Random structures & algorithms, 6(2â€3), 161-180. +Generate a graph dataset based on different models and configurations +--------------------------------------------------------------------- + +If you wish to generate a dataset containing generated graph with different configurations, +you can use the script generate_random_graph.py` using the following command : + +.. code-block:: shell + + python generate_theoric_random_graph.py <output_dir> + + +You can modify the parameters of each configuration for each model in the script source code. + +Graph parameters +^^^^^^^^^^^^^^^^ + +.. code-block:: python + + GRAPH_SIZE = [80,800,5000] + EDGE_FACTOR = [2,4,5,10] + sample_per_params = 10 + +Model parameters +^^^^^^^^^^^^^^^^ + +.. code-block:: python + + parameters = { + "stochastic_block_model_graph": { + "nb_nodes":GRAPH_SIZE, + "nb_edges":EDGE_FACTOR, + "nb_com" :[2,5,8,16,10,25], + "percentage_edge_betw":[0.1,0.01], + }, + #... + } + + +API Reference +------------- +.. automodule:: lib.random + :members: \ No newline at end of file diff --git a/doc/source/index.rst b/doc/source/index.rst new file mode 100644 index 0000000..13a6a5c --- /dev/null +++ b/doc/source/index.rst @@ -0,0 +1,29 @@ +.. Link Prediction documentation master file, created by + sphinx-quickstart on Tue May 4 13:26:48 2021. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +Welcome to Link Prediction's documentation! +=========================================== + +Summary +------- +.. toctree:: + :maxdepth: 2 + + installation + get_started + graph_generator + link_pred_eval + + + + + + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/doc/source/installation.rst b/doc/source/installation.rst new file mode 100644 index 0000000..984e0c3 --- /dev/null +++ b/doc/source/installation.rst @@ -0,0 +1,20 @@ +Installation +============ + +To use the code in this repo, first you'll need to install Python requirements using the following command + +.. code-block:: shell + + pip install -r requirements.txt + + +Then, install our custom version of EvalNE using pip + +.. code-block:: shell + + pip install git+https://github.com/thunlp/OpenNE.git + pip install git+https://github.com/Jacobe2169/EvalNE + + +Finally, install the Python library `graph-tool +<https://graph-tool.skewed.de>`_. \ No newline at end of file diff --git a/doc/source/link_pred_eval.rst b/doc/source/link_pred_eval.rst new file mode 100644 index 0000000..82573ee --- /dev/null +++ b/doc/source/link_pred_eval.rst @@ -0,0 +1,2 @@ +Link Prediction Evaluation +========================== diff --git a/doc/source/logo.png b/doc/source/logo.png new file mode 100644 index 0000000000000000000000000000000000000000..23d8b54dc7122fc7bdb43e2a0507706aba0dca57 GIT binary patch literal 5836 zcmZ`-2RIyU+Z}85UZVFdAqWw@3!-<?RtXVZ1gjI#Npy)C1X&_l^xlQ&y~Jt>5p~s# z4gchQ-|xHr?=N#*bMIX9%>CRm&&-~4?h~u0qeeo=KnMT;iH5q00r=Yi?sfRMU?lar zZ36%_sTwMZMsVvrt5Ch^IbZpz-Ct(+s(yx8RB(yv4QknoF1%33_w84Udfr+vY~#ut z`Kd@nU^bfj5!1A=|LcYK)A#8UIcG9we4!Q0oDME(Hrw7gFM9=2TzT$J5k@}goj7Z$ zivLMzMk6)Q@Ms+W@^@Ve^1yE_cR@Zaa8&l(Tki7fW!w3(H)R+DI~#F06Hv8%8h~kK zBF4=pgF=Q&N=h<xbah=GJ33NQQBgfkNlAf7<?bh&lrzsbiTV8wxo!yQK;i%wUWAP1 z3<&6x>7)z^1vW+?jy}pi(A9N!=H%oVOh@M>kF@ksQhGX$Q0pN?0M^cwaGw(_H0~ru zPK?`ia2_$R5spX3(s_;yCBw$X9!DY%yrpux*4AtzK0o@E<*+b@wR#gcQKM4h2o;+! zSuqoWbV}bZI<jiB&abiU!Ts9o^E2avRSQjC;J)NUt^dU#yJbfRvy`+nuaH%<*the` z0a=!@$ABj(uph(1mH^=heMezZ5)#xsr>mva`taeyH+uT|)m-=P-CGxJ@;~PJR9~;N zxOcX&xw$E1mf=SDn?vQ1D@Uk`jq!&`$|zeo9c^vx%F0UJin6k}1r?oqITG-Mb>*D& zmzrHXz*E)PJ_cjJS*8_C6J?u4g4^RFo-J5cSg0gk>*VGZ85M<FIkt5f=diFi9$&+C zMuFxI*PnQB=2ExV7EQ`LIXg4+*_T7@B~!R;;29JW-l+&&q*F&xqKg&u-$>PrI373M zb(wvaE8)d~<L9!u;@RM|HJZC|&wZKBtL5uK%vWX()bQ7h<Z2H_AV<z0#bsn_s^<6t z-`VZSlW=Bf-&Onz1C&dFG6g!4qg1a6e8{3(CLi9n!jJ5Rc4o?NND+m#x692pdTm_% zzC7Op-~0i=8~kJ#zB6IENcPAcO+0r;TbrzLT<|Zmp0F*bgM)*0=CIGmo^R$+UG(4> ztw`kAW4iD6A|oTqmlhX^33I!jJ7#8PUN*P1T$*JB$B!+?V1mXCoM2q-GtR+1?hQY0 zNJUmTnwpsWVkRBP3~C2duBxhTwqW1(iHnOPoWAG{T@<|_8lRb&>G2^@*)$bzpsizp zxgVoY^=Bt1@5<m=x>^-)-*zr7Ed@pmYYHdB+CI!@Bqtxm4u5-~-!m*|dT&j$s?Uy| zm4P8Zep#F^IsaDK&`$K)KtqlWFQ31(A>U>rNmgpa9LefwA8B_CMu{S$I%mPP-K}=P z+F%_k!>DRq(M(yn$IrbXa7IZg7GqE#$SQkTVdCPZrluAbq!pLQU0hteK^k4;Utq?@ z5$cMeMCc2eSIkOENW3>z`k?bp;q_E}wZfY>Z|I1lUD#U1(JGuMwMA_G=~tKN!1Mi) zOz&ro)N61V>iZEaGVsI2+Oz9e%*5IK&Hh^&y1E|B3=DDEZrUI993MS`M@L5=rF2u# z+QncjDboDqqioyV-QDX4e{jTEvh@&&)YaA9&TxR`dQc_9ZnIE6R`YUn{2mz|juo&Y z6lQN?lHup+={bdw?otWydY2k0_VJXANg|1LSh}GGu%qBS)H!SqzRWYbq(Xn&uRA<z zYkc-&(j!O?Io9o$c))@iu@<fNilyyNCQamEWsux%B?5tv_uZYR{HDuYBRXH_GPilO zF(AS`v?1N8ay7Wl?6n8}UH50oO{WTzO?lT-V-JDXwC@`m;k_G2GP;|ugy;43^4p$% z8*<^~<XoRBRqZm9Bu>uAI6GWv3q;c$E9-_x!c`hcE}fm7<=%0V`2otB`LBRgAUR*! zz2@g<VJ+lB?PHh_KYwzr+ta(-L+Q7w9ELM21cpEIpoV8OnRX|g#HQ@+?X%!<b6dc0 zp_T9o^3fffh%Ot-WSBr!b?VsXFJE>`A_T8U@tNT_DA8zpUfNi<Y;iZKfGdpujDvlQ zSZDS<k29-A&ox;PKayW2g|u0RwpLdkWw!Voyd1~hF%G;qv~_ZI#iR{Z>ap6Z_aF>Q zQFE2B%5nPM(cR%pp`Nq7rAi94{T8-9XSP;}w3U_B!y~wwDYfU}l2Np(sw%chNVR&u z`ihNkqX)9REy3OO$PZXKG_P}>0Y6!J-<qWHAJ62g1X^CyOQKt_8>H!@-f*Fg2XtsC zV`F2xz_+0CAwWHm6=KG1P&ze2jz+KXl&;i*+tV5~{dz7gE<K~tLK*Qfvj7y)Ig>0p z(w>({!~B@Y54+;;<HbFSeOxfS%oX5zk`HTSbjpWGfOX$dS+BXj1;hMjr4+ls7cJpn z0GCK3ZijlC01Ws!3B7!Iu(zUMVDo%RS(I&wX;Zw{bYah<NzmLgj1+tpohk45H2v)^ ziHxlB0wWy4v$3Dn5MAoc=Rv01xw)Ksr{3$F)WLP*Wx7Ft!AG5!d4-m_OyLH4_l73F zOgv00e#5EvVCOy>-4hs(I*uMtN65avqe(ZW??6**kR9>WDAmO_={*lE9)!P@$k^Vp zGu$Xqf9=b6JGq&)-~11ZqBDuSrzfIsR*fBV8rzuSA(}?lxq<mdb>=S8Qc?zugorxW zlP5nKd)A!t%DYZaWcYnzQL@*8Kotd0kEOr^V3u9QuiHi(0};*5=IB}~iHM908Yd^G zi_tybN^^CbWEfkc2#qEw#g!ERrviAe0K@caRseGwc!b?4SUN>;^m>@;VEg;#=G%Dd zfkg|IE4o_a?zL+wpaxjQ1IFcx&kq2beXkBdPR<Rg%;9uOU49Y~YouLu0vQQO8yV1H zP3#wd(i|M(L;RuzI`dICfH3~0@9jWQ;|gBbzNB<YYU-=+-@du+{rI7gnBpeFTJjGl zI@;N-w|VxHqkGu7kR2U3z#b6RS9-L87gnsB4YJcxJczT4i;G6EuHx3#<v9>(H+)-$ z8ygb`$8fmagN5x~lPpXp+9m;zh;wkl$gluE-`iN7`Er_~`_~o~v-fs=FQ<mNCZYya z6&)Okk!zkopKAo<s3q=l$n(CY*?nOVe*h?WZF3)oVy*H3OgLf47(-^0UnIZ<C7^@V z2?HMoyHj3}D-ssg-Q7LojKS&T4e|5yV`W|E)B)*$$)YNP0BFYw8z|n*pYX-%^jh1= z=a1E-MH+^pRbx&%umP(0#Ar>n_`Ve6%1T~YRHjW(li;me$IX@>T`V)Dj*r+;M31*r zfs5Lt3uQn88wh0zpwFF(8%_=Ej)p54Fq-e2U@5fe<z7&>+V#Z^#nTFv@cA=RH;97F zDiLyh6)c^Zx%7BVb(6NABWP!1f5_4T7qBmYO<un_cRAz*d4HZ<KKYj^a3KV|Pgh`N ziOK!LzI;b)#AH<<I))1yMJ|n665rzJ5ay|XhV>_H0{~`3=I%8q5K8^)TngBx6z|o2 zCx-<rw`7z#WRHCJcA5R4A?>xU5=ZGAX!~JwG*^1SW4Tde-M8h)%B2fi;5Hity6J6i z8=2e>=Hq7Yo(g|@?w4`SdUu$Ve+}FoXLkcZrWco@oUTape0PBg9}2;vjhk7M#!b!_ zvLiZ25&~!bp&zlrbXCg+&SQ=#uL1-fy^OSd?HJK@m^I?4S@Hfo`IGu*>u`$Vf`YD} z$LMyuhcuZ%w-KMH(8Y?BuxFt%4{-nt$0|$f>siXUDSwLu%DftG;+~+KtgI|+7%bo1 zI8t+GUaZx7X?l9|G@z|VU^s+rt!3&J6$eUl!y<vbi%rzkfSk3c<}kNVh2a?P!tY2| z5(0j)BSS+&FF*yr-PF|7ntphrvsJ31q9WC|C0JZi5>1rhkgX(b8D#9Wqr15UHJsO> z^AJ@tVg{hy4_+LES!5e*6Ua|?%Ubs;%M?vqe6+6AU8u5+a0k_*=Ilo+B7+>NRk`1% zKj#W+YrRpE#hQj89iht`M?tv27O?t~uBT~`5?ovAcC!B2FE@Jo=okz3iu4+v>!H<$ zY+s9AakR>ci=R0M1T+ir@~Wlg=DH0I4MCO#&D(<{=@Rdc|2z#l4X@rQKpx<|8o(xF zkpY$~=;`RfuC7Azr+yHVl94fL@l4HutHBb`9lzMNChgB559`hQ%6h&k;>)z;3|v@; znOn|%B#V4d1=%bTe6A@PZ(TVsHr!=%h*n^$as|oo$H~bjUq?qC5fKqD9)5oQw^Kh@ zp42VsC*Jp-J2(&hd473Rd_#WJiBUFGhNDAlyNDA;faqQ_x*jnU+(&v4P=vlH74V^r z0YLEb)Q$a0h*Q57dMyVK6X?amvJ|nNa&eGjadarGa#8kBt?I-+OJ>N!CrA53cx?S- z@&=M{h@-C_D7C$d<AkV$yj0LwCMi-pas^_Mx2oL@1}LyN<kjAC?VsHNY%sy^gj&^I z>zk)3&@<n3vq;FGF&LRn%7xRdf=?XTsypmnvE*zhP3R-KyRXd%g>pRbfL5&g+PGl3 zq7A6UHqETO%3}$JNFuP(Z9aWgV-AJQS2-4<O6+9UOik?SwRGTwcH<UiGzS;!46K=w zp^R_X)yvKPoTb1ZS|wG2w74&-Pj)nljKDdIl~5Y8uT|-5rSwuoz$t&^q(XtINd%{M z^8K(uA?9v)I)!q|wYgOKJN}eE*DKPBa&7FDcck5CeiDldW<1ez2LPGD!V)&<rb0*` z62~~H<MXsXe(!UP^Jj=BCGZieQ=YOYEmq;vU}Zf<$Si1`3;?`03=R1p$vhnlqM>W5 z$5RCsW`=~P(r=i%h6J3)&@p$b`ePM`?knu@c4XE5-p2v7UTJG{<Ev|g+}-jY$olv? zhDIm{X&ig75yD`Q1lzkIcSQ(PIb&1iulpFCq*G{K!4Z2W(ZC_=O_a2@>K7spCZp3O zC_o#-y`!oyo+?ke(`3i(EkN+%2h`*J-0ve<(UncpK<<Ol(b0m534NK)?z>kVefMOi zWdp4@)z<$8cWD!PBZnoh&r(rS8^{GOi@(XMp8AUdYjks<SVv!9U(v!MTXF9q{g2FV zZVq^n_Q_OQ+&PPt7qv0$W^KJPo0peobXPm1x`~>a8Xg!JIEF9i#wsMi@1JKm_;1GJ zP^Gw292VI(DL^xPF+NUiqTQ7+G;I{6Ac3;KIQqk(<)-!s#D#NkaB$Kexy~}T6z5$1 z=|V|Lyw)lW<5-KX|C^wt#AiR-OoIgd0l3Ot+Oy`y_8x>1P?#lP{t@kX_l5g?ZV3u9 z`$;g30Egv`xJnMGuow(Yg~?AQCZ=h-;2F4wnY_5Ds3<u;)RW{zIxPYLgg;JUJwHC* z|C$XmjXtuRCJ*OSd)e6)fmc~Aa7n{nND9qUy-GPbIhideC}>=m-P{>UNFY8qJoLl? zEZ1%-|K5!6)EyiX{xUZ*GLjKTcp_4V#s}L?B{#Q<c!vd30{U;9hhpe>u&NL-G4Z%z zC>GMu!NDI74^L#Bs#e(0P#B_{#5(s2kCbV!vaHPDFpb0I*N>@ETV#9ir?XcAU-T#B z@-afUt(s35^T*C>-zR)nRV2pkA$!Fl8-N4s^T8RsJbneC4*sRtQySH{iGdd1y)2m! z44i83@>Vv#d<xP@Y^5#W)Y;tK{bS8zYMh@QE3E{5eSMrPueQ*9#A?ba_~DYM$CZ{% zsKvp-L2n8MH6Q1x$w`)#7C)hUyO6Y>i;}2=as&dMlp(XsOmF@x2nygeYU=AsvLC+2 z5cizqg3aw|N(!aq0*{NuN6RLNMZIfNUyUm!vSnydbU86lD<tv0C<w77v8C|feRA0r z-u_EaHQNZ&^*&XZrj7wp<atF!YqLq~F9L<0ts3J4I#-ii6fV#t%#yg<;<FPB`naqk z`_|q0e{?&RA^oJb5TYybc{}vKOZRTi9ftM8wVv_$`3nT{pi+PC%euWd0o|nM&NKo8 z62!nj0(Z{0Z^N0kmuGv_J8bMF6GrJaci!wj^YL-$+hDe;R@h)h78VwUS67RCkWtpQ z#pj|U#y#I#YRH^O6uipoVYDtcsbDxo2RZ(Ii<Hl#zLE(H-!5>|x%$|-rxRt%Q(98O zN0%fYB$BDFuBAl`S`6IIVp*>li^Q;>cd>0hYX&<<dWi0mQKDkL{M_7J<F`Lh!jkN3 zN0PRU7&Vk|#s@eAD*zSG29-sthVf$Sn;*w}LMr|~1{M7p6bx8-#44bye?bMxdMNx3 zc`=nWg$^j|DONu#q}c_kNPx0l<xeuG=+~fN_>;h&2PS~B{;z~=OGvK5y(TDxms6y< z=his=vuIN`&pVVwyCjfY#RbN{28Nb6$LZ4=)czVsaD(J1(l0^7c$6uIX81e#)5;A2 zr#sXI0wa(drH5HKe+`U{P4TDY=kwa!+1E*b2$zb!S$hc8>i>%1p(WeW)4t7O!~gb{ zvHj*6S6TlcwgAqvVyRZ9TooU<B&7JQyHR*<MD2TsG+q+*yhHT|?0+}rkS%V+=YJaW zH_o0Kgf3sw1+$*^xQZj|7hHvmO-%=u$Ia-+Cnq5!BqYeDulvT;HR1h<#E>=A$p4h> zY^X}Zy^29{;SBShWLCGOe>n`XrD;)gl^)e*{n1fTE2Wsr>UU0?{oj>B)OB>^vNAIp z>Pky_?>;RV)bv(V3~y*?IRDby3#G|_TcQM&NM_w~`+o(v(YAM%0sbG5y)ql@cCKr; zZCE;WR;{Y3nLO~6Xn*wZOt5Y=`TMB_6rLQqPo!sN)@YvC0bKMR?C<Y$t`7{%MchMg zWhng1vLO-bx_D|~V)EUm$#TvKLB-ljd$O4yfeU+X`oT=!|7g9Bn%g5{-S>NcBICdT zvcsb^bLoP;oevdlvxp0n>gLH1JdnZf!T#{aMlvdMk``@`5yC<1tAUFt*Wfsk^@)mC z=N(@`J#GKa?sAhi2Aqe}z2TBssxwZ0T~qZ+a`QiIE*kBG(dF95x@_e5Oz+H?7n=ER zMiB(8S@qr_>Jc6ReTKW)BN^uU`V{FJp7c<mLNPHhc(5GORA6`(5*8oNbgC}|F-Q$1 zMf|5L*UjzY%COSp=Io3LXj`$(aF7SWZdF&mxS!4lN#?j-mWij2Lgc@G4X>@Kxl>SB zXr}?XC%q#7TVDdJ&b@dgSk#dU?>@`p;hOM;4mgRmm(Tq)6nC9}Yti8TH3c*~o>Ol@ z8+Ai}|MJ=1-OXtQy^=KK0qZ$jCLgO)5@;;qdzDnJugqq?nJ>>yoZ1h2(J;{8Z>_DX zd;V>6)2XJmmISmQ!&+29ivX`^0zaUQrAkiNME2zJ_v#UR6(pCimb?GAFRHA2arK^E z9h;vf+$`z(NtLk8O5xnkqvNN-bU||D_yXBkS>{r*vK6^+-e_oPY2|1<ct8k3iJEzZ zuTjy&^6BMo<gec$K~MT#8CJ8f_1H&sUSoUX*^}{=`J90+HK3uYqw-P7=J|gCt7kZn literal 0 HcmV?d00001 diff --git a/evalNE_script.py b/evalNE_script.py index faa3c6e..ceef756 100644 --- a/evalNE_script.py +++ b/evalNE_script.py @@ -1,13 +1,17 @@ +import argparse + +from lib.utils import load_edgelist + from evalne.evaluation.evaluator import LPEvaluator from evalne.evaluation.split import EvalSplit as LPEvalSplit from evalne.evaluation.score import Scoresheet from evalne.utils import preprocess as pp + import networkx as nx from tqdm import tqdm -from lib.utils import load_edgelist -import argparse + parser = argparse.ArgumentParser() parser.add_argument("graph_filename") diff --git a/eval_mixed_model.py b/eval_mixed_model.py index 22c8b7b..aaa89da 100644 --- a/eval_mixed_model.py +++ b/eval_mixed_model.py @@ -1,17 +1,17 @@ # coding = utf-8 import argparse import time +import glob + +from lib.erosion_model import eval_erosion_model +from lib.utils import load_edgelist import pandas as pd +import networkx as nx -from lib.random import mixed_model_spat_sbm -from lib.erosion_model import eval_erosion_model from joblib import Parallel,delayed -import networkx as nx -import glob from tqdm import tqdm -from lib.utils import load_edgelist parser = argparse.ArgumentParser() parser.add_argument("graph_dir") diff --git a/generate_graph_atlas.py b/generate_graph_atlas.py index 1c90fe9..844edb1 100644 --- a/generate_graph_atlas.py +++ b/generate_graph_atlas.py @@ -1,15 +1,17 @@ # coding = utf-8 - -import networkx as nx -import matplotlib.pyplot as plt -import seaborn as sns import glob -import numpy as np import re -import pandas as pd import argparse import os +import pandas as pd +import numpy as np +import networkx as nx + +import matplotlib.pyplot as plt +import seaborn as sns + + parser = argparse.ArgumentParser() parser.add_argument("graph_directory") parser.add_argument("output_directory") diff --git a/generate_mixed_model_graph.py b/generate_mixed_model_graph.py index 6a6d80c..2eb32e9 100644 --- a/generate_mixed_model_graph.py +++ b/generate_mixed_model_graph.py @@ -1,11 +1,12 @@ # coding = utf-8 import itertools import os +import argparse + +import lib.random as ra import networkx as nx -import argparse from tqdm import tqdm -import lib.random as ra # COMMAND PARSING parser = argparse.ArgumentParser() diff --git a/generate_theoric_random_graph.py b/generate_theoric_random_graph.py index 51ca442..fba4cc2 100644 --- a/generate_theoric_random_graph.py +++ b/generate_theoric_random_graph.py @@ -1,11 +1,13 @@ # coding = utf-8 import itertools import os +import argparse + +import lib.random as ra import networkx as nx -import argparse from tqdm import tqdm -import lib.random as ra + # COMMAND PARSING parser = argparse.ArgumentParser() diff --git a/lib/draw.py b/lib/draw.py index a1f8d6b..5f13a18 100644 --- a/lib/draw.py +++ b/lib/draw.py @@ -1,13 +1,13 @@ -import matplotlib.pyplot as plt -import matplotlib.patheffects as path_effects -import seaborn as sns -import networkx as nx +from glob import glob + import pandas as pd import numpy as np -from glob import glob +import networkx as nx from fa2 import ForceAtlas2 - +import matplotlib.pyplot as plt +import matplotlib.patheffects as path_effects +import seaborn as sns def get_force_atlas(weight_influence=0, scaling_ratio=3.0, gravity=5): """ diff --git a/lib/erosion_model.py b/lib/erosion_model.py index 48eb731..74c3306 100644 --- a/lib/erosion_model.py +++ b/lib/erosion_model.py @@ -1,8 +1,5 @@ # coding = utf-8 -from sklearn.linear_model import LogisticRegression -from sklearn.metrics import roc_auc_score - from .link_prediction_eval import get_auc_heuristics, split_train_test, get_all_possible_edges from .lambda_func import euclid_dist as dist from .lambda_func import hash_func @@ -15,6 +12,9 @@ import networkx as nx import numpy as np float_epsilon = np.finfo(float).eps +from sklearn.linear_model import LogisticRegression +from sklearn.metrics import roc_auc_score + VERBOSE = True def log(x): if VERBOSE: diff --git a/lib/helpers.py b/lib/helpers.py index e5db269..a71fe46 100644 --- a/lib/helpers.py +++ b/lib/helpers.py @@ -1,14 +1,14 @@ +from .link_prediction_eval import get_all_possible_edges + import numpy as np import networkx as nx import pandas as pd -from .link_prediction_eval import get_all_possible_edges - - try: import graph_tool as gt except: - pass + raise ImportError("Graph-tool must be installed !") + def parse_evalne_output(string): def foo(x): diff --git a/lib/link_prediction_eval.py b/lib/link_prediction_eval.py index c022e83..0b69165 100644 --- a/lib/link_prediction_eval.py +++ b/lib/link_prediction_eval.py @@ -1,10 +1,11 @@ # coding = utf-8 +from .lambda_func import hash_func from evalne.evaluation.evaluator import LPEvaluator from evalne.evaluation.split import EvalSplit as LPEvalSplit from evalne.utils import preprocess as pp -from .lambda_func import hash_func + def get_auc_heuristics(G,timeout=60): H, _ = pp.prep_graph(G.copy(),maincc=True,relabel=False) diff --git a/lib/random.py b/lib/random.py index 5b4f1c4..70cdf8d 100644 --- a/lib/random.py +++ b/lib/random.py @@ -1,19 +1,20 @@ # coding = utf-8 from collections import Iterable +import random import numpy as np -import networkx as nx import pandas as pd +import networkx as nx from networkx.generators.degree_seq import _to_stublist -import random -float_epsilon = np.finfo(float).eps - +float_epsilon = np.finfo(float).eps def powerlaw(nb_nodes, nb_edges, exponent=2, tries=100, min_deg=1): """ Return a degree distribution that fit the power law and specified number of edges and vertices. + ddd + Parameters ---------- nb_nodes : int @@ -64,6 +65,7 @@ def powerlaw(nb_nodes, nb_edges, exponent=2, tries=100, min_deg=1): def get_countries_coords(): """ Return the coordinates of each country in the world. + Returns ------- np.ndarray @@ -108,6 +110,7 @@ def powerlaw_graph(nb_nodes, nb_edges, exponent=2, tries=1000, min_deg=0): """ Generate a graph with a defined number of vertices, edges, and a degree distribution that fit the power law. Using the Molloy-Reed algorithm to + Parameters ---------- nb_nodes : int @@ -157,6 +160,7 @@ def spatial_graph(nb_nodes, nb_edges, coords="country", dist_func=lambda a, b: n self_link=False, weighted=False): """ Generate a spatial graph with a specific number of vertices and edges + Parameters ---------- nb_nodes : int @@ -253,6 +257,7 @@ def spatial_graph(nb_nodes, nb_edges, coords="country", dist_func=lambda a, b: n def ER_graph(nb_nodes, nb_edges): """ Generate a random graph with a specific nb of nodes and edges. + Parameters ---------- nb_nodes : int @@ -269,6 +274,7 @@ def ER_graph(nb_nodes, nb_edges): def stochastic_block_model_graph(nb_nodes, nb_edges, nb_com, percentage_edge_betw, verbose=False): """ Generate a stochastic block model graph with defined number of vertices and edges. + Parameters ---------- nb_nodes : int @@ -352,6 +358,7 @@ def equilibrate(G, nb_nodes, percentage_edge_betw, percentage_edge_within, inter """ Sometimes the generated graph from the stochastic block model have some missing nodes due to the sampling method. This function fix this issue. + Parameters ---------- G @@ -512,57 +519,3 @@ def mixed_model_spat_sbm(nb_nodes, nb_edges, nb_com, alpha, percentage_edge_betw return G2#,all_probs_sbm,all_probs_spa - -def get_sbm_probs(G, percentage_edge_betw, verbose=False): - hash_func = lambda x: "_".join(sorted([str(x[0]), str(x[1])])) - def nb_of_pair(N): - return (N*(N-1))/2 - - block_assign = nx.get_node_attributes(G, "block") - nb_com = len(set(block_assign.values())) - nb_nodes=len(G) - nb_edges = G.size() - b_assign_array = np.asarray(list(nx.get_node_attributes(G,"block").values())) - - - - u_in = sum([nb_of_pair((b_assign_array==b).sum()) for b in range(nb_com)]) - u_out = nb_of_pair(len(G)) - u_in - l_out = nb_edges*percentage_edge_betw - p_out = l_out/u_out - l_in = nb_edges - l_out - - p_in = l_in / u_in - - inter_edges, intra_edges = get_inter_intra_edges(G,G.is_directed()) - inter_edges = np.asarray(inter_edges) - intra_edges = np.asarray(intra_edges) - inter_N, intra_N = len(inter_edges), len(intra_edges) - probs_inter = np.ones(inter_N) * p_out - probs_intra = np.ones(intra_N) * p_in - - all_edges = np.concatenate((inter_edges, intra_edges)) - all_probs = np.concatenate((probs_inter, probs_intra)) - del probs_inter - del probs_intra - return all_edges,all_probs - - -def get_spat_probs(G,dist = lambda a,b : np.linalg.norm(a-b)**2): - hash_func = lambda x: "_".join(sorted([str(x[0]), str(x[1])])) - pos = nx.get_node_attributes(G, "pos") - spat_model = lambda u, v: 1 / (float_epsilon + dist(pos[u], pos[v])) - register = set([]) - edges, probs = [], [] - for n1 in list(G.nodes()): - for n2 in list(G.nodes()): - if n1 != n2 and hash_func((n1, n2)) not in register: - edges.append([n1, n2]) - probs.append(spat_model(n1, n2)) - register.add(hash_func((n1, n2))) - - return edges, probs - - - - diff --git a/lib/visualisation.py b/lib/visualisation.py index ed5d6f4..9e8bdab 100644 --- a/lib/visualisation.py +++ b/lib/visualisation.py @@ -1,13 +1,12 @@ # coding = utf-8 - -import pandas as pd -import seaborn as sns -import matplotlib.pyplot as plt import re import os +import pandas as pd import networkx as nx +import seaborn as sns +import matplotlib.pyplot as plt def get_graph_attr(fn, graph_dir): g_fn = os.path.join(graph_dir, fn) diff --git a/requirements.txt b/requirements.txt index 4d553c6..fd69055 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,3 +10,5 @@ seaborn joblib tqdm scikit-learn +fa2 +numpydoc -- GitLab