From cb9be46fbe07cec3ca789cc3e0f9e51c2b43b8f9 Mon Sep 17 00:00:00 2001
From: Ludovic Moncla <moncla.ludovic@gmail.com>
Date: Sat, 10 Dec 2022 17:59:29 +0100
Subject: [PATCH] Update Normclass2graph.ipynb

---
 notebooks/Normclass2graph.ipynb | 153 +++++++-------------------------
 1 file changed, 33 insertions(+), 120 deletions(-)

diff --git a/notebooks/Normclass2graph.ipynb b/notebooks/Normclass2graph.ipynb
index 6b62725..47e4f3f 100644
--- a/notebooks/Normclass2graph.ipynb
+++ b/notebooks/Normclass2graph.ipynb
@@ -9,7 +9,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 7,
+      "execution_count": null,
       "metadata": {},
       "outputs": [],
       "source": [
@@ -19,7 +19,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 79,
+      "execution_count": null,
       "metadata": {},
       "outputs": [],
       "source": [
@@ -28,27 +28,16 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 80,
+      "execution_count": null,
       "metadata": {},
-      "outputs": [
-        {
-          "data": {
-            "text/plain": [
-              "2908"
-            ]
-          },
-          "execution_count": 80,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
+      "outputs": [],
       "source": [
         "len(df['edda_class'].unique())"
       ]
     },
     {
       "cell_type": "code",
-      "execution_count": 83,
+      "execution_count": null,
       "metadata": {},
       "outputs": [],
       "source": [
@@ -57,27 +46,16 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 84,
+      "execution_count": null,
       "metadata": {},
-      "outputs": [
-        {
-          "data": {
-            "text/plain": [
-              "2905"
-            ]
-          },
-          "execution_count": 84,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
+      "outputs": [],
       "source": [
         "len(df['edda_class'].unique())"
       ]
     },
     {
       "cell_type": "code",
-      "execution_count": 85,
+      "execution_count": null,
       "metadata": {},
       "outputs": [],
       "source": [
@@ -86,42 +64,18 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 86,
+      "execution_count": null,
       "metadata": {},
-      "outputs": [
-        {
-          "data": {
-            "text/plain": [
-              "2904"
-            ]
-          },
-          "execution_count": 86,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
+      "outputs": [],
       "source": [
         "len(df['edda_class'].unique())"
       ]
     },
     {
       "cell_type": "code",
-      "execution_count": 87,
+      "execution_count": null,
       "metadata": {},
-      "outputs": [
-        {
-          "data": {
-            "text/plain": [
-              "array(['Grammaire', 'Ecrivains modernes', 'Calendrier Julien', ...,\n",
-              "       'Jeux militaires franÃ§ois', 'Gravure antique sur mÃ©tal',\n",
-              "       'Inscription | MÃ©dailles | PoÃ©sie'], dtype=object)"
-            ]
-          },
-          "execution_count": 87,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
+      "outputs": [],
       "source": [
         "normclasses = df['edda_class'].unique()\n",
         "normclasses"
@@ -129,7 +83,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 144,
+      "execution_count": null,
       "metadata": {},
       "outputs": [],
       "source": [
@@ -138,55 +92,33 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 145,
+      "execution_count": null,
       "metadata": {},
-      "outputs": [
-        {
-          "data": {
-            "text/plain": [
-              "edda_class\n",
-              "Abus des langues                     1\n",
-              "Accord de sons                       1\n",
-              "Acoustique                           6\n",
-              "Agonistique                          1\n",
-              "Agriculture                        127\n",
-              "                                  ... \n",
-              "terme usitÃ© parmi les MarÃ©chaux      1\n",
-              "vaisselle d'Ã©tain                    1\n",
-              "Ã‰bÃ©nisterie                          3\n",
-              "Ã‰bÃ©nisterie | Tapisserie             1\n",
-              "Ã‰conomie rustique                    3\n",
-              "Length: 2904, dtype: int64"
-            ]
-          },
-          "execution_count": 145,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
+      "outputs": [],
       "source": [
         "df_group"
       ]
     },
     {
       "cell_type": "code",
-      "execution_count": 151,
+      "execution_count": null,
       "metadata": {},
       "outputs": [],
       "source": [
         "d = {}\n",
         "for normclass, freq in df_group.items():\n",
-        "    #print(normclass, freq)\n",
+        "    print(normclass, freq)\n",
         "\n",
         "    try:        \n",
         "        n = [x.strip() for x in normclass.split('|')] # corrige le probleme du normclass 'GÃ©ographie moderne  |'\n",
         "        \n",
-        "        for i in range(1,len(n)):\n",
-        "            t = (n[0],n[i])\n",
-        "            if t not in d:\n",
-        "                d[t] = freq\n",
-        "            else:\n",
-        "                d[t] += freq\n",
+        "        for i in range(0,len(n)):\n",
+        "            for j in range(i+1,len(n)):\n",
+        "                t = (n[i],n[j])\n",
+        "                if t not in d:\n",
+        "                    d[t] = freq\n",
+        "                else:\n",
+        "                    d[t] += freq\n",
         "\n",
         "    except AttributeError:\n",
         "        print(normclass)"
@@ -194,31 +126,20 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 152,
+      "execution_count": null,
       "metadata": {},
-      "outputs": [
-        {
-          "data": {
-            "text/plain": [
-              "1257"
-            ]
-          },
-          "execution_count": 152,
-          "metadata": {},
-          "output_type": "execute_result"
-        }
-      ],
+      "outputs": [],
       "source": [
         "len(d.keys())"
       ]
     },
     {
       "cell_type": "code",
-      "execution_count": 162,
+      "execution_count": null,
       "metadata": {},
       "outputs": [],
       "source": [
-        "G = nx.DiGraph()\n",
+        "G = nx.Graph()\n",
         "content = 'node1,node2,freq\\n'\n",
         "for key, val in d.items():\n",
         "    G.add_weighted_edges_from([(key[0], key[1], val)])\n",
@@ -227,17 +148,9 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 156,
+      "execution_count": null,
       "metadata": {},
-      "outputs": [
-        {
-          "name": "stdout",
-          "output_type": "stream",
-          "text": [
-            "Nodes: 576, Edges: 1257\n"
-          ]
-        }
-      ],
+      "outputs": [],
       "source": [
         "print(f\"Nodes: {G.number_of_nodes()}, Edges: {G.number_of_edges()}\")"
       ]
@@ -253,7 +166,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 164,
+      "execution_count": null,
       "metadata": {},
       "outputs": [],
       "source": [
@@ -263,7 +176,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 158,
+      "execution_count": null,
       "metadata": {},
       "outputs": [],
       "source": [
@@ -272,7 +185,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 159,
+      "execution_count": null,
       "metadata": {},
       "outputs": [],
       "source": [
-- 
GitLab