From cb9be46fbe07cec3ca789cc3e0f9e51c2b43b8f9 Mon Sep 17 00:00:00 2001 From: Ludovic Moncla <moncla.ludovic@gmail.com> Date: Sat, 10 Dec 2022 17:59:29 +0100 Subject: [PATCH] Update Normclass2graph.ipynb --- notebooks/Normclass2graph.ipynb | 153 +++++++------------------------- 1 file changed, 33 insertions(+), 120 deletions(-) diff --git a/notebooks/Normclass2graph.ipynb b/notebooks/Normclass2graph.ipynb index 6b62725..47e4f3f 100644 --- a/notebooks/Normclass2graph.ipynb +++ b/notebooks/Normclass2graph.ipynb @@ -9,7 +9,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -19,7 +19,7 @@ }, { "cell_type": "code", - "execution_count": 79, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -28,27 +28,16 @@ }, { "cell_type": "code", - "execution_count": 80, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "2908" - ] - }, - "execution_count": 80, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "len(df['edda_class'].unique())" ] }, { "cell_type": "code", - "execution_count": 83, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -57,27 +46,16 @@ }, { "cell_type": "code", - "execution_count": 84, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "2905" - ] - }, - "execution_count": 84, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "len(df['edda_class'].unique())" ] }, { "cell_type": "code", - "execution_count": 85, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -86,42 +64,18 @@ }, { "cell_type": "code", - "execution_count": 86, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "2904" - ] - }, - "execution_count": 86, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "len(df['edda_class'].unique())" ] }, { "cell_type": "code", - "execution_count": 87, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array(['Grammaire', 'Ecrivains modernes', 'Calendrier Julien', ...,\n", - " 'Jeux militaires françois', 'Gravure antique sur métal',\n", - " 'Inscription | Médailles | Poésie'], dtype=object)" - ] - }, - "execution_count": 87, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "normclasses = df['edda_class'].unique()\n", "normclasses" @@ -129,7 +83,7 @@ }, { "cell_type": "code", - "execution_count": 144, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -138,55 +92,33 @@ }, { "cell_type": "code", - "execution_count": 145, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "edda_class\n", - "Abus des langues 1\n", - "Accord de sons 1\n", - "Acoustique 6\n", - "Agonistique 1\n", - "Agriculture 127\n", - " ... \n", - "terme usité parmi les Maréchaux 1\n", - "vaisselle d'étain 1\n", - "Ébénisterie 3\n", - "Ébénisterie | Tapisserie 1\n", - "Économie rustique 3\n", - "Length: 2904, dtype: int64" - ] - }, - "execution_count": 145, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df_group" ] }, { "cell_type": "code", - "execution_count": 151, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "d = {}\n", "for normclass, freq in df_group.items():\n", - " #print(normclass, freq)\n", + " print(normclass, freq)\n", "\n", " try: \n", " n = [x.strip() for x in normclass.split('|')] # corrige le probleme du normclass 'Géographie moderne |'\n", " \n", - " for i in range(1,len(n)):\n", - " t = (n[0],n[i])\n", - " if t not in d:\n", - " d[t] = freq\n", - " else:\n", - " d[t] += freq\n", + " for i in range(0,len(n)):\n", + " for j in range(i+1,len(n)):\n", + " t = (n[i],n[j])\n", + " if t not in d:\n", + " d[t] = freq\n", + " else:\n", + " d[t] += freq\n", "\n", " except AttributeError:\n", " print(normclass)" @@ -194,31 +126,20 @@ }, { "cell_type": "code", - "execution_count": 152, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "1257" - ] - }, - "execution_count": 152, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "len(d.keys())" ] }, { "cell_type": "code", - "execution_count": 162, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "G = nx.DiGraph()\n", + "G = nx.Graph()\n", "content = 'node1,node2,freq\\n'\n", "for key, val in d.items():\n", " G.add_weighted_edges_from([(key[0], key[1], val)])\n", @@ -227,17 +148,9 @@ }, { "cell_type": "code", - "execution_count": 156, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Nodes: 576, Edges: 1257\n" - ] - } - ], + "outputs": [], "source": [ "print(f\"Nodes: {G.number_of_nodes()}, Edges: {G.number_of_edges()}\")" ] @@ -253,7 +166,7 @@ }, { "cell_type": "code", - "execution_count": 164, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -263,7 +176,7 @@ }, { "cell_type": "code", - "execution_count": 158, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -272,7 +185,7 @@ }, { "cell_type": "code", - "execution_count": 159, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ -- GitLab