From 062a8fdb69667981aa54e1fd5607072041e19574 Mon Sep 17 00:00:00 2001 From: Fize Jacques <jacques.fize@cirad.fr> Date: Wed, 24 Mar 2021 14:28:05 +0100 Subject: [PATCH] Made a nice script for baseline model training --- notebooks/baseline.ipynb | 2242 ++++++++++++++------------------------ train_baseline.py | 119 ++ 2 files changed, 916 insertions(+), 1445 deletions(-) create mode 100644 train_baseline.py diff --git a/notebooks/baseline.ipynb b/notebooks/baseline.ipynb index acdeb08..2fdffd4 100644 --- a/notebooks/baseline.ipynb +++ b/notebooks/baseline.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 6, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -37,42 +37,30 @@ "#PROGRESS BAR\n", "from tqdm import tqdm\n", "\n", - "import joblib\n" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ + "import joblib\n", "from ngram import NGram" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "\u001b[31mFR_adjacent.csv\u001b[m\u001b[m* \u001b[31mGB_cooc.csv\u001b[m\u001b[m* TX_IDF_cooc.csv\n", - "\u001b[31mFR_cooc.csv\u001b[m\u001b[m* \u001b[31mGB_cooc_perm.csv\u001b[m\u001b[m* TX_IDF_inclusion.csv\n", - "\u001b[31mFR_inclusion.csv\u001b[m\u001b[m* \u001b[31mGB_inclusion.csv\u001b[m\u001b[m* \u001b[31mUS_adjacent.csv\u001b[m\u001b[m*\n", - "\u001b[31mGB_adjacent.csv\u001b[m\u001b[m* \u001b[31mGB_inclusion_perm.csv\u001b[m\u001b[m* \u001b[31mUS_cooc.csv\u001b[m\u001b[m*\n", - "\u001b[31mGB_adjacent_perm.csv\u001b[m\u001b[m* TX_IDF_adjacent.csv \u001b[31mUS_inclusion.csv\u001b[m\u001b[m*\n" + "/Users/jacquesfize/Dropbox/Projets/2020/toponym-geocoding\n" ] } ], "source": [ - "ls data_new_/" + "cd .." ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -97,7 +85,27 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['FR_per_pair_cooc.csv']\n" + ] + } + ], + "source": [ + "from glob import glob\n", + "fns = glob(\"FR_per_pair_*.csv\")\n", + "fns= [fns[0]]\n", + "print(fns)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -133,52 +141,52 @@ " <tbody>\n", " <tr>\n", " <th>0</th>\n", - " <td>3019599</td>\n", - " <td>Essonne</td>\n", - " <td>Île-de-France</td>\n", - " <td>48.50000</td>\n", - " <td>2.25000</td>\n", - " <td>24422</td>\n", + " <td>12105028</td>\n", + " <td>Auvergne</td>\n", + " <td>France 3</td>\n", + " <td>45.700000</td>\n", + " <td>3.300000</td>\n", + " <td>27616</td>\n", " <td>train</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", - " <td>3013657</td>\n", - " <td>Hauts-de-Seine</td>\n", - " <td>Île-de-France</td>\n", - " <td>48.85000</td>\n", - " <td>2.19293</td>\n", - " <td>23982</td>\n", - " <td>train</td>\n", + " <td>12105028</td>\n", + " <td>Auvergne</td>\n", + " <td>Château de Chavaniac</td>\n", + " <td>45.700000</td>\n", + " <td>3.300000</td>\n", + " <td>27616</td>\n", + " <td>test</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", - " <td>2975246</td>\n", - " <td>Seine-Saint-Denis</td>\n", - " <td>Île-de-France</td>\n", - " <td>48.91421</td>\n", - " <td>2.47604</td>\n", - " <td>23983</td>\n", + " <td>12105028</td>\n", + " <td>Auvergne</td>\n", + " <td>Parc animalier d'Auvergne</td>\n", + " <td>45.700000</td>\n", + " <td>3.300000</td>\n", + " <td>27616</td>\n", " <td>test</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", - " <td>2968815</td>\n", - " <td>Paris</td>\n", - " <td>Île-de-France</td>\n", - " <td>48.85340</td>\n", - " <td>2.34860</td>\n", - " <td>23982</td>\n", - " <td>train</td>\n", + " <td>12105028</td>\n", + " <td>Auvergne</td>\n", + " <td>Dognon</td>\n", + " <td>45.700000</td>\n", + " <td>3.300000</td>\n", + " <td>27616</td>\n", + " <td>test</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", - " <td>2971090</td>\n", - " <td>Val-de-Marne</td>\n", - " <td>Île-de-France</td>\n", - " <td>48.78149</td>\n", - " <td>2.49331</td>\n", - " <td>24423</td>\n", + " <td>12105029</td>\n", + " <td>Alpes-de-Haute-Provence</td>\n", + " <td>Chaudon-Norante</td>\n", + " <td>44.095278</td>\n", + " <td>6.240000</td>\n", + " <td>29532</td>\n", " <td>train</td>\n", " </tr>\n", " <tr>\n", @@ -192,124 +200,150 @@ " <td>...</td>\n", " </tr>\n", " <tr>\n", - " <th>7017</th>\n", - " <td>2988500</td>\n", - " <td>Paris Orly Airport</td>\n", - " <td>Paris</td>\n", - " <td>48.72528</td>\n", - " <td>2.35944</td>\n", - " <td>23982</td>\n", - " <td>test</td>\n", + " <th>376083</th>\n", + " <td>12221787</td>\n", + " <td>Church of Saint-Médard, Tremblay-en-France</td>\n", + " <td>Basilica of Saint-Denis</td>\n", + " <td>48.979600</td>\n", + " <td>2.558030</td>\n", + " <td>23983</td>\n", + " <td>train</td>\n", " </tr>\n", " <tr>\n", - " <th>7018</th>\n", - " <td>11983675</td>\n", - " <td>Opera Royal</td>\n", - " <td>Château de Versailles</td>\n", - " <td>48.80600</td>\n", - " <td>2.12290</td>\n", - " <td>23982</td>\n", - " <td>test</td>\n", + " <th>376084</th>\n", + " <td>12221787</td>\n", + " <td>Church of Saint-Médard, Tremblay-en-France</td>\n", + " <td>Seine-Saint-Denis</td>\n", + " <td>48.979600</td>\n", + " <td>2.558030</td>\n", + " <td>23983</td>\n", + " <td>train</td>\n", " </tr>\n", " <tr>\n", - " <th>7019</th>\n", - " <td>11983678</td>\n", - " <td>Chapelle royale</td>\n", - " <td>Château de Versailles</td>\n", - " <td>48.80503</td>\n", - " <td>2.12225</td>\n", - " <td>23982</td>\n", - " <td>train</td>\n", + " <th>376085</th>\n", + " <td>12221787</td>\n", + " <td>Church of Saint-Médard, Tremblay-en-France</td>\n", + " <td>Tremblay-en-France</td>\n", + " <td>48.979600</td>\n", + " <td>2.558030</td>\n", + " <td>23983</td>\n", + " <td>test</td>\n", " </tr>\n", " <tr>\n", - " <th>7020</th>\n", - " <td>6284982</td>\n", - " <td>Petit Trianon</td>\n", - " <td>Château de Versailles</td>\n", - " <td>48.81545</td>\n", - " <td>2.10976</td>\n", - " <td>23982</td>\n", - " <td>train</td>\n", + " <th>376086</th>\n", + " <td>12221788</td>\n", + " <td>Church of Saint-Éloi, Dunkirk</td>\n", + " <td>Dunkirk</td>\n", + " <td>51.035556</td>\n", + " <td>2.376944</td>\n", + " <td>21426</td>\n", + " <td>test</td>\n", " </tr>\n", " <tr>\n", - " <th>7021</th>\n", - " <td>6284981</td>\n", - " <td>Grand Trianon</td>\n", - " <td>Château de Versailles</td>\n", - " <td>48.81449</td>\n", - " <td>2.10478</td>\n", - " <td>23982</td>\n", + " <th>376087</th>\n", + " <td>12221788</td>\n", + " <td>Church of Saint-Éloi, Dunkirk</td>\n", + " <td>Dunkirk</td>\n", + " <td>51.035556</td>\n", + " <td>2.376944</td>\n", + " <td>21426</td>\n", " <td>test</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", - "<p>7022 rows × 7 columns</p>\n", + "<p>376088 rows × 7 columns</p>\n", "</div>" ], "text/plain": [ - " ID toponym toponym_context latitude \\\n", - "0 3019599 Essonne Île-de-France 48.50000 \n", - "1 3013657 Hauts-de-Seine Île-de-France 48.85000 \n", - "2 2975246 Seine-Saint-Denis Île-de-France 48.91421 \n", - "3 2968815 Paris Île-de-France 48.85340 \n", - "4 2971090 Val-de-Marne Île-de-France 48.78149 \n", - "... ... ... ... ... \n", - "7017 2988500 Paris Orly Airport Paris 48.72528 \n", - "7018 11983675 Opera Royal Château de Versailles 48.80600 \n", - "7019 11983678 Chapelle royale Château de Versailles 48.80503 \n", - "7020 6284982 Petit Trianon Château de Versailles 48.81545 \n", - "7021 6284981 Grand Trianon Château de Versailles 48.81449 \n", + " ID toponym \\\n", + "0 12105028 Auvergne \n", + "1 12105028 Auvergne \n", + "2 12105028 Auvergne \n", + "3 12105028 Auvergne \n", + "4 12105029 Alpes-de-Haute-Provence \n", + "... ... ... \n", + "376083 12221787 Church of Saint-Médard, Tremblay-en-France \n", + "376084 12221787 Church of Saint-Médard, Tremblay-en-France \n", + "376085 12221787 Church of Saint-Médard, Tremblay-en-France \n", + "376086 12221788 Church of Saint-Éloi, Dunkirk \n", + "376087 12221788 Church of Saint-Éloi, Dunkirk \n", "\n", - " longitude hp_split split \n", - "0 2.25000 24422 train \n", - "1 2.19293 23982 train \n", - "2 2.47604 23983 test \n", - "3 2.34860 23982 train \n", - "4 2.49331 24423 train \n", - "... ... ... ... \n", - "7017 2.35944 23982 test \n", - "7018 2.12290 23982 test \n", - "7019 2.12225 23982 train \n", - "7020 2.10976 23982 train \n", - "7021 2.10478 23982 test \n", + " toponym_context latitude longitude hp_split split \n", + "0 France 3 45.700000 3.300000 27616 train \n", + "1 Château de Chavaniac 45.700000 3.300000 27616 test \n", + "2 Parc animalier d'Auvergne 45.700000 3.300000 27616 test \n", + "3 Dognon 45.700000 3.300000 27616 test \n", + "4 Chaudon-Norante 44.095278 6.240000 29532 train \n", + "... ... ... ... ... ... \n", + "376083 Basilica of Saint-Denis 48.979600 2.558030 23983 train \n", + "376084 Seine-Saint-Denis 48.979600 2.558030 23983 train \n", + "376085 Tremblay-en-France 48.979600 2.558030 23983 test \n", + "376086 Dunkirk 51.035556 2.376944 21426 test \n", + "376087 Dunkirk 51.035556 2.376944 21426 test \n", "\n", - "[7022 rows x 7 columns]" + "[376088 rows x 7 columns]" ] }, - "execution_count": 18, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "files = [\"TX_IDF_inclusion.csv\"]#[\"TX_IDF_adjacent.csv\",\"TX_IDF_cooc.csv\",\"TX_IDF_inclusion.csv\"]\n", - "basedir= \"data_new_/\"\n", - "df = pd.read_csv(basedir+files[0],sep=\"\\t\",index_col = 0)\n", - "if not len(files)<2:\n", - " for fn in files[1:]:\n", - " df = pd.concat((df,pd.read_csv(basedir + fn,sep=\"\\t\",index_col = 0)))\n", + "df = pd.read_csv(fns[0],sep=\"\\t\",index_col = 0)\n", + "if not len(fns)<2:\n", + " for fn in fns[1:]:\n", + " df = pd.concat((df,pd.read_csv(fn,sep=\"\\t\",index_col = 0)))\n", "df" ] }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ - "index = NGram(n=4)" + "index = NGram(n=4)\n", + "data_vectorizer = Pipeline([\n", + " ('vect', CountVectorizer(tokenizer=index.split)),\n", + " ('tfidf', TfidfTransformer()),\n", + "])" ] }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ - "data_vectorizer = Pipeline([\n", - " ('vect', CountVectorizer(tokenizer=index.split)),\n", - " ('tfidf', TfidfTransformer()),\n", - "])" + "from joblib import dump,load" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['test.pkl']" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dump(data_vectorizer,\"test.pkl\")" ] }, { @@ -323,7 +357,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ @@ -332,7 +366,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -343,7 +377,7 @@ " ('tfidf', TfidfTransformer())])" ] }, - "execution_count": 23, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -354,14 +388,7 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": 24, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -371,7 +398,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -385,550 +412,27 @@ "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/lib/python3.8/site-packages/sklearn/model_selection/_split.py:670: UserWarning: The least populated class in y has only 1 members, which is less than n_splits=5.\n", - " warnings.warn((\"The least populated class in y has only %d\"\n", - "/usr/local/lib/python3.8/site-packages/sklearn/naive_bayes.py:511: UserWarning: alpha too small will result in numeric errors, setting alpha = 1.0e-10\n", - " warnings.warn('alpha too small will result in numeric errors, '\n", - "/usr/local/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1221: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", - " _warn_prf(average, modifier, msg_start, len(result))\n", - "/usr/local/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1221: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n", - " _warn_prf(average, modifier, msg_start, len(result))\n", - "/usr/local/lib/python3.8/site-packages/sklearn/model_selection/_split.py:670: UserWarning: The least populated class in y has only 1 members, which is less than n_splits=5.\n", - " warnings.warn((\"The least populated class in y has only %d\"\n" + "/Users/jacquesfize/opt/anaconda3/envs/my_env/lib/python3.7/site-packages/sklearn/model_selection/_split.py:672: UserWarning: The least populated class in y has only 1 members, which is less than n_splits=5.\n", + " % (min_groups, self.n_splits)), UserWarning)\n", + "/Users/jacquesfize/opt/anaconda3/envs/my_env/lib/python3.7/site-packages/sklearn/naive_bayes.py:512: UserWarning: alpha too small will result in numeric errors, setting alpha = 1.0e-10\n", + " 'setting alpha = %.1e' % _ALPHA_MIN)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Best Parameters : {'alpha': 0}\n", - " precision recall f1-score support\n", - "\n", - " 23545 0.00 0.00 0.00 10\n", - " 23546 0.50 0.13 0.21 31\n", - " 23547 1.00 0.17 0.29 6\n", - " 23981 0.00 0.00 0.00 11\n", - " 23982 0.62 0.96 0.75 557\n", - " 23983 0.74 0.51 0.61 111\n", - " 23984 0.00 0.00 0.00 2\n", - " 24422 0.62 0.35 0.45 71\n", - " 24423 0.70 0.59 0.64 105\n", - " 24424 0.33 0.07 0.11 15\n", - " 24867 0.80 0.40 0.53 30\n", - " 24868 0.00 0.00 0.00 6\n", - " 39537 1.00 1.00 1.00 3\n", - " 40045 0.00 0.00 0.00 1\n", - " 40047 0.00 0.00 0.00 1\n", - " 40558 1.00 0.50 0.67 4\n", - " 40559 1.00 0.50 0.67 2\n", - " 40561 0.00 0.00 0.00 1\n", - " 41070 0.00 0.00 0.00 1\n", - " 41071 1.00 0.75 0.86 4\n", - " 41072 0.00 0.00 0.00 4\n", - " 41073 0.67 1.00 0.80 2\n", - " 41583 0.85 0.58 0.69 19\n", - " 41584 0.00 0.00 0.00 2\n", - " 41585 0.00 0.00 0.00 1\n", - " 42094 0.00 0.00 0.00 0\n", - " 42095 1.00 0.67 0.80 3\n", - " 42096 1.00 1.00 1.00 2\n", - " 42097 1.00 1.00 1.00 4\n", - " 42606 0.00 0.00 0.00 3\n", - " 42607 1.00 1.00 1.00 4\n", - " 42608 0.00 0.00 0.00 0\n", - " 42609 0.00 0.00 0.00 3\n", - " 43117 0.00 0.00 0.00 2\n", - " 43118 0.50 0.50 0.50 2\n", - " 43119 1.00 1.00 1.00 1\n", - " 43120 0.00 0.00 0.00 3\n", - " 43121 1.00 1.00 1.00 3\n", - " 43122 0.00 0.00 0.00 2\n", - " 43124 0.00 0.00 0.00 0\n", - " 43630 0.00 0.00 0.00 2\n", - " 43631 0.67 0.50 0.57 4\n", - " 43632 0.00 0.00 0.00 1\n", - " 43633 0.00 0.00 0.00 1\n", - " 43634 0.00 0.00 0.00 1\n", - " 43635 0.00 0.00 0.00 2\n", - " 43636 1.00 0.55 0.71 11\n", - " 43637 0.00 0.00 0.00 1\n", - " 43638 0.00 0.00 0.00 2\n", - " 43639 1.00 0.33 0.50 3\n", - " 43640 0.00 0.00 0.00 2\n", - " 43641 0.00 0.00 0.00 1\n", - " 44142 0.67 0.67 0.67 3\n", - " 44143 0.73 0.80 0.76 10\n", - " 44144 1.00 1.00 1.00 1\n", - " 44146 0.00 0.00 0.00 1\n", - " 44147 0.00 0.00 0.00 2\n", - " 44148 0.00 0.00 0.00 0\n", - " 44149 0.00 0.00 0.00 2\n", - " 44150 0.69 0.56 0.62 16\n", - " 44151 1.00 0.29 0.44 7\n", - " 44152 0.75 0.75 0.75 4\n", - " 44153 0.00 0.00 0.00 1\n", - " 44154 0.00 0.00 0.00 1\n", - " 44654 0.00 0.00 0.00 1\n", - " 44655 1.00 0.50 0.67 4\n", - " 44656 0.00 0.00 0.00 2\n", - " 44657 0.00 0.00 0.00 2\n", - " 44658 1.00 0.50 0.67 2\n", - " 44660 0.00 0.00 0.00 4\n", - " 44661 1.00 0.67 0.80 9\n", - " 44662 0.57 0.72 0.64 64\n", - " 44663 0.50 0.50 0.50 12\n", - " 44664 0.50 0.67 0.57 3\n", - " 44665 1.00 0.67 0.80 3\n", - " 44666 1.00 0.33 0.50 3\n", - " 45165 0.50 1.00 0.67 1\n", - " 45168 1.00 1.00 1.00 2\n", - " 45169 0.50 1.00 0.67 1\n", - " 45170 0.67 1.00 0.80 2\n", - " 45171 0.00 0.00 0.00 2\n", - " 45172 1.00 0.56 0.71 9\n", - " 45173 0.58 0.84 0.69 57\n", - " 45174 0.50 0.86 0.63 99\n", - " 45175 0.60 0.43 0.50 7\n", - " 45176 0.75 1.00 0.86 3\n", - " 45177 1.00 0.50 0.67 6\n", - " 45678 0.00 0.00 0.00 3\n", - " 45679 1.00 0.29 0.44 7\n", - " 45680 0.00 0.00 0.00 1\n", - " 45681 0.00 0.00 0.00 1\n", - " 45682 0.80 0.50 0.62 8\n", - " 45683 1.00 0.25 0.40 4\n", - " 45684 0.00 0.00 0.00 0\n", - " 45685 0.80 0.73 0.76 11\n", - " 45686 0.67 0.43 0.53 23\n", - " 45687 0.73 0.67 0.70 12\n", - " 45688 0.50 0.29 0.36 7\n", - " 45689 0.71 0.94 0.81 16\n", - " 45690 1.00 0.40 0.57 5\n", - " 46184 0.00 0.00 0.00 3\n", - " 46189 0.00 0.00 0.00 1\n", - " 46190 0.83 0.62 0.71 8\n", - " 46191 0.00 0.00 0.00 1\n", - " 46192 0.00 0.00 0.00 1\n", - " 46193 1.00 1.00 1.00 2\n", - " 46194 0.33 1.00 0.50 1\n", - " 46195 0.00 0.00 0.00 4\n", - " 46196 0.33 0.33 0.33 3\n", - " 46197 0.25 0.14 0.18 7\n", - " 46198 0.33 0.20 0.25 5\n", - " 46199 0.50 0.20 0.29 5\n", - " 46200 0.83 0.31 0.45 16\n", - " 46201 1.00 0.56 0.71 9\n", - " 46697 0.71 0.38 0.50 13\n", - " 46701 0.50 0.50 0.50 2\n", - " 46702 0.00 0.00 0.00 1\n", - " 46703 0.00 0.00 0.00 1\n", - " 46707 1.00 0.25 0.40 4\n", - " 46709 1.00 0.17 0.29 6\n", - " 46710 0.29 0.14 0.19 14\n", - " 46711 0.40 0.33 0.36 6\n", - " 46712 0.00 0.00 0.00 4\n", - " 46713 0.60 1.00 0.75 3\n", - " 46714 0.00 0.00 0.00 1\n", - " 47212 0.33 0.50 0.40 2\n", - " 47213 0.00 0.00 0.00 1\n", - " 47214 0.00 0.00 0.00 1\n", - " 47217 1.00 0.25 0.40 4\n", - " 47218 1.00 1.00 1.00 1\n", - " 47219 0.00 0.00 0.00 2\n", - " 47220 1.00 0.75 0.86 4\n", - " 47221 0.40 0.62 0.48 13\n", - " 47222 1.00 0.33 0.50 6\n", - " 47223 1.00 0.50 0.67 4\n", - " 47224 0.50 1.00 0.67 1\n", - " 47225 1.00 0.67 0.80 9\n", - " 47226 1.00 1.00 1.00 1\n", - " 47723 0.00 0.00 0.00 2\n", - " 47726 0.00 0.00 0.00 1\n", - " 47730 1.00 1.00 1.00 1\n", - " 47733 0.75 0.50 0.60 12\n", - " 47734 1.00 0.50 0.67 4\n", - " 47735 0.33 0.50 0.40 2\n", - " 47736 0.00 0.00 0.00 1\n", - " 47737 0.50 0.50 0.50 2\n", - " 47738 0.33 0.33 0.33 3\n", - " 48235 0.00 0.00 0.00 1\n", - " 48236 0.00 0.00 0.00 1\n", - " 48237 1.00 0.50 0.67 2\n", - " 48239 0.00 0.00 0.00 1\n", - " 48240 1.00 1.00 1.00 1\n", - " 48243 0.00 0.00 0.00 1\n", - " 48244 0.80 0.67 0.73 6\n", - " 48245 0.75 0.30 0.43 10\n", - " 48246 0.50 0.40 0.44 5\n", - " 48247 0.50 0.50 0.50 2\n", - " 48248 0.50 0.33 0.40 3\n", - " 48249 0.33 0.50 0.40 2\n", - " 48250 0.00 0.00 0.00 3\n", - " 48748 0.00 0.00 0.00 1\n", - " 48749 0.00 0.00 0.00 1\n", - " 48753 0.00 0.00 0.00 3\n", - " 48754 1.00 0.50 0.67 4\n", - " 48755 1.00 1.00 1.00 1\n", - " 48756 1.00 0.67 0.80 3\n", - " 48757 0.66 0.75 0.70 68\n", - " 48758 0.00 0.00 0.00 2\n", - " 48759 0.56 0.42 0.48 12\n", - " 48760 0.88 0.44 0.58 16\n", - " 48761 0.60 0.43 0.50 7\n", - " 48762 0.00 0.00 0.00 7\n", - " 49265 0.00 0.00 0.00 1\n", - " 49266 0.00 0.00 0.00 3\n", - " 49267 1.00 0.50 0.67 2\n", - " 49268 0.75 0.38 0.50 16\n", - " 49269 0.67 0.40 0.50 5\n", - " 49270 0.50 0.33 0.40 3\n", - " 49271 0.00 0.00 0.00 2\n", - " 49272 0.52 0.62 0.57 40\n", - " 49273 0.00 0.00 0.00 2\n", - " 49274 0.71 0.45 0.56 11\n", - " 49773 0.00 0.00 0.00 0\n", - " 49776 0.00 0.00 0.00 1\n", - " 49778 0.00 0.00 0.00 1\n", - " 49779 1.00 0.33 0.50 3\n", - " 49780 0.60 0.86 0.71 70\n", - " 49781 0.00 0.00 0.00 4\n", - " 49782 0.50 0.25 0.33 4\n", - " 49783 0.00 0.00 0.00 0\n", - " 49784 0.76 0.63 0.69 65\n", - " 49785 0.59 0.82 0.68 33\n", - " 50283 0.00 0.00 0.00 1\n", - " 50288 0.00 0.00 0.00 2\n", - " 50290 0.00 0.00 0.00 1\n", - " 50291 0.38 0.45 0.42 11\n", - " 50292 0.80 0.57 0.67 7\n", - " 50293 0.67 1.00 0.80 2\n", - " 50295 0.00 0.00 0.00 3\n", - " 50296 0.44 0.40 0.42 10\n", - " 50297 1.00 0.30 0.46 10\n", - " 50801 1.00 0.25 0.40 4\n", - " 50802 0.00 0.00 0.00 2\n", - " 50804 1.00 0.50 0.67 2\n", - " 50805 1.00 1.00 1.00 3\n", - " 50806 0.00 0.00 0.00 2\n", - " 50807 0.00 0.00 0.00 3\n", - " 50808 0.25 0.17 0.20 6\n", - " 50809 0.00 0.00 0.00 0\n", - " 51313 0.00 0.00 0.00 0\n", - " 51315 0.00 0.00 0.00 1\n", - " 51316 1.00 0.50 0.67 2\n", - " 51317 0.00 0.00 0.00 1\n", - " 51318 1.00 0.50 0.67 2\n", - " 51320 0.50 1.00 0.67 1\n", - " 51827 0.00 0.00 0.00 1\n", - " 51828 0.00 0.00 0.00 1\n", - " 51829 0.00 0.00 0.00 1\n", - " 51830 0.80 1.00 0.89 4\n", - " 52340 0.40 0.80 0.53 5\n", - " 52341 0.89 0.73 0.80 22\n", - " 52851 0.00 0.00 0.00 1\n", - " 52852 1.00 0.33 0.50 3\n", - " 52853 0.83 0.71 0.77 7\n", - " 52854 0.00 0.00 0.00 4\n", - " 53362 1.00 0.25 0.40 8\n", - " 53364 0.00 0.00 0.00 1\n", - " 53875 0.00 0.00 0.00 2\n", - " 54387 0.00 0.00 0.00 0\n", - " 54388 0.00 0.00 0.00 1\n", - " 54389 0.00 0.00 0.00 0\n", - " 54899 0.00 0.00 0.00 1\n", - " 54900 0.78 0.41 0.54 17\n", - " 54901 0.67 0.36 0.47 11\n", - " 54902 1.00 0.88 0.93 8\n", - " 55412 0.33 1.00 0.50 1\n", - " 55413 1.00 0.44 0.62 9\n", - "\n", - " accuracy 0.63 2318\n", - " macro avg 0.44 0.34 0.36 2318\n", - "weighted avg 0.62 0.63 0.59 2318\n", - "\n", - "TRAIN AND EVAL sgd\n", - "Best Parameters : {'loss': 'hinge', 'penalty': 'l2'}\n", - " precision recall f1-score support\n", - "\n", - " 23545 1.00 0.10 0.18 10\n", - " 23546 0.62 0.81 0.70 31\n", - " 23547 0.00 0.00 0.00 6\n", - " 23981 0.00 0.00 0.00 11\n", - " 23982 0.91 0.96 0.93 557\n", - " 23983 0.81 0.77 0.79 111\n", - " 23984 0.00 0.00 0.00 2\n", - " 24422 0.79 0.75 0.77 71\n", - " 24423 0.71 0.70 0.70 105\n", - " 24424 0.33 0.27 0.30 15\n", - " 24867 0.76 0.87 0.81 30\n", - " 24868 1.00 0.17 0.29 6\n", - " 39537 1.00 1.00 1.00 3\n", - " 40045 0.00 0.00 0.00 1\n", - " 40047 0.00 0.00 0.00 1\n", - " 40558 0.60 0.75 0.67 4\n", - " 40559 0.67 1.00 0.80 2\n", - " 40561 0.00 0.00 0.00 1\n", - " 41070 0.00 0.00 0.00 1\n", - " 41071 0.75 0.75 0.75 4\n", - " 41072 1.00 0.25 0.40 4\n", - " 41073 0.33 1.00 0.50 2\n", - " 41582 0.00 0.00 0.00 0\n", - " 41583 0.95 1.00 0.97 19\n", - " 41584 1.00 0.50 0.67 2\n", - " 41585 0.00 0.00 0.00 1\n", - " 42095 0.67 0.67 0.67 3\n", - " 42096 1.00 1.00 1.00 2\n", - " 42097 1.00 1.00 1.00 4\n", - " 42606 0.00 0.00 0.00 3\n", - " 42607 1.00 1.00 1.00 4\n", - " 42608 0.00 0.00 0.00 0\n", - " 42609 0.00 0.00 0.00 3\n", - " 43117 0.00 0.00 0.00 2\n", - " 43118 0.67 1.00 0.80 2\n", - " 43119 1.00 1.00 1.00 1\n", - " 43120 1.00 0.33 0.50 3\n", - " 43121 1.00 1.00 1.00 3\n", - " 43122 1.00 1.00 1.00 2\n", - " 43630 0.00 0.00 0.00 2\n", - " 43631 0.50 0.25 0.33 4\n", - " 43632 0.00 0.00 0.00 1\n", - " 43633 0.00 0.00 0.00 1\n", - " 43634 1.00 1.00 1.00 1\n", - " 43635 0.00 0.00 0.00 2\n", - " 43636 1.00 0.64 0.78 11\n", - " 43637 0.50 1.00 0.67 1\n", - " 43638 1.00 0.50 0.67 2\n", - " 43639 0.00 0.00 0.00 3\n", - " 43640 0.50 0.50 0.50 2\n", - " 43641 0.00 0.00 0.00 1\n", - " 44141 0.00 0.00 0.00 0\n", - " 44142 0.67 0.67 0.67 3\n", - " 44143 0.73 0.80 0.76 10\n", - " 44144 0.33 1.00 0.50 1\n", - " 44146 0.00 0.00 0.00 1\n", - " 44147 0.00 0.00 0.00 2\n", - " 44148 0.00 0.00 0.00 0\n", - " 44149 0.00 0.00 0.00 2\n", - " 44150 0.75 0.75 0.75 16\n", - " 44151 0.50 0.57 0.53 7\n", - " 44152 0.80 1.00 0.89 4\n", - " 44153 0.00 0.00 0.00 1\n", - " 44154 0.00 0.00 0.00 1\n", - " 44654 0.00 0.00 0.00 1\n", - " 44655 1.00 1.00 1.00 4\n", - " 44656 1.00 0.50 0.67 2\n", - " 44657 0.00 0.00 0.00 2\n", - " 44658 1.00 1.00 1.00 2\n", - " 44660 0.50 0.50 0.50 4\n", - " 44661 1.00 0.89 0.94 9\n", - " 44662 0.80 0.86 0.83 64\n", - " 44663 0.53 0.75 0.62 12\n", - " 44664 0.67 0.67 0.67 3\n", - " 44665 0.40 0.67 0.50 3\n", - " 44666 0.50 0.67 0.57 3\n", - " 45165 1.00 1.00 1.00 1\n", - " 45168 1.00 1.00 1.00 2\n", - " 45169 0.50 1.00 0.67 1\n", - " 45170 0.67 1.00 0.80 2\n", - " 45171 1.00 0.50 0.67 2\n", - " 45172 0.83 0.56 0.67 9\n", - " 45173 0.76 0.88 0.81 57\n", - " 45174 0.86 0.89 0.88 99\n", - " 45175 0.75 0.43 0.55 7\n", - " 45176 1.00 1.00 1.00 3\n", - " 45177 0.50 0.17 0.25 6\n", - " 45678 0.00 0.00 0.00 3\n", - " 45679 1.00 0.43 0.60 7\n", - " 45680 0.00 0.00 0.00 1\n", - " 45681 0.00 0.00 0.00 1\n", - " 45682 0.75 0.75 0.75 8\n", - " 45683 0.00 0.00 0.00 4\n", - " 45684 0.00 0.00 0.00 0\n", - " 45685 0.64 0.82 0.72 11\n", - " 45686 0.71 0.65 0.68 23\n", - " 45687 0.92 0.92 0.92 12\n", - " 45688 0.67 0.57 0.62 7\n", - " 45689 0.68 0.94 0.79 16\n", - " 45690 1.00 0.60 0.75 5\n", - " 46184 0.00 0.00 0.00 3\n", - " 46189 0.00 0.00 0.00 1\n", - " 46190 0.67 1.00 0.80 8\n", - " 46191 0.00 0.00 0.00 1\n", - " 46192 0.00 0.00 0.00 1\n", - " 46193 1.00 1.00 1.00 2\n", - " 46194 0.33 1.00 0.50 1\n", - " 46195 1.00 0.75 0.86 4\n", - " 46196 0.33 0.67 0.44 3\n", - " 46197 0.50 0.57 0.53 7\n", - " 46198 0.71 1.00 0.83 5\n", - " 46199 0.62 1.00 0.77 5\n", - " 46200 0.79 0.69 0.73 16\n", - " 46201 1.00 0.44 0.62 9\n", - " 46697 0.81 1.00 0.90 13\n", - " 46701 1.00 0.50 0.67 2\n", - " 46702 1.00 1.00 1.00 1\n", - " 46703 0.00 0.00 0.00 1\n", - " 46707 1.00 0.75 0.86 4\n", - " 46709 0.67 0.33 0.44 6\n", - " 46710 0.71 0.36 0.48 14\n", - " 46711 0.83 0.83 0.83 6\n", - " 46712 1.00 0.75 0.86 4\n", - " 46713 1.00 0.67 0.80 3\n", - " 46714 0.50 1.00 0.67 1\n", - " 47212 0.50 1.00 0.67 2\n", - " 47213 0.33 1.00 0.50 1\n", - " 47214 0.00 0.00 0.00 1\n", - " 47217 1.00 0.75 0.86 4\n", - " 47218 1.00 1.00 1.00 1\n", - " 47219 0.50 0.50 0.50 2\n", - " 47220 1.00 0.75 0.86 4\n", - " 47221 0.38 0.69 0.49 13\n", - " 47222 1.00 0.17 0.29 6\n", - " 47223 0.60 0.75 0.67 4\n", - " 47224 0.33 1.00 0.50 1\n", - " 47225 1.00 1.00 1.00 9\n", - " 47226 1.00 1.00 1.00 1\n", - " 47723 1.00 1.00 1.00 2\n", - " 47726 0.00 0.00 0.00 1\n", - " 47727 0.00 0.00 0.00 0\n", - " 47730 1.00 1.00 1.00 1\n", - " 47733 0.75 0.75 0.75 12\n", - " 47734 0.50 0.50 0.50 4\n", - " 47735 0.25 0.50 0.33 2\n", - " 47736 0.00 0.00 0.00 1\n", - " 47737 0.50 0.50 0.50 2\n", - " 47738 0.25 0.33 0.29 3\n", - " 48235 1.00 1.00 1.00 1\n", - " 48236 0.00 0.00 0.00 1\n", - " 48237 1.00 1.00 1.00 2\n", - " 48239 0.00 0.00 0.00 1\n", - " 48240 1.00 1.00 1.00 1\n", - " 48243 0.00 0.00 0.00 1\n", - " 48244 1.00 0.67 0.80 6\n", - " 48245 0.57 0.40 0.47 10\n", - " 48246 0.50 0.40 0.44 5\n", - " 48247 1.00 0.50 0.67 2\n", - " 48248 0.67 0.67 0.67 3\n", - " 48249 0.20 0.50 0.29 2\n", - " 48250 1.00 0.33 0.50 3\n", - " 48748 0.00 0.00 0.00 1\n", - " 48749 1.00 1.00 1.00 1\n", - " 48753 1.00 1.00 1.00 3\n", - " 48754 1.00 1.00 1.00 4\n", - " 48755 0.20 1.00 0.33 1\n", - " 48756 0.50 0.67 0.57 3\n", - " 48757 0.89 0.99 0.94 68\n", - " 48758 0.00 0.00 0.00 2\n", - " 48759 0.85 0.92 0.88 12\n", - " 48760 0.62 0.50 0.55 16\n", - " 48761 1.00 0.57 0.73 7\n", - " 48762 0.00 0.00 0.00 7\n", - " 49259 0.00 0.00 0.00 0\n", - " 49265 0.00 0.00 0.00 1\n", - " 49266 0.00 0.00 0.00 3\n", - " 49267 1.00 0.50 0.67 2\n", - " 49268 0.94 0.94 0.94 16\n", - " 49269 0.67 0.40 0.50 5\n", - " 49270 0.50 0.33 0.40 3\n", - " 49271 1.00 0.50 0.67 2\n", - " 49272 0.59 0.80 0.68 40\n", - " 49273 0.11 0.50 0.18 2\n", - " 49274 0.75 0.82 0.78 11\n", - " 49773 0.00 0.00 0.00 0\n", - " 49776 0.00 0.00 0.00 1\n", - " 49778 0.50 1.00 0.67 1\n", - " 49779 1.00 0.33 0.50 3\n", - " 49780 0.85 1.00 0.92 70\n", - " 49781 1.00 0.75 0.86 4\n", - " 49782 1.00 0.25 0.40 4\n", - " 49783 0.00 0.00 0.00 0\n", - " 49784 0.88 0.71 0.79 65\n", - " 49785 0.74 0.97 0.84 33\n", - " 50283 0.00 0.00 0.00 1\n", - " 50288 0.50 1.00 0.67 2\n", - " 50289 0.00 0.00 0.00 0\n", - " 50290 0.00 0.00 0.00 1\n", - " 50291 0.75 0.55 0.63 11\n", - " 50292 0.83 0.71 0.77 7\n", - " 50293 0.50 0.50 0.50 2\n", - " 50295 0.00 0.00 0.00 3\n", - " 50296 0.50 0.50 0.50 10\n", - " 50297 1.00 0.90 0.95 10\n", - " 50801 1.00 0.50 0.67 4\n", - " 50802 0.50 0.50 0.50 2\n", - " 50803 0.00 0.00 0.00 0\n", - " 50804 0.67 1.00 0.80 2\n", - " 50805 0.67 0.67 0.67 3\n", - " 50806 1.00 1.00 1.00 2\n", - " 50807 0.00 0.00 0.00 3\n", - " 50808 0.40 0.33 0.36 6\n", - " 50809 0.00 0.00 0.00 0\n", - " 51313 0.00 0.00 0.00 0\n", - " 51314 0.00 0.00 0.00 0\n", - " 51315 0.00 0.00 0.00 1\n", - " 51316 1.00 1.00 1.00 2\n", - " 51317 1.00 1.00 1.00 1\n", - " 51318 1.00 1.00 1.00 2\n", - " 51319 0.00 0.00 0.00 0\n", - " 51320 0.00 0.00 0.00 1\n", - " 51827 1.00 1.00 1.00 1\n", - " 51828 0.00 0.00 0.00 1\n", - " 51829 0.33 1.00 0.50 1\n", - " 51830 0.75 0.75 0.75 4\n", - " 52340 0.44 0.80 0.57 5\n", - " 52341 0.86 0.82 0.84 22\n", - " 52851 0.00 0.00 0.00 1\n", - " 52852 1.00 0.67 0.80 3\n", - " 52853 1.00 0.71 0.83 7\n", - " 52854 1.00 0.50 0.67 4\n", - " 53362 0.80 1.00 0.89 8\n", - " 53364 0.00 0.00 0.00 1\n", - " 53875 0.00 0.00 0.00 2\n", - " 54387 0.00 0.00 0.00 0\n", - " 54388 0.00 0.00 0.00 1\n", - " 54899 0.00 0.00 0.00 1\n", - " 54900 0.80 0.71 0.75 17\n", - " 54901 0.82 0.82 0.82 11\n", - " 54902 1.00 1.00 1.00 8\n", - " 55412 0.25 1.00 0.40 1\n", - " 55413 1.00 0.78 0.88 9\n", - "\n", - " accuracy 0.78 2318\n", - " macro avg 0.54 0.52 0.50 2318\n", - "weighted avg 0.78 0.78 0.77 2318\n", - "\n", - "TRAIN AND EVAL knn\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1221: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", - " _warn_prf(average, modifier, msg_start, len(result))\n", - "/usr/local/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1221: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n", - " _warn_prf(average, modifier, msg_start, len(result))\n", - "/usr/local/lib/python3.8/site-packages/sklearn/model_selection/_split.py:670: UserWarning: The least populated class in y has only 1 members, which is less than n_splits=5.\n", - " warnings.warn((\"The least populated class in y has only %d\"\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Best Parameters : {'n_neighbors': 4, 'p': 2}\n" + "Best Parameters : {'alpha': 0}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1221: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", + "/Users/jacquesfize/opt/anaconda3/envs/my_env/lib/python3.7/site-packages/sklearn/metrics/_classification.py:1221: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", " _warn_prf(average, modifier, msg_start, len(result))\n", - "/usr/local/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1221: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n", - " _warn_prf(average, modifier, msg_start, len(result))\n", - "/usr/local/lib/python3.8/site-packages/sklearn/model_selection/_split.py:670: UserWarning: The least populated class in y has only 1 members, which is less than n_splits=5.\n", - " warnings.warn((\"The least populated class in y has only %d\"\n" + "/Users/jacquesfize/opt/anaconda3/envs/my_env/lib/python3.7/site-packages/sklearn/model_selection/_split.py:672: UserWarning: The least populated class in y has only 1 members, which is less than n_splits=5.\n", + " % (min_groups, self.n_splits)), UserWarning)\n" ] }, { @@ -937,795 +441,643 @@ "text": [ " precision recall f1-score support\n", "\n", - " 23545 0.00 0.00 0.00 10\n", - " 23546 0.54 0.81 0.65 31\n", - " 23547 0.00 0.00 0.00 6\n", - " 23981 0.00 0.00 0.00 11\n", - " 23982 0.86 0.95 0.90 557\n", - " 23983 0.81 0.75 0.78 111\n", - " 23984 0.00 0.00 0.00 2\n", - " 24422 0.80 0.72 0.76 71\n", - " 24423 0.71 0.60 0.65 105\n", - " 24424 0.32 0.47 0.38 15\n", - " 24867 0.76 0.73 0.75 30\n", - " 24868 0.00 0.00 0.00 6\n", - " 39537 1.00 0.33 0.50 3\n", - " 40045 0.00 0.00 0.00 1\n", - " 40046 0.00 0.00 0.00 0\n", - " 40047 0.00 0.00 0.00 1\n", - " 40049 0.00 0.00 0.00 0\n", - " 40558 0.60 0.75 0.67 4\n", - " 40559 0.25 0.50 0.33 2\n", - " 40560 0.00 0.00 0.00 0\n", - " 40561 0.00 0.00 0.00 1\n", - " 41070 0.00 0.00 0.00 1\n", - " 41071 0.50 0.75 0.60 4\n", - " 41072 1.00 0.50 0.67 4\n", - " 41073 0.17 0.50 0.25 2\n", - " 41583 0.82 0.95 0.88 19\n", - " 41584 0.20 0.50 0.29 2\n", - " 41585 0.00 0.00 0.00 1\n", - " 42094 0.00 0.00 0.00 0\n", - " 42095 0.33 0.33 0.33 3\n", - " 42096 1.00 1.00 1.00 2\n", - " 42097 1.00 0.75 0.86 4\n", - " 42606 0.00 0.00 0.00 3\n", - " 42607 1.00 1.00 1.00 4\n", - " 42608 0.00 0.00 0.00 0\n", - " 42609 0.08 0.33 0.13 3\n", - " 43117 0.00 0.00 0.00 2\n", - " 43118 0.40 1.00 0.57 2\n", - " 43119 1.00 1.00 1.00 1\n", - " 43120 0.00 0.00 0.00 3\n", - " 43121 1.00 0.67 0.80 3\n", - " 43122 1.00 1.00 1.00 2\n", - " 43123 0.00 0.00 0.00 0\n", - " 43124 0.00 0.00 0.00 0\n", - " 43630 0.00 0.00 0.00 2\n", - " 43631 0.29 0.50 0.36 4\n", - " 43632 0.00 0.00 0.00 1\n", - " 43633 0.00 0.00 0.00 1\n", - " 43634 0.50 1.00 0.67 1\n", - " 43635 0.00 0.00 0.00 2\n", - " 43636 0.83 0.45 0.59 11\n", - " 43637 0.50 1.00 0.67 1\n", - " 43638 0.50 0.50 0.50 2\n", - " 43639 0.00 0.00 0.00 3\n", - " 43640 0.14 0.50 0.22 2\n", - " 43641 0.00 0.00 0.00 1\n", - " 44141 0.00 0.00 0.00 0\n", - " 44142 0.67 0.67 0.67 3\n", - " 44143 0.53 0.80 0.64 10\n", - " 44144 0.50 1.00 0.67 1\n", - " 44146 0.00 0.00 0.00 1\n", - " 44147 0.00 0.00 0.00 2\n", - " 44148 0.00 0.00 0.00 0\n", - " 44149 0.00 0.00 0.00 2\n", - " 44150 0.69 0.69 0.69 16\n", - " 44151 0.56 0.71 0.63 7\n", - " 44152 0.75 0.75 0.75 4\n", - " 44153 0.00 0.00 0.00 1\n", - " 44154 0.00 0.00 0.00 1\n", - " 44654 0.00 0.00 0.00 1\n", - " 44655 0.75 0.75 0.75 4\n", - " 44656 0.00 0.00 0.00 2\n", - " 44657 0.00 0.00 0.00 2\n", - " 44658 1.00 1.00 1.00 2\n", - " 44660 0.00 0.00 0.00 4\n", - " 44661 0.47 0.78 0.58 9\n", - " 44662 0.64 0.91 0.75 64\n", - " 44663 0.38 0.42 0.40 12\n", - " 44664 0.33 0.33 0.33 3\n", - " 44665 0.33 0.33 0.33 3\n", - " 44666 0.25 0.67 0.36 3\n", - " 45165 1.00 1.00 1.00 1\n", - " 45168 1.00 0.50 0.67 2\n", - " 45169 0.50 1.00 0.67 1\n", - " 45170 1.00 0.50 0.67 2\n", - " 45171 0.00 0.00 0.00 2\n", - " 45172 0.86 0.67 0.75 9\n", - " 45173 0.71 0.84 0.77 57\n", - " 45174 0.67 0.81 0.73 99\n", - " 45175 0.00 0.00 0.00 7\n", - " 45176 0.75 1.00 0.86 3\n", - " 45177 0.00 0.00 0.00 6\n", - " 45678 0.00 0.00 0.00 3\n", - " 45679 1.00 0.14 0.25 7\n", - " 45680 0.00 0.00 0.00 1\n", - " 45681 0.00 0.00 0.00 1\n", - " 45682 0.67 0.75 0.71 8\n", - " 45683 1.00 0.25 0.40 4\n", - " 45684 0.00 0.00 0.00 0\n", - " 45685 0.57 0.36 0.44 11\n", - " 45686 0.59 0.43 0.50 23\n", - " 45687 0.75 0.50 0.60 12\n", - " 45688 0.38 0.43 0.40 7\n", - " 45689 0.68 0.94 0.79 16\n", - " 45690 1.00 0.60 0.75 5\n", - " 46184 0.20 0.67 0.31 3\n", - " 46189 0.00 0.00 0.00 1\n", - " 46190 0.70 0.88 0.78 8\n", - " 46191 0.00 0.00 0.00 1\n", - " 46192 0.00 0.00 0.00 1\n", - " 46193 0.67 1.00 0.80 2\n", - " 46194 0.50 1.00 0.67 1\n", - " 46195 0.75 0.75 0.75 4\n", - " 46196 0.40 0.67 0.50 3\n", - " 46197 0.17 0.14 0.15 7\n", - " 46198 0.80 0.80 0.80 5\n", - " 46199 0.00 0.00 0.00 5\n", - " 46200 0.62 0.50 0.55 16\n", - " 46201 0.43 0.33 0.38 9\n", - " 46697 0.86 0.46 0.60 13\n", - " 46701 1.00 1.00 1.00 2\n", - " 46702 0.00 0.00 0.00 1\n", - " 46703 0.00 0.00 0.00 1\n", - " 46705 0.00 0.00 0.00 0\n", - " 46707 0.67 0.50 0.57 4\n", - " 46708 0.00 0.00 0.00 0\n", - " 46709 0.50 0.33 0.40 6\n", - " 46710 0.57 0.57 0.57 14\n", - " 46711 0.71 0.83 0.77 6\n", - " 46712 0.00 0.00 0.00 4\n", - " 46713 1.00 0.33 0.50 3\n", - " 46714 1.00 1.00 1.00 1\n", - " 47212 0.67 1.00 0.80 2\n", - " 47213 0.50 1.00 0.67 1\n", - " 47214 0.00 0.00 0.00 1\n", - " 47217 1.00 0.50 0.67 4\n", - " 47218 1.00 1.00 1.00 1\n", - " 47219 0.50 0.50 0.50 2\n", - " 47220 0.50 0.50 0.50 4\n", - " 47221 0.70 0.54 0.61 13\n", - " 47222 1.00 0.17 0.29 6\n", - " 47223 0.67 0.50 0.57 4\n", - " 47224 0.00 0.00 0.00 1\n", - " 47225 0.90 1.00 0.95 9\n", - " 47226 0.50 1.00 0.67 1\n", - " 47723 1.00 1.00 1.00 2\n", - " 47726 0.00 0.00 0.00 1\n", - " 47730 0.00 0.00 0.00 1\n", - " 47733 0.75 0.75 0.75 12\n", - " 47734 0.40 0.50 0.44 4\n", - " 47735 0.25 0.50 0.33 2\n", - " 47736 0.00 0.00 0.00 1\n", - " 47737 0.00 0.00 0.00 2\n", - " 47738 0.33 0.33 0.33 3\n", - " 48235 0.00 0.00 0.00 1\n", - " 48236 0.00 0.00 0.00 1\n", - " 48237 1.00 1.00 1.00 2\n", - " 48239 0.00 0.00 0.00 1\n", - " 48240 1.00 1.00 1.00 1\n", - " 48243 0.00 0.00 0.00 1\n", - " 48244 0.75 0.50 0.60 6\n", - " 48245 0.50 0.40 0.44 10\n", - " 48246 0.00 0.00 0.00 5\n", - " 48247 0.00 0.00 0.00 2\n", - " 48248 1.00 0.67 0.80 3\n", - " 48249 0.00 0.00 0.00 2\n", - " 48250 0.00 0.00 0.00 3\n", - " 48748 0.00 0.00 0.00 1\n", - " 48749 0.00 0.00 0.00 1\n", - " 48753 1.00 1.00 1.00 3\n", - " 48754 1.00 0.50 0.67 4\n", - " 48755 0.33 1.00 0.50 1\n", - " 48756 0.50 0.67 0.57 3\n", - " 48757 0.94 0.94 0.94 68\n", - " 48758 0.00 0.00 0.00 2\n", - " 48759 0.92 0.92 0.92 12\n", - " 48760 0.73 0.69 0.71 16\n", - " 48761 0.83 0.71 0.77 7\n", - " 48762 0.00 0.00 0.00 7\n", - " 49265 0.00 0.00 0.00 1\n", - " 49266 0.00 0.00 0.00 3\n", - " 49267 1.00 0.50 0.67 2\n", - " 49268 1.00 0.88 0.93 16\n", - " 49269 0.50 0.40 0.44 5\n", - " 49270 0.40 0.67 0.50 3\n", - " 49271 0.25 0.50 0.33 2\n", - " 49272 0.53 0.72 0.61 40\n", - " 49273 0.00 0.00 0.00 2\n", - " 49274 0.73 0.73 0.73 11\n", - " 49776 0.00 0.00 0.00 1\n", - " 49778 0.00 0.00 0.00 1\n", - " 49779 0.00 0.00 0.00 3\n", - " 49780 0.86 0.94 0.90 70\n", - " 49781 0.67 1.00 0.80 4\n", - " 49782 0.00 0.00 0.00 4\n", - " 49783 0.00 0.00 0.00 0\n", - " 49784 0.85 0.63 0.73 65\n", - " 49785 0.78 0.94 0.85 33\n", - " 50283 0.00 0.00 0.00 1\n", - " 50288 1.00 1.00 1.00 2\n", - " 50289 0.00 0.00 0.00 0\n", - " 50290 0.00 0.00 0.00 1\n", - " 50291 0.44 0.36 0.40 11\n", - " 50292 1.00 0.29 0.44 7\n", - " 50293 0.50 1.00 0.67 2\n", - " 50295 0.50 0.33 0.40 3\n", - " 50296 0.50 0.60 0.55 10\n", - " 50297 1.00 0.70 0.82 10\n", - " 50801 1.00 0.75 0.86 4\n", - " 50802 0.00 0.00 0.00 2\n", - " 50804 0.00 0.00 0.00 2\n", - " 50805 1.00 0.33 0.50 3\n", - " 50806 1.00 1.00 1.00 2\n", - " 50807 0.33 0.33 0.33 3\n", - " 50808 1.00 0.17 0.29 6\n", - " 50809 0.00 0.00 0.00 0\n", - " 51315 0.00 0.00 0.00 1\n", - " 51316 1.00 1.00 1.00 2\n", - " 51317 0.00 0.00 0.00 1\n", - " 51318 1.00 1.00 1.00 2\n", - " 51320 0.00 0.00 0.00 1\n", - " 51827 0.00 0.00 0.00 1\n", - " 51828 0.00 0.00 0.00 1\n", - " 51829 0.00 0.00 0.00 1\n", - " 51830 0.75 0.75 0.75 4\n", - " 52340 0.50 1.00 0.67 5\n", - " 52341 0.68 0.77 0.72 22\n", - " 52851 0.00 0.00 0.00 1\n", - " 52852 1.00 0.33 0.50 3\n", - " 52853 1.00 0.43 0.60 7\n", - " 52854 0.00 0.00 0.00 4\n", - " 53362 0.89 1.00 0.94 8\n", - " 53364 0.00 0.00 0.00 1\n", - " 53875 0.00 0.00 0.00 2\n", - " 54387 0.00 0.00 0.00 0\n", - " 54388 0.00 0.00 0.00 1\n", - " 54899 0.00 0.00 0.00 1\n", - " 54900 0.76 0.76 0.76 17\n", - " 54901 0.78 0.64 0.70 11\n", - " 54902 1.00 1.00 1.00 8\n", - " 55412 0.00 0.00 0.00 1\n", - " 55413 1.00 0.67 0.80 9\n", - "\n", - " accuracy 0.72 2318\n", - " macro avg 0.41 0.41 0.39 2318\n", - "weighted avg 0.70 0.72 0.70 2318\n", - "\n", - "TRAIN AND EVAL decision-tree\n", - "Best Parameters : {'criterion': 'gini'}\n", - " precision recall f1-score support\n", - "\n", - " 23545 0.00 0.00 0.00 10\n", - " 23546 0.50 0.71 0.59 31\n", - " 23547 0.00 0.00 0.00 6\n", - " 23981 0.00 0.00 0.00 11\n", - " 23982 0.89 0.88 0.88 557\n", - " 23983 0.65 0.66 0.65 111\n", - " 23984 0.00 0.00 0.00 2\n", - " 24422 0.65 0.73 0.69 71\n", - " 24423 0.52 0.49 0.50 105\n", - " 24424 0.31 0.33 0.32 15\n", - " 24866 0.00 0.00 0.00 0\n", - " 24867 0.71 0.73 0.72 30\n", - " 24868 0.14 0.17 0.15 6\n", - " 39537 1.00 1.00 1.00 3\n", - " 40045 0.00 0.00 0.00 1\n", - " 40047 0.00 0.00 0.00 1\n", - " 40558 0.67 0.50 0.57 4\n", - " 40559 0.40 1.00 0.57 2\n", - " 40560 0.00 0.00 0.00 0\n", - " 40561 0.00 0.00 0.00 1\n", - " 41070 0.00 0.00 0.00 1\n", - " 41071 1.00 0.75 0.86 4\n", - " 41072 0.00 0.00 0.00 4\n", - " 41073 0.00 0.00 0.00 2\n", - " 41582 0.00 0.00 0.00 0\n", - " 41583 0.83 1.00 0.90 19\n", - " 41584 0.00 0.00 0.00 2\n", - " 41585 0.00 0.00 0.00 1\n", - " 42095 0.20 0.33 0.25 3\n", - " 42096 1.00 0.50 0.67 2\n", - " 42097 0.00 0.00 0.00 4\n", - " 42606 0.00 0.00 0.00 3\n", - " 42607 1.00 0.50 0.67 4\n", - " 42608 0.00 0.00 0.00 0\n", - " 42609 0.33 0.33 0.33 3\n", - " 43117 0.00 0.00 0.00 2\n", - " 43118 0.33 0.50 0.40 2\n", - " 43119 0.33 1.00 0.50 1\n", - " 43120 0.00 0.00 0.00 3\n", - " 43121 0.50 0.33 0.40 3\n", - " 43122 0.00 0.00 0.00 2\n", - " 43630 0.00 0.00 0.00 2\n", - " 43631 1.00 0.25 0.40 4\n", - " 43632 0.00 0.00 0.00 1\n", - " 43633 0.00 0.00 0.00 1\n", - " 43634 0.00 0.00 0.00 1\n", - " 43635 0.00 0.00 0.00 2\n", - " 43636 0.80 0.36 0.50 11\n", - " 43637 0.00 0.00 0.00 1\n", - " 43638 1.00 0.50 0.67 2\n", - " 43639 0.50 0.33 0.40 3\n", - " 43640 0.00 0.00 0.00 2\n", - " 43641 0.00 0.00 0.00 1\n", - " 44142 0.67 0.67 0.67 3\n", - " 44143 0.70 0.70 0.70 10\n", - " 44144 0.00 0.00 0.00 1\n", - " 44146 0.00 0.00 0.00 1\n", - " 44147 0.00 0.00 0.00 2\n", - " 44148 0.00 0.00 0.00 0\n", - " 44149 0.00 0.00 0.00 2\n", - " 44150 0.53 0.50 0.52 16\n", - " 44151 1.00 0.57 0.73 7\n", - " 44152 0.57 1.00 0.73 4\n", - " 44153 0.00 0.00 0.00 1\n", - " 44154 0.00 0.00 0.00 1\n", - " 44654 0.33 1.00 0.50 1\n", - " 44655 0.75 0.75 0.75 4\n", - " 44656 0.00 0.00 0.00 2\n", - " 44657 0.00 0.00 0.00 2\n", - " 44658 1.00 1.00 1.00 2\n", - " 44660 0.40 0.50 0.44 4\n", - " 44661 0.80 0.44 0.57 9\n", - " 44662 0.65 0.70 0.68 64\n", - " 44663 0.37 0.58 0.45 12\n", - " 44664 0.50 0.33 0.40 3\n", - " 44665 0.33 0.33 0.33 3\n", - " 44666 0.40 0.67 0.50 3\n", - " 45165 1.00 1.00 1.00 1\n", - " 45167 0.00 0.00 0.00 0\n", - " 45168 0.00 0.00 0.00 2\n", - " 45169 1.00 1.00 1.00 1\n", - " 45170 0.33 0.50 0.40 2\n", - " 45171 0.00 0.00 0.00 2\n", - " 45172 0.57 0.44 0.50 9\n", - " 45173 0.68 0.70 0.69 57\n", - " 45174 0.78 0.82 0.80 99\n", - " 45175 0.29 0.29 0.29 7\n", - " 45176 0.17 0.33 0.22 3\n", - " 45177 1.00 0.17 0.29 6\n", - " 45678 0.00 0.00 0.00 3\n", - " 45679 0.67 0.29 0.40 7\n", - " 45680 0.00 0.00 0.00 1\n", - " 45681 0.00 0.00 0.00 1\n", - " 45682 0.62 0.62 0.62 8\n", - " 45683 0.00 0.00 0.00 4\n", - " 45684 0.00 0.00 0.00 0\n", - " 45685 0.36 0.36 0.36 11\n", - " 45686 0.46 0.48 0.47 23\n", - " 45687 0.50 0.42 0.45 12\n", - " 45688 0.75 0.43 0.55 7\n", - " 45689 0.72 0.81 0.76 16\n", - " 45690 0.83 1.00 0.91 5\n", - " 46184 0.25 0.33 0.29 3\n", - " 46189 0.00 0.00 0.00 1\n", - " 46190 0.75 0.75 0.75 8\n", - " 46191 0.00 0.00 0.00 1\n", - " 46192 0.00 0.00 0.00 1\n", - " 46193 1.00 1.00 1.00 2\n", - " 46194 0.33 1.00 0.50 1\n", - " 46195 0.00 0.00 0.00 4\n", - " 46196 0.00 0.00 0.00 3\n", - " 46197 0.00 0.00 0.00 7\n", - " 46198 0.25 0.20 0.22 5\n", - " 46199 0.11 0.20 0.14 5\n", - " 46200 0.47 0.56 0.51 16\n", - " 46201 0.80 0.44 0.57 9\n", - " 46697 0.69 0.69 0.69 13\n", - " 46701 0.00 0.00 0.00 2\n", - " 46702 0.00 0.00 0.00 1\n", - " 46703 0.00 0.00 0.00 1\n", - " 46704 0.00 0.00 0.00 0\n", - " 46705 0.00 0.00 0.00 0\n", - " 46707 1.00 0.50 0.67 4\n", - " 46709 0.33 0.17 0.22 6\n", - " 46710 0.42 0.36 0.38 14\n", - " 46711 0.43 0.50 0.46 6\n", - " 46712 0.00 0.00 0.00 4\n", - " 46713 0.00 0.00 0.00 3\n", - " 46714 0.33 1.00 0.50 1\n", - " 47212 0.67 1.00 0.80 2\n", - " 47213 0.00 0.00 0.00 1\n", - " 47214 0.00 0.00 0.00 1\n", - " 47215 0.00 0.00 0.00 0\n", - " 47217 1.00 0.25 0.40 4\n", - " 47218 1.00 1.00 1.00 1\n", - " 47219 1.00 0.50 0.67 2\n", - " 47220 1.00 0.50 0.67 4\n", - " 47221 0.40 0.62 0.48 13\n", - " 47222 0.50 0.17 0.25 6\n", - " 47223 0.50 0.50 0.50 4\n", - " 47224 1.00 1.00 1.00 1\n", - " 47225 1.00 0.56 0.71 9\n", - " 47226 0.00 0.00 0.00 1\n", - " 47723 0.00 0.00 0.00 2\n", - " 47726 0.00 0.00 0.00 1\n", - " 47729 0.00 0.00 0.00 0\n", - " 47730 0.00 0.00 0.00 1\n", - " 47731 0.00 0.00 0.00 0\n", - " 47732 0.00 0.00 0.00 0\n", - " 47733 0.56 0.42 0.48 12\n", - " 47734 0.00 0.00 0.00 4\n", - " 47735 0.33 0.50 0.40 2\n", - " 47736 0.00 0.00 0.00 1\n", - " 47737 0.00 0.00 0.00 2\n", - " 47738 0.00 0.00 0.00 3\n", - " 48235 0.50 1.00 0.67 1\n", - " 48236 0.00 0.00 0.00 1\n", - " 48237 1.00 0.50 0.67 2\n", - " 48239 0.00 0.00 0.00 1\n", - " 48240 0.00 0.00 0.00 1\n", - " 48243 0.00 0.00 0.00 1\n", - " 48244 0.38 0.50 0.43 6\n", - " 48245 0.40 0.20 0.27 10\n", - " 48246 0.50 0.20 0.29 5\n", - " 48247 1.00 0.50 0.67 2\n", - " 48248 0.29 0.67 0.40 3\n", - " 48249 0.50 0.50 0.50 2\n", - " 48250 0.00 0.00 0.00 3\n", - " 48748 0.00 0.00 0.00 1\n", - " 48749 0.00 0.00 0.00 1\n", - " 48753 0.00 0.00 0.00 3\n", - " 48754 0.00 0.00 0.00 4\n", - " 48755 0.00 0.00 0.00 1\n", - " 48756 0.17 0.67 0.27 3\n", - " 48757 0.93 0.94 0.93 68\n", - " 48758 0.00 0.00 0.00 2\n", - " 48759 0.71 0.83 0.77 12\n", - " 48760 0.83 0.31 0.45 16\n", - " 48761 0.20 0.14 0.17 7\n", - " 48762 0.50 0.29 0.36 7\n", - " 49259 0.00 0.00 0.00 0\n", - " 49265 0.00 0.00 0.00 1\n", - " 49266 0.00 0.00 0.00 3\n", - " 49267 0.00 0.00 0.00 2\n", - " 49268 0.81 0.81 0.81 16\n", - " 49269 0.00 0.00 0.00 5\n", - " 49270 0.00 0.00 0.00 3\n", - " 49271 1.00 0.50 0.67 2\n", - " 49272 0.54 0.62 0.58 40\n", - " 49273 0.07 0.50 0.12 2\n", - " 49274 0.71 0.45 0.56 11\n", - " 49776 0.00 0.00 0.00 1\n", - " 49778 0.00 0.00 0.00 1\n", - " 49779 0.00 0.00 0.00 3\n", - " 49780 0.89 0.89 0.89 70\n", - " 49781 0.60 0.75 0.67 4\n", - " 49782 0.00 0.00 0.00 4\n", - " 49783 0.00 0.00 0.00 0\n", - " 49784 0.68 0.63 0.66 65\n", - " 49785 0.60 0.55 0.57 33\n", - " 50283 0.00 0.00 0.00 1\n", - " 50288 1.00 1.00 1.00 2\n", - " 50289 0.00 0.00 0.00 0\n", - " 50290 0.00 0.00 0.00 1\n", - " 50291 0.44 0.36 0.40 11\n", - " 50292 0.80 0.57 0.67 7\n", - " 50293 0.00 0.00 0.00 2\n", - " 50295 0.50 0.33 0.40 3\n", - " 50296 0.27 0.30 0.29 10\n", - " 50297 1.00 0.80 0.89 10\n", - " 50801 0.00 0.00 0.00 4\n", - " 50802 0.00 0.00 0.00 2\n", - " 50803 0.00 0.00 0.00 0\n", - " 50804 0.50 1.00 0.67 2\n", - " 50805 0.67 0.67 0.67 3\n", - " 50806 1.00 1.00 1.00 2\n", - " 50807 0.50 0.67 0.57 3\n", - " 50808 0.33 0.17 0.22 6\n", - " 50809 0.00 0.00 0.00 0\n", - " 51314 0.00 0.00 0.00 0\n", - " 51315 0.00 0.00 0.00 1\n", - " 51316 1.00 0.50 0.67 2\n", - " 51317 0.00 0.00 0.00 1\n", - " 51318 0.00 0.00 0.00 2\n", - " 51320 0.00 0.00 0.00 1\n", - " 51826 0.00 0.00 0.00 0\n", - " 51827 0.00 0.00 0.00 1\n", - " 51828 0.00 0.00 0.00 1\n", - " 51829 0.00 0.00 0.00 1\n", - " 51830 0.50 0.25 0.33 4\n", - " 52339 0.00 0.00 0.00 0\n", - " 52340 0.67 0.80 0.73 5\n", - " 52341 0.56 0.68 0.61 22\n", - " 52851 0.00 0.00 0.00 1\n", - " 52852 1.00 0.33 0.50 3\n", - " 52853 1.00 0.57 0.73 7\n", - " 52854 0.00 0.00 0.00 4\n", - " 53362 0.73 1.00 0.84 8\n", - " 53363 0.00 0.00 0.00 0\n", - " 53364 0.00 0.00 0.00 1\n", - " 53875 0.00 0.00 0.00 2\n", - " 53876 0.00 0.00 0.00 0\n", - " 53877 0.00 0.00 0.00 0\n", - " 54387 0.00 0.00 0.00 0\n", - " 54388 0.00 0.00 0.00 1\n", - " 54389 0.00 0.00 0.00 0\n", - " 54899 0.00 0.00 0.00 1\n", - " 54900 0.65 0.65 0.65 17\n", - " 54901 0.57 0.36 0.44 11\n", - " 54902 1.00 1.00 1.00 8\n", - " 55412 0.11 1.00 0.20 1\n", - " 55413 0.78 0.78 0.78 9\n", + " 21425 1.00 0.67 0.80 9\n", + " 21426 0.91 0.77 0.84 150\n", + " 21841 0.87 0.60 0.71 225\n", + " 21842 0.84 0.77 0.80 741\n", + " 21843 0.79 0.72 0.75 561\n", + " 22261 0.93 0.51 0.66 151\n", + " 22262 0.66 0.75 0.70 866\n", + " 22263 0.78 0.68 0.72 1667\n", + " 22264 0.67 0.33 0.44 6\n", + " 22684 0.94 0.72 0.82 101\n", + " 22685 0.89 0.61 0.72 370\n", + " 22686 0.72 0.70 0.71 1118\n", + " 22687 0.69 0.68 0.69 1064\n", + " 22688 0.84 0.62 0.71 641\n", + " 22689 0.98 0.52 0.68 85\n", + " 23112 0.80 0.80 0.80 609\n", + " 23113 0.78 0.77 0.78 725\n", + " 23114 0.73 0.65 0.69 749\n", + " 23115 0.66 0.65 0.66 899\n", + " 23116 0.59 0.59 0.59 510\n", + " 23117 0.71 0.65 0.68 613\n", + " 23118 0.86 0.56 0.68 176\n", + " 23541 0.94 0.81 0.87 36\n", + " 23542 0.79 0.57 0.66 140\n", + " 23543 0.83 0.42 0.56 12\n", + " 23544 0.77 0.62 0.68 759\n", + " 23545 0.71 0.59 0.64 1101\n", + " 23546 0.53 0.63 0.58 931\n", + " 23547 0.56 0.59 0.58 648\n", + " 23548 0.68 0.62 0.65 1056\n", + " 23549 0.80 0.65 0.72 384\n", + " 23550 0.67 0.71 0.69 419\n", + " 23551 0.83 0.73 0.77 760\n", + " 23552 0.91 0.75 0.82 158\n", + " 23553 1.00 0.33 0.50 3\n", + " 23977 0.81 0.62 0.70 114\n", + " 23978 0.82 0.63 0.72 545\n", + " 23979 0.77 0.67 0.72 1018\n", + " 23980 0.72 0.63 0.68 605\n", + " 23981 0.73 0.54 0.62 484\n", + " 23982 0.42 0.94 0.58 12576\n", + " 23983 0.69 0.63 0.66 1060\n", + " 23984 0.77 0.56 0.65 400\n", + " 23985 0.74 0.60 0.66 397\n", + " 23986 0.68 0.63 0.65 340\n", + " 23987 0.71 0.66 0.69 863\n", + " 23988 0.85 0.77 0.81 606\n", + " 23989 0.81 0.80 0.80 542\n", + " 24415 0.87 0.73 0.79 113\n", + " 24416 0.91 0.74 0.82 188\n", + " 24417 0.96 0.58 0.72 38\n", + " 24418 0.76 0.69 0.72 678\n", + " 24419 0.73 0.62 0.67 802\n", + " 24420 0.78 0.57 0.66 415\n", + " 24421 0.78 0.61 0.68 381\n", + " 24422 0.70 0.68 0.69 736\n", + " 24423 0.65 0.66 0.66 1154\n", + " 24424 0.72 0.59 0.65 397\n", + " 24425 0.77 0.51 0.61 345\n", + " 24426 0.73 0.59 0.66 377\n", + " 24427 0.76 0.63 0.69 780\n", + " 24428 0.79 0.59 0.68 528\n", + " 24429 0.84 0.87 0.85 1290\n", + " 24858 0.87 0.78 0.82 312\n", + " 24859 0.83 0.79 0.81 467\n", + " 24860 0.86 0.77 0.81 394\n", + " 24861 0.86 0.74 0.79 544\n", + " 24862 0.81 0.67 0.73 580\n", + " 24863 0.76 0.57 0.65 419\n", + " 24864 0.77 0.63 0.69 360\n", + " 24865 0.85 0.66 0.74 328\n", + " 24866 0.83 0.61 0.70 316\n", + " 24867 0.81 0.62 0.71 443\n", + " 24868 0.74 0.48 0.58 312\n", + " 24869 0.73 0.56 0.63 483\n", + " 24870 0.78 0.69 0.73 259\n", + " 24871 0.74 0.66 0.70 502\n", + " 24872 0.82 0.67 0.74 722\n", + " 24873 0.84 0.77 0.80 749\n", + " 25306 0.86 0.77 0.82 273\n", + " 25307 0.84 0.67 0.75 323\n", + " 25308 0.78 0.67 0.72 418\n", + " 25309 0.83 0.64 0.72 691\n", + " 25310 0.75 0.67 0.71 456\n", + " 25311 0.78 0.63 0.70 470\n", + " 25312 0.77 0.61 0.68 484\n", + " 25313 0.82 0.61 0.70 333\n", + " 25314 0.83 0.70 0.76 395\n", + " 25315 0.85 0.65 0.74 271\n", + " 25316 0.73 0.63 0.68 478\n", + " 25317 0.82 0.60 0.70 324\n", + " 25318 0.82 0.52 0.64 286\n", + " 25319 0.75 0.66 0.70 522\n", + " 25320 0.85 0.66 0.74 680\n", + " 25321 0.87 0.78 0.82 585\n", + " 25758 0.95 0.73 0.82 125\n", + " 25759 0.86 0.67 0.76 328\n", + " 25760 0.75 0.76 0.76 789\n", + " 25761 0.84 0.56 0.68 505\n", + " 25762 0.81 0.64 0.72 351\n", + " 25763 0.73 0.63 0.68 538\n", + " 25764 0.73 0.58 0.65 577\n", + " 25765 0.81 0.61 0.70 360\n", + " 25766 0.85 0.67 0.75 229\n", + " 25767 0.85 0.66 0.74 305\n", + " 25768 0.83 0.58 0.68 305\n", + " 25769 0.72 0.54 0.61 343\n", + " 25770 0.78 0.68 0.72 666\n", + " 25771 0.69 0.67 0.68 883\n", + " 25772 0.69 0.68 0.68 334\n", + " 25773 0.93 0.79 0.85 122\n", + " 26215 1.00 0.65 0.79 20\n", + " 26216 0.90 0.63 0.74 218\n", + " 26217 0.77 0.67 0.72 314\n", + " 26218 0.80 0.68 0.73 1548\n", + " 26219 0.69 0.76 0.72 823\n", + " 26220 0.78 0.62 0.69 366\n", + " 26221 0.82 0.66 0.73 279\n", + " 26222 0.80 0.66 0.72 242\n", + " 26223 0.80 0.67 0.73 361\n", + " 26224 0.78 0.53 0.63 291\n", + " 26225 0.78 0.58 0.67 426\n", + " 26226 0.74 0.61 0.67 456\n", + " 26227 0.63 0.62 0.63 454\n", + " 26228 0.90 0.57 0.70 107\n", + " 26676 0.92 0.73 0.81 77\n", + " 26677 0.86 0.57 0.69 322\n", + " 26678 0.83 0.66 0.73 437\n", + " 26679 0.86 0.71 0.78 435\n", + " 26680 0.76 0.63 0.69 367\n", + " 26681 0.80 0.58 0.67 263\n", + " 26682 0.82 0.61 0.70 280\n", + " 26683 0.76 0.59 0.66 348\n", + " 26684 0.82 0.48 0.60 283\n", + " 26685 0.77 0.61 0.68 374\n", + " 26686 0.75 0.69 0.72 673\n", + " 26687 0.77 0.62 0.69 674\n", + " 26688 1.00 0.33 0.50 6\n", + " 27141 0.85 0.59 0.70 76\n", + " 27142 0.77 0.70 0.73 290\n", + " 27143 0.80 0.68 0.74 304\n", + " 27144 0.83 0.58 0.69 231\n", + " 27145 0.80 0.65 0.72 313\n", + " 27146 0.67 0.72 0.70 374\n", + " 27147 0.83 0.56 0.67 332\n", + " 27148 0.83 0.64 0.72 405\n", + " 27149 0.83 0.64 0.72 374\n", + " 27150 0.70 0.63 0.66 817\n", + " 27151 0.73 0.61 0.67 722\n", + " 27152 0.80 0.70 0.74 647\n", + " 27609 0.73 0.67 0.70 12\n", + " 27610 0.85 0.66 0.74 311\n", + " 27611 0.81 0.67 0.73 418\n", + " 27612 0.80 0.64 0.71 448\n", + " 27613 0.80 0.67 0.73 442\n", + " 27614 0.79 0.66 0.72 290\n", + " 27615 0.83 0.63 0.72 303\n", + " 27616 0.76 0.68 0.72 591\n", + " 27617 0.69 0.64 0.66 321\n", + " 27618 0.71 0.68 0.70 1901\n", + " 27619 0.66 0.69 0.68 878\n", + " 27620 0.81 0.66 0.73 580\n", + " 28082 0.87 0.69 0.77 313\n", + " 28083 0.70 0.57 0.62 469\n", + " 28084 0.76 0.70 0.73 625\n", + " 28085 0.73 0.66 0.70 391\n", + " 28086 0.78 0.62 0.69 258\n", + " 28087 0.83 0.59 0.69 283\n", + " 28088 0.76 0.70 0.73 478\n", + " 28089 0.76 0.57 0.65 396\n", + " 28090 0.64 0.59 0.61 461\n", + " 28091 0.79 0.69 0.74 865\n", + " 28092 0.76 0.67 0.71 469\n", + " 28093 0.77 0.42 0.55 57\n", + " 28558 0.88 0.62 0.73 192\n", + " 28559 0.72 0.70 0.71 511\n", + " 28560 0.74 0.70 0.72 377\n", + " 28561 0.77 0.70 0.73 565\n", + " 28562 0.76 0.57 0.65 416\n", + " 28563 0.75 0.62 0.68 335\n", + " 28564 0.79 0.59 0.67 253\n", + " 28565 0.83 0.66 0.73 357\n", + " 28566 0.79 0.61 0.69 420\n", + " 28567 0.82 0.65 0.72 409\n", + " 28568 0.87 0.66 0.75 356\n", + " 28569 1.00 0.64 0.78 56\n", + " 29038 0.83 0.66 0.73 109\n", + " 29039 0.71 0.64 0.68 793\n", + " 29040 0.72 0.70 0.71 444\n", + " 29041 0.70 0.67 0.69 513\n", + " 29042 0.75 0.63 0.68 358\n", + " 29043 0.79 0.63 0.70 275\n", + " 29044 0.79 0.65 0.71 351\n", + " 29045 0.76 0.66 0.70 419\n", + " 29046 0.78 0.65 0.71 385\n", + " 29047 0.86 0.61 0.71 230\n", + " 29048 0.87 0.68 0.76 301\n", + " 29049 0.93 0.57 0.70 44\n", + " 29522 0.94 0.59 0.73 110\n", + " 29523 0.63 0.70 0.66 595\n", + " 29524 0.80 0.63 0.71 491\n", + " 29525 0.75 0.63 0.68 419\n", + " 29526 0.82 0.67 0.74 347\n", + " 29527 0.88 0.69 0.77 297\n", + " 29528 0.80 0.60 0.69 282\n", + " 29529 0.75 0.68 0.72 438\n", + " 29530 0.79 0.64 0.70 564\n", + " 29531 0.83 0.60 0.69 294\n", + " 29532 0.79 0.68 0.73 292\n", + " 29533 0.82 0.70 0.76 262\n", + " 30010 0.90 0.77 0.83 103\n", + " 30011 0.86 0.56 0.68 156\n", + " 30012 0.81 0.67 0.73 248\n", + " 30013 0.75 0.64 0.69 990\n", + " 30014 0.79 0.57 0.66 273\n", + " 30015 0.81 0.57 0.67 169\n", + " 30016 0.79 0.68 0.73 304\n", + " 30017 0.84 0.68 0.75 407\n", + " 30018 0.79 0.67 0.72 680\n", + " 30019 0.81 0.65 0.72 395\n", + " 30020 0.77 0.68 0.72 318\n", + " 30021 0.82 0.67 0.74 778\n", + " 30022 0.94 0.73 0.82 119\n", + " 30502 0.79 0.69 0.74 269\n", + " 30503 0.84 0.63 0.72 346\n", + " 30504 0.82 0.70 0.75 391\n", + " 30505 0.79 0.67 0.73 332\n", + " 30506 0.80 0.65 0.72 579\n", + " 30507 0.82 0.63 0.71 358\n", + " 30508 0.85 0.69 0.76 360\n", + " 30509 0.86 0.65 0.74 335\n", + " 30510 0.90 0.64 0.75 123\n", + " 30511 0.85 0.70 0.76 936\n", + " 30512 0.82 0.63 0.71 213\n", + " 30513 0.85 0.69 0.76 242\n", + " 30514 0.93 0.77 0.84 202\n", + " 30997 0.91 0.68 0.78 247\n", + " 30998 0.87 0.79 0.83 537\n", + " 30999 0.78 0.73 0.76 585\n", + " 31000 0.75 0.77 0.76 714\n", + " 31001 0.74 0.59 0.66 367\n", + " 31002 0.82 0.66 0.73 364\n", + " 31003 0.76 0.62 0.68 485\n", + " 31004 0.80 0.65 0.72 148\n", + " 31005 0.00 0.00 0.00 1\n", + " 31007 0.94 0.94 0.94 16\n", + " 31008 1.00 0.47 0.64 17\n", + " 31009 0.94 0.72 0.81 61\n", + " 31012 1.00 0.73 0.84 11\n", + " 31497 1.00 0.55 0.71 40\n", + " 31498 0.93 0.80 0.86 269\n", + " 31499 0.86 0.68 0.76 392\n", + " 31500 0.88 0.72 0.79 195\n", + " 31501 0.90 0.60 0.72 43\n", + " 31502 0.85 0.76 0.80 539\n", + " 31503 0.80 0.80 0.80 693\n", + " 31504 0.89 0.69 0.78 235\n", + " 31512 0.94 0.82 0.88 213\n", + " 31513 0.95 0.74 0.83 125\n", + " 32002 1.00 0.50 0.67 12\n", + " 32003 0.89 0.77 0.83 139\n", + " 32006 0.89 0.73 0.80 98\n", + " 32007 0.88 0.63 0.74 73\n", + " 32008 0.94 0.76 0.84 59\n", + " 32016 0.87 0.84 0.86 223\n", + " 32017 0.86 0.87 0.87 206\n", + " 32524 0.89 0.80 0.84 164\n", + " 32525 0.93 0.79 0.85 67\n", + " 33037 0.84 0.80 0.82 81\n", + " 88755 0.76 0.68 0.72 19\n", + " 88756 0.00 0.00 0.00 1\n", + " 89267 1.00 0.25 0.40 4\n", + " 89268 1.00 0.89 0.94 9\n", + " 89781 0.90 0.82 0.86 22\n", + " 90291 1.00 1.00 1.00 3\n", + " 90292 1.00 0.50 0.67 2\n", + " 90293 1.00 0.72 0.84 64\n", + " 90805 1.00 1.00 1.00 1\n", + " 90806 0.92 0.85 0.88 13\n", + " 91314 1.00 1.00 1.00 2\n", + " 91315 1.00 1.00 1.00 1\n", + " 91316 0.00 0.00 0.00 1\n", + " 91317 1.00 0.50 0.67 2\n", + " 91830 1.00 1.00 1.00 1\n", + " 92339 0.00 0.00 0.00 1\n", + " 92340 0.67 1.00 0.80 2\n", + " 92851 1.00 1.00 1.00 5\n", + " 93363 0.00 0.00 0.00 1\n", + " 93364 0.00 0.00 0.00 1\n", + " 93365 1.00 0.75 0.86 4\n", + " 93876 1.00 1.00 1.00 1\n", + " 94386 0.00 0.00 0.00 1\n", + " 94388 1.00 1.00 1.00 1\n", "\n", - " accuracy 0.63 2318\n", - " macro avg 0.32 0.30 0.29 2318\n", - "weighted avg 0.65 0.63 0.63 2318\n", + " accuracy 0.69 124110\n", + " macro avg 0.80 0.65 0.71 124110\n", + "weighted avg 0.74 0.69 0.70 124110\n", "\n", - "TRAIN AND EVAL random-forest\n" + "TRAIN AND EVAL sgd\n", + "Best Parameters : {'loss': 'hinge', 'penalty': 'l2'}\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1221: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", + "/Users/jacquesfize/opt/anaconda3/envs/my_env/lib/python3.7/site-packages/sklearn/metrics/_classification.py:1221: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", " _warn_prf(average, modifier, msg_start, len(result))\n", - "/usr/local/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1221: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n", - " _warn_prf(average, modifier, msg_start, len(result))\n", - "/usr/local/lib/python3.8/site-packages/sklearn/model_selection/_split.py:670: UserWarning: The least populated class in y has only 1 members, which is less than n_splits=5.\n", - " warnings.warn((\"The least populated class in y has only %d\"\n" + "/Users/jacquesfize/opt/anaconda3/envs/my_env/lib/python3.7/site-packages/sklearn/model_selection/_split.py:672: UserWarning: The least populated class in y has only 1 members, which is less than n_splits=5.\n", + " % (min_groups, self.n_splits)), UserWarning)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Best Parameters : {'criterion': 'gini', 'n_estimators': 100}\n", " precision recall f1-score support\n", "\n", - " 23545 0.33 0.10 0.15 10\n", - " 23546 0.68 0.68 0.68 31\n", - " 23547 0.00 0.00 0.00 6\n", - " 23981 0.00 0.00 0.00 11\n", - " 23982 0.82 0.97 0.89 557\n", - " 23983 0.79 0.64 0.71 111\n", - " 23984 0.00 0.00 0.00 2\n", - " 24422 0.82 0.75 0.78 71\n", - " 24423 0.65 0.64 0.64 105\n", - " 24424 0.40 0.13 0.20 15\n", - " 24867 0.80 0.80 0.80 30\n", - " 24868 1.00 0.33 0.50 6\n", - " 39537 1.00 0.67 0.80 3\n", - " 40045 0.00 0.00 0.00 1\n", - " 40046 0.00 0.00 0.00 0\n", - " 40047 0.00 0.00 0.00 1\n", - " 40049 0.00 0.00 0.00 0\n", - " 40558 1.00 0.50 0.67 4\n", - " 40559 0.67 1.00 0.80 2\n", - " 40561 0.00 0.00 0.00 1\n", - " 41070 0.00 0.00 0.00 1\n", - " 41071 1.00 0.75 0.86 4\n", - " 41072 0.00 0.00 0.00 4\n", - " 41073 0.25 0.50 0.33 2\n", - " 41582 0.00 0.00 0.00 0\n", - " 41583 0.90 1.00 0.95 19\n", - " 41584 0.33 0.50 0.40 2\n", - " 41585 0.00 0.00 0.00 1\n", - " 42094 0.00 0.00 0.00 0\n", - " 42095 0.25 0.33 0.29 3\n", - " 42096 1.00 1.00 1.00 2\n", - " 42097 0.00 0.00 0.00 4\n", - " 42606 0.00 0.00 0.00 3\n", - " 42607 1.00 0.25 0.40 4\n", - " 42609 0.50 0.33 0.40 3\n", - " 43117 0.00 0.00 0.00 2\n", - " 43118 0.50 0.50 0.50 2\n", - " 43119 0.00 0.00 0.00 1\n", - " 43120 0.00 0.00 0.00 3\n", - " 43121 1.00 0.67 0.80 3\n", - " 43122 0.33 0.50 0.40 2\n", - " 43123 0.00 0.00 0.00 0\n", - " 43630 0.00 0.00 0.00 2\n", - " 43631 0.20 0.25 0.22 4\n", - " 43632 0.00 0.00 0.00 1\n", - " 43633 0.00 0.00 0.00 1\n", - " 43634 0.00 0.00 0.00 1\n", - " 43635 0.00 0.00 0.00 2\n", - " 43636 0.78 0.64 0.70 11\n", - " 43637 0.00 0.00 0.00 1\n", - " 43638 1.00 0.50 0.67 2\n", - " 43639 0.00 0.00 0.00 3\n", - " 43640 0.00 0.00 0.00 2\n", - " 43641 0.00 0.00 0.00 1\n", - " 44142 0.40 0.67 0.50 3\n", - " 44143 0.73 0.80 0.76 10\n", - " 44144 0.50 1.00 0.67 1\n", - " 44146 0.00 0.00 0.00 1\n", - " 44147 0.00 0.00 0.00 2\n", - " 44149 0.00 0.00 0.00 2\n", - " 44150 0.68 0.81 0.74 16\n", - " 44151 0.57 0.57 0.57 7\n", - " 44152 0.57 1.00 0.73 4\n", - " 44153 0.00 0.00 0.00 1\n", - " 44154 0.00 0.00 0.00 1\n", - " 44654 0.00 0.00 0.00 1\n", - " 44655 0.75 0.75 0.75 4\n", - " 44656 0.00 0.00 0.00 2\n", - " 44657 0.00 0.00 0.00 2\n", - " 44658 1.00 1.00 1.00 2\n", - " 44660 0.00 0.00 0.00 4\n", - " 44661 0.83 0.56 0.67 9\n", - " 44662 0.81 0.78 0.79 64\n", - " 44663 0.33 0.50 0.40 12\n", - " 44664 0.50 0.33 0.40 3\n", - " 44665 0.33 0.33 0.33 3\n", - " 44666 0.40 0.67 0.50 3\n", - " 45165 1.00 1.00 1.00 1\n", - " 45168 1.00 0.50 0.67 2\n", - " 45169 0.50 1.00 0.67 1\n", - " 45170 0.50 0.50 0.50 2\n", - " 45171 0.00 0.00 0.00 2\n", - " 45172 0.60 0.33 0.43 9\n", - " 45173 0.78 0.86 0.82 57\n", - " 45174 0.67 0.84 0.75 99\n", - " 45175 0.33 0.14 0.20 7\n", - " 45176 0.50 1.00 0.67 3\n", - " 45177 0.50 0.17 0.25 6\n", - " 45678 0.00 0.00 0.00 3\n", - " 45679 1.00 0.29 0.44 7\n", - " 45680 0.00 0.00 0.00 1\n", - " 45681 0.00 0.00 0.00 1\n", - " 45682 0.78 0.88 0.82 8\n", - " 45683 0.00 0.00 0.00 4\n", - " 45684 0.00 0.00 0.00 0\n", - " 45685 0.70 0.64 0.67 11\n", - " 45686 0.50 0.48 0.49 23\n", - " 45687 0.64 0.75 0.69 12\n", - " 45688 0.33 0.29 0.31 7\n", - " 45689 0.62 0.81 0.70 16\n", - " 45690 0.80 0.80 0.80 5\n", - " 46184 0.00 0.00 0.00 3\n", - " 46189 0.00 0.00 0.00 1\n", - " 46190 0.56 0.62 0.59 8\n", - " 46191 0.00 0.00 0.00 1\n", - " 46192 0.00 0.00 0.00 1\n", - " 46193 1.00 1.00 1.00 2\n", - " 46194 0.00 0.00 0.00 1\n", - " 46195 0.00 0.00 0.00 4\n", - " 46196 1.00 0.33 0.50 3\n", - " 46197 0.17 0.14 0.15 7\n", - " 46198 0.60 0.60 0.60 5\n", - " 46199 0.50 0.40 0.44 5\n", - " 46200 0.57 0.75 0.65 16\n", - " 46201 1.00 0.33 0.50 9\n", - " 46697 0.80 0.92 0.86 13\n", - " 46701 1.00 1.00 1.00 2\n", - " 46702 0.00 0.00 0.00 1\n", - " 46703 0.00 0.00 0.00 1\n", - " 46707 0.67 0.50 0.57 4\n", - " 46709 0.33 0.17 0.22 6\n", - " 46710 0.42 0.36 0.38 14\n", - " 46711 0.60 0.50 0.55 6\n", - " 46712 0.00 0.00 0.00 4\n", - " 46713 0.00 0.00 0.00 3\n", - " 46714 0.50 1.00 0.67 1\n", - " 47210 0.00 0.00 0.00 0\n", - " 47211 0.00 0.00 0.00 0\n", - " 47212 0.33 0.50 0.40 2\n", - " 47213 0.50 1.00 0.67 1\n", - " 47214 0.00 0.00 0.00 1\n", - " 47217 1.00 0.50 0.67 4\n", - " 47218 0.00 0.00 0.00 1\n", - " 47219 1.00 0.50 0.67 2\n", - " 47220 0.75 0.75 0.75 4\n", - " 47221 0.40 0.62 0.48 13\n", - " 47222 0.50 0.33 0.40 6\n", - " 47223 0.50 0.50 0.50 4\n", - " 47224 0.50 1.00 0.67 1\n", - " 47225 0.89 0.89 0.89 9\n", - " 47226 0.50 1.00 0.67 1\n", - " 47723 1.00 0.50 0.67 2\n", - " 47726 0.00 0.00 0.00 1\n", - " 47730 0.00 0.00 0.00 1\n", - " 47731 0.00 0.00 0.00 0\n", - " 47733 0.55 0.50 0.52 12\n", - " 47734 0.25 0.25 0.25 4\n", - " 47735 0.25 0.50 0.33 2\n", - " 47736 0.00 0.00 0.00 1\n", - " 47737 0.00 0.00 0.00 2\n", - " 47738 0.20 0.33 0.25 3\n", - " 48235 0.00 0.00 0.00 1\n", - " 48236 0.00 0.00 0.00 1\n", - " 48237 1.00 1.00 1.00 2\n", - " 48239 0.00 0.00 0.00 1\n", - " 48240 0.00 0.00 0.00 1\n", - " 48242 0.00 0.00 0.00 0\n", - " 48243 0.00 0.00 0.00 1\n", - " 48244 1.00 0.50 0.67 6\n", - " 48245 0.40 0.20 0.27 10\n", - " 48246 0.00 0.00 0.00 5\n", - " 48247 1.00 0.50 0.67 2\n", - " 48248 0.67 0.67 0.67 3\n", - " 48249 0.25 0.50 0.33 2\n", - " 48250 0.00 0.00 0.00 3\n", - " 48748 0.00 0.00 0.00 1\n", - " 48749 0.00 0.00 0.00 1\n", - " 48753 0.00 0.00 0.00 3\n", - " 48754 0.00 0.00 0.00 4\n", - " 48755 0.00 0.00 0.00 1\n", - " 48756 0.40 0.67 0.50 3\n", - " 48757 0.85 0.99 0.91 68\n", - " 48758 0.00 0.00 0.00 2\n", - " 48759 0.71 0.83 0.77 12\n", - " 48760 0.50 0.31 0.38 16\n", - " 48761 0.50 0.29 0.36 7\n", - " 48762 1.00 0.43 0.60 7\n", - " 49265 0.00 0.00 0.00 1\n", - " 49266 0.00 0.00 0.00 3\n", - " 49267 0.00 0.00 0.00 2\n", - " 49268 0.88 0.94 0.91 16\n", - " 49269 0.00 0.00 0.00 5\n", - " 49270 0.40 0.67 0.50 3\n", - " 49271 0.25 0.50 0.33 2\n", - " 49272 0.59 0.68 0.63 40\n", - " 49273 0.17 0.50 0.25 2\n", - " 49274 0.80 0.73 0.76 11\n", - " 49773 0.00 0.00 0.00 0\n", - " 49776 0.00 0.00 0.00 1\n", - " 49778 0.00 0.00 0.00 1\n", - " 49779 1.00 0.33 0.50 3\n", - " 49780 0.85 0.99 0.91 70\n", - " 49781 0.60 0.75 0.67 4\n", - " 49782 0.00 0.00 0.00 4\n", - " 49783 0.00 0.00 0.00 0\n", - " 49784 0.79 0.74 0.76 65\n", - " 49785 0.64 0.88 0.74 33\n", - " 50283 0.00 0.00 0.00 1\n", - " 50288 1.00 1.00 1.00 2\n", - " 50289 0.00 0.00 0.00 0\n", - " 50290 0.00 0.00 0.00 1\n", - " 50291 0.50 0.27 0.35 11\n", - " 50292 0.67 0.29 0.40 7\n", - " 50293 0.33 0.50 0.40 2\n", - " 50294 0.00 0.00 0.00 0\n", - " 50295 0.50 0.33 0.40 3\n", - " 50296 0.27 0.30 0.29 10\n", - " 50297 1.00 0.70 0.82 10\n", - " 50801 1.00 0.50 0.67 4\n", - " 50802 0.00 0.00 0.00 2\n", - " 50804 0.00 0.00 0.00 2\n", - " 50805 0.50 0.67 0.57 3\n", - " 50806 0.67 1.00 0.80 2\n", - " 50807 0.50 0.33 0.40 3\n", - " 50808 0.20 0.17 0.18 6\n", - " 50809 0.00 0.00 0.00 0\n", - " 51315 0.00 0.00 0.00 1\n", - " 51316 1.00 1.00 1.00 2\n", - " 51317 0.00 0.00 0.00 1\n", - " 51318 1.00 1.00 1.00 2\n", - " 51319 0.00 0.00 0.00 0\n", - " 51320 0.00 0.00 0.00 1\n", - " 51827 0.00 0.00 0.00 1\n", - " 51828 0.00 0.00 0.00 1\n", - " 51829 0.00 0.00 0.00 1\n", - " 51830 0.50 0.25 0.33 4\n", - " 52340 0.50 0.80 0.62 5\n", - " 52341 0.65 0.68 0.67 22\n", - " 52851 0.00 0.00 0.00 1\n", - " 52852 1.00 0.33 0.50 3\n", - " 52853 0.80 0.57 0.67 7\n", - " 52854 1.00 0.50 0.67 4\n", - " 53362 0.80 1.00 0.89 8\n", - " 53364 0.00 0.00 0.00 1\n", - " 53875 0.00 0.00 0.00 2\n", - " 54387 0.00 0.00 0.00 0\n", - " 54388 0.00 0.00 0.00 1\n", - " 54389 0.00 0.00 0.00 0\n", - " 54899 0.00 0.00 0.00 1\n", - " 54900 0.76 0.76 0.76 17\n", - " 54901 0.70 0.64 0.67 11\n", - " 54902 1.00 0.88 0.93 8\n", - " 55412 0.20 1.00 0.33 1\n", - " 55413 0.67 0.67 0.67 9\n", + " 21425 1.00 0.67 0.80 9\n", + " 21426 0.74 0.78 0.76 150\n", + " 21841 0.62 0.64 0.63 225\n", + " 21842 0.67 0.80 0.73 741\n", + " 21843 0.73 0.71 0.72 561\n", + " 22261 0.78 0.58 0.66 151\n", + " 22262 0.70 0.59 0.64 866\n", + " 22263 0.64 0.69 0.66 1667\n", + " 22264 0.33 0.33 0.33 6\n", + " 22684 0.84 0.51 0.64 101\n", + " 22685 0.72 0.57 0.63 370\n", + " 22686 0.66 0.70 0.68 1118\n", + " 22687 0.62 0.65 0.63 1064\n", + " 22688 0.70 0.61 0.65 641\n", + " 22689 0.73 0.65 0.69 85\n", + " 23112 0.69 0.75 0.72 609\n", + " 23113 0.73 0.69 0.71 725\n", + " 23114 0.72 0.58 0.64 749\n", + " 23115 0.68 0.59 0.63 899\n", + " 23116 0.64 0.58 0.61 510\n", + " 23117 0.67 0.68 0.67 613\n", + " 23118 0.68 0.56 0.62 176\n", + " 23541 0.85 0.81 0.83 36\n", + " 23542 0.58 0.59 0.58 140\n", + " 23543 0.80 0.67 0.73 12\n", + " 23544 0.70 0.59 0.64 759\n", + " 23545 0.68 0.58 0.63 1101\n", + " 23546 0.65 0.53 0.58 931\n", + " 23547 0.59 0.59 0.59 648\n", + " 23548 0.62 0.63 0.63 1056\n", + " 23549 0.63 0.59 0.61 384\n", + " 23550 0.65 0.62 0.63 419\n", + " 23551 0.68 0.72 0.70 760\n", + " 23552 0.74 0.81 0.77 158\n", + " 23553 0.00 0.00 0.00 3\n", + " 23977 0.78 0.58 0.66 114\n", + " 23978 0.70 0.60 0.64 545\n", + " 23979 0.64 0.75 0.69 1018\n", + " 23980 0.68 0.61 0.64 605\n", + " 23981 0.62 0.55 0.59 484\n", + " 23982 0.65 0.87 0.74 12576\n", + " 23983 0.68 0.56 0.61 1060\n", + " 23984 0.65 0.54 0.59 400\n", + " 23985 0.59 0.57 0.58 397\n", + " 23986 0.63 0.57 0.60 340\n", + " 23987 0.64 0.53 0.58 863\n", + " 23988 0.69 0.66 0.67 606\n", + " 23989 0.69 0.69 0.69 542\n", + " 24415 0.81 0.60 0.69 113\n", + " 24416 0.76 0.69 0.72 188\n", + " 24417 0.68 0.71 0.69 38\n", + " 24418 0.63 0.68 0.65 678\n", + " 24419 0.66 0.57 0.61 802\n", + " 24420 0.61 0.63 0.62 415\n", + " 24421 0.70 0.62 0.66 381\n", + " 24422 0.66 0.59 0.63 736\n", + " 24423 0.67 0.57 0.62 1154\n", + " 24424 0.62 0.45 0.52 397\n", + " 24425 0.64 0.57 0.60 345\n", + " 24426 0.68 0.60 0.64 377\n", + " 24427 0.66 0.56 0.61 780\n", + " 24428 0.64 0.51 0.57 528\n", + " 24429 0.64 0.87 0.74 1290\n", + " 24858 0.70 0.77 0.73 312\n", + " 24859 0.71 0.76 0.73 467\n", + " 24860 0.69 0.77 0.73 394\n", + " 24861 0.70 0.74 0.72 544\n", + " 24862 0.69 0.70 0.69 580\n", + " 24863 0.65 0.56 0.60 419\n", + " 24864 0.64 0.58 0.61 360\n", + " 24865 0.69 0.59 0.64 328\n", + " 24866 0.72 0.53 0.61 316\n", + " 24867 0.72 0.63 0.67 443\n", + " 24868 0.57 0.39 0.47 312\n", + " 24869 0.59 0.52 0.55 483\n", + " 24870 0.74 0.52 0.61 259\n", + " 24871 0.68 0.60 0.64 502\n", + " 24872 0.70 0.70 0.70 722\n", + " 24873 0.62 0.62 0.62 749\n", + " 25306 0.69 0.73 0.71 273\n", + " 25307 0.77 0.69 0.73 323\n", + " 25308 0.75 0.64 0.69 418\n", + " 25309 0.70 0.66 0.68 691\n", + " 25310 0.66 0.68 0.67 456\n", + " 25311 0.70 0.58 0.63 470\n", + " 25312 0.62 0.59 0.61 484\n", + " 25313 0.62 0.64 0.63 333\n", + " 25314 0.71 0.68 0.69 395\n", + " 25315 0.70 0.56 0.63 271\n", + " 25316 0.74 0.62 0.68 478\n", + " 25317 0.66 0.45 0.54 324\n", + " 25318 0.63 0.56 0.60 286\n", + " 25319 0.67 0.61 0.63 522\n", + " 25320 0.60 0.66 0.63 680\n", + " 25321 0.66 0.53 0.59 585\n", + " 25758 0.81 0.80 0.80 125\n", + " 25759 0.74 0.64 0.69 328\n", + " 25760 0.66 0.83 0.74 789\n", + " 25761 0.75 0.53 0.62 505\n", + " 25762 0.77 0.58 0.66 351\n", + " 25763 0.69 0.54 0.61 538\n", + " 25764 0.63 0.63 0.63 577\n", + " 25765 0.71 0.56 0.63 360\n", + " 25766 0.67 0.60 0.63 229\n", + " 25767 0.69 0.64 0.66 305\n", + " 25768 0.70 0.53 0.60 305\n", + " 25769 0.63 0.47 0.54 343\n", + " 25770 0.63 0.68 0.65 666\n", + " 25771 0.57 0.62 0.60 883\n", + " 25772 0.66 0.54 0.60 334\n", + " 25773 0.77 0.61 0.68 122\n", + " 26215 1.00 0.20 0.33 20\n", + " 26216 0.67 0.57 0.62 218\n", + " 26217 0.63 0.59 0.61 314\n", + " 26218 0.60 0.66 0.63 1548\n", + " 26219 0.65 0.74 0.69 823\n", + " 26220 0.65 0.58 0.61 366\n", + " 26221 0.75 0.56 0.64 279\n", + " 26222 0.70 0.61 0.65 242\n", + " 26223 0.65 0.67 0.66 361\n", + " 26224 0.59 0.58 0.58 291\n", + " 26225 0.64 0.62 0.63 426\n", + " 26226 0.59 0.58 0.59 456\n", + " 26227 0.77 0.55 0.64 454\n", + " 26228 0.70 0.66 0.68 107\n", + " 26676 0.70 0.62 0.66 77\n", + " 26677 0.67 0.50 0.57 322\n", + " 26678 0.68 0.68 0.68 437\n", + " 26679 0.71 0.66 0.69 435\n", + " 26680 0.69 0.70 0.70 367\n", + " 26681 0.67 0.59 0.63 263\n", + " 26682 0.66 0.55 0.60 280\n", + " 26683 0.67 0.59 0.63 348\n", + " 26684 0.58 0.53 0.55 283\n", + " 26685 0.67 0.57 0.62 374\n", + " 26686 0.61 0.65 0.63 673\n", + " 26687 0.70 0.62 0.65 674\n", + " 26688 1.00 0.50 0.67 6\n", + " 27141 0.61 0.62 0.61 76\n", + " 27142 0.70 0.64 0.67 290\n", + " 27143 0.58 0.62 0.60 304\n", + " 27144 0.59 0.55 0.57 231\n", + " 27145 0.76 0.60 0.67 313\n", + " 27146 0.64 0.70 0.67 374\n", + " 27147 0.64 0.53 0.58 332\n", + " 27148 0.70 0.63 0.66 405\n", + " 27149 0.65 0.53 0.58 374\n", + " 27150 0.65 0.62 0.63 817\n", + " 27151 0.70 0.57 0.63 722\n", + " 27152 0.69 0.74 0.71 647\n", + " 27609 0.36 0.75 0.49 12\n", + " 27610 0.66 0.64 0.65 311\n", + " 27611 0.70 0.66 0.68 418\n", + " 27612 0.69 0.59 0.64 448\n", + " 27613 0.63 0.72 0.67 442\n", + " 27614 0.70 0.63 0.66 290\n", + " 27615 0.65 0.69 0.67 303\n", + " 27616 0.69 0.67 0.68 591\n", + " 27617 0.59 0.58 0.58 321\n", + " 27618 0.64 0.70 0.67 1901\n", + " 27619 0.63 0.70 0.66 878\n", + " 27620 0.67 0.62 0.64 580\n", + " 28082 0.80 0.62 0.70 313\n", + " 28083 0.71 0.51 0.59 469\n", + " 28084 0.63 0.70 0.66 625\n", + " 28085 0.68 0.62 0.65 391\n", + " 28086 0.58 0.66 0.62 258\n", + " 28087 0.73 0.54 0.62 283\n", + " 28088 0.72 0.61 0.66 478\n", + " 28089 0.64 0.61 0.62 396\n", + " 28090 0.57 0.49 0.53 461\n", + " 28091 0.58 0.66 0.62 865\n", + " 28092 0.70 0.66 0.68 469\n", + " 28093 0.65 0.35 0.45 57\n", + " 28558 0.66 0.65 0.65 192\n", + " 28559 0.70 0.70 0.70 511\n", + " 28560 0.71 0.64 0.67 377\n", + " 28561 0.68 0.63 0.65 565\n", + " 28562 0.67 0.62 0.64 416\n", + " 28563 0.62 0.63 0.63 335\n", + " 28564 0.72 0.56 0.63 253\n", + " 28565 0.64 0.61 0.63 357\n", + " 28566 0.73 0.57 0.64 420\n", + " 28567 0.68 0.56 0.61 409\n", + " 28568 0.70 0.73 0.71 356\n", + " 28569 0.75 0.77 0.76 56\n", + " 29038 0.71 0.71 0.71 109\n", + " 29039 0.65 0.64 0.65 793\n", + " 29040 0.69 0.59 0.63 444\n", + " 29041 0.68 0.60 0.64 513\n", + " 29042 0.63 0.68 0.66 358\n", + " 29043 0.68 0.59 0.63 275\n", + " 29044 0.66 0.67 0.67 351\n", + " 29045 0.62 0.67 0.64 419\n", + " 29046 0.68 0.62 0.65 385\n", + " 29047 0.65 0.65 0.65 230\n", + " 29048 0.74 0.63 0.68 301\n", + " 29049 0.57 0.52 0.55 44\n", + " 29522 0.74 0.74 0.74 110\n", + " 29523 0.73 0.67 0.70 595\n", + " 29524 0.66 0.60 0.63 491\n", + " 29525 0.64 0.62 0.63 419\n", + " 29526 0.62 0.62 0.62 347\n", + " 29527 0.63 0.73 0.68 297\n", + " 29528 0.65 0.57 0.61 282\n", + " 29529 0.64 0.65 0.64 438\n", + " 29530 0.68 0.59 0.63 564\n", + " 29531 0.71 0.47 0.57 294\n", + " 29532 0.61 0.62 0.61 292\n", + " 29533 0.71 0.57 0.63 262\n", + " 30010 0.84 0.69 0.76 103\n", + " 30011 0.64 0.54 0.58 156\n", + " 30012 0.63 0.65 0.64 248\n", + " 30013 0.54 0.58 0.56 990\n", + " 30014 0.74 0.53 0.62 273\n", + " 30015 0.66 0.54 0.59 169\n", + " 30016 0.72 0.63 0.67 304\n", + " 30017 0.69 0.61 0.65 407\n", + " 30018 0.63 0.68 0.66 680\n", + " 30019 0.75 0.58 0.66 395\n", + " 30020 0.63 0.70 0.67 318\n", + " 30021 0.63 0.69 0.66 778\n", + " 30022 0.71 0.54 0.61 119\n", + " 30502 0.67 0.67 0.67 269\n", + " 30503 0.65 0.61 0.63 346\n", + " 30504 0.73 0.56 0.63 391\n", + " 30505 0.67 0.56 0.61 332\n", + " 30506 0.66 0.53 0.59 579\n", + " 30507 0.63 0.63 0.63 358\n", + " 30508 0.70 0.70 0.70 360\n", + " 30509 0.60 0.70 0.65 335\n", + " 30510 0.74 0.59 0.65 123\n", + " 30511 0.57 0.71 0.63 936\n", + " 30512 0.68 0.57 0.62 213\n", + " 30513 0.69 0.64 0.67 242\n", + " 30514 0.62 0.74 0.68 202\n", + " 30997 0.70 0.71 0.71 247\n", + " 30998 0.65 0.80 0.72 537\n", + " 30999 0.68 0.67 0.68 585\n", + " 31000 0.64 0.76 0.69 714\n", + " 31001 0.64 0.56 0.60 367\n", + " 31002 0.61 0.60 0.61 364\n", + " 31003 0.68 0.62 0.65 485\n", + " 31004 0.78 0.68 0.72 148\n", + " 31005 0.00 0.00 0.00 1\n", + " 31007 0.78 0.44 0.56 16\n", + " 31008 0.75 0.53 0.62 17\n", + " 31009 0.72 0.77 0.75 61\n", + " 31012 1.00 0.64 0.78 11\n", + " 31497 0.44 0.65 0.53 40\n", + " 31498 0.73 0.76 0.74 269\n", + " 31499 0.71 0.68 0.69 392\n", + " 31500 0.71 0.61 0.65 195\n", + " 31501 0.61 0.58 0.60 43\n", + " 31502 0.75 0.72 0.73 539\n", + " 31503 0.64 0.83 0.72 693\n", + " 31504 0.65 0.69 0.67 235\n", + " 31512 0.76 0.79 0.77 213\n", + " 31513 0.76 0.70 0.73 125\n", + " 32002 0.70 0.58 0.64 12\n", + " 32003 0.61 0.60 0.60 139\n", + " 32006 0.67 0.70 0.69 98\n", + " 32007 0.80 0.59 0.68 73\n", + " 32008 0.79 0.58 0.67 59\n", + " 32016 0.75 0.70 0.72 223\n", + " 32017 0.75 0.84 0.79 206\n", + " 32524 0.78 0.74 0.76 164\n", + " 32525 0.85 0.66 0.74 67\n", + " 33037 0.69 0.69 0.69 81\n", + " 88755 0.67 0.84 0.74 19\n", + " 88756 0.00 0.00 0.00 1\n", + " 89267 1.00 0.25 0.40 4\n", + " 89268 0.60 1.00 0.75 9\n", + " 89781 0.76 0.73 0.74 22\n", + " 90291 0.29 0.67 0.40 3\n", + " 90292 0.67 1.00 0.80 2\n", + " 90293 0.79 0.84 0.82 64\n", + " 90805 0.00 0.00 0.00 1\n", + " 90806 0.71 0.92 0.80 13\n", + " 91314 1.00 1.00 1.00 2\n", + " 91315 1.00 1.00 1.00 1\n", + " 91316 0.00 0.00 0.00 1\n", + " 91317 0.00 0.00 0.00 2\n", + " 91830 0.00 0.00 0.00 1\n", + " 92339 0.00 0.00 0.00 1\n", + " 92340 0.00 0.00 0.00 2\n", + " 92851 0.44 0.80 0.57 5\n", + " 93363 0.00 0.00 0.00 1\n", + " 93364 0.00 0.00 0.00 1\n", + " 93365 0.80 1.00 0.89 4\n", + " 93876 1.00 1.00 1.00 1\n", + " 94386 0.00 1.00 0.01 1\n", + " 94388 0.00 0.00 0.00 1\n", + "\n", + " accuracy 0.66 124110\n", + " macro avg 0.65 0.61 0.62 124110\n", + "weighted avg 0.66 0.66 0.66 124110\n", "\n", - " accuracy 0.70 2318\n", - " macro avg 0.37 0.35 0.34 2318\n", - "weighted avg 0.68 0.70 0.68 2318\n", - "\n" + "TRAIN AND EVAL knn\n" ] }, { - "name": "stderr", - "output_type": "stream", - "text": [ - "/usr/local/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1221: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", - " _warn_prf(average, modifier, msg_start, len(result))\n", - "/usr/local/lib/python3.8/site-packages/sklearn/metrics/_classification.py:1221: UndefinedMetricWarning: Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.\n", - " _warn_prf(average, modifier, msg_start, len(result))\n" + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001B[0;31m---------------------------------------------------------------------------\u001B[0m", + "\u001B[0;31mKeyboardInterrupt\u001B[0m Traceback (most recent call last)", + "\u001B[0;32m<ipython-input-16-495ee00830c8>\u001B[0m in \u001B[0;36m<module>\u001B[0;34m\u001B[0m\n\u001B[1;32m 4\u001B[0m \u001B[0mclassifier_dict\u001B[0m\u001B[0;34m[\u001B[0m\u001B[0mCLASSIFIER\u001B[0m\u001B[0;34m]\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mparameters\u001B[0m\u001B[0;34m[\u001B[0m\u001B[0mCLASSIFIER\u001B[0m\u001B[0;34m]\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mscoring\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0;34m'f1_weighted'\u001B[0m\u001B[0;34m,\u001B[0m\u001B[0mn_jobs\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0;34m-\u001B[0m\u001B[0;36m1\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 5\u001B[0m )\n\u001B[0;32m----> 6\u001B[0;31m \u001B[0mclf\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mfit\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mX_train\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0my_train\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 7\u001B[0m \u001B[0mprint\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m\"Best Parameters : \"\u001B[0m\u001B[0;34m,\u001B[0m\u001B[0mclf\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mbest_params_\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 8\u001B[0m \u001B[0my_pred\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mclf\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mbest_estimator_\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mpredict\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mX_test\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n", + "\u001B[0;32m~/opt/anaconda3/envs/my_env/lib/python3.7/site-packages/sklearn/utils/validation.py\u001B[0m in \u001B[0;36minner_f\u001B[0;34m(*args, **kwargs)\u001B[0m\n\u001B[1;32m 70\u001B[0m FutureWarning)\n\u001B[1;32m 71\u001B[0m \u001B[0mkwargs\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mupdate\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m{\u001B[0m\u001B[0mk\u001B[0m\u001B[0;34m:\u001B[0m \u001B[0marg\u001B[0m \u001B[0;32mfor\u001B[0m \u001B[0mk\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0marg\u001B[0m \u001B[0;32min\u001B[0m \u001B[0mzip\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0msig\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mparameters\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0margs\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m}\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m---> 72\u001B[0;31m \u001B[0;32mreturn\u001B[0m \u001B[0mf\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m**\u001B[0m\u001B[0mkwargs\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 73\u001B[0m \u001B[0;32mreturn\u001B[0m \u001B[0minner_f\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 74\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n", + "\u001B[0;32m~/opt/anaconda3/envs/my_env/lib/python3.7/site-packages/sklearn/model_selection/_search.py\u001B[0m in \u001B[0;36mfit\u001B[0;34m(self, X, y, groups, **fit_params)\u001B[0m\n\u001B[1;32m 734\u001B[0m \u001B[0;32mreturn\u001B[0m \u001B[0mresults\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 735\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 736\u001B[0;31m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_run_search\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mevaluate_candidates\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 737\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 738\u001B[0m \u001B[0;31m# For multi-metric evaluation, store the best_index_, best_params_ and\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n", + "\u001B[0;32m~/opt/anaconda3/envs/my_env/lib/python3.7/site-packages/sklearn/model_selection/_search.py\u001B[0m in \u001B[0;36m_run_search\u001B[0;34m(self, evaluate_candidates)\u001B[0m\n\u001B[1;32m 1186\u001B[0m \u001B[0;32mdef\u001B[0m \u001B[0m_run_search\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mevaluate_candidates\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 1187\u001B[0m \u001B[0;34m\"\"\"Search all candidates in param_grid\"\"\"\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m-> 1188\u001B[0;31m \u001B[0mevaluate_candidates\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mParameterGrid\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mparam_grid\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 1189\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 1190\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n", + "\u001B[0;32m~/opt/anaconda3/envs/my_env/lib/python3.7/site-packages/sklearn/model_selection/_search.py\u001B[0m in \u001B[0;36mevaluate_candidates\u001B[0;34m(candidate_params)\u001B[0m\n\u001B[1;32m 713\u001B[0m \u001B[0;32mfor\u001B[0m \u001B[0mparameters\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0;34m(\u001B[0m\u001B[0mtrain\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mtest\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 714\u001B[0m in product(candidate_params,\n\u001B[0;32m--> 715\u001B[0;31m cv.split(X, y, groups)))\n\u001B[0m\u001B[1;32m 716\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 717\u001B[0m \u001B[0;32mif\u001B[0m \u001B[0mlen\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mout\u001B[0m\u001B[0;34m)\u001B[0m \u001B[0;34m<\u001B[0m \u001B[0;36m1\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n", + "\u001B[0;32m~/opt/anaconda3/envs/my_env/lib/python3.7/site-packages/joblib/parallel.py\u001B[0m in \u001B[0;36m__call__\u001B[0;34m(self, iterable)\u001B[0m\n\u001B[1;32m 1052\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 1053\u001B[0m \u001B[0;32mwith\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_backend\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mretrieval_context\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m-> 1054\u001B[0;31m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mretrieve\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 1055\u001B[0m \u001B[0;31m# Make sure that we get a last message telling us we are done\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 1056\u001B[0m \u001B[0melapsed_time\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0mtime\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mtime\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m \u001B[0;34m-\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_start_time\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n", + "\u001B[0;32m~/opt/anaconda3/envs/my_env/lib/python3.7/site-packages/joblib/parallel.py\u001B[0m in \u001B[0;36mretrieve\u001B[0;34m(self)\u001B[0m\n\u001B[1;32m 931\u001B[0m \u001B[0;32mtry\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 932\u001B[0m \u001B[0;32mif\u001B[0m \u001B[0mgetattr\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_backend\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0;34m'supports_timeout'\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0;32mFalse\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 933\u001B[0;31m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_output\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mextend\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mjob\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mget\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mtimeout\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mtimeout\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 934\u001B[0m \u001B[0;32melse\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 935\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_output\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mextend\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mjob\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mget\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n", + "\u001B[0;32m~/opt/anaconda3/envs/my_env/lib/python3.7/site-packages/joblib/_parallel_backends.py\u001B[0m in \u001B[0;36mwrap_future_result\u001B[0;34m(future, timeout)\u001B[0m\n\u001B[1;32m 540\u001B[0m AsyncResults.get from multiprocessing.\"\"\"\n\u001B[1;32m 541\u001B[0m \u001B[0;32mtry\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 542\u001B[0;31m \u001B[0;32mreturn\u001B[0m \u001B[0mfuture\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mresult\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mtimeout\u001B[0m\u001B[0;34m=\u001B[0m\u001B[0mtimeout\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 543\u001B[0m \u001B[0;32mexcept\u001B[0m \u001B[0mCfTimeoutError\u001B[0m \u001B[0;32mas\u001B[0m \u001B[0me\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 544\u001B[0m \u001B[0;32mraise\u001B[0m \u001B[0mTimeoutError\u001B[0m \u001B[0;32mfrom\u001B[0m \u001B[0me\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n", + "\u001B[0;32m~/opt/anaconda3/envs/my_env/lib/python3.7/concurrent/futures/_base.py\u001B[0m in \u001B[0;36mresult\u001B[0;34m(self, timeout)\u001B[0m\n\u001B[1;32m 428\u001B[0m \u001B[0;32mreturn\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m__get_result\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 429\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 430\u001B[0;31m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_condition\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0mwait\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0mtimeout\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 431\u001B[0m \u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 432\u001B[0m \u001B[0;32mif\u001B[0m \u001B[0mself\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0m_state\u001B[0m \u001B[0;32min\u001B[0m \u001B[0;34m[\u001B[0m\u001B[0mCANCELLED\u001B[0m\u001B[0;34m,\u001B[0m \u001B[0mCANCELLED_AND_NOTIFIED\u001B[0m\u001B[0;34m]\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n", + "\u001B[0;32m~/opt/anaconda3/envs/my_env/lib/python3.7/threading.py\u001B[0m in \u001B[0;36mwait\u001B[0;34m(self, timeout)\u001B[0m\n\u001B[1;32m 294\u001B[0m \u001B[0;32mtry\u001B[0m\u001B[0;34m:\u001B[0m \u001B[0;31m# restore state no matter what (e.g., KeyboardInterrupt)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 295\u001B[0m \u001B[0;32mif\u001B[0m \u001B[0mtimeout\u001B[0m \u001B[0;32mis\u001B[0m \u001B[0;32mNone\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0;32m--> 296\u001B[0;31m \u001B[0mwaiter\u001B[0m\u001B[0;34m.\u001B[0m\u001B[0macquire\u001B[0m\u001B[0;34m(\u001B[0m\u001B[0;34m)\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[0m\u001B[1;32m 297\u001B[0m \u001B[0mgotit\u001B[0m \u001B[0;34m=\u001B[0m \u001B[0;32mTrue\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n\u001B[1;32m 298\u001B[0m \u001B[0;32melse\u001B[0m\u001B[0;34m:\u001B[0m\u001B[0;34m\u001B[0m\u001B[0;34m\u001B[0m\u001B[0m\n", + "\u001B[0;31mKeyboardInterrupt\u001B[0m: " ] } ], @@ -1751,9 +1103,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3.7.5 64-bit", + "display_name": "Python (my_env)", "language": "python", - "name": "python37564bitdc8b0e1290e74b85b0e630c435ea2fe8" + "name": "my_env" }, "language_info": { "codemirror_mode": { @@ -1765,9 +1117,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.5" + "version": "3.7.7" } }, "nbformat": 4, "nbformat_minor": 4 -} +} \ No newline at end of file diff --git a/train_baseline.py b/train_baseline.py new file mode 100644 index 0000000..7fb6f3d --- /dev/null +++ b/train_baseline.py @@ -0,0 +1,119 @@ +# coding = utf-8 +# -*- coding: utf-8 -*- + +import os +import argparse + +# BASIC +import pandas as pd +import numpy as np + +# ML +# MACHINE LEARNING +from sklearn.feature_extraction.text import CountVectorizer +from sklearn.feature_extraction.text import TfidfTransformer +from sklearn.pipeline import Pipeline +from sklearn.naive_bayes import MultinomialNB +from sklearn.svm import SVC +from sklearn.linear_model import SGDClassifier +from sklearn.tree import DecisionTreeClassifier +from sklearn.neighbors import KNeighborsClassifier +from sklearn.ensemble import RandomForestClassifier + +from sklearn.model_selection import GridSearchCV + +# ML HELPERS +from sklearn import preprocessing +from sklearn.model_selection import train_test_split +from sklearn.metrics import classification_report + +from glob import glob +from joblib import dump,load + +#PROGRESS BAR +from tqdm import tqdm + +from ngram import NGram + +parser = argparse.ArgumentParser() +parser.add_argument("dataset_name") +parser.add_argument("inclusion_fn") +parser.add_argument("adjacency_fn") +parser.add_argument("cooc_fn") + +args = parser.parse_args() + +DATASET_NAME = args.dataset_name +I_FN = args.inclusion_fn +A_FN = args.adjacency_fn +C_FN = args.cooc_fn + +OUTPUT_DIR = "outputs" + +verbose = False + +for fn in [I_FN,A_FN,C_FN]: + if not os.path.exists(fn): + raise FileNotFoundError("{0} does not exists !".format(fn)) + + + +classifier_dict = { + "naive-bayes":MultinomialNB(), + "svm":SVC(), + "sgd":SGDClassifier(), + "knn":KNeighborsClassifier(), + "decision-tree": DecisionTreeClassifier(), + "random-forest":RandomForestClassifier() +} + +parameters = { + "naive-bayes":[{"alpha":[0,1]}], + "svm":[{"kernel":["rbf","poly"], 'gamma': [1e-1,1e-2,1e-3, 1,10,100]}], + "sgd":[{"penalty":["l1","l2"],"loss":["hinge","modified_huber","log"]}], + "knn":[{"n_neighbors":list(range(4,8)),"p":[1,2]}], + "decision-tree": [{"criterion":["gini","entropy"]}], + "random-forest":[{"criterion":["gini","entropy"],"n_estimators":[10,50,100]}] +} + +combinaison = [ + [I_FN,C_FN,A_FN], + [I_FN,C_FN], + [C_FN], + [C_FN,A_FN] +] +combinaison_label = [ + "PIC", + "IC", + "C", + "PC" +] + +for ix, comb in enumerate(combinaison): + df = pd.concat([pd.read_csv(fn,sep="\t").head(500) for fn in comb]) + + index = NGram(n=4) + data_vectorizer = Pipeline([ + ('vect', CountVectorizer(tokenizer=index.split)), + ('tfidf', TfidfTransformer()), + ]) + + X_train,y_train = (df[df.split == "train"].toponym + " " + df[df.split == "train"].toponym_context).values, df[df.split == "train"].hp_split + X_test,y_test = (df[df.split == "test"].toponym + " " + df[df.split == "test"].toponym_context).values, df[df.split == "test"].hp_split + data_vectorizer.fit((df.toponym + " " + df.toponym_context).values) + + dump(data_vectorizer,"{2}/{0}_{1}_vectorizer.pkl".format(DATASET_NAME,combinaison_label[ix],OUTPUT_DIR)) + + X_train = data_vectorizer.transform(X_train) + X_test = data_vectorizer.transform(X_test) + + for CLASSIFIER in tqdm(classifier_dict): + if verbose : print("TRAIN AND EVAL {0}".format(CLASSIFIER)) + clf = GridSearchCV( + classifier_dict[CLASSIFIER], parameters[CLASSIFIER], scoring='f1_weighted',n_jobs=-1 + ) + clf.fit(X_train, y_train) + if verbose : print("Best Parameters : ",clf.best_params_) + y_pred = clf.best_estimator_.predict(X_test) + if verbose : print(classification_report(y_test,y_pred)) + dump(clf.best_estimator_,"{0}/{1}_{2}_{3}.pkl".format(OUTPUT_DIR,DATASET_NAME,combinaison_label[ix],CLASSIFIER)) \ No newline at end of file -- GitLab