Skip to content
Snippets Groups Projects
EDdA_Classification_Analyses_predictions_proba.ipynb 4.6 MiB
Newer Older
lmoncla's avatar
lmoncla committed
{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "EDdA-Classification_Analyses_predictions_proba.ipynb",
      "provenance": [],
      "collapsed_sections": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "M-41ZfqIHyi2"
      },
      "outputs": [],
      "source": [
        "import pandas as pd\n",
        "import csv"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "!wget https://projet.liris.cnrs.fr/geode/EDdA-Classification/predictions/dataset_test_predictions_sgd_tfidf.csv"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "gVaa01O5IQke",
        "outputId": "054b0d9d-148a-4cc6-8616-b9e704eab6ea"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "--2022-02-17 15:51:03--  https://projet.liris.cnrs.fr/geode/EDdA-Classification/predictions/dataset_test_predictions_sgd_tfidf.csv\n",
            "Resolving projet.liris.cnrs.fr (projet.liris.cnrs.fr)... 134.214.142.28\n",
            "Connecting to projet.liris.cnrs.fr (projet.liris.cnrs.fr)|134.214.142.28|:443... connected.\n",
            "HTTP request sent, awaiting response... 200 OK\n",
            "Length: 49747105 (47M) [text/csv]\n",
            "Saving to: ‘dataset_test_predictions_sgd_tfidf.csv’\n",
            "\n",
            "dataset_test_predic 100%[===================>]  47.44M  18.8MB/s    in 2.5s    \n",
            "\n",
            "2022-02-17 15:51:06 (18.8 MB/s) - ‘dataset_test_predictions_sgd_tfidf.csv’ saved [49747105/49747105]\n",
            "\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "df = pd.read_csv(\"dataset_test_predictions_sgd_tfidf.csv\")\n",
        "\n",
        "df.shape"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "dYVLgduMIQm4",
        "outputId": "4e35f288-f81a-428b-8b9e-035c3a1d3c7a"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "(11702, 20)"
            ]
          },
          "metadata": {},
          "execution_count": 7
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "df.head()"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 479
        },
        "id": "Bp50IA0qIQpf",
        "outputId": "c4efa4c8-4fac-4349-cc12-a331f89850ad"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/html": [
              "\n",
              "  <div id=\"df-f1cebb7e-2ff0-4e07-8b97-8782c768d931\">\n",
              "    <div class=\"colab-df-container\">\n",
              "      <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>Unnamed: 0</th>\n",
              "      <th>volume</th>\n",
              "      <th>numero</th>\n",
              "      <th>head</th>\n",
              "      <th>normClass</th>\n",
              "      <th>classEDdA</th>\n",
              "      <th>author</th>\n",
              "      <th>id_enccre</th>\n",
              "      <th>domaine_enccre</th>\n",
              "      <th>ensemble_domaine_enccre</th>\n",
              "      <th>content</th>\n",
              "      <th>contentWithoutClass</th>\n",
              "      <th>firstParagraph</th>\n",
              "      <th>nb_words</th>\n",
              "      <th>predict_proba</th>\n",
              "      <th>predict_proba2</th>\n",
              "      <th>predict1</th>\n",
              "      <th>proba1</th>\n",
              "      <th>predict2</th>\n",
              "      <th>proba2</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>0</td>\n",
              "      <td>8</td>\n",
              "      <td>1249</td>\n",
              "      <td>HOCKERLAND</td>\n",
              "      <td>Géographie</td>\n",
              "      <td>Géograp.</td>\n",
              "      <td>Jaucourt</td>\n",
              "      <td>v8-965-0</td>\n",
              "      <td>géographie</td>\n",
              "      <td>Géographie</td>\n",
              "      <td>HOCKERLAND, (Géograp.) petite contrée,\\n&amp; l'un...</td>\n",
              "      <td>hockerland   petit contrée \\n cercle prusse du...</td>\n",
              "      <td>hockerland   petit contrée \\n cercle prusse du...</td>\n",
              "      <td>43</td>\n",
              "      <td>[0.0, 0.0031502678656972283, 0.000415623529573...</td>\n",
              "      <td>[('Géographie', 0.7849115661795937), ('Métiers...</td>\n",
              "      <td>Géographie</td>\n",
              "      <td>0.784912</td>\n",
              "      <td>Métiers</td>\n",
              "      <td>0.108011</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>1</td>\n",
              "      <td>14</td>\n",
              "      <td>2370</td>\n",
              "      <td>Rotonde la</td>\n",
              "      <td>Architecture romaine</td>\n",
              "      <td>Archit. rom.</td>\n",
              "      <td>Jaucourt</td>\n",
              "      <td>v14-1352-1</td>\n",
              "      <td>architecture</td>\n",
              "      <td>Architecture</td>\n",
              "      <td>Rotonde la, (Archit. rom.) nom moderne de\\nl'a...</td>\n",
              "      <td>rotonde   nom moderne \\n ancien panthéon bâti ...</td>\n",
              "      <td>rotonde   nom moderne \\n ancien panthéon bâti ...</td>\n",
              "      <td>777</td>\n",
              "      <td>[0.0, 0.0, 0.22553458272522398, 0.689266306891...</td>\n",
              "      <td>[('Architecture', 0.6892663068918098), ('Antiq...</td>\n",
              "      <td>Architecture</td>\n",
              "      <td>0.689266</td>\n",
              "      <td>Antiquité</td>\n",
              "      <td>0.225535</td>\n",
              "    </tr>\n",
Loading
Loading full blame...