diff --git a/notebooks/Predicted_super_domains_to_normclass.ipynb b/notebooks/Predicted_super_domains_to_normclass.ipynb index d0d76dc7fbc9ad6355b4af1975e4f41496a42df9..46a0ffad1239d1c55f3ef42c1ddba7f990727f12 100644 --- a/notebooks/Predicted_super_domains_to_normclass.ipynb +++ b/notebooks/Predicted_super_domains_to_normclass.ipynb @@ -360,7 +360,65 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>0</th>\n", + " <th>1</th>\n", + " <th>2</th>\n", + " <th>3</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>Économie rustique</td>\n", + " <td>Droit Jurisprudence</td>\n", + " <td>Agriculture</td>\n", + " <td>Histoire naturelle</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "</div>" + ], + "text/plain": [ + " 0 1 2 3\n", + "0 Économie rustique Droit Jurisprudence Agriculture Histoire naturelle" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.DataFrame([np.insert(g['super_domain_bert'].values,0,n)])" + ] + }, + { + "cell_type": "code", + "execution_count": 41, "metadata": {}, "outputs": [ { @@ -1034,7 +1092,21 @@ "Droit saxon Histoire\n", "Droit | Droit ecclésiastique Droit Jurisprudence\n", "Droit | Histoire moderne Droit Jurisprudence\n", - "Droit | Littérature | Commerce Belles-lettres\n", + "Droit | Littérature | Commerce Belles-lettres\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/qv/x3jfzb6x2s5cpb2_8lw4g06m0000gn/T/ipykernel_73946/3752465403.py:5: FutureWarning: The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.\n", + " df2 = df2.append(pd.DataFrame([np.insert(g['super_domain_bert'].values,0,n)]), ignore_index=True)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ "Droit | Médecine Philosophie\tDroit Jurisprudence\n", "Droits honorifiques Droit Jurisprudence\n", "Dynamique Physique\n", @@ -2856,9 +2928,237 @@ } ], "source": [ + "df2 = pd.DataFrame()\n", + "\n", "for n, g in dfg:\n", " print(n, '\\t'.join(g['super_domain_bert'].values))\n", - " " + " df2 = df2.append(pd.DataFrame([np.insert(g['super_domain_bert'].values,0,n)]), ignore_index=True)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "<div>\n", + "<style scoped>\n", + " .dataframe tbody tr th:only-of-type {\n", + " vertical-align: middle;\n", + " }\n", + "\n", + " .dataframe tbody tr th {\n", + " vertical-align: top;\n", + " }\n", + "\n", + " .dataframe thead th {\n", + " text-align: right;\n", + " }\n", + "</style>\n", + "<table border=\"1\" class=\"dataframe\">\n", + " <thead>\n", + " <tr style=\"text-align: right;\">\n", + " <th></th>\n", + " <th>0</th>\n", + " <th>1</th>\n", + " <th>2</th>\n", + " <th>3</th>\n", + " <th>4</th>\n", + " <th>5</th>\n", + " <th>6</th>\n", + " <th>7</th>\n", + " <th>8</th>\n", + " <th>9</th>\n", + " </tr>\n", + " </thead>\n", + " <tbody>\n", + " <tr>\n", + " <th>0</th>\n", + " <td>Abus des langues</td>\n", + " <td>Philosophie</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>1</th>\n", + " <td>Accord de sons</td>\n", + " <td>Belles-lettres</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2</th>\n", + " <td>Acoustique</td>\n", + " <td>Physique</td>\n", + " <td>Belles-lettres</td>\n", + " <td>Médecine</td>\n", + " <td>Médecine</td>\n", + " <td>Physique</td>\n", + " <td>Médecine</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>3</th>\n", + " <td>Agonistique</td>\n", + " <td>Histoire</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>4</th>\n", + " <td>Agriculture décorative</td>\n", + " <td>Agriculture</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>...</th>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " <td>...</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2480</th>\n", + " <td>terme usité parmi les Maréchaux</td>\n", + " <td>Chasse</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2481</th>\n", + " <td>vaisselle d'étain</td>\n", + " <td>Métiers</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2482</th>\n", + " <td>Ébénisterie</td>\n", + " <td>Métiers</td>\n", + " <td>Métiers</td>\n", + " <td>Musique</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2483</th>\n", + " <td>Ébénisterie | Tapisserie</td>\n", + " <td>Métiers</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " <tr>\n", + " <th>2484</th>\n", + " <td>Économie rustique</td>\n", + " <td>Droit Jurisprudence</td>\n", + " <td>Agriculture</td>\n", + " <td>Histoire naturelle</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " <td>NaN</td>\n", + " </tr>\n", + " </tbody>\n", + "</table>\n", + "<p>2485 rows × 10 columns</p>\n", + "</div>" + ], + "text/plain": [ + " 0 1 2 \\\n", + "0 Abus des langues Philosophie NaN \n", + "1 Accord de sons Belles-lettres NaN \n", + "2 Acoustique Physique Belles-lettres \n", + "3 Agonistique Histoire NaN \n", + "4 Agriculture décorative Agriculture NaN \n", + "... ... ... ... \n", + "2480 terme usité parmi les Maréchaux Chasse NaN \n", + "2481 vaisselle d'étain Métiers NaN \n", + "2482 Ébénisterie Métiers Métiers \n", + "2483 Ébénisterie | Tapisserie Métiers NaN \n", + "2484 Économie rustique Droit Jurisprudence Agriculture \n", + "\n", + " 3 4 5 6 7 8 9 \n", + "0 NaN NaN NaN NaN NaN NaN NaN \n", + "1 NaN NaN NaN NaN NaN NaN NaN \n", + "2 Médecine Médecine Physique Médecine NaN NaN NaN \n", + "3 NaN NaN NaN NaN NaN NaN NaN \n", + "4 NaN NaN NaN NaN NaN NaN NaN \n", + "... ... ... ... ... ... ... ... \n", + "2480 NaN NaN NaN NaN NaN NaN NaN \n", + "2481 NaN NaN NaN NaN NaN NaN NaN \n", + "2482 Musique NaN NaN NaN NaN NaN NaN \n", + "2483 NaN NaN NaN NaN NaN NaN NaN \n", + "2484 Histoire naturelle NaN NaN NaN NaN NaN NaN \n", + "\n", + "[2485 rows x 10 columns]" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df2" ] }, {