diff --git a/notebooks/Predict_XAI.ipynb b/notebooks/Predict_XAI.ipynb index 8079d32ba6c8ea20ae9ebce65375d12711245e16..347b3d0760c5efe64b1fb3c423220a66abf85a9c 100644 --- a/notebooks/Predict_XAI.ipynb +++ b/notebooks/Predict_XAI.ipynb @@ -740,14 +740,19 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "edda_par_path = \"/Users/lmoncla/Nextcloud-LIRIS/GEODE/GEODE - Partage consortium/Corpus/EDdA/EDdA_dataset_articles.tsv\"\n", + "df_EDdA_par = pd.read_csv(edda_par_path, sep=\"\\t\")" + ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "df_EDdA_par.head()" + ] }, { "cell_type": "markdown", @@ -844,6 +849,16 @@ "data_loader_EDdA = generate_dataloader(tokenizer, df_EDdA.content.values)" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# EDdA parallel\n", + "data_loader_EDdA_par = generate_dataloader(tokenizer, df_EDdA_par.content.values)" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -887,6 +902,16 @@ "df_EDdA['class_pred'] = list(encoder.inverse_transform(pred_EDdA))" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pred_EDdA_par = predict(model, data_loader_EDdA_par, device)\n", + "df_EDdA_par['class_pred'] = list(encoder.inverse_transform(pred_EDdA_par))" + ] + }, { "cell_type": "code", "execution_count": 20, @@ -994,7 +1019,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### 4.3 Save" + "### 4.4 Save" ] }, { @@ -1029,7 +1054,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 54, "metadata": {}, "outputs": [], "source": [ @@ -1042,7 +1067,7 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 55, "metadata": {}, "outputs": [ { @@ -1051,7 +1076,7 @@ "\"\\nLYON, (Géogr.) grande, riche, belle, ancienne\\n& celebre ville de France, la plus considérable du\\nroyaume après Paris, & la capitale du Lyonnois.\\nElle se nomme en latin Lugdunum, Lugudunum, Lugdumum Segusianorum, Lugdumum Celtarum, &c.\\nVoyez Lugdunum.\\n\\nLyon fut fondée l'an de Rome 712, quarante-un\\nans avant l'ere chrétienne, par Lucius Munatius\\nPlancus, qui étoit consul avec AEmilius Lepidus. Il\\nla bâtit sur la Sône, au lieu où cette riviere se jette\\ndans le Rhône, & il la peupla des citoyens romains \\nqui a\"" ] }, - "execution_count": 53, + "execution_count": 55, "metadata": {}, "output_type": "execute_result" } @@ -1063,7 +1088,7 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 56, "metadata": {}, "outputs": [ { @@ -1231,7 +1256,7 @@ " ('[SEP]', 0.0)]" ] }, - "execution_count": 50, + "execution_count": 56, "metadata": {}, "output_type": "execute_result" } @@ -1241,6 +1266,186 @@ "word_attributions" ] }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[('ville', 0.478071716663547),\n", + " ('capitale', 0.2983988672217172),\n", + " ('royaume', 0.24665610131446675),\n", + " ('G', 0.24402357535335403),\n", + " ('##éo', 0.23393328870446992),\n", + " ('grande', 0.21832893139528123),\n", + " (',', 0.19909154256915337),\n", + " ('##gr', 0.1695800465119405),\n", + " ('##Y', 0.1456759996705617),\n", + " ('##ON', 0.14307146561933012),\n", + " ('.', 0.14162802579543046),\n", + " ('France', 0.13688799086603975),\n", + " ('latin', 0.13303588704102381),\n", + " ('chrétienne', 0.1311835388990743),\n", + " ('lieu', 0.13020947076813982),\n", + " ('ancienne', 0.12958979621300132),\n", + " ('celebre', 0.12947489123965564),\n", + " (',', 0.11257940886969105),\n", + " ('rivier', 0.11169096058453537),\n", + " ('nomme', 0.10000471924693329),\n", + " ('(', 0.09932002907423143),\n", + " ('la', 0.08253583803987206),\n", + " ('riche', 0.07913704700022943),\n", + " ('.', 0.0760972913677917),\n", + " ('du', 0.07376998774114908),\n", + " ('Rhône', 0.07226400802922804),\n", + " ('Elle', 0.0693630173969722),\n", + " (',', 0.06873738399629244),\n", + " (\"'\", 0.06774875716439344),\n", + " ('S', 0.06575101714951456),\n", + " ('.', 0.06146632041097513),\n", + " ('plus', 0.05840061099213507),\n", + " ('qui', 0.05804189959646576),\n", + " (\"'\", 0.05723696778164145),\n", + " (',', 0.05662853362544685),\n", + " ('712', 0.054693452829347115),\n", + " ('##érable', 0.05297839086419718),\n", + " ('fondée', 0.04704211890403151),\n", + " ('l', 0.04620483463390136),\n", + " ('.', 0.04544699824023643),\n", + " ('l', 0.0451974674122074),\n", + " ('la', 0.04410484491168233),\n", + " ('fut', 0.0438798787047486),\n", + " ('##e', 0.04378867745175019),\n", + " ('b', 0.042870227388534604),\n", + " ('sur', 0.04260004363332922),\n", + " ('la', 0.04198219592000479),\n", + " ('se', 0.04164162356829115),\n", + " ('peu', 0.040981027718879084),\n", + " ('c', 0.0403850871592572),\n", + " ('Lyon', 0.04007542253467923),\n", + " ('##um', 0.039922520378568),\n", + " ('##ye', 0.03940461731845493),\n", + " ('##rum', 0.03820084664850618),\n", + " ('qui', 0.03778469886529954),\n", + " ('##m', 0.03770363967219936),\n", + " ('Se', 0.037503453809376),\n", + " ('cette', 0.03737564027887762),\n", + " ('la', 0.03714972247323993),\n", + " ('Rome', 0.03688032185991681),\n", + " ('##umu', 0.03658546160187376),\n", + " ('##um', 0.03484266276127894),\n", + " ('##umu', 0.03456580806237662),\n", + " ('de', 0.03403811335226887),\n", + " ('Paris', 0.03310146903416289),\n", + " ('##gus', 0.03294045015997047),\n", + " ('##sid', 0.03255043778254519),\n", + " ('jet', 0.031575857152632385),\n", + " ('un', 0.03117251985912735),\n", + " ('le', 0.03105610182850656),\n", + " ('Lu', 0.0309274199183622),\n", + " ('Plan', 0.030820184404097863),\n", + " ('en', 0.03010674205624715),\n", + " ('##z', 0.029871874749211054),\n", + " ('##mil', 0.02963200210194755),\n", + " ('##unum', 0.029477331874186236),\n", + " ('Vo', 0.02935262786796574),\n", + " (',', 0.02919256859997905),\n", + " ('.', 0.029180628239546275),\n", + " ('##noi', 0.02909189419875202),\n", + " ('des', 0.028996120278423045),\n", + " ('quarante', 0.028231791558966633),\n", + " ('Lu', 0.02694302543021504),\n", + " ('##m', 0.026521004509341334),\n", + " ('Lu', 0.026337930390794705),\n", + " ('##s', 0.02625525527522554),\n", + " ('con', 0.026165582559808873),\n", + " ('##unum', 0.025898349689579492),\n", + " ('##dun', 0.025699022336446258),\n", + " ('##tar', 0.025188870477124894),\n", + " ('é', 0.024089543382319098),\n", + " ('Lu', 0.02366442497712222),\n", + " ('an', 0.023585319400848195),\n", + " ('##gu', 0.023084632572130535),\n", + " ('Lyon', 0.022692171217471906),\n", + " (',', 0.022664305461904344),\n", + " ('Il', 0.021736540370470812),\n", + " ('de', 0.021013220187771894),\n", + " ('##us', 0.020667475964218647),\n", + " ('##te', 0.020236291895152022),\n", + " ('avec', 0.019039309232488966),\n", + " ('du', 0.018572791985543135),\n", + " ('dans', 0.01852231748257226),\n", + " ('##cus', 0.01828726599412002),\n", + " ('##pid', 0.01787476167297771),\n", + " ('après', 0.01785470962170739),\n", + " ('avant', 0.017626577836139475),\n", + " ('##nati', 0.01752347206998558),\n", + " ('##iano', 0.017089445343453365),\n", + " ('Le', 0.016166723086828174),\n", + " ('##us', 0.015200983089939281),\n", + " ('##s', 0.01484737615013025),\n", + " ('##toi', 0.01483008688193065),\n", + " ('ans', 0.014472180695321534),\n", + " ('Cel', 0.014027086848242715),\n", + " (',', 0.013178253982938232),\n", + " ('où', 0.013157964330803138),\n", + " ('##ât', 0.011332787999157318),\n", + " ('##ôn', 0.011241165099203603),\n", + " ('##e', 0.010537012868472688),\n", + " ('##it', 0.009763016011555254),\n", + " ('Lu', 0.008645628419735481),\n", + " (',', 0.007825484996566502),\n", + " ('ere', 0.007590037219544403),\n", + " ('L', 0.007399733805079844),\n", + " ('par', 0.007011176299182855),\n", + " ('##gd', 0.006578965732858923),\n", + " ('Mu', 0.006340399133187405),\n", + " ('consul', 0.006312700914285012),\n", + " ('il', 0.00609352197030786),\n", + " ('se', 0.0058271154715995995),\n", + " ('##gd', 0.005721331572683938),\n", + " ('##pla', 0.005560350755837545),\n", + " ('##gd', 0.00518317960511743),\n", + " ('Lucius', 0.0044462351021057325),\n", + " ('##t', 0.0036622619849812073),\n", + " (',', 0.0030423079199119554),\n", + " ('[CLS]', 0.0),\n", + " ('[SEP]', 0.0),\n", + " ('-', -0.0003846539976056082),\n", + " ('##ius', -0.00048531039895657175),\n", + " (',', -0.0010714894154601323),\n", + " ('au', -0.0018304190363696647),\n", + " ('##gd', -0.00442376201350928),\n", + " (',', -0.0062768408260973066),\n", + " ('&', -0.006321268573570221),\n", + " (',', -0.006856821180122214),\n", + " ('la', -0.006879341345145134),\n", + " (',', -0.007011581545450849),\n", + " ('romain', -0.008083189911088765),\n", + " (',', -0.008296981653008715),\n", + " ('AE', -0.010990138859793724),\n", + " (',', -0.011642202072501788),\n", + " ('a', -0.022083265525204197),\n", + " ('belle', -0.029909244412604778),\n", + " ('citoyens', -0.03278504989463669),\n", + " ('&', -0.038176803729996794),\n", + " ('&', -0.0494132018474461),\n", + " ('&', -0.08418116246612357),\n", + " (')', -0.13544847084394057)]" + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "word_attributions.sort(key=lambda a: a[1], reverse = True)\n", + "word_attributions" + ] + }, { "cell_type": "code", "execution_count": 51,