Skip to content
Snippets Groups Projects
Commit a36157a7 authored by Ludovic Moncla's avatar Ludovic Moncla
Browse files

Update Predict_XAI.ipynb

parent 18a8d3f2
No related branches found
No related tags found
No related merge requests found
%% Cell type:markdown id: tags:
# BERT Predict classification
## 1. Setup the environment
### 1.1 Setup colab environment
#### 1.1.1 Install packages
%% Cell type:code id: tags:
``` python
!pip install transformers==4.10.3
!pip install sentencepiece
!pip install transformers_interpret
```
%% Cell type:markdown id: tags:
#### 1.1.2 Use more RAM
%% Cell type:code id: tags:
``` python
from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))
if ram_gb < 20:
print('Not using a high-RAM runtime')
else:
print('You are using a high-RAM runtime!')
```
%% Cell type:markdown id: tags:
#### 1.1.3 Mount GoogleDrive
%% Cell type:code id: tags:
``` python
from google.colab import drive
drive.mount('/content/drive')
```
%% Cell type:markdown id: tags:
### 1.2 Import librairies
%% Cell type:code id: tags:
``` python
import pickle
import torch
from transformers import BertTokenizer, BertForSequenceClassification
from transformers_interpret import SequenceClassificationExplainer
import numpy as np
import torch
from torch.utils.data import TensorDataset, DataLoader, SequentialSampler
import pandas as pd
```
%% Cell type:markdown id: tags:
### 1.3 Setup GPU
%% Cell type:code id: tags:
``` python
# If there's a GPU available...
if torch.cuda.is_available():
# Tell PyTorch to use the GPU.
device = torch.device("cuda")
gpu_name = "cuda"
print('There are %d GPU(s) available.' % torch.cuda.device_count())
print('We will use the GPU:', torch.cuda.get_device_name(0))
# for MacOS
elif torch.backends.mps.is_available() and torch.backends.mps.is_built():
device = torch.device("mps")
gpu_name = "mps"
print('We will use the GPU')
else:
device = torch.device("cpu")
gpu_name = "cpu"
print('No GPU available, using the CPU instead.')
```
%% Output
We will use the GPU
%% Cell type:code id: tags:
``` python
device = torch.device("cpu")
gpu_name = "cpu"
```
%% Cell type:markdown id: tags:
## 2. Utils
%% Cell type:code id: tags:
``` python
def generate_dataloader(tokenizer, sentences, batch_size = 8, max_len = 512):
# Tokenize all of the sentences and map the tokens to thier word IDs.
input_ids_test = []
# For every sentence...
for sent in sentences:
# `encode` will:
# (1) Tokenize the sentence.
# (2) Prepend the `[CLS]` token to the start.
# (3) Append the `[SEP]` token to the end.
# (4) Map tokens to their IDs.
encoded_sent = tokenizer.encode(
sent, # Sentence to encode.
add_special_tokens = True, # Add '[CLS]' and '[SEP]'
# This function also supports truncation and conversion
# to pytorch tensors, but I need to do padding, so I
# can't use these features.
#max_length = max_len, # Truncate all sentences.
#return_tensors = 'pt', # Return pytorch tensors.
)
input_ids_test.append(encoded_sent)
# Pad our input tokens
padded_test = []
for i in input_ids_test:
if len(i) > max_len:
padded_test.extend([i[:max_len]])
else:
padded_test.extend([i + [0] * (max_len - len(i))])
input_ids_test = np.array(padded_test)
# Create attention masks
attention_masks = []
# Create a mask of 1s for each token followed by 0s for padding
for seq in input_ids_test:
seq_mask = [float(i>0) for i in seq]
attention_masks.append(seq_mask)
# Convert to tensors.
inputs = torch.tensor(input_ids_test)
masks = torch.tensor(attention_masks)
#set batch size
# Create the DataLoader.
data = TensorDataset(inputs, masks)
prediction_sampler = SequentialSampler(data)
return DataLoader(data, sampler=prediction_sampler, batch_size=batch_size)
def predict(model, dataloader, device):
# Put model in evaluation mode
model.eval()
# Tracking variables
predictions_test , true_labels = [], []
pred_labels_ = []
# Predict
for batch in dataloader:
# Add batch to GPU
batch = tuple(t.to(device) for t in batch)
# Unpack the inputs from the dataloader
b_input_ids, b_input_mask = batch
# Telling the model not to compute or store gradients, saving memory and
# speeding up prediction
with torch.no_grad():
# Forward pass, calculate logit predictions
outputs = model(b_input_ids, token_type_ids=None,
attention_mask=b_input_mask)
logits = outputs[0]
#print(logits)
# Move logits and labels to CPU ???
logits = logits.detach().cpu().numpy()
#print(logits)
# Store predictions and true labels
predictions_test.append(logits)
pred_labels = []
for i in range(len(predictions_test)):
# The predictions for this batch are a 2-column ndarray (one column for "0"
# and one column for "1"). Pick the label with the highest value and turn this
# in to a list of 0s and 1s.
pred_labels_i = np.argmax(predictions_test[i], axis=1).flatten()
pred_labels.append(pred_labels_i)
pred_labels_ += [item for sublist in pred_labels for item in sublist]
return pred_labels_
```
%% Cell type:markdown id: tags:
## 3. Load Data
!! A modifier: charger le corpus parallele : EDdA et LGE
%% Cell type:markdown id: tags:
### 3.1 LGE (Nakala)
%% Cell type:code id: tags:
``` python
lge_path = "/Users/lmoncla/Nextcloud-LIRIS/GEODE/GEODE - Partage consortium/Corpus/LGE/LGE_dataset_articles.tsv"
df_LGE = pd.read_csv(lge_path, sep="\t")
```
%% Cell type:code id: tags:
``` python
df_LGE.head()
```
%% Output
id tome filename \
0 T1article_1 T1 article_1
1 T1article_10 T1 article_10
2 T1article_100 T1 article_100
3 T1article_1000 T1 article_1000
4 T1article_1001 T1 article_1001
content nb_words
0 F.-Camille DREYFUS, député de la Seine.\n 6
1 quimarque un mouvement en avant de l’esprit hu... 212
2 ABACUS. L’abacus ou abaque était un instrument... 1345
3 H6SS6)\n1780-1793 Choiseul-Goufficr\n1780-1793... 218
4 1803Le Brun.\n 2
%% Cell type:code id: tags:
``` python
df_LGE.shape
```
%% Output
(229475, 5)
%% Cell type:markdown id: tags:
### 3.2 LGE Parallel
%% Cell type:code id: tags:
``` python
lge_par_path = "/Users/lmoncla/Nextcloud-LIRIS/GEODE/GEODE - Partage consortium/Corpus/LGE/LGE_parallel_dataset_articles.tsv"
df_LGE_par = pd.read_csv(lge_par_path, sep="\t")
```
%% Cell type:code id: tags:
``` python
df_LGE_par.head()
```
%% Output
id tome filename \
0 T1aam-0 T1 aam-0
1 T1abaco-0 T1 abaco-0
2 T1abacot-0 T1 abacot-0
3 T1abaddon-0 T1 abaddon-0
4 T1abandonnement-0 T1 abandonnement-0
content nb_words
0 AAM. Mesure de capacité pour les liquides en u... 38
1 ABACO, architecte italien du xvi siècle (V. La... 8
2 ABACOT. Double couronne que portaient autrefoi... 33
3 ABADDONou APOLYON le Destructeur. « Elles\nava... 109
4 ABANDONNEMENT. I. Droit civil. — Ce mot est un... 76
%% Cell type:markdown id: tags:
### 3.3 EDdA (ARTFL)
%% Cell type:code id: tags:
``` python
edda_path = "/Users/lmoncla/Nextcloud-LIRIS/GEODE/GEODE - Partage consortium/Corpus/EDdA/EDdA_dataset_articles.tsv"
df_EDdA = pd.read_csv(edda_path, sep="\t")
```
%% Cell type:code id: tags:
``` python
df_EDdA.head()
```
%% Output
volume numero head author \
0 1 1 Title Page unsigned
1 1 2 A MONSEIGNEUR LE COMTE D'ARGENSON Diderot & d'Alembert
2 1 3 DISCOURS PRÉLIMINAIRE DES EDITEURS d'Alembert
3 1 5 A, a & a Dumarsais5
4 1 6 A Dumarsais5
edda_class enccre_id enccre_class \
0 unclassified NaN NaN
1 unclassified NaN NaN
2 unclassified NaN NaN
3 Grammaire v1-1-0 Grammaire
4 unclassified v1-1-1 Grammaire
content \
0 \n\nENCYCLOPÉDIE,\nDICTIONNAIRE RAISONNÉ\nDES ...
1 \n\nA MONSEIGNEUR\nLE COMTE D'ARGENSON,\nMINIS...
2 \n\nDISCOURS PRÉLIMINAIRE\nDES EDITEURS.\n\n\n...
3 \nA, a & a s.m. (ordre Encyclopéd.\nEntend. Sc...
4 \nA, mot, est 1. la troisieme personne du prés...
content_without_designant \
0 \n\nENCYCLOPÉDIE,\nDICTIONNAIRE RAISONNÉ\nDES ...
1 \n\nA MONSEIGNEUR\nLE COMTE D'ARGENSON,\nMINIS...
2 \n\nDISCOURS PRÉLIMINAIRE\nDES EDITEURS.\n\n\n...
3 \nA, a & a s.m. (ordre Encyclopéd.\nEntend. Sc...
4 \nA, mot, est 1. la troisieme personne du prés...
first_paragraph nb_words
0 \n\nENCYCLOPÉDIE,\nDICTIONNAIRE RAISONNÉ\nDES ... 151
1 \n\nA MONSEIGNEUR\nLE COMTE D'ARGENSON,\nMINIS... 208
2 \n\nDISCOURS PRÉLIMINAIRE\nDES EDITEURS.\n\n 44669
3 \nA, a & a s.m. (ordre Encyclopéd.\nEntend. Sc... 711
4 \nA, mot, est 1. la troisieme personne du prés... 238
%% Cell type:markdown id: tags:
### 3.4 EDdA Parallel
%% Cell type:code id: tags:
``` python
edda_par_path = "/Users/lmoncla/Nextcloud-LIRIS/GEODE/GEODE - Partage consortium/Corpus/EDdA/EDdA_dataset_articles.tsv"
df_EDdA_par = pd.read_csv(edda_par_path, sep="\t")
```
%% Cell type:code id: tags:
``` python
df_EDdA_par.head()
```
%% Cell type:markdown id: tags:
## 4. Load model and predict
### 4.1 Load BERT model
%% Cell type:code id: tags:
``` python
#path = "drive/MyDrive/Classification-EDdA/"
path = "../"
model_name = "bert-base-multilingual-cased"
model_path = path + "models/model_" + model_name + "_s10000.pt"
```
%% Cell type:code id: tags:
``` python
encoder_filename = "models/label_encoder.pkl"
with open(path + encoder_filename, 'rb') as file:
encoder = pickle.load(file)
```
%% Cell type:code id: tags:
``` python
tokenizer = BertTokenizer.from_pretrained(model_name)
```
%% Cell type:code id: tags:
``` python
model = BertForSequenceClassification.from_pretrained(model_path).to(gpu_name) #.to("cuda")
```
%% Cell type:markdown id: tags:
### 4.2 Prepare datasets
%% Cell type:code id: tags:
``` python
# LGE
data_loader_LGE = generate_dataloader(tokenizer, df_LGE.content.values)
```
%% Output
Token indices sequence length is longer than the specified maximum sequence length for this model (1204 > 512). Running this sequence through the model will result in indexing errors
%% Cell type:code id: tags:
``` python
# LGE parallel
data_loader_LGE_par = generate_dataloader(tokenizer, df_LGE_par.content.values)
```
%% Cell type:code id: tags:
``` python
# EDdA
data_loader_EDdA = generate_dataloader(tokenizer, df_EDdA.content.values)
```
%% Cell type:code id: tags:
``` python
# EDdA parallel
data_loader_EDdA_par = generate_dataloader(tokenizer, df_EDdA_par.content.values)
```
%% Cell type:markdown id: tags:
### 4.3 Predict
%% Cell type:code id: tags:
``` python
pred_LGE = predict(model, data_loader_LGE, device)
df_LGE['class_pred'] = list(encoder.inverse_transform(pred_LGE))
```
%% Cell type:code id: tags:
``` python
pred_LGE_par = predict(model, data_loader_LGE_par, device)
df_LGE_par['class_pred'] = list(encoder.inverse_transform(pred_LGE_par))
```
%% Cell type:code id: tags:
``` python
pred_EDdA = predict(model, data_loader_EDdA, device)
df_EDdA['class_pred'] = list(encoder.inverse_transform(pred_EDdA))
```
%% Cell type:code id: tags:
``` python
pred_EDdA_par = predict(model, data_loader_EDdA_par, device)
df_EDdA_par['class_pred'] = list(encoder.inverse_transform(pred_EDdA_par))
```
%% Cell type:code id: tags:
``` python
df_LGE.head()
```
%% Output
id tome filename \
0 T1article_1 T1 article_1
1 T1article_10 T1 article_10
2 T1article_100 T1 article_100
3 T1article_1000 T1 article_1000
4 T1article_1001 T1 article_1001
content nb_words
0 F.-Camille DREYFUS, député de la Seine.\n 6
1 quimarque un mouvement en avant de l’esprit hu... 212
2 ABACUS. L’abacus ou abaque était un instrument... 1345
3 H6SS6)\n1780-1793 Choiseul-Goufficr\n1780-1793... 218
4 1803Le Brun.\n 2
%% Cell type:markdown id: tags:
### 4.3 Save
### 4.4 Save
%% Cell type:code id: tags:
``` python
filepath = path + "results_LGE/LGE-metadata-withContent.csv"
df_LGE.to_csv(filepath, sep="\,")
```
%% Cell type:code id: tags:
``` python
df_LGE.drop(columns=['content'], inplace=True)
filepath = path + "results_LGE/LGE-metadata.csv"
df_LGE.to_csv(filepath, sep="\,")
```
%% Cell type:markdown id: tags:
## 5. BERT XAI
https://www.kaggle.com/code/rizwanhaidar/deep-learning-xai-models-loading-and-predictions
%% Cell type:code id: tags:
``` python
cls_explainer = SequenceClassificationExplainer(
model,
tokenizer,
custom_labels=encoder.classes_.tolist()
)
```
%% Cell type:code id: tags:
``` python
content = df_EDdA.loc[df_EDdA['head']=="LYON"].reset_index().content[0][:512]
content
```
%% Output
"\nLYON, (Géogr.) grande, riche, belle, ancienne\n& celebre ville de France, la plus considérable du\nroyaume après Paris, & la capitale du Lyonnois.\nElle se nomme en latin Lugdunum, Lugudunum, Lugdumum Segusianorum, Lugdumum Celtarum, &c.\nVoyez Lugdunum.\n\nLyon fut fondée l'an de Rome 712, quarante-un\nans avant l'ere chrétienne, par Lucius Munatius\nPlancus, qui étoit consul avec AEmilius Lepidus. Il\nla bâtit sur la Sône, au lieu où cette riviere se jette\ndans le Rhône, & il la peupla des citoyens romains \nqui a"
%% Cell type:code id: tags:
``` python
word_attributions = cls_explainer(content if len(content) < 512 else content[:512])
word_attributions
```
%% Output
[('[CLS]', 0.0),
('L', 0.007399733805079844),
('##Y', 0.1456759996705617),
('##ON', 0.14307146561933012),
(',', 0.19909154256915337),
('(', 0.09932002907423143),
('G', 0.24402357535335403),
('##éo', 0.23393328870446992),
('##gr', 0.1695800465119405),
('.', 0.14162802579543046),
(')', -0.13544847084394057),
('grande', 0.21832893139528123),
(',', 0.11257940886969105),
('riche', 0.07913704700022943),
(',', 0.05662853362544685),
('belle', -0.029909244412604778),
(',', 0.06873738399629244),
('ancienne', 0.12958979621300132),
('&', -0.08418116246612357),
('celebre', 0.12947489123965564),
('ville', 0.478071716663547),
('de', 0.03403811335226887),
('France', 0.13688799086603975),
(',', -0.0010714894154601323),
('la', -0.006879341345145134),
('plus', 0.05840061099213507),
('con', 0.026165582559808873),
('##sid', 0.03255043778254519),
('##érable', 0.05297839086419718),
('du', 0.018572791985543135),
('royaume', 0.24665610131446675),
('après', 0.01785470962170739),
('Paris', 0.03310146903416289),
(',', -0.006856821180122214),
('&', -0.006321268573570221),
('la', 0.08253583803987206),
('capitale', 0.2983988672217172),
('du', 0.07376998774114908),
('Lyon', 0.04007542253467923),
('##noi', 0.02909189419875202),
('##s', 0.02625525527522554),
('.', 0.0760972913677917),
('Elle', 0.0693630173969722),
('se', 0.04164162356829115),
('nomme', 0.10000471924693329),
('en', 0.03010674205624715),
('latin', 0.13303588704102381),
('Lu', 0.0309274199183622),
('##gd', 0.00518317960511743),
('##unum', 0.029477331874186236),
(',', -0.007011581545450849),
('Lu', 0.008645628419735481),
('##gu', 0.023084632572130535),
('##dun', 0.025699022336446258),
('##um', 0.03484266276127894),
(',', 0.0030423079199119554),
('Lu', 0.02366442497712222),
('##gd', 0.006578965732858923),
('##umu', 0.03456580806237662),
('##m', 0.026521004509341334),
('Se', 0.037503453809376),
('##gus', 0.03294045015997047),
('##iano', 0.017089445343453365),
('##rum', 0.03820084664850618),
(',', -0.011642202072501788),
('Lu', 0.02694302543021504),
('##gd', 0.005721331572683938),
('##umu', 0.03658546160187376),
('##m', 0.03770363967219936),
('Cel', 0.014027086848242715),
('##tar', 0.025188870477124894),
('##um', 0.039922520378568),
(',', 0.022664305461904344),
('&', -0.0494132018474461),
('c', 0.0403850871592572),
('.', 0.04544699824023643),
('Vo', 0.02935262786796574),
('##ye', 0.03940461731845493),
('##z', 0.029871874749211054),
('Lu', 0.026337930390794705),
('##gd', -0.00442376201350928),
('##unum', 0.025898349689579492),
('.', 0.06146632041097513),
('Lyon', 0.022692171217471906),
('fut', 0.0438798787047486),
('fondée', 0.04704211890403151),
('l', 0.0451974674122074),
("'", 0.06774875716439344),
('an', 0.023585319400848195),
('de', 0.021013220187771894),
('Rome', 0.03688032185991681),
('712', 0.054693452829347115),
(',', 0.007825484996566502),
('quarante', 0.028231791558966633),
('-', -0.0003846539976056082),
('un', 0.03117251985912735),
('ans', 0.014472180695321534),
('avant', 0.017626577836139475),
('l', 0.04620483463390136),
("'", 0.05723696778164145),
('ere', 0.007590037219544403),
('chrétienne', 0.1311835388990743),
(',', 0.013178253982938232),
('par', 0.007011176299182855),
('Lucius', 0.0044462351021057325),
('Mu', 0.006340399133187405),
('##nati', 0.01752347206998558),
('##us', 0.015200983089939281),
('Plan', 0.030820184404097863),
('##cus', 0.01828726599412002),
(',', -0.008296981653008715),
('qui', 0.03778469886529954),
('é', 0.024089543382319098),
('##toi', 0.01483008688193065),
('##t', 0.0036622619849812073),
('consul', 0.006312700914285012),
('avec', 0.019039309232488966),
('AE', -0.010990138859793724),
('##mil', 0.02963200210194755),
('##ius', -0.00048531039895657175),
('Le', 0.016166723086828174),
('##pid', 0.01787476167297771),
('##us', 0.020667475964218647),
('.', 0.029180628239546275),
('Il', 0.021736540370470812),
('la', 0.03714972247323993),
('b', 0.042870227388534604),
('##ât', 0.011332787999157318),
('##it', 0.009763016011555254),
('sur', 0.04260004363332922),
('la', 0.04410484491168233),
('S', 0.06575101714951456),
('##ôn', 0.011241165099203603),
('##e', 0.010537012868472688),
(',', -0.0062768408260973066),
('au', -0.0018304190363696647),
('lieu', 0.13020947076813982),
('où', 0.013157964330803138),
('cette', 0.03737564027887762),
('rivier', 0.11169096058453537),
('##e', 0.04378867745175019),
('se', 0.0058271154715995995),
('jet', 0.031575857152632385),
('##te', 0.020236291895152022),
('dans', 0.01852231748257226),
('le', 0.03105610182850656),
('Rhône', 0.07226400802922804),
(',', 0.02919256859997905),
('&', -0.038176803729996794),
('il', 0.00609352197030786),
('la', 0.04198219592000479),
('peu', 0.040981027718879084),
('##pla', 0.005560350755837545),
('des', 0.028996120278423045),
('citoyens', -0.03278504989463669),
('romain', -0.008083189911088765),
('##s', 0.01484737615013025),
('qui', 0.05804189959646576),
('a', -0.022083265525204197),
('[SEP]', 0.0)]
%% Cell type:code id: tags:
``` python
word_attributions.sort(key=lambda a: a[1], reverse = True)
word_attributions
```
%% Output
[('ville', 0.478071716663547),
('capitale', 0.2983988672217172),
('royaume', 0.24665610131446675),
('G', 0.24402357535335403),
('##éo', 0.23393328870446992),
('grande', 0.21832893139528123),
(',', 0.19909154256915337),
('##gr', 0.1695800465119405),
('##Y', 0.1456759996705617),
('##ON', 0.14307146561933012),
('.', 0.14162802579543046),
('France', 0.13688799086603975),
('latin', 0.13303588704102381),
('chrétienne', 0.1311835388990743),
('lieu', 0.13020947076813982),
('ancienne', 0.12958979621300132),
('celebre', 0.12947489123965564),
(',', 0.11257940886969105),
('rivier', 0.11169096058453537),
('nomme', 0.10000471924693329),
('(', 0.09932002907423143),
('la', 0.08253583803987206),
('riche', 0.07913704700022943),
('.', 0.0760972913677917),
('du', 0.07376998774114908),
('Rhône', 0.07226400802922804),
('Elle', 0.0693630173969722),
(',', 0.06873738399629244),
("'", 0.06774875716439344),
('S', 0.06575101714951456),
('.', 0.06146632041097513),
('plus', 0.05840061099213507),
('qui', 0.05804189959646576),
("'", 0.05723696778164145),
(',', 0.05662853362544685),
('712', 0.054693452829347115),
('##érable', 0.05297839086419718),
('fondée', 0.04704211890403151),
('l', 0.04620483463390136),
('.', 0.04544699824023643),
('l', 0.0451974674122074),
('la', 0.04410484491168233),
('fut', 0.0438798787047486),
('##e', 0.04378867745175019),
('b', 0.042870227388534604),
('sur', 0.04260004363332922),
('la', 0.04198219592000479),
('se', 0.04164162356829115),
('peu', 0.040981027718879084),
('c', 0.0403850871592572),
('Lyon', 0.04007542253467923),
('##um', 0.039922520378568),
('##ye', 0.03940461731845493),
('##rum', 0.03820084664850618),
('qui', 0.03778469886529954),
('##m', 0.03770363967219936),
('Se', 0.037503453809376),
('cette', 0.03737564027887762),
('la', 0.03714972247323993),
('Rome', 0.03688032185991681),
('##umu', 0.03658546160187376),
('##um', 0.03484266276127894),
('##umu', 0.03456580806237662),
('de', 0.03403811335226887),
('Paris', 0.03310146903416289),
('##gus', 0.03294045015997047),
('##sid', 0.03255043778254519),
('jet', 0.031575857152632385),
('un', 0.03117251985912735),
('le', 0.03105610182850656),
('Lu', 0.0309274199183622),
('Plan', 0.030820184404097863),
('en', 0.03010674205624715),
('##z', 0.029871874749211054),
('##mil', 0.02963200210194755),
('##unum', 0.029477331874186236),
('Vo', 0.02935262786796574),
(',', 0.02919256859997905),
('.', 0.029180628239546275),
('##noi', 0.02909189419875202),
('des', 0.028996120278423045),
('quarante', 0.028231791558966633),
('Lu', 0.02694302543021504),
('##m', 0.026521004509341334),
('Lu', 0.026337930390794705),
('##s', 0.02625525527522554),
('con', 0.026165582559808873),
('##unum', 0.025898349689579492),
('##dun', 0.025699022336446258),
('##tar', 0.025188870477124894),
('é', 0.024089543382319098),
('Lu', 0.02366442497712222),
('an', 0.023585319400848195),
('##gu', 0.023084632572130535),
('Lyon', 0.022692171217471906),
(',', 0.022664305461904344),
('Il', 0.021736540370470812),
('de', 0.021013220187771894),
('##us', 0.020667475964218647),
('##te', 0.020236291895152022),
('avec', 0.019039309232488966),
('du', 0.018572791985543135),
('dans', 0.01852231748257226),
('##cus', 0.01828726599412002),
('##pid', 0.01787476167297771),
('après', 0.01785470962170739),
('avant', 0.017626577836139475),
('##nati', 0.01752347206998558),
('##iano', 0.017089445343453365),
('Le', 0.016166723086828174),
('##us', 0.015200983089939281),
('##s', 0.01484737615013025),
('##toi', 0.01483008688193065),
('ans', 0.014472180695321534),
('Cel', 0.014027086848242715),
(',', 0.013178253982938232),
('où', 0.013157964330803138),
('##ât', 0.011332787999157318),
('##ôn', 0.011241165099203603),
('##e', 0.010537012868472688),
('##it', 0.009763016011555254),
('Lu', 0.008645628419735481),
(',', 0.007825484996566502),
('ere', 0.007590037219544403),
('L', 0.007399733805079844),
('par', 0.007011176299182855),
('##gd', 0.006578965732858923),
('Mu', 0.006340399133187405),
('consul', 0.006312700914285012),
('il', 0.00609352197030786),
('se', 0.0058271154715995995),
('##gd', 0.005721331572683938),
('##pla', 0.005560350755837545),
('##gd', 0.00518317960511743),
('Lucius', 0.0044462351021057325),
('##t', 0.0036622619849812073),
(',', 0.0030423079199119554),
('[CLS]', 0.0),
('[SEP]', 0.0),
('-', -0.0003846539976056082),
('##ius', -0.00048531039895657175),
(',', -0.0010714894154601323),
('au', -0.0018304190363696647),
('##gd', -0.00442376201350928),
(',', -0.0062768408260973066),
('&', -0.006321268573570221),
(',', -0.006856821180122214),
('la', -0.006879341345145134),
(',', -0.007011581545450849),
('romain', -0.008083189911088765),
(',', -0.008296981653008715),
('AE', -0.010990138859793724),
(',', -0.011642202072501788),
('a', -0.022083265525204197),
('belle', -0.029909244412604778),
('citoyens', -0.03278504989463669),
('&', -0.038176803729996794),
('&', -0.0494132018474461),
('&', -0.08418116246612357),
(')', -0.13544847084394057)]
%% Cell type:code id: tags:
``` python
cls_explainer.predicted_class_name
```
%% Output
'Géographie'
%% Cell type:code id: tags:
``` python
cls_explainer.visualize()
```
%% Output
<IPython.core.display.HTML object>
%% Cell type:code id: tags:
``` python
```
%% Cell type:markdown id: tags:
* récupérer les mots positifs par domaine (EDdA et LGE)
* faire des nuages de mots et comparer les plus fréquents entre EDdA et LGE (corpus parallèle)
%% Cell type:markdown id: tags:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment