Skip to content
Snippets Groups Projects
Commit 68628d62 authored by Ludovic Moncla's avatar Ludovic Moncla
Browse files

Create Classification_CNN.ipynb

parent 5ef7d2be
Branches
No related tags found
No related merge requests found
%% Cell type:markdown id: tags:
# Deep learning for EDdA classification
%% Cell type:markdown id: tags:
## Setup colab environment
%% Cell type:code id: tags:
```
from google.colab import drive
drive.mount('/content/drive')
```
%% Output
Mounted at /content/drive
%% Cell type:markdown id: tags:
### Install packages
%% Cell type:code id: tags:
```
#!pip install zeugma
#!pip install plot_model
```
%% Cell type:markdown id: tags:
### Import librairies
%% Cell type:code id: tags:
```
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pickle
import os
from tqdm import tqdm
import requests, zipfile, io
import codecs
from sklearn import preprocessing # LabelEncoder
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from keras.preprocessing import sequence
from keras.preprocessing.text import Tokenizer
from keras.layers import BatchNormalization, Input, Reshape, Conv1D, MaxPool1D, Conv2D, MaxPool2D, Concatenate
from keras.layers import Embedding, Dropout, Flatten, Dense
from keras.models import Model, load_model
from keras.callbacks import ModelCheckpoint
```
%% Cell type:markdown id: tags:
### Utils functions
%% Cell type:code id: tags:
```
def resample_classes(df, classColumnName, numberOfInstances):
#random numberOfInstances elements
replace = False # with replacement
fn = lambda obj: obj.loc[np.random.choice(obj.index, numberOfInstances if len(obj) > numberOfInstances else len(obj), replace),:]
return df.groupby(classColumnName, as_index=False).apply(fn)
```
%% Cell type:markdown id: tags:
## Load Data
%% Cell type:code id: tags:
```
!wget https://projet.liris.cnrs.fr/geode/EDdA-Classification/datasets/training_set.tsv
!wget https://projet.liris.cnrs.fr/geode/EDdA-Classification/datasets/test_set.tsv
```
%% Output
--2022-02-17 19:08:55-- https://projet.liris.cnrs.fr/geode/EDdA-Classification/datasets/training_set.tsv
Resolving projet.liris.cnrs.fr (projet.liris.cnrs.fr)... 134.214.142.28
Connecting to projet.liris.cnrs.fr (projet.liris.cnrs.fr)|134.214.142.28|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 175634219 (167M) [text/tab-separated-values]
Saving to: ‘training_set.tsv’
training_set.tsv 100%[===================>] 167.50M 28.2MB/s in 6.5s
2022-02-17 19:09:02 (25.7 MB/s) - ‘training_set.tsv’ saved [175634219/175634219]
--2022-02-17 19:09:02-- https://projet.liris.cnrs.fr/geode/EDdA-Classification/datasets/test_set.tsv
Resolving projet.liris.cnrs.fr (projet.liris.cnrs.fr)... 134.214.142.28
Connecting to projet.liris.cnrs.fr (projet.liris.cnrs.fr)|134.214.142.28|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 42730598 (41M) [text/tab-separated-values]
Saving to: ‘test_set.tsv’
test_set.tsv 100%[===================>] 40.75M 19.7MB/s in 2.1s
2022-02-17 19:09:05 (19.7 MB/s) - ‘test_set.tsv’ saved [42730598/42730598]
%% Cell type:markdown id: tags:
### Loading dataset
%% Cell type:code id: tags:
```
train_path = 'training_set.tsv'
test_path = 'test_set.tsv'
```
%% Cell type:code id: tags:
```
df_train = pd.read_csv(train_path, sep="\t")
```
%% Cell type:code id: tags:
```
df_train.sample(5)
```
%% Output
volume ... nb_words
17253 1 ... 70
16170 9 ... 36
12041 16 ... 95
20046 9 ... 71
35783 9 ... 50
[5 rows x 13 columns]
%% Cell type:markdown id: tags:
## Configuration
%% Cell type:code id: tags:
```
columnText = 'contentWithoutClass'
columnClass = 'ensemble_domaine_enccre'
maxOfInstancePerClass = 10000
batch_size = 64
validation_split = 0.20
max_nb_words = 20000 # taille du vocabulaire
max_sequence_length = 512 # taille max du 'document'
epochs = 10
#embedding_name = "fasttext"
#embedding_dim = 300
embedding_name = "glove.6B.100d"
embedding_dim = 100
path = "drive/MyDrive/Classification-EDdA/"
encoder_filename = "label_encoder.pkl"
tokenizer_filename = "tokenizer_keras.pkl"
```
%% Cell type:markdown id: tags:
## Preprocessing
%% Cell type:code id: tags:
```
if maxOfInstancePerClass != 10000:
df_train = resample_classes(df_train, columnClass, maxOfInstancePerClass)
```
%% Cell type:code id: tags:
```
labels = df_train[columnClass]
numberOfClasses = labels.nunique()
if os.path.isfile(path+encoder_filename):
# load existing encoder
with open(path+encoder_filename, 'rb') as file:
encoder = pickle.load(file)
else:
encoder = preprocessing.LabelEncoder()
encoder.fit(labels)
with open(path+encoder_filename, 'wb') as file:
pickle.dump(encoder, file)
labels = encoder.transform(labels)
```
%% Cell type:code id: tags:
```
encoder.classes_
```
%% Output
array(['Agriculture - Economie rustique', 'Anatomie', 'Antiquité',
'Architecture', 'Arts et métiers', 'Beaux-arts',
'Belles-lettres - Poésie', 'Blason', 'Caractères', 'Chasse',
'Chimie', 'Commerce', 'Droit - Jurisprudence',
'Economie domestique', 'Grammaire', 'Géographie', 'Histoire',
'Histoire naturelle', 'Jeu', 'Marine', 'Maréchage - Manège',
'Mathématiques', 'Mesure', 'Militaire (Art) - Guerre - Arme',
'Minéralogie', 'Monnaie', 'Musique', 'Médailles',
'Médecine - Chirurgie', 'Métiers', 'Pharmacie', 'Philosophie',
'Physique - [Sciences physico-mathématiques]', 'Politique',
'Pêche', 'Religion', 'Spectacle', 'Superstition'], dtype=object)
%% Cell type:code id: tags:
```
labels_index = dict(zip(list(encoder.classes_), encoder.transform(list(encoder.classes_))))
```
%% Cell type:code id: tags:
```
labels_index
```
%% Output
{'Agriculture - Economie rustique': 0,
'Anatomie': 1,
'Antiquité': 2,
'Architecture': 3,
'Arts et métiers': 4,
'Beaux-arts': 5,
'Belles-lettres - Poésie': 6,
'Blason': 7,
'Caractères': 8,
'Chasse': 9,
'Chimie': 10,
'Commerce': 11,
'Droit - Jurisprudence': 12,
'Economie domestique': 13,
'Grammaire': 14,
'Géographie': 15,
'Histoire': 16,
'Histoire naturelle': 17,
'Jeu': 18,
'Marine': 19,
'Maréchage - Manège': 20,
'Mathématiques': 21,
'Mesure': 22,
'Militaire (Art) - Guerre - Arme': 23,
'Minéralogie': 24,
'Monnaie': 25,
'Musique': 26,
'Médailles': 27,
'Médecine - Chirurgie': 28,
'Métiers': 29,
'Pharmacie': 30,
'Philosophie': 31,
'Physique - [Sciences physico-mathématiques]': 32,
'Politique': 33,
'Pêche': 34,
'Religion': 35,
'Spectacle': 36,
'Superstition': 37}
%% Cell type:markdown id: tags:
### Loading pre-trained embeddings
%% Cell type:markdown id: tags:
#### FastText
%% Cell type:code id: tags:
```
# download FastText (prend trop de place pour le laisser sur le drive)
zip_file_url = "https://dl.fbaipublicfiles.com/fasttext/vectors-english/crawl-300d-2M.vec.zip"
r = requests.get(zip_file_url)
z = zipfile.ZipFile(io.BytesIO(r.content))
z.extractall()
```
%% Cell type:code id: tags:
```
print('loading word embeddings FastText...')
embeddings_index = {}
f = codecs.open('crawl-300d-2M.vec', encoding='utf-8')
for line in tqdm(f):
values = line.rstrip().rsplit(' ')
word = values[0]
coefs = np.asarray(values[1:], dtype='float32')
embeddings_index[word] = coefs
f.close()
print('found %s word vectors' % len(embeddings_index))
```
%% Cell type:markdown id: tags:
#### GLOVE
%% Cell type:code id: tags:
```
# download Glove
#zip_file_url = "https://nlp.stanford.edu/data/glove.6B.zip"
#r = requests.get(zip_file_url)
#z = zipfile.ZipFile(io.BytesIO(r.content))
#z.extractall()
```
%% Cell type:code id: tags:
```
print('loading word embeddings GLOVE...')
embeddings_index = {}
f = open(path+"embeddings/"+embedding_name+".txt", encoding='utf-8')
for line in tqdm(f):
values = line.split()
word = values[0]
coefs = np.asarray(values[1:], dtype='float32')
embeddings_index[word] = coefs
f.close()
print('Found %s word vectors.' % len(embeddings_index))
```
%% Output
loading word embeddings GLOVE...
400000it [00:12, 31570.08it/s]
Found 400000 word vectors.
%% Cell type:markdown id: tags:
## Training models
%% Cell type:code id: tags:
```
raw_docs_train = df_train[columnText].tolist()
print("pre-processing train data...")
if os.path.isfile(path+tokenizer_filename):
with open(path+tokenizer_filename, 'rb') as file:
tokenizer = pickle.load(file)
else:
tokenizer = Tokenizer(num_words = max_nb_words)
tokenizer.fit_on_texts(raw_docs_train)
with open(path+tokenizer_filename, 'wb') as file:
pickle.dump(tokenizer, file)
sequences = tokenizer.texts_to_sequences(raw_docs_train)
word_index = tokenizer.word_index
print("dictionary size: ", len(word_index))
#pad sequences
data = sequence.pad_sequences(sequences, maxlen=max_sequence_length)
print('Shape of data tensor:', data.shape)
print('Shape of label tensor:', labels.shape)
#print(labels)
```
%% Output
pre-processing train data...
dictionary size: 190508
Shape of data tensor: (46807, 512)
Shape of label tensor: (46807,)
%% Cell type:code id: tags:
```
# split the data into a training set and a validation set
indices = np.arange(data.shape[0])
np.random.shuffle(indices)
data = data[indices]
labels = labels[indices]
nb_validation_samples = int(validation_split * data.shape[0])
x_train = data[:-nb_validation_samples]
y_train = labels[:-nb_validation_samples]
x_val = data[-nb_validation_samples:]
y_val = labels[-nb_validation_samples:]
```
%% Cell type:code id: tags:
```
#embedding matrix
print('preparing embedding matrix...')
embedding_matrix = np.zeros((len(word_index)+1, embedding_dim))
for word, i in word_index.items():
embedding_vector = embeddings_index.get(word)
if embedding_vector is not None :
embedding_matrix[i] = embedding_vector
```
%% Output
preparing embedding matrix...
%% Cell type:code id: tags:
```
#filter_sizes = [2, 3, 5]
#drop = 0.5
embedding_layer = Embedding(len(word_index)+1, embedding_dim, input_length = max_sequence_length,
weights=[embedding_matrix], trainable=False)
inputs = Input(shape=(max_sequence_length), dtype='int32')
embedding = embedding_layer(inputs)
print(embedding.shape)
#reshape = Reshape((max_sequence_length, embedding_dim, 1))(embedding)
#print(reshape.shape)
# architecture testée par Khaled
conv_0 = Conv1D(64, 5, activation='relu')(embedding)
#conv_1 = Conv1D(128, 5, activation='relu')(embedding)
#conv_2 = Conv1D(128, 5, activation='relu')(embedding)
maxpool_0 = MaxPool1D(pool_size=(max_sequence_length - 5 + 1))(conv_0)
#maxpool_1 = MaxPool1D(5)(conv_1)
#maxpool_2 = MaxPool1D(35)(conv_2)
#concatenated_tensor = Concatenate(axis=1)([maxpool_0, maxpool_1, maxpool_2])
flatten = Flatten()(maxpool_0)
#dropout = Dropout(drop)(flatten)
output = Dense(len(labels_index), activation='softmax')(flatten)
# this creates a model that includes
model = Model(inputs=inputs, outputs=output)
checkpoint = ModelCheckpoint('weights_cnn_sentece.hdf5', monitor='val_acc', verbose=1, save_best_only=True, mode='auto')
#adam = Adam(lr=1e-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['acc'])
model.summary()
```
%% Output
(None, 512, 100)
Model: "model"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) [(None, 512)] 0
embedding (Embedding) (None, 512, 100) 19050900
conv1d (Conv1D) (None, 508, 64) 32064
max_pooling1d (MaxPooling1D (None, 1, 64) 0
)
flatten (Flatten) (None, 64) 0
dense (Dense) (None, 38) 2470
=================================================================
Total params: 19,085,434
Trainable params: 34,534
Non-trainable params: 19,050,900
_________________________________________________________________
%% Cell type:code id: tags:
```
history = model.fit(x_train, y_train,
batch_size=batch_size,
epochs=epochs,
verbose=1,
callbacks=[checkpoint],
validation_data=(x_val, y_val))
```
%% Output
Epoch 1/10
585/586 [============================>.] - ETA: 0s - loss: 2.0486 - acc: 0.4831
Epoch 1: val_acc improved from -inf to 0.56832, saving model to weights_cnn_sentece.hdf5
586/586 [==============================] - 93s 157ms/step - loss: 2.0484 - acc: 0.4831 - val_loss: 1.6547 - val_acc: 0.5683
Epoch 2/10
585/586 [============================>.] - ETA: 0s - loss: 1.4558 - acc: 0.6155
Epoch 2: val_acc improved from 0.56832 to 0.61949, saving model to weights_cnn_sentece.hdf5
586/586 [==============================] - 85s 145ms/step - loss: 1.4557 - acc: 0.6156 - val_loss: 1.4356 - val_acc: 0.6195
Epoch 3/10
585/586 [============================>.] - ETA: 0s - loss: 1.2437 - acc: 0.6631
Epoch 3: val_acc improved from 0.61949 to 0.63829, saving model to weights_cnn_sentece.hdf5
586/586 [==============================] - 84s 143ms/step - loss: 1.2439 - acc: 0.6631 - val_loss: 1.3358 - val_acc: 0.6383
Epoch 4/10
585/586 [============================>.] - ETA: 0s - loss: 1.1175 - acc: 0.6942
Epoch 4: val_acc improved from 0.63829 to 0.65111, saving model to weights_cnn_sentece.hdf5
586/586 [==============================] - 84s 143ms/step - loss: 1.1176 - acc: 0.6941 - val_loss: 1.2895 - val_acc: 0.6511
Epoch 5/10
585/586 [============================>.] - ETA: 0s - loss: 1.0243 - acc: 0.7172
Epoch 5: val_acc improved from 0.65111 to 0.65356, saving model to weights_cnn_sentece.hdf5
586/586 [==============================] - 84s 143ms/step - loss: 1.0242 - acc: 0.7172 - val_loss: 1.2751 - val_acc: 0.6536
Epoch 6/10
585/586 [============================>.] - ETA: 0s - loss: 0.9492 - acc: 0.7371
Epoch 6: val_acc improved from 0.65356 to 0.65987, saving model to weights_cnn_sentece.hdf5
586/586 [==============================] - 92s 158ms/step - loss: 0.9491 - acc: 0.7371 - val_loss: 1.2598 - val_acc: 0.6599
Epoch 7/10
585/586 [============================>.] - ETA: 0s - loss: 0.8892 - acc: 0.7536
Epoch 7: val_acc did not improve from 0.65987
586/586 [==============================] - 86s 147ms/step - loss: 0.8892 - acc: 0.7536 - val_loss: 1.2598 - val_acc: 0.6557
Epoch 8/10
585/586 [============================>.] - ETA: 0s - loss: 0.8387 - acc: 0.7659
Epoch 8: val_acc improved from 0.65987 to 0.66179, saving model to weights_cnn_sentece.hdf5
586/586 [==============================] - 85s 145ms/step - loss: 0.8387 - acc: 0.7659 - val_loss: 1.2452 - val_acc: 0.6618
Epoch 9/10
585/586 [============================>.] - ETA: 0s - loss: 0.7950 - acc: 0.7780
Epoch 9: val_acc improved from 0.66179 to 0.66275, saving model to weights_cnn_sentece.hdf5
586/586 [==============================] - 83s 142ms/step - loss: 0.7950 - acc: 0.7780 - val_loss: 1.2593 - val_acc: 0.6627
Epoch 10/10
585/586 [============================>.] - ETA: 0s - loss: 0.7575 - acc: 0.7873
Epoch 10: val_acc improved from 0.66275 to 0.66286, saving model to weights_cnn_sentece.hdf5
586/586 [==============================] - 83s 141ms/step - loss: 0.7575 - acc: 0.7873 - val_loss: 1.2646 - val_acc: 0.6629
%% Cell type:code id: tags:
```
```
%% Cell type:code id: tags:
```
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='lower right')
plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper right')
plt.show()
```
%% Output
%% Cell type:markdown id: tags:
## Saving models
%% Cell type:code id: tags:
```
name = "cnn_conv1D_egc_"+embedding_name+"_s"+str(maxOfInstancePerClass)
```
%% Cell type:code id: tags:
```
model.save(path+name+".h5")
```
%% Cell type:code id: tags:
```
# save embeddings
# saving embeddings index
```
%% Cell type:markdown id: tags:
## Loading models
%% Cell type:code id: tags:
```
model = load_model(path+name+".h5")
with open(path+tokenizer_filename, 'rb') as file:
tokenizer = pickle.load(file)
with open(path+encoder_filename, 'rb') as file:
encoder = pickle.load(file)
```
%% Cell type:markdown id: tags:
## Evaluation
%% Cell type:code id: tags:
```
df_test = pd.read_csv(test_path, sep="\t")
```
%% Cell type:code id: tags:
```
test_texts = df_test[columnText].tolist()
test_labels = df_test[columnClass].tolist()
test_sequences = tokenizer.texts_to_sequences(test_texts)
test_input = sequence.pad_sequences(test_sequences, maxlen=max_sequence_length)
# Get predictions
test_predictions_probas = model.predict(test_input)
test_predictions = test_predictions_probas.argmax(axis=-1)
```
%% Cell type:code id: tags:
```
test_intent_predictions = encoder.inverse_transform(test_predictions)
#test_intent_original = encoder.inverse_transform(test_labels)
print('accuracy: ', sum(test_intent_predictions == test_labels) / len(test_labels))
print("Precision, Recall and F1-Score:\n\n", classification_report(test_labels, test_intent_predictions))
```
%% Output
accuracy: 0.6617672192787558
/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1318: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, msg_start, len(result))
/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1318: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, msg_start, len(result))
Precision, Recall and F1-Score:
precision recall f1-score support
Agriculture - Economie rustique 0.40 0.28 0.33 233
Anatomie 0.79 0.53 0.63 215
Antiquité 0.47 0.50 0.48 272
Architecture 0.54 0.47 0.51 278
Arts et métiers 0.37 0.13 0.20 112
Beaux-arts 0.49 0.30 0.37 86
Belles-lettres - Poésie 0.31 0.22 0.26 206
Blason 0.59 0.48 0.53 108
Caractères 1.00 0.09 0.16 23
Chasse 0.66 0.53 0.58 116
Chimie 0.40 0.31 0.35 104
Commerce 0.48 0.54 0.51 376
Droit - Jurisprudence 0.79 0.76 0.77 1284
Economie domestique 0.20 0.04 0.06 27
Grammaire 0.43 0.38 0.40 452
Géographie 0.95 0.94 0.95 2621
Histoire 0.39 0.53 0.45 616
Histoire naturelle 0.77 0.82 0.79 963
Jeu 0.63 0.64 0.64 56
Marine 0.65 0.71 0.68 415
Maréchage - Manège 0.85 0.72 0.78 105
Mathématiques 0.56 0.61 0.58 140
Mesure 0.33 0.05 0.09 37
Militaire (Art) - Guerre - Arme 0.57 0.63 0.60 258
Minéralogie 0.10 0.05 0.06 22
Monnaie 0.27 0.13 0.17 63
Musique 0.73 0.53 0.61 137
Médailles 0.86 0.26 0.40 23
Médecine - Chirurgie 0.51 0.64 0.57 455
Métiers 0.53 0.67 0.59 1051
Pharmacie 0.39 0.14 0.20 65
Philosophie 0.41 0.27 0.32 94
Physique - [Sciences physico-mathématiques] 0.52 0.56 0.54 265
Politique 0.50 0.04 0.08 23
Pêche 0.75 0.43 0.55 42
Religion 0.54 0.55 0.55 328
Spectacle 0.00 0.00 0.00 9
Superstition 0.86 0.27 0.41 22
accuracy 0.66 11702
macro avg 0.54 0.41 0.44 11702
weighted avg 0.66 0.66 0.66 11702
/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1318: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, msg_start, len(result))
%% Cell type:code id: tags:
```
name = "test_"+ name
#classesName = encoder.classes_
#classes = [str(e) for e in encoder.transform(encoder.classes_)]
report = classification_report(test_labels, test_intent_predictions, output_dict = True)
precision = []
recall = []
f1 = []
support = []
dff = pd.DataFrame(columns= ['className', 'precision', 'recall', 'f1-score', 'support', 'FP', 'FN', 'TP', 'TN'])
for c in encoder.classes_:
precision.append(report[c]['precision'])
recall.append(report[c]['recall'])
f1.append(report[c]['f1-score'])
support.append(report[c]['support'])
accuracy = report['accuracy']
weighted_avg = report['weighted avg']
cnf_matrix = confusion_matrix(test_labels, test_intent_predictions)
FP = cnf_matrix.sum(axis=0) - np.diag(cnf_matrix)
FN = cnf_matrix.sum(axis=1) - np.diag(cnf_matrix)
TP = np.diag(cnf_matrix)
TN = cnf_matrix.sum() - (FP + FN + TP)
dff['className'] = encoder.classes_
dff['precision'] = precision
dff['recall'] = recall
dff['f1-score'] = f1
dff['support'] = support
dff['FP'] = FP
dff['FN'] = FN
dff['TP'] = TP
dff['TN'] = TN
content = name + "\n"
print(name)
content += str(weighted_avg) + "\n"
print(weighted_avg)
print(accuracy)
print(dff)
dff.to_csv(path+"reports/report_"+name+".csv", index=False)
# enregistrer les predictions
pd.DataFrame({'labels': pd.Series(df_test[columnClass]), 'predictions': pd.Series(test_intent_predictions)}).to_csv(path+"predictions/predictions_"+name+".csv")
with open(path+"reports/report_"+name+".txt", 'w') as f:
f.write(content)
```
%% Output
/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1318: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, msg_start, len(result))
/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1318: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, msg_start, len(result))
/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1318: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.
_warn_prf(average, modifier, msg_start, len(result))
test_cnn_conv1D_egc_glove.6B.100d_s10000
{'precision': 0.6623969961145181, 'recall': 0.6617672192787558, 'f1-score': 0.6552681646811063, 'support': 11702}
0.6617672192787558
className precision ... TP TN
0 Agriculture - Economie rustique 0.395210 ... 66 11368
1 Anatomie 0.786207 ... 114 11456
2 Antiquité 0.470383 ... 135 11278
3 Architecture 0.543210 ... 132 11313
4 Arts et métiers 0.365854 ... 15 11564
5 Beaux-arts 0.490566 ... 26 11589
6 Belles-lettres - Poésie 0.312500 ... 45 11397
7 Blason 0.590909 ... 52 11558
8 Caractères 1.000000 ... 2 11679
9 Chasse 0.655914 ... 61 11554
10 Chimie 0.400000 ... 32 11550
11 Commerce 0.475410 ... 203 11102
12 Droit - Jurisprudence 0.785084 ... 979 10150
13 Economie domestique 0.200000 ... 1 11671
14 Grammaire 0.434010 ... 171 11027
15 Géographie 0.947469 ... 2471 8944
16 Histoire 0.389087 ... 328 10571
17 Histoire naturelle 0.774162 ... 785 10510
18 Jeu 0.631579 ... 36 11625
19 Marine 0.647702 ... 296 11126
20 Maréchage - Manège 0.853933 ... 76 11584
21 Mathématiques 0.562914 ... 85 11496
22 Mesure 0.333333 ... 2 11661
23 Militaire (Art) - Guerre - Arme 0.569930 ... 163 11321
24 Minéralogie 0.100000 ... 1 11671
25 Monnaie 0.266667 ... 8 11617
26 Musique 0.734694 ... 72 11539
27 Médailles 0.857143 ... 6 11678
28 Médecine - Chirurgie 0.513228 ... 291 10971
29 Métiers 0.527820 ... 702 10023
30 Pharmacie 0.391304 ... 9 11623
31 Philosophie 0.409836 ... 25 11572
32 Physique - [Sciences physico-mathématiques] 0.522807 ... 149 11301
33 Politique 0.500000 ... 1 11678
34 Pêche 0.750000 ... 18 11654
35 Religion 0.543807 ... 180 11223
36 Spectacle 0.000000 ... 0 11693
37 Superstition 0.857143 ... 6 11679
[38 rows x 9 columns]
%% Cell type:code id: tags:
```
```
%% Cell type:code id: tags:
```
```
%% Cell type:code id: tags:
```
```
%% Cell type:code id: tags:
```
```
%% Cell type:code id: tags:
```
```
%% Cell type:code id: tags:
```
```
%% Cell type:code id: tags:
```
```
%% Cell type:code id: tags:
```
```
%% Cell type:code id: tags:
```
```
%% Cell type:code id: tags:
```
```
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment