Skip to content
Snippets Groups Projects
Commit 21df7dd3 authored by Khalleud's avatar Khalleud
Browse files

[ADD] Notebook Bert Fine Tuning

parent 72178236
No related branches found
No related tags found
1 merge request!5Branch dev bert exp
%% Cell type:markdown id: tags:
## Setup GPU
%% Cell type:code id: tags:
```
import torch
# If there's a GPU available...
if torch.cuda.is_available():
# Tell PyTorch to use the GPU.
device = torch.device("cuda")
print('There are %d GPU(s) available.' % torch.cuda.device_count())
print('We will use the GPU:', torch.cuda.get_device_name(0))
# If not...
else:
print('No GPU available, using the CPU instead.')
device = torch.device("cpu")
```
%% Output
No GPU available, using the CPU instead.
%% Cell type:markdown id: tags:
## Install packages
%% Cell type:code id: tags:
```
pip install transformers
```
%% Output
Requirement already satisfied: transformers in /usr/local/lib/python3.7/dist-packages (4.10.0)
Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from transformers) (2.23.0)
Requirement already satisfied: packaging in /usr/local/lib/python3.7/dist-packages (from transformers) (21.0)
Requirement already satisfied: tokenizers<0.11,>=0.10.1 in /usr/local/lib/python3.7/dist-packages (from transformers) (0.10.3)
Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.7/dist-packages (from transformers) (4.62.0)
Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from transformers) (4.6.4)
Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.7/dist-packages (from transformers) (5.4.1)
Requirement already satisfied: sacremoses in /usr/local/lib/python3.7/dist-packages (from transformers) (0.0.45)
Requirement already satisfied: huggingface-hub>=0.0.12 in /usr/local/lib/python3.7/dist-packages (from transformers) (0.0.16)
Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (1.19.5)
Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (2019.12.20)
Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from transformers) (3.0.12)
Requirement already satisfied: typing-extensions in /usr/local/lib/python3.7/dist-packages (from huggingface-hub>=0.0.12->transformers) (3.7.4.3)
Requirement already satisfied: pyparsing>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging->transformers) (2.4.7)
Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->transformers) (3.5.0)
Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (1.24.3)
Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2021.5.30)
Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (3.0.4)
Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->transformers) (2.10)
Requirement already satisfied: click in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (7.1.2)
Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (1.15.0)
Requirement already satisfied: joblib in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (1.0.1)
%% Cell type:code id: tags:
```
pip install sentencepiece
```
%% Output
Requirement already satisfied: sentencepiece in /usr/local/lib/python3.7/dist-packages (0.1.96)
%% Cell type:markdown id: tags:
## Utils functions
%% Cell type:code id: tags:
```
def create_dict(df, classColumnName):
return dict(df[classColumnName].value_counts())
def remove_weak_classes(df, classColumnName, threshold):
dictOfClassInstances = create_dict(df,classColumnName)
dictionary = {k: v for k, v in dictOfClassInstances.items() if v >= threshold }
keys = [*dictionary]
df_tmp = df[~ df[classColumnName].isin(keys)]
df = pd.concat([df,df_tmp]).drop_duplicates(keep=False)
return df
def resample_classes(df, classColumnName, numberOfInstances):
#random numberOfInstances elements
replace = False # with replacement
fn = lambda obj: obj.loc[np.random.choice(obj.index, numberOfInstances if len(obj) > numberOfInstances else len(obj), replace),:]
return df.groupby(classColumnName, as_index=False).apply(fn)
```
%% Cell type:markdown id: tags:
## Load Data
%% Cell type:code id: tags:
```
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
```
%% Cell type:code id: tags:
```
dataPath = 'dataframe_with_ensemble_domaine_enccre.csv'
columnText = 'contentWithoutClass'
columnClass = 'ensemble_domaine_enccre'
minOfInstancePerClass = 200
maxOfInstancePerClass = 1500
```
%% Cell type:code id: tags:
```
df = pd.read_csv(dataPath)
df = remove_weak_classes(df, columnClass, minOfInstancePerClass)
df = resample_classes(df, columnClass, maxOfInstancePerClass)
df = df[df[columnClass] != 'unclassified']
```
%% Cell type:code id: tags:
```
y = df[columnClass]
numberOfClasses = y.nunique()
encoder = preprocessing.LabelEncoder()
y = encoder.fit_transform(y)
```
%% Cell type:code id: tags:
```
train_x, test_x, train_y, test_y = train_test_split(df, y, test_size=0.33, random_state=42, stratify = y )
```
%% Cell type:code id: tags:
```
sentences = train_x[columnText].values
labels = train_y.tolist()
```
%% Cell type:markdown id: tags:
# Model
## Tokenisation & Input Formatting
%% Cell type:code id: tags:
```
tokeniser_bert = 'bert-base-multilingual-cased'
tokeniser_camembert = 'camembert-base'
model_bert = "bert-base-multilingual-cased"
model_camembert = 'camembert-base'
```
%% Cell type:code id: tags:
```
from transformers import BertTokenizer, CamembertTokenizer
# Load the BERT tokenizer.
print('Loading BERT tokenizer...')
tokenizer = BertTokenizer.from_pretrained(tokeniser_bert, do_lower_case=True)
```
%% Output
Loading BERT tokenizer...
%% Cell type:code id: tags:
```
# Tokenize all of the sentences and map the tokens to thier word IDs.
input_ids = []
# For every sentence...
for sent in sentences:
# `encode` will:
# (1) Tokenize the sentence.
# (2) Prepend the `[CLS]` token to the start.
# (3) Append the `[SEP]` token to the end.
# (4) Map tokens to their IDs.
encoded_sent = tokenizer.encode(
sent, # Sentence to encode.
add_special_tokens = True, # Add '[CLS]' and '[SEP]'
# This function also supports truncation and conversion
# to pytorch tensors, but I need to do padding, so I
# can't use these features.
#max_length = 128, # Truncate all sentences.
#return_tensors = 'pt', # Return pytorch tensors.
)
# Add the encoded sentence to the list.
input_ids.append(encoded_sent)
```
%% Output
Token indices sequence length is longer than the specified maximum sequence length for this model (866 > 512). Running this sequence through the model will result in indexing errors
%% Cell type:code id: tags:
```
print('Max sentence length: ', max([len(sen) for sen in input_ids]))
```
%% Output
Max sentence length: 3462
%% Cell type:code id: tags:
```
max_len = 180
padded = []
for i in input_ids:
if len(i) > max_len:
padded.extend([i[:max_len]])
else:
padded.extend([i + [0] * (max_len - len(i))])
padded = input_ids = np.array(padded)
```
%% Cell type:code id: tags:
```
# Create attention masks
attention_masks = []
# For each sentence...
for sent in padded:
# Create the attention mask.
# - If a token ID is 0, then it's padding, set the mask to 0.
# - If a token ID is > 0, then it's a real token, set the mask to 1.
att_mask = [int(token_id > 0) for token_id in sent]
# Store the attention mask for this sentence.
attention_masks.append(att_mask)
```
%% Cell type:code id: tags:
```
# Use 90% for training and 10% for validation.
train_inputs, validation_inputs, train_labels, validation_labels = train_test_split(padded, labels,
random_state=2018, test_size=0.1, stratify = labels )
# Do the same for the masks.
train_masks, validation_masks, _, _ = train_test_split(attention_masks, labels,
random_state=2018, test_size=0.1, stratify = labels)
```
%% Cell type:code id: tags:
```
# Convert all inputs and labels into torch tensors, the required datatype
# for my model.
train_inputs = torch.tensor(train_inputs)
validation_inputs = torch.tensor(validation_inputs)
train_labels = torch.tensor(train_labels)
validation_labels = torch.tensor(validation_labels)
train_masks = torch.tensor(train_masks)
validation_masks = torch.tensor(validation_masks)
```
%% Cell type:code id: tags:
```
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
# The DataLoader needs to know the batch size for training, so I specify it here.
# For fine-tuning BERT on a specific task, the authors recommend a batch size of
# 16 or 32.
batch_size = 32
# Create the DataLoader for training set.
train_data = TensorDataset(train_inputs, train_masks, train_labels)
train_sampler = RandomSampler(train_data)
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)
# Create the DataLoader for validation set.
validation_data = TensorDataset(validation_inputs, validation_masks, validation_labels)
validation_sampler = SequentialSampler(validation_data)
validation_dataloader = DataLoader(validation_data, sampler=validation_sampler, batch_size=batch_size)
```
%% Cell type:markdown id: tags:
## Training
%% Cell type:code id: tags:
```
from transformers import BertForSequenceClassification, AdamW, BertConfig, CamembertForSequenceClassification
# Load BertForSequenceClassification, the pretrained BERT model with a single
# linear classification layer on top.
model = BertForSequenceClassification.from_pretrained(
model_bert, # Use the 12-layer BERT model, with an uncased vocab.
num_labels = numberOfClasses, # The number of output labels--2 for binary classification.
# You can increase this for multi-class tasks.
output_attentions = False, # Whether the model returns attentions weights.
output_hidden_states = False, # Whether the model returns all hidden-states.
)
# Tell pytorch to run this model on the GPU.
model.cuda()
```
%% Output
Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-multilingual-cased and are newly initialized: ['classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-120-80c23ac5f353> in <module>()
13
14 # Tell pytorch to run this model on the GPU.
---> 15 model.cuda()
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in cuda(self, device)
635 Module: self
636 """
--> 637 return self._apply(lambda t: t.cuda(device))
638
639 def xpu(self: T, device: Optional[Union[int, device]] = None) -> T:
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _apply(self, fn)
528 def _apply(self, fn):
529 for module in self.children():
--> 530 module._apply(fn)
531
532 def compute_should_use_set_data(tensor, tensor_applied):
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _apply(self, fn)
528 def _apply(self, fn):
529 for module in self.children():
--> 530 module._apply(fn)
531
532 def compute_should_use_set_data(tensor, tensor_applied):
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _apply(self, fn)
528 def _apply(self, fn):
529 for module in self.children():
--> 530 module._apply(fn)
531
532 def compute_should_use_set_data(tensor, tensor_applied):
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in _apply(self, fn)
550 # `with torch.no_grad():`
551 with torch.no_grad():
--> 552 param_applied = fn(param)
553 should_use_set_data = compute_should_use_set_data(param, param_applied)
554 if should_use_set_data:
/usr/local/lib/python3.7/dist-packages/torch/nn/modules/module.py in <lambda>(t)
635 Module: self
636 """
--> 637 return self._apply(lambda t: t.cuda(device))
638
639 def xpu(self: T, device: Optional[Union[int, device]] = None) -> T:
RuntimeError: CUDA out of memory. Tried to allocate 352.00 MiB (GPU 0; 11.17 GiB total capacity; 10.43 GiB already allocated; 91.81 MiB free; 10.63 GiB reserved in total by PyTorch)
%% Cell type:code id: tags:
```
#Note: AdamW is a class from the huggingface library (as opposed to pytorch)
# I believe the 'W' stands for 'Weight Decay fix"
optimizer = AdamW(model.parameters(),
lr = 2e-5, # args.learning_rate - default is 5e-5, our notebook had 2e-5
eps = 1e-8 # args.adam_epsilon - default is 1e-8.
)
```
%% Cell type:code id: tags:
```
from transformers import get_linear_schedule_with_warmup
# Number of training epochs (authors recommend between 2 and 4)
epochs = 4
# Total number of training steps is number of batches * number of epochs.
total_steps = len(train_dataloader) * epochs
# Create the learning rate scheduler.
scheduler = get_linear_schedule_with_warmup(optimizer,
num_warmup_steps = 0, # Default value in run_glue.py
num_training_steps = total_steps)
```
%% Cell type:code id: tags:
```
import numpy as np
# Function to calculate the accuracy of our predictions vs labels
def flat_accuracy(preds, labels):
pred_flat = np.argmax(preds, axis=1).flatten()
labels_flat = labels.flatten()
return np.sum(pred_flat == labels_flat) / len(labels_flat)
```
%% Cell type:code id: tags:
```
import time
import datetime
def format_time(elapsed):
'''
Takes a time in seconds and returns a string hh:mm:ss
'''
# Round to the nearest second.
elapsed_rounded = int(round((elapsed)))
# Format as hh:mm:ss
return str(datetime.timedelta(seconds=elapsed_rounded))
```
%% Cell type:code id: tags:
```
import random
# This training code is based on the `run_glue.py` script here:
# https://github.com/huggingface/transformers/blob/5bfcd0485ece086ebcbed2d008813037968a9e58/examples/run_glue.py#L128
# Set the seed value all over the place to make this reproducible.
seed_val = 42
random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)
# Store the average loss after each epoch so I can plot them.
loss_values = []
# For each epoch...
for epoch_i in range(0, epochs):
# ========================================
# Training
# ========================================
# Perform one full pass over the training set.
print("")
print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))
print('Training...')
# Measure how long the training epoch takes.
t0 = time.time()
# Reset the total loss for this epoch.
total_loss = 0
# Put the model into training mode.
model.train()
# For each batch of training data...
for step, batch in enumerate(train_dataloader):
# Progress update every 40 batches.
if step % 40 == 0 and not step == 0:
# Calculate elapsed time in minutes.
elapsed = format_time(time.time() - t0)
# Report progress.
print(' Batch {:>5,} of {:>5,}. Elapsed: {:}.'.format(step, len(train_dataloader), elapsed))
# Unpack this training batch from the dataloader.
#
# As I unpack the batch, I'll also copy each tensor to the GPU using the
# `to` method.
#
# `batch` contains three pytorch tensors:
# [0]: input ids
# [1]: attention masks
# [2]: labels
b_input_ids = batch[0].to(device)
b_input_mask = batch[1].to(device)
b_labels = batch[2].to(device)
# Always clear any previously calculated gradients before performing a
# backward pass. PyTorch doesn't do this automatically because
# accumulating the gradients is "convenient while training RNNs".
# (source: https://stackoverflow.com/questions/48001598/why-do-we-need-to-call-zero-grad-in-pytorch)
model.zero_grad()
# Perform a forward pass (evaluate the model on this training batch).
# This will return the loss (rather than the model output) because I
# have provided the `labels`.
# The documentation for this `model` function is here:
# https://huggingface.co/transformers/v2.2.0/model_doc/bert.html#transformers.BertForSequenceClassification
outputs = model(b_input_ids,
token_type_ids=None,
attention_mask=b_input_mask,
labels=b_labels)
# The call to `model` always returns a tuple, so I need to pull the
# loss value out of the tuple.
loss = outputs[0]
# Accumulate the training loss over all of the batches so that I can
# calculate the average loss at the end. `loss` is a Tensor containing a
# single value; the `.item()` function just returns the Python value
# from the tensor.
total_loss += loss.item()
# Perform a backward pass to calculate the gradients.
loss.backward()
# Clip the norm of the gradients to 1.0.
# This is to help prevent the "exploding gradients" problem.
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
# Update parameters and take a step using the computed gradient.
# The optimizer dictates the "update rule"--how the parameters are
# modified based on their gradients, the learning rate, etc.
optimizer.step()
# Update the learning rate.
scheduler.step()
# Calculate the average loss over the training data.
avg_train_loss = total_loss / len(train_dataloader)
# Store the loss value for plotting the learning curve.
loss_values.append(avg_train_loss)
print("")
print(" Average training loss: {0:.2f}".format(avg_train_loss))
print(" Training epoch took: {:}".format(format_time(time.time() - t0)))
# ========================================
# Validation
# ========================================
# After the completion of each training epoch, measure the performance on
# the validation set.
print("")
print("Running Validation...")
t0 = time.time()
# Put the model in evaluation mode--the dropout layers behave differently
# during evaluation.
model.eval()
# Tracking variables
eval_loss, eval_accuracy = 0, 0
nb_eval_steps, nb_eval_examples = 0, 0
# Evaluate data for one epoch
for batch in validation_dataloader:
# Add batch to GPU
batch = tuple(t.to(device) for t in batch)
# Unpack the inputs from dataloader
b_input_ids, b_input_mask, b_labels = batch
# Telling the model not to compute or store gradients, saving memory and
# speeding up validation
with torch.no_grad():
# Forward pass, calculate logit predictions.
# This will return the logits rather than the loss because we have
# not provided labels.
# token_type_ids is the same as the "segment ids", which
# differentiates sentence 1 and 2 in 2-sentence tasks.
# The documentation for this `model` function is here:
# https://huggingface.co/transformers/v2.2.0/model_doc/bert.html#transformers.BertForSequenceClassification
outputs = model(b_input_ids,
token_type_ids=None,
attention_mask=b_input_mask)
# Get the "logits" output by the model. The "logits" are the output
# values prior to applying an activation function like the softmax.
logits = outputs[0]
# Move logits and labels to CPU
logits = logits.detach().cpu().numpy()
label_ids = b_labels.to('cpu').numpy()
# Calculate the accuracy for this batch of test sentences.
tmp_eval_accuracy = flat_accuracy(logits, label_ids)
# Accumulate the total accuracy.
eval_accuracy += tmp_eval_accuracy
# Track the number of batches
nb_eval_steps += 1
# Report the final accuracy for this validation run.
print(" Accuracy: {0:.2f}".format(eval_accuracy/nb_eval_steps))
print(" Validation took: {:}".format(format_time(time.time() - t0)))
print("")
print("Training complete!")
```
%% Cell type:markdown id: tags:
## Test
%% Cell type:code id: tags:
```
sentences_test = test_x[columnText].values
labels_test = test_y.tolist()
```
%% Cell type:code id: tags:
```
# Tokenize all of the sentences and map the tokens to thier word IDs.
input_ids_test = []
# For every sentence...
for sent in sentences_test:
# `encode` will:
# (1) Tokenize the sentence.
# (2) Prepend the `[CLS]` token to the start.
# (3) Append the `[SEP]` token to the end.
# (4) Map tokens to their IDs.
encoded_sent = tokenizer.encode(
sent, # Sentence to encode.
add_special_tokens = True, # Add '[CLS]' and '[SEP]'
)
input_ids_test.append(encoded_sent)
# Pad our input tokens
padded_test = []
for i in input_ids_test:
if len(i) > max_len:
padded_test.extend([i[:max_len]])
else:
padded_test.extend([i + [0] * (max_len - len(i))])
input_ids_test = np.array(padded_test)
# Create attention masks
attention_masks = []
# Create a mask of 1s for each token followed by 0s for padding
for seq in input_ids_test:
seq_mask = [float(i>0) for i in seq]
attention_masks.append(seq_mask)
# Convert to tensors.
prediction_inputs = torch.tensor(input_ids_test)
prediction_masks = torch.tensor(attention_masks)
prediction_labels = torch.tensor(labels_test)
# Set the batch size.
batch_size = 32
# Create the DataLoader.
prediction_data = TensorDataset(prediction_inputs, prediction_masks, prediction_labels)
prediction_sampler = SequentialSampler(prediction_data)
prediction_dataloader = DataLoader(prediction_data, sampler=prediction_sampler, batch_size=batch_size)
```
%% Cell type:code id: tags:
```
print('Predicting labels for {:,} test sentences...'.format(len(prediction_inputs)))
# Put model in evaluation mode
model.eval()
# Tracking variables
predictions_test , true_labels = [], []
# Predict
for batch in prediction_dataloader:
# Add batch to GPU
batch = tuple(t.to(device) for t in batch)
# Unpack the inputs from the dataloader
b_input_ids, b_input_mask, b_labels = batch
# Telling the model not to compute or store gradients, saving memory and
# speeding up prediction
with torch.no_grad():
# Forward pass, calculate logit predictions
outputs = model(b_input_ids, token_type_ids=None,
attention_mask=b_input_mask)
logits = outputs[0]
#print(logits)
# Move logits and labels to CPU
logits = logits.detach().cpu().numpy()
label_ids = b_labels.to('cpu').numpy()
#print(logits)
# Store predictions and true labels
predictions_test.append(logits)
true_labels.append(label_ids)
print(' DONE.')
```
%% Cell type:code id: tags:
```
from sklearn.metrics import *
pred_labels = []
# Evaluate each test batch using many matrics
print('Calculating the matrics for each batch...')
for i in range(len(true_labels)):
# The predictions for this batch are a 2-column ndarray (one column for "0"
# and one column for "1"). Pick the label with the highest value and turn this
# in to a list of 0s and 1s.
pred_labels_i = np.argmax(predictions_test[i], axis=1).flatten()
pred_labels.append(pred_labels_i)
```
%% Cell type:code id: tags:
```
pred_labels_ = [item for sublist in pred_labels for item in sublist]
true_labels_ = [item for sublist in true_labels for item in sublist]
```
%% Cell type:markdown id: tags:
### Report & Evaluation
%% Cell type:code id: tags:
```
import matplotlib.pyplot as plt
from sklearn.metrics import plot_confusion_matrix
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
import seaborn as sns
```
%% Cell type:code id: tags:
```
report = classification_report( pred_labels_, true_labels_, output_dict = True)
accuracy = report['accuracy']
weighted_avg = report['weighted avg']
```
%% Cell type:code id: tags:
```
classes = [str(e) for e in encoder.transform(encoder.classes_)]
classesName = encoder.classes_
```
%% Cell type:code id: tags:
```
precision = []
recall = []
f1 = []
support = []
dff = pd.DataFrame(columns= ['className', 'precision', 'recall', 'f1-score', 'support', 'FP', 'FN', 'TP', 'TN'])
for c in classes:
precision.append(report[c]['precision'])
recall.append(report[c]['recall'])
f1.append(report[c]['f1-score'])
support.append(report[c]['support'])
accuracy = report['accuracy']
weighted_avg = report['weighted avg']
cnf_matrix = confusion_matrix(true_labels_, pred_labels_)
FP = cnf_matrix.sum(axis=0) - np.diag(cnf_matrix)
FN = cnf_matrix.sum(axis=1) - np.diag(cnf_matrix)
TP = np.diag(cnf_matrix)
TN = cnf_matrix.sum() - (FP + FN + TP)
dff['className'] = classesName
dff['precision'] = precision
dff['recall'] = recall
dff['f1-score'] = f1
dff['support'] = support
dff['FP'] = FP
dff['FN'] = FN
dff['TP'] = TP
dff['TN'] = TN
```
%% Cell type:code id: tags:
```
print(weighted_avg)
print(accuracy)
print(dff)
```
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment