diff --git a/scripts/ML/BERT.py b/scripts/ML/BERT.py deleted file mode 100644 index 1dc0ed9d486c171324cc02c67c27bfa37750e54d..0000000000000000000000000000000000000000 --- a/scripts/ML/BERT.py +++ /dev/null @@ -1,45 +0,0 @@ -from loaders import get_device -from transformers import BertForSequenceClassification, BertTokenizer - -def loader(f): - def wrapped(*args, **kwargs): - name = f.__name__.replace('_init_', '') - print(f' - {name}', end='') - f(*args, **kwargs) - print(f'\râœ”ï¸ {name}') - return wrapped - -class BERT: - model_name = 'bert-base-multilingual-cased' - - def __init__(self, root_path, training=False): - self.device = get_device() - print('Loading BERT tools') - self._init_tokenizer() - self.root_path = root_path - _init_classifier(training) - - @loader - def _init_tokenizer(): - self.tokenizer = BertTokenizer.from_pretrained(BERT.model_name) - - @loader - def _init_classifier(training) - if training - bert = BertForSequenceClassification.from_pretrained(self.root_path) - else: - bert = BertForSequenceClassification.from_pretrained( - model_name, # Use the 12-layer BERT model, with an uncased vocab. - num_labels = numberOfClasses, # The number of output labels--2 for binary classification. - # You can increase this - # for multi-class tasks. - output_attentions = False, # Whether the model returns attentions weights. - output_hidden_states = False, # Whether the model returns all hidden-states. - ) - self.model = bert.to(self.device.type) - - def import_data(self, data): - return map(lambda d: d.to(self.device), data) - - def save(self): - self.model.save_pretrained(self.root_path)