Skip to content
Snippets Groups Projects
Commit c9c23588 authored by Ludovic Moncla's avatar Ludovic Moncla
Browse files

update

parent b4999412
No related branches found
No related tags found
No related merge requests found
...@@ -2,6 +2,7 @@ import torch ...@@ -2,6 +2,7 @@ import torch
import pandas as pd import pandas as pd
import numpy as np import numpy as np
from sklearn import preprocessing from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from transformers import BertTokenizer, CamembertTokenizer from transformers import BertTokenizer, CamembertTokenizer
from transformers import BertForSequenceClassification, AdamW, BertConfig, CamembertForSequenceClassification from transformers import BertForSequenceClassification, AdamW, BertConfig, CamembertForSequenceClassification
...@@ -11,6 +12,35 @@ import datetime ...@@ -11,6 +12,35 @@ import datetime
import random import random
import os import os
import argparse import argparse
import configparser
def create_dict(df, classColumnName):
return dict(df[classColumnName].value_counts())
def remove_weak_classes(df, classColumnName, threshold):
dictOfClassInstances = create_dict(df,classColumnName)
dictionary = {k: v for k, v in dictOfClassInstances.items() if v >= threshold }
keys = [*dictionary]
df_tmp = df[~ df[classColumnName].isin(keys)]
df = pd.concat([df,df_tmp]).drop_duplicates(keep=False)
return df
def resample_classes(df, classColumnName, numberOfInstances):
#random numberOfInstances elements
replace = False # with replacement
fn = lambda obj: obj.loc[np.random.choice(obj.index, numberOfInstances if len(obj) > numberOfInstances else len(obj), replace),:]
return df.groupby(classColumnName, as_index=False).apply(fn)
def flat_accuracy(preds, labels): def flat_accuracy(preds, labels):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment