Skip to content
Snippets Groups Projects
Commit c9c23588 authored by Ludovic Moncla's avatar Ludovic Moncla
Browse files

update

parent b4999412
No related branches found
No related tags found
No related merge requests found
......@@ -2,6 +2,7 @@ import torch
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from transformers import BertTokenizer, CamembertTokenizer
from transformers import BertForSequenceClassification, AdamW, BertConfig, CamembertForSequenceClassification
......@@ -11,6 +12,35 @@ import datetime
import random
import os
import argparse
import configparser
def create_dict(df, classColumnName):
return dict(df[classColumnName].value_counts())
def remove_weak_classes(df, classColumnName, threshold):
dictOfClassInstances = create_dict(df,classColumnName)
dictionary = {k: v for k, v in dictOfClassInstances.items() if v >= threshold }
keys = [*dictionary]
df_tmp = df[~ df[classColumnName].isin(keys)]
df = pd.concat([df,df_tmp]).drop_duplicates(keep=False)
return df
def resample_classes(df, classColumnName, numberOfInstances):
#random numberOfInstances elements
replace = False # with replacement
fn = lambda obj: obj.loc[np.random.choice(obj.index, numberOfInstances if len(obj) > numberOfInstances else len(obj), replace),:]
return df.groupby(classColumnName, as_index=False).apply(fn)
def flat_accuracy(preds, labels):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment