diff --git a/experiments/bert_experiments.py b/experiments/bert_experiments.py
new file mode 100644
index 0000000000000000000000000000000000000000..b4e4bcd52219cb92d830f7d03654e631e113906f
--- /dev/null
+++ b/experiments/bert_experiments.py
@@ -0,0 +1,349 @@
+import pandas as pd
+import numpy as np
+import torch
+import transformers as ppb
+from sklearn.model_selection import train_test_split
+from sklearn import preprocessing
+import statistics
+import os
+import sys
+import argparse
+import configparser
+from transformers import CamembertModel, CamembertTokenizer
+from transformers import FlaubertModel, FlaubertTokenizer
+
+
+from sklearn.svm import SVC
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.linear_model import LogisticRegression
+from sklearn.linear_model import SGDClassifier
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.model_selection import GridSearchCV
+
+
+import matplotlib.pyplot as plt
+from sklearn.metrics import plot_confusion_matrix
+from sklearn.metrics import confusion_matrix
+from sklearn.metrics import classification_report
+import seaborn as sns
+
+
+
+
+
+
+def evaluate_model(clf, X_test, y_test, y_pred, valid_y, classes, classesName, pathSave):
+
+    #classifier, label_list, test_x, valid_y, title = "Confusion matrix"):
+    precision = []
+    recall = []
+    f1 = []
+    support = []
+    weighted_avg = None
+    accuracy = None
+
+    df = pd.DataFrame(columns= ['className', 'precision', 'recall', 'f1-score', 'support', 'FP', 'FN', 'TP', 'TN'])
+    report = classification_report( y_pred, valid_y, output_dict = True)
+    for c in classes:
+        precision.append(report[c]['precision'])
+        recall.append(report[c]['recall'])
+        f1.append(report[c]['f1-score'])
+        support.append(report[c]['support'])
+
+    accuracy = report['accuracy']
+    weighted_avg = report['weighted avg']
+    cnf_matrix = confusion_matrix(valid_y, y_pred)
+    FP = cnf_matrix.sum(axis=0) - np.diag(cnf_matrix)
+    FN = cnf_matrix.sum(axis=1) - np.diag(cnf_matrix)
+    TP = np.diag(cnf_matrix)
+    TN = cnf_matrix.sum() - (FP + FN + TP)
+
+    df['className'] = classesName
+    df['precision'] = precision
+    df['recall'] = recall
+    df['f1-score'] = f1
+    df['support'] = support
+    df['FP'] = FP
+    df['FN'] = FN
+    df['TP'] = TP
+    df['TN'] = TN
+    #disp = plot_confusion_matrix(classifier, test_x, valid_y,
+    #                                 display_labels= label_list,
+    #                                 cmap=plt.cm.Blues,
+    #                                 normalize=None)
+    #disp.ax_.set_title(title)
+
+    #print(title)
+    #print(disp.confusion_matrix)
+
+    #plt.show()
+    plt.rcParams["font.size"] = 3
+    plot_confusion_matrix(clf, X_test, y_test)
+    plt.savefig(pathSave)
+    return df, accuracy, weighted_avg
+
+
+
+def create_dict(df, classColumnName):
+    return dict(df[classColumnName].value_counts())
+
+def remove_weak_classes(df, classColumnName, threshold):
+
+    dictOfClassInstances = create_dict(df,classColumnName)
+
+
+    dictionary = {k: v for k, v in dictOfClassInstances.items() if v >= threshold }
+    keys = [*dictionary]
+    df_tmp = df[~ df[classColumnName].isin(keys)]
+    #df = df[df[columnTarget] not in keys]
+    #df =  df.merge(df_tmp, how = 'outer' ,indicator=True)
+    df =  pd.concat([df,df_tmp]).drop_duplicates(keep=False)
+    return df
+
+
+def split_class(df, columnProcessed):
+    i = 0
+    new_df = pd.DataFrame(columns= df.columns)
+    for index, row in df.iterrows():
+        #cls = re.split(';', row[columnProcessed])
+        cls = filter(None, row[columnProcessed].split(';'))
+        cls = list(cls)
+        #cls = re.findall(r"[\w']+", row [columnProcessed])
+        r = row
+        for categ in cls:
+            r[columnProcessed] = categ
+            #new_df.append(r, ignore_index = True)
+            new_df.loc[i] = r
+            i = i + 1
+
+    return new_df
+
+
+def resample_classes(df, classColumnName, numberOfInstances):
+    # numberOfInstances first elements
+    #return df.groupby(classColumnName).apply(lambda x: x[:numberOfInstances][df.columns])
+    #random numberOfInstances elements
+    replace = False  # with replacement
+
+    fn = lambda obj: obj.loc[np.random.choice(obj.index, numberOfInstances if len(obj) > numberOfInstances else len(obj), replace),:]
+    return df.groupby(classColumnName, as_index=False).apply(fn)
+
+
+def select_classifier(argument):
+
+    classifiers = {
+
+                'lr' :LogisticRegression(),
+                'sgd' :SGDClassifier(),
+                'svm' :SVC() ,
+                'decisionTree' :DecisionTreeClassifier(),
+                'rfc' :RandomForestClassifier(),
+                'knn' : KNeighborsClassifier()
+                }
+
+    param_grid_svm = {'C':[1,10,100,1000],'gamma':[1,0.1,0.001,0.0001], 'kernel':['linear','rbf']}
+    param_grid_decisionTree = { 'criterion' : ['gini', 'entropy'], 'max_depth':range(5,10), 'min_samples_split': range(5,10), 'min_samples_leaf': range(1,5) }
+    param_grid_rfc = { 'n_estimators': [200, 500], 'max_features': ['auto', 'sqrt', 'log2'], 'max_depth' : [4,5,6,7,8], 'criterion' :['gini', 'entropy'] }
+    param_grid_lr = { "penalty":['none',"l2"]}
+    param_grid_sgd = { "loss" : ["hinge", "log", "squared_hinge", "modified_huber"], "alpha" : [0.0001, 0.001, 0.01, 0.1], "penalty" : ["l2", "l1", "none"], "max_iter" : [500]}
+    param_grid_knn = {'n_neighbors' : list(range(3,20)), 'weights' : ['uniform', 'distance'], 'metric' : ['euclidean', 'manhattan'] }
+
+    grid_params = {
+
+                'lr': param_grid_lr,
+                'sgd': param_grid_sgd ,
+                'svm': param_grid_svm,
+                'decisionTree': param_grid_decisionTree,
+                'rfc': param_grid_rfc ,
+                'knn': param_grid_knn,
+
+                }
+
+    return classifiers.get(argument), grid_params.get(argument)
+
+
+if __name__ == "__main__":
+
+
+
+
+
+    print('ok')
+    parser = argparse.ArgumentParser()
+    parser.add_argument("modelName", help="bert or distilBert or camembert or flaubert")
+    parser.add_argument("classifier", help="lr or knn or rfc or decisionTree or sgd or svm")
+
+
+    args = parser.parse_args()
+    arg = args.modelName
+    classifier = args.classifier
+
+    config = configparser.ConfigParser()
+    config.read('parameters.conf')
+
+    minOfInstancePerClass = int(config.get('general','minOfInstancePerClass'))
+    maxOfInstancePerClass = int(config.get('general','maxOfInstancePerClass'))
+
+    dataPath = config.get('data','dataPath')
+    columnText = config.get('data','columnText')
+    columnClass = config.get('data','columnClass')
+
+
+
+    if not os.path.exists('reports'):
+        os.makedirs('reports')
+
+    if not os.path.exists(os.path.join('reports',  columnClass)):
+        os.makedirs(os.path.join('reports', columnClass))
+
+
+    dir_name_report = str(minOfInstancePerClass) + '_' + str(maxOfInstancePerClass)
+    if not os.path.exists(os.path.join('reports',  columnClass, dir_name_report)):
+        os.makedirs(os.path.join('reports', columnClass, dir_name_report))
+
+
+
+    # read data
+    print(dataPath)
+    df = pd.read_csv(dataPath)
+    df = remove_weak_classes(df, columnClass, minOfInstancePerClass)
+    df = resample_classes(df, columnClass, maxOfInstancePerClass)
+
+    print(df.head())
+    print(df.shape)
+    #encode labels
+    df = df[df[columnClass] != 'unclassified']
+    y  = df[columnClass]
+    encoder = preprocessing.LabelEncoder()
+    y = encoder.fit_transform(y)
+
+
+    sentences = df['firstParagraph']
+    labels = y.tolist()
+
+
+
+    # Features Extraction
+        #Bert
+    model_class_bert, tokenizer_class_bert, pretrained_weights_bert = (ppb.BertModel, ppb.BertTokenizer, 'bert-base-uncased')
+    tokenizer_bert = tokenizer_class_bert.from_pretrained(pretrained_weights_bert)
+    model_bert = model_class_bert.from_pretrained(pretrained_weights_bert)
+        #DistilBert
+    model_class_distilBert, tokenizer_class_distilBert, pretrained_weights_distilBert = (ppb.DistilBertModel, ppb.DistilBertTokenizer, 'distilbert-base-uncased')
+    tokenizer_distilBert = tokenizer_class_distilBert.from_pretrained(pretrained_weights_distilBert)
+    model_distilBert = model_class_distilBert.from_pretrained(pretrained_weights_distilBert)
+        #Camembert
+    camembert_tokenizer = CamembertTokenizer.from_pretrained("camembert/camembert-base")
+    camembert = CamembertModel.from_pretrained("camembert/camembert-base")
+        #Flaubert
+
+    flaubert, log = FlaubertModel.from_pretrained('flaubert/flaubert_base_cased', output_loading_info=True)
+    flaubert_tokenizer = FlaubertTokenizer.from_pretrained('flaubert/flaubert_base_cased', do_lowercase=False)
+
+
+
+    models = {
+            'bert': model_bert,
+            'distilbert': model_distilBert ,
+            'camembert': camembert,
+            'flaubert': flaubert
+            }
+
+    tokenizers = {
+    'bert': tokenizer_bert,
+    'distilbert': tokenizer_distilBert ,
+    'camembert': camembert_tokenizer,
+    'flaubert': flaubert_tokenizer
+
+    }
+
+
+
+
+
+
+    if arg == 'flaubert':
+        model = flaubert
+        tokenizer = flaubert_tokenizer
+    elif arg == 'camembert':
+        model = camembert
+        tokenizer = camembert_tokenizer
+
+    elif arg == 'distilbert':
+        model = model_distilBert
+        tokenizer = tokenizer_distilBert
+
+    elif arg == 'bert':
+        model = model_bert
+        tokenizer = tokenizer_bert
+
+
+
+
+
+
+    tokenized = sentences.apply((lambda x: tokenizer.encode(x, add_special_tokens=True, max_length = 512, truncation = True)))
+
+    # padding the sequences
+    max_len = 0
+    for i in tokenized.values:
+        if len(i) > max_len:
+            max_len = len(i)
+
+    padded = np.array([i + [0]*(max_len-len(i)) for i in tokenized.values])
+
+
+
+    # attention mask
+
+    attention_mask = np.where(padded != 0, 1, 0)
+
+
+
+    # get features
+    input_ids = torch.tensor(padded)
+    attention_mask = torch.tensor(attention_mask)
+
+    with torch.no_grad():
+        last_hidden_states = model(input_ids, attention_mask=attention_mask)
+
+    features = last_hidden_states[0][:,0,:].numpy()
+    print(features.shape)
+
+    train_x, test_x, train_y, test_y = train_test_split(features, y, test_size=0.33, random_state=42, stratify = y )
+
+
+    # classification
+
+
+    clf, grid_param = select_classifier(classifier)
+
+    print(features)
+
+
+
+    clf = GridSearchCV(clf, grid_param, refit = True, verbose = 3)
+
+    clf.fit(train_x, train_y)
+
+    #evaluation
+
+
+    y_pred = clf.predict(test_x)
+
+
+    report, accuracy, weighted_avg = evaluate_model(clf, test_x, test_y, y_pred, test_y, [str(e) for e in encoder.transform(encoder.classes_)],  encoder.classes_, os.path.join('reports', columnClass, dir_name_report, arg+ '_' + classifier+'.pdf'))
+
+    report.to_csv(os.path.join('reports', columnClass,  dir_name_report, arg + '_' + classifier +'.csv'))
+    with open(os.path.join('reports', columnClass,  dir_name_report, arg + '_' + classifier+'.txt'), 'w') as f:
+
+        sys.stdout = f # Change the standard output to the file we created.
+        print('accuracy : {}'.format(accuracy))
+        print('weighted_Precision : {}'.format(weighted_avg['precision']))
+        print('weighted_Recall    : {}'.format(weighted_avg['recall']))
+        print('weighted_F-score   : {}'.format(weighted_avg['f1-score']))
+        print('weighted_Support   : {}'.format(weighted_avg['support']))
+        print(dict(zip(encoder.classes_, encoder.transform(encoder.classes_))))
+        #sys.stdout = sys.stdout # Reset the standard output to its original value
+        sys.stdout = sys.__stdout__
diff --git a/experiments/parameters.conf b/experiments/parameters.conf
new file mode 100644
index 0000000000000000000000000000000000000000..df584e4ab43603f86828c17c4c7cacaaaf6437ee
--- /dev/null
+++ b/experiments/parameters.conf
@@ -0,0 +1,10 @@
+[general]
+
+minOfInstancePerClass = 1200
+maxOfInstancePerClass = 7
+
+[data]
+
+dataPath = ../Data/dataframe_with_ensemble_domaine_enccre.csv
+columnText = contentWithoutClass
+columnClass = ensemble_domaine_enccre
diff --git a/experiments/requierements.txt b/experiments/requierements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..076fef87790fd62e4a4101a85d64ceff09083b9a
--- /dev/null
+++ b/experiments/requierements.txt
@@ -0,0 +1,7 @@
+transformers==4.3.2
+sentencepiece
+sklearn
+pandas
+numpy
+torch==1.8.1
+