diff --git a/experimentsClassicClassifiers.py b/experimentsClassicClassifiers.py
index e1c15c0284363725d61d819616a70d3f45ffece4..eb2aa0f76410b846a99c553ac6e3e65c021f72fb 100644
--- a/experimentsClassicClassifiers.py
+++ b/experimentsClassicClassifiers.py
@@ -17,8 +17,6 @@ import nltk
 nltk.download('stopwords')
 nltk.download('punkt')
 
-
-
 parser = argparse.ArgumentParser()
 parser.add_argument("dataPath", help="Path of the dataframe")
 parser.add_argument("columnText", help="the column name of the text that should preproceed", default = 'content')
@@ -26,9 +24,6 @@ parser.add_argument("columnClass", help="ColumnClass the column name of the clas
 parser.add_argument("minOfInstancePerClass", help="minOfInstancePerClass the minimum of instance required for each class", type=int)
 parser.add_argument("maxOfInstancePerClass", help="maxOfInstancePerClass the maximum of instance required resamling classes", type=int)
 
-
-
-
 args = parser.parse_args()
 dataPath = args.dataPath
 columnText = args.columnText
@@ -47,31 +42,20 @@ dir_name_report = str(minOfInstancePerClass) + '_' + str(maxOfInstancePerClass)
 if not os.path.exists(os.path.join('reports',  columnClass, dir_name_report)):
     os.makedirs(os.path.join('reports', columnClass, dir_name_report))
 
-
 # Reading data and preprocessings steps
-
 preprocessor = Preprocessor()
 
-
 df_original = pd.read_csv(dataPath)
 
-
 df = df_original[[columnClass,columnText]].copy()
-#preprocessor.remove_null_rows(df, columnText)
-#preprocessor.remove_null_rows(df, columnClass)
-#df = split_class(df, columnClass)
 df = remove_weak_classes(df, columnClass, minOfInstancePerClass)
 df = resample_classes(df, columnClass, maxOfInstancePerClass)
 
-#preprocessor.getFirstParagraph(df, columnText, 'paragraphe' ) # select first sentence of each text
-
 #Read configuration file for retreiving parameters of features extractors
 
 config = configparser.ConfigParser()
 config.read('settings.conf')
 
-
-
 vectorization_max_df = int(config.get('vectorizers','vectorization_max_df')) if config.get('vectorizers','vectorization_max_df').isdigit() else  float(config.get('vectorizers','vectorization_max_df'))
 vectorization_min_df = int(config.get('vectorizers','vectorization_min_df')) if config.get('vectorizers','vectorization_min_df').isdigit() else  float(config.get('vectorizers','vectorization_min_df'))
 vectorization_numberOfFeatures = int(config.get('vectorizers','vectorization_numberOfFeatures')) if config.get('vectorizers','vectorization_numberOfFeatures').isdigit() else None
@@ -79,33 +63,22 @@ doc2vec_vec_size = int(config.get('vectorizers','doc2vec_vec_size'))
 doc2vec_epochs = int(config.get('vectorizers','doc2vec_epochs'))
 doc2vec_lr = float(config.get('vectorizers','doc2vec_lr'))
 
-
 for columnInput in [columnText, 'firstParagraph']:
 
     print('Process: ' + columnInput)
 
     extractor = feature_extractor(df,columnText, columnClass)
-    #extractor_paragraphe = feature_extractor(df,'paragraphe', columnClass)
-
 
     features_techniques = [
     ('counter',  extractor.count_vect(max_df = vectorization_max_df, min_df = vectorization_min_df, numberOfFeatures = vectorization_numberOfFeatures )),
     ('tf_idf',  extractor.tf_idf(max_df = vectorization_max_df, min_df = vectorization_min_df, numberOfFeatures = vectorization_numberOfFeatures)),
     ('doc2vec',  extractor.doc2vec(doc2vec_epochs, doc2vec_vec_size, doc2vec_lr))]
 
-    '''
-    features_techniques_paragraphe = [
-    ('counter',  extractor_paragraphe.count_vect(max_df = vectorization_max_df, min_df = vectorization_min_df, numberOfFeatures = vectorization_numberOfFeatures )),
-    ('tf_idf',  extractor_paragraphe.tf_idf(max_df = vectorization_max_df, min_df = vectorization_min_df, numberOfFeatures = vectorization_numberOfFeatures)),
-    ('doc2vec',  extractor_paragraphe.doc2vec(doc2vec_epochs, doc2vec_vec_size, doc2vec_lr))]
-    '''
-
     #prepare data
     df = df[df[columnClass] != 'unclassified']
     y  = df[columnClass]
 
     #case of full text
-
     for feature_technique_name, features in features_techniques:
         train_x, test_x, train_y, test_y = train_test_split(features, y, test_size=0.33, random_state=42, stratify = y )
         encoder = preprocessing.LabelEncoder()
diff --git a/requirements.txt b/requirements.txt
index ab54835a61f9acc00afee644c0bfd94d19a4add5..b083ca106aab942e0d285e6dafa445ea29851550 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -14,4 +14,4 @@ torchvision==0.8.2
 tokenizers==0.10.1
 regex==2018.1.10
 tensorflow==2.2.0
-gensim==3.8.1
+gensim==3.8.1
\ No newline at end of file
diff --git a/tmp_preprocess_data.py b/tmp_preprocess_data.py
index 73fa83436b42060fd9be952e0c7d48af6b524657..d353bc44e0dc37e124b1aa94be45af9540e31150 100644
--- a/tmp_preprocess_data.py
+++ b/tmp_preprocess_data.py
@@ -15,23 +15,16 @@ from sklearn.model_selection import GridSearchCV
 import configparser
 from re import search
 import math
-from unidecode import unidecode
 import re
 import nltk
 from ClassPreprocessor import create_dict
 
 
-
-
 print("Begin preprocess")
 
 # Reading data and preprocessings steps
 
 preprocessor = Preprocessor()
-#df = pd.read_csv('data/corpus_tei.csv')
-#listOfM = df['class'].unique()
-
-
 
 print("load dataset")
 
@@ -48,14 +41,6 @@ df_1 = df[['ensemble_domaine_enccre','content','contentWithoutClass','firstParag
 df_2 = df[['domaine_enccre','content','contentWithoutClass','firstParagraph']].copy()
 df_3 = df[['normClass','content','contentWithoutClass','firstParagraph']].copy()
 
-############ shall we remove articles with less n tokens ####### remove markers
-#preprocessor.remove_null_rows(df_1, 'contentWithoutClass')
-#preprocessor.remove_null_rows(df_1, 'ensemble_domaine_enccre')
-#preprocessor.remove_null_rows(df_2, 'contentWithoutClass')
-#preprocessor.remove_null_rows(df_2, 'domaine_enccre')
-#preprocessor.remove_null_rows(df_3, 'contentWithoutClass')
-#preprocessor.remove_null_rows(df_3, 'normClass')
-
 print("split ensemble domaine enccre")
 df_1 = split_class(df_1, 'ensemble_domaine_enccre')
 print("save dataframe")