Ludovic Moncla · 1a99bf70 · 60bfb622 · 1a99bf70
--- a/experimentsClassicClassifiers.py

+ 3

− 4
+++ b/experimentsClassicClassifiers.py

+ 3

− 4
 @@ -51,9 +51,8 @@ if not os.path.exists('models'):
 @@ -51,9 +51,8 @@ if not os.path.exists('models'):
 # Reading data and preprocessings steps
 preprocessor = Preprocessor()
-df_original = pd.read_csv(dataPath)
+df = pd.read_csv(dataPath)
-df = df_original[[columnClass,columnText]].copy()
 df = remove_weak_classes(df, columnClass, minOfInstancePerClass)
 df = resample_classes(df, columnClass, maxOfInstancePerClass)
 @@ -73,7 +72,7 @@ for columnInput in [columnText, 'firstParagraph']:
 @@ -73,7 +72,7 @@ for columnInput in [columnText, 'firstParagraph']:
    print('Process: ' + columnInput)
-    extractor = feature_extractor(df,columnText, columnClass)
+    extractor = feature_extractor(df, columnInput, columnClass)
    features_techniques = [
    ('counter',  extractor.count_vect(max_df = vectorization_max_df, min_df = vectorization_min_df, numberOfFeatures = vectorization_numberOfFeatures )),
 @@ -122,10 +121,10 @@ for columnInput in [columnText, 'firstParagraph']:
 @@ -122,10 +121,10 @@ for columnInput in [columnText, 'firstParagraph']:
            file_name_report = columnInput + '_' +feature_technique_name + '_' + clf_name
            report, accuracy, weighted_avg = evaluate_model(clf, test_x, valid_y, y_pred, valid_y, [str(e) for e in encoder.transform(encoder.classes_)],  encoder.classes_, os.path.join('reports', columnClass, dir_name_report, file_name_report)+'.pdf')
+            report.to_csv(os.path.join('reports', columnClass, dir_name_report, file_name_report+'.csv'))
            with open(os.path.join('reports', columnClass, dir_name_report, file_name_report+'.txt'), 'w') as f:
                sys.stdout = f # Change the standard output to the file we created.
-                print(report)
                print('accuracy : {}'.format(accuracy))
                print('weighted_Precision : {}'.format(weighted_avg['precision']))
                print('weighted_Recall    : {}'.format(weighted_avg['recall']))