diff --git a/experimentsClassicClassifiers.py b/experimentsClassicClassifiers.py index fd2558cf8cc5436c7ca2906833ee9a2bb7c23efc..1cc2f91dac3edb0d237da6605ce743a112c0e01c 100644 --- a/experimentsClassicClassifiers.py +++ b/experimentsClassicClassifiers.py @@ -51,9 +51,8 @@ if not os.path.exists('models'): # Reading data and preprocessings steps preprocessor = Preprocessor() -df_original = pd.read_csv(dataPath) +df = pd.read_csv(dataPath) -df = df_original[[columnClass,columnText]].copy() df = remove_weak_classes(df, columnClass, minOfInstancePerClass) df = resample_classes(df, columnClass, maxOfInstancePerClass) @@ -73,7 +72,7 @@ for columnInput in [columnText, 'firstParagraph']: print('Process: ' + columnInput) - extractor = feature_extractor(df,columnText, columnClass) + extractor = feature_extractor(df, columnInput, columnClass) features_techniques = [ ('counter', extractor.count_vect(max_df = vectorization_max_df, min_df = vectorization_min_df, numberOfFeatures = vectorization_numberOfFeatures )), @@ -122,10 +121,10 @@ for columnInput in [columnText, 'firstParagraph']: file_name_report = columnInput + '_' +feature_technique_name + '_' + clf_name report, accuracy, weighted_avg = evaluate_model(clf, test_x, valid_y, y_pred, valid_y, [str(e) for e in encoder.transform(encoder.classes_)], encoder.classes_, os.path.join('reports', columnClass, dir_name_report, file_name_report)+'.pdf') + report.to_csv(os.path.join('reports', columnClass, dir_name_report, file_name_report+'.csv')) with open(os.path.join('reports', columnClass, dir_name_report, file_name_report+'.txt'), 'w') as f: sys.stdout = f # Change the standard output to the file we created. - print(report) print('accuracy : {}'.format(accuracy)) print('weighted_Precision : {}'.format(weighted_avg['precision'])) print('weighted_Recall : {}'.format(weighted_avg['recall']))