Skip to content
Snippets Groups Projects

Branch dev

Merged Ludovic Moncla requested to merge branch_dev into master
1 file
+ 3
4
Compare changes
  • Side-by-side
  • Inline
@@ -51,9 +51,8 @@ if not os.path.exists('models'):
# Reading data and preprocessings steps
preprocessor = Preprocessor()
df_original = pd.read_csv(dataPath)
df = pd.read_csv(dataPath)
df = df_original[[columnClass,columnText]].copy()
df = remove_weak_classes(df, columnClass, minOfInstancePerClass)
df = resample_classes(df, columnClass, maxOfInstancePerClass)
@@ -73,7 +72,7 @@ for columnInput in [columnText, 'firstParagraph']:
print('Process: ' + columnInput)
extractor = feature_extractor(df,columnText, columnClass)
extractor = feature_extractor(df, columnInput, columnClass)
features_techniques = [
('counter', extractor.count_vect(max_df = vectorization_max_df, min_df = vectorization_min_df, numberOfFeatures = vectorization_numberOfFeatures )),
@@ -122,10 +121,10 @@ for columnInput in [columnText, 'firstParagraph']:
file_name_report = columnInput + '_' +feature_technique_name + '_' + clf_name
report, accuracy, weighted_avg = evaluate_model(clf, test_x, valid_y, y_pred, valid_y, [str(e) for e in encoder.transform(encoder.classes_)], encoder.classes_, os.path.join('reports', columnClass, dir_name_report, file_name_report)+'.pdf')
report.to_csv(os.path.join('reports', columnClass, dir_name_report, file_name_report+'.csv'))
with open(os.path.join('reports', columnClass, dir_name_report, file_name_report+'.txt'), 'w') as f:
sys.stdout = f # Change the standard output to the file we created.
print(report)
print('accuracy : {}'.format(accuracy))
print('weighted_Precision : {}'.format(weighted_avg['precision']))
print('weighted_Recall : {}'.format(weighted_avg['recall']))
Loading