Skip to content
Snippets Groups Projects
Commit 60bfb622 authored by Ludovic Moncla's avatar Ludovic Moncla
Browse files

Merge branch 'master' into 'branch_dev'

# Conflicts:
#   experimentsClassicClassifiers.py
parents 1a99bf70 7c34555b
No related branches found
No related tags found
1 merge request!3Branch dev
This commit is part of merge request !3. Comments created here will be created in the context of that merge request.
...@@ -10,3 +10,5 @@ data/dataframe_with_ensemble_domaine_enccre.csv ...@@ -10,3 +10,5 @@ data/dataframe_with_ensemble_domaine_enccre.csv
data/dataframe_with_normClass_artfl.csv data/dataframe_with_normClass_artfl.csv
dataframe_with_domaine_enccre.csv dataframe_with_domaine_enccre.csv
dataframe_with_normClass_artfl.csv dataframe_with_normClass_artfl.csv
*.pkl
.DS_Store
...@@ -12,11 +12,11 @@ import numpy as np ...@@ -12,11 +12,11 @@ import numpy as np
classifiers = [ classifiers = [
('bayes', MultinomialNB()), ('bayes', MultinomialNB()),
('lr', LogisticRegression()),
('sgd', SGDClassifier()),
('svm', SVC() ), ('svm', SVC() ),
('decisionTree',DecisionTreeClassifier()), ('decisionTree',DecisionTreeClassifier()),
('rfc', RandomForestClassifier()), ('rfc', RandomForestClassifier()),
('lr', LogisticRegression()),
('sgd', SGDClassifier()),
('knn', KNeighborsClassifier()) ('knn', KNeighborsClassifier())
] ]
...@@ -26,7 +26,7 @@ param_grid_decisionTree = { 'criterion' : ['gini', 'entropy'], 'max_depth':range ...@@ -26,7 +26,7 @@ param_grid_decisionTree = { 'criterion' : ['gini', 'entropy'], 'max_depth':range
param_grid_rfc = { 'n_estimators': [200, 500], 'max_features': ['auto', 'sqrt', 'log2'], 'max_depth' : [4,5,6,7,8], 'criterion' :['gini', 'entropy'] } param_grid_rfc = { 'n_estimators': [200, 500], 'max_features': ['auto', 'sqrt', 'log2'], 'max_depth' : [4,5,6,7,8], 'criterion' :['gini', 'entropy'] }
param_grid_lr = {"C":np.logspace(-3,3,7), "penalty":["l1","l2"]} param_grid_lr = {"C":np.logspace(-3,3,7), "penalty":["l1","l2"]}
param_grid_sgd = { "loss" : ["hinge", "log", "squared_hinge", "modified_huber"], "alpha" : [0.0001, 0.001, 0.01, 0.1], "penalty" : ["l2", "l1", "none"], "max_iter" : [500]} param_grid_sgd = { "loss" : ["hinge", "log", "squared_hinge", "modified_huber"], "alpha" : [0.0001, 0.001, 0.01, 0.1], "penalty" : ["l2", "l1", "none"], "max_iter" : [500]}
param_grid_knn = {'n_neighbors' : list(range(1,20)), 'weights' : ['uniform', 'distance'], 'metric' : ['euclidean', 'manhattan'] } param_grid_knn = {'n_neighbors' : list(range(3,20)), 'weights' : ['uniform', 'distance'], 'metric' : ['euclidean', 'manhattan'] }
grid_params = [ grid_params = [
('bayes', None), ('bayes', None),
......
...@@ -57,14 +57,3 @@ def evaluate_model(clf, X_test, y_test, y_pred, valid_y, classes, classesName, p ...@@ -57,14 +57,3 @@ def evaluate_model(clf, X_test, y_test, y_pred, valid_y, classes, classesName, p
plt.savefig(pathSave) plt.savefig(pathSave)
return df, accuracy, weighted_avg return df, accuracy, weighted_avg
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
#y_true = [2, 0, 2, 2, 0, 1]
#y_pred = [0, 0, 2, 2, 0, 2]
#cf_matrix = confusion_matrix(y_true, y_pred)
#sns.heatmap(cf_matrix, annot=True)
#import matplotlib.pyplot as plt
#plt.show()
...@@ -94,50 +94,28 @@ for columnInput in [columnText, 'firstParagraph']: ...@@ -94,50 +94,28 @@ for columnInput in [columnText, 'firstParagraph']:
clf_name, clf = tmp_clf clf_name, clf = tmp_clf
grid_param_name, grid_param = tmp_grid_params grid_param_name, grid_param = tmp_grid_params
print(clf_name, clf, grid_param_name, grid_param) print(clf_name, clf, grid_param_name, grid_param)
model_file_name = columnInput + '_' + feature_technique_name + '_' + clf_name + '_' + str(minOfInstancePerClass) + '_' + str(maxOfInstancePerClass) +".pkl" model_file_name = columnInput + '_' +feature_technique_name + '_' + clf_name+ str(minOfInstancePerClass) + '_' + str(maxOfInstancePerClass) +".pkl"
if clf_name == 'bayes' :
if feature_technique_name == 'doc2vec': if clf_name != 'bayes' :
continue
else:
t_begin = time.time()
# if model exist
if os.path.isfile(os.path.join('./models', model_file_name)):
print('trained model loaded')
with open(os.path.join('./models', model_file_name), 'rb') as file:
clf = pickle.load(file)
else:
print('model training')
#if model not exists we save
with open(os.path.join('./models', model_file_name), 'wb') as file:
clf.fit(train_x, train_y)
pickle.dump(clf, file)
t_end =time.time()
training_time = t_end - t_begin
y_pred = clf.predict(test_x)
else :
clf = GridSearchCV(clf, grid_param, refit = True, verbose = 3) clf = GridSearchCV(clf, grid_param, refit = True, verbose = 3)
t_begin = time.time() elif feature_technique_name == 'doc2vec':
continue
t_begin = time.time()
if os.path.isfile(os.path.join('./models', model_file_name)): if os.path.isfile(os.path.join('./models', model_file_name)):
print('trained model loaded') with open(os.path.join('./models', model_file_name), 'rb') as file:
with open(os.path.join('./models', model_file_name), 'rb') as file: clf = pickle.load(file)
clf = pickle.load(file) else:
else: with open(os.path.join('./models', model_file_name), 'wb') as file:
print('model training') clf.fit(train_x, train_y)
with open(os.path.join('./models', model_file_name), 'wb') as file: pickle.dump(clf, file)
clf.fit(train_x, train_y)
pickle.dump(clf, file)
t_end =time.time() t_end =time.time()
training_time = t_end - t_begin training_time = t_end - t_begin
y_pred = clf.predict(test_x) y_pred = clf.predict(test_x)
#evaluate model #evaluate model
file_name_report = columnInput + '_' +feature_technique_name + '_' + clf_name file_name_report = columnInput + '_' +feature_technique_name + '_' + clf_name
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment