Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • geode/EDdA-Classification
1 result
Show changes
Commits on Source (5)
......@@ -10,3 +10,5 @@ data/dataframe_with_ensemble_domaine_enccre.csv
data/dataframe_with_normClass_artfl.csv
dataframe_with_domaine_enccre.csv
dataframe_with_normClass_artfl.csv
*.pkl
.DS_Store
......@@ -12,11 +12,11 @@ import numpy as np
classifiers = [
('bayes', MultinomialNB()),
('lr', LogisticRegression()),
('sgd', SGDClassifier()),
('svm', SVC() ),
('decisionTree',DecisionTreeClassifier()),
('rfc', RandomForestClassifier()),
('lr', LogisticRegression()),
('sgd', SGDClassifier()),
('knn', KNeighborsClassifier())
]
......@@ -26,7 +26,7 @@ param_grid_decisionTree = { 'criterion' : ['gini', 'entropy'], 'max_depth':range
param_grid_rfc = { 'n_estimators': [200, 500], 'max_features': ['auto', 'sqrt', 'log2'], 'max_depth' : [4,5,6,7,8], 'criterion' :['gini', 'entropy'] }
param_grid_lr = {"C":np.logspace(-3,3,7), "penalty":["l1","l2"]}
param_grid_sgd = { "loss" : ["hinge", "log", "squared_hinge", "modified_huber"], "alpha" : [0.0001, 0.001, 0.01, 0.1], "penalty" : ["l2", "l1", "none"], "max_iter" : [500]}
param_grid_knn = {'n_neighbors' : list(range(1,20)), 'weights' : ['uniform', 'distance'], 'metric' : ['euclidean', 'manhattan'] }
param_grid_knn = {'n_neighbors' : list(range(3,20)), 'weights' : ['uniform', 'distance'], 'metric' : ['euclidean', 'manhattan'] }
grid_params = [
('bayes', None),
......
......@@ -57,14 +57,3 @@ def evaluate_model(clf, X_test, y_test, y_pred, valid_y, classes, classesName, p
plt.savefig(pathSave)
return df, accuracy, weighted_avg
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
#y_true = [2, 0, 2, 2, 0, 1]
#y_pred = [0, 0, 2, 2, 0, 2]
#cf_matrix = confusion_matrix(y_true, y_pred)
#sns.heatmap(cf_matrix, annot=True)
#import matplotlib.pyplot as plt
#plt.show()
......@@ -96,44 +96,27 @@ for columnInput in [columnText, 'firstParagraph']:
grid_param_name, grid_param = tmp_grid_params
print(clf_name, clf, grid_param_name, grid_param)
model_file_name = columnInput + '_' +feature_technique_name + '_' + clf_name+ str(minOfInstancePerClass) + '_' + str(maxOfInstancePerClass) +".pkl"
if clf_name == 'bayes' :
if feature_technique_name == 'doc2vec':
continue
else:
t_begin = time.time()
# if model exist
if os.path.isfile(os.path.join('./model', model_file_name)):
with open(model_file_name, 'rb') as file:
clf = pickle.load(file)
else:
#if model not exists we save
with open(Pkl_Filename, 'wb') as file:
clf.fit(train_x, train_y)
pickle.dump(clf, file)
t_end =time.time()
training_time = t_end - t_begin
y_pred = clf.predict(test_x)
else :
if clf_name != 'bayes' :
clf = GridSearchCV(clf, grid_param, refit = True, verbose = 3)
t_begin = time.time()
elif feature_technique_name == 'doc2vec':
continue
t_begin = time.time()
if os.path.isfile(os.path.join('./model', model_file_name)):
with open(model_file_name, 'rb') as file:
clf = pickle.load(file)
else:
with open(Pkl_Filename, 'wb') as file:
clf.fit(train_x, train_y)
pickle.dump(clf, file)
if os.path.isfile(os.path.join('./models', model_file_name)):
with open(os.path.join('./models', model_file_name), 'rb') as file:
clf = pickle.load(file)
else:
with open(os.path.join('./models', model_file_name), 'wb') as file:
clf.fit(train_x, train_y)
pickle.dump(clf, file)
t_end =time.time()
t_end =time.time()
training_time = t_end - t_begin
training_time = t_end - t_begin
y_pred = clf.predict(test_x)
y_pred = clf.predict(test_x)
#evaluate model
file_name_report = columnInput + '_' +feature_technique_name + '_' + clf_name
......