From 12f760dce6bf49eca30db8be3ee78f3675e694c0 Mon Sep 17 00:00:00 2001 From: Nelly Barret <nelly.barret@etu.univ-lyon1.fr> Date: Fri, 29 May 2020 17:29:25 +0200 Subject: [PATCH] [M] prediction with kmeans --- predihood/classes/MethodPrediction.py | 3 +- .../.~lock.data_density_filtered.csv# | 1 + predihood/main.py | 4 +- predihood/predict.py | 56 ++++++++++--------- 4 files changed, 35 insertions(+), 29 deletions(-) create mode 100644 predihood/generated_files/datasets/.~lock.data_density_filtered.csv# diff --git a/predihood/classes/MethodPrediction.py b/predihood/classes/MethodPrediction.py index b2f85172..ca7f6fda 100644 --- a/predihood/classes/MethodPrediction.py +++ b/predihood/classes/MethodPrediction.py @@ -36,7 +36,7 @@ class MethodPrediction(Method): def predict(self, iris_code=None): """ - Predict environment variables for the given iris. + Predict environment variables for the given iris. The environment variable to predict is stored in the dataset as "env" variable """ iris_object = model.get_iris_from_code(iris_code) iris_area = area(model.get_coords_from_code(iris_code)) / 1000000 @@ -69,6 +69,7 @@ class MethodPrediction(Method): df = pd.DataFrame([iris_indicators_values], columns=iris_indicators_names) self.prediction = self.classifier.predict(df)[0] + print(self.prediction) def plot(self): max_depths = np.linspace(1, 32, 32, endpoint=True) diff --git a/predihood/generated_files/datasets/.~lock.data_density_filtered.csv# b/predihood/generated_files/datasets/.~lock.data_density_filtered.csv# new file mode 100644 index 00000000..83abca58 --- /dev/null +++ b/predihood/generated_files/datasets/.~lock.data_density_filtered.csv# @@ -0,0 +1 @@ +,nelly,MacBook-Pro.local,29.05.2020 17:18,file:///Users/nelly/Library/Application%20Support/LibreOffice/4; \ No newline at end of file diff --git a/predihood/main.py b/predihood/main.py index 19e55c36..ef28479b 100644 --- a/predihood/main.py +++ b/predihood/main.py @@ -96,10 +96,12 @@ def run_algorithm(): @app.route('/predict_iris', methods=["GET"]) def predict_iris(): iris_code_to_predict = request.args['iris_code'] + # clf_name = request.args['clf'] + # clf = get_classifier(clf_name) data = Data(normalize="density", filter=True) data.init_all_in_one() - predictions = predict_one_iris(iris_code_to_predict, data, KNeighborsClassifier(n_neighbors=30), 0.8, 0.2, False) + predictions = predict_one_iris(iris_code_to_predict, data, KNeighborsClassifier(n_neighbors=30), 0.8, 0.2, False) # clf return {"predictions": predictions} diff --git a/predihood/predict.py b/predihood/predict.py index 38dcbd92..fb53068d 100644 --- a/predihood/predict.py +++ b/predihood/predict.py @@ -426,9 +426,9 @@ def predict_one_iris(iris_code, data, clf, train_size, test_size, remove_outlier for j, env in enumerate(ENVIRONMENT_VARIABLES): predictions_lst = [] for top_k, lst in lists.items(): - dataset = Dataset(data, env, selected_indicators=lst[env], train_size=train_size, test_size=test_size, outliers=remove_outliers) + dataset = Dataset(data, env, selected_indicators=lst[env], train_size=train_size, test_size=test_size, outliers=remove_outliers, _type='supervised') dataset.init_all_in_one() - algo = MethodPrediction(name='', dataset=dataset, classifier=clf, _type='supervised') + algo = MethodPrediction(name='', dataset=dataset, classifier=clf) # logging.debug("size of X_train: %d", len(algo2.dataset.X_train.columns)) # logging.debug(algo2.dataset.X_train.columns) algo.fit() @@ -438,7 +438,7 @@ def predict_one_iris(iris_code, data, clf, train_size, test_size, remove_outlier return predictions -def predict_k_means(data): +def predict_k_means(data, iris_code): nb_clusters = { "batiment": 5, "usage": 3, @@ -447,30 +447,32 @@ def predict_k_means(data): "geo": 9, "social": 5 } + lists = get_selected_indicators_lists() for j, env in enumerate(ENVIRONMENT_VARIABLES): - # for top_k, lst in lists.items(): - # if top_k == "10": - # dataset = Dataset(data, env, selected_indicators=lst[env], _type='unsupervised') - # dataset.init_all_in_one() - # cost = [] - # for i in range(1, 11): - # kmeans = MethodPrediction(name='', dataset=dataset, classifier=KMeans(n_clusters=i, random_state=RANDOM_STATE)) - # kmeans.fit() - # cost.append(kmeans.classifier.inertia_) - # plt.plot(range(1, 11), cost, color='g', linewidth='3') - # plt.xlabel("Value of K") - # plt.ylabel("Squared Error (Cost)") - # plt.show() - # print("top-k:", top_k, "--", env) - dataset = Dataset(data, env, _type='unsupervised') # selected_indicators=lst[env], - dataset.init_all_in_one() - # print("K-means with", nb_clusters[env], "clusters") - # kmeans = MethodPrediction(name='', dataset=dataset, classifier=KMeans(n_clusters=nb_clusters[env], random_state=RANDOM_STATE)) - # kmeans.fit() - # print(kmeans.classifier.labels_) - chart = Chart(dataset=dataset, name='') - chart.compute_trendline() + for top_k, lst in lists.items(): + if top_k == "10": + dataset = Dataset(data, env, selected_indicators=lst[env], _type='unsupervised') + dataset.init_all_in_one() + # cost = [] + # for i in range(1, 11): + # kmeans = MethodPrediction(name='', dataset=dataset, classifier=KMeans(n_clusters=i, random_state=RANDOM_STATE)) + # kmeans.fit() + # cost.append(kmeans.classifier.inertia_) + # plt.plot(range(1, 11), cost, color='g', linewidth='3') + # plt.xlabel("Value of K") + # plt.ylabel("Squared Error (Cost)") + # plt.show() + # print("top-k:", top_k, "--", env) + # dataset = Dataset(data, env, _type='unsupervised') # selected_indicators=lst[env], + # dataset.init_all_in_one() + # print("K-means with", nb_clusters[env], "clusters") + kmeans = MethodPrediction(name='', dataset=dataset, classifier=KMeans(n_clusters=nb_clusters[env], random_state=RANDOM_STATE)) + kmeans.fit() + kmeans.predict(iris_code) + print(kmeans.classifier.labels_) + # chart = Chart(dataset=dataset, name='') + # chart.compute_trendline() if __name__ == '__main__': @@ -494,7 +496,7 @@ if __name__ == '__main__': # data = Data(normalize="density", filter=True) data.init_all_in_one() - # predict_k_means(data) + predict_k_means(data, "692440102") # # data = Data(normalize="pop", filter=True) # data.init_all_in_one() @@ -504,4 +506,4 @@ if __name__ == '__main__': # expe2(data) # expe3(data) # expe4(data, RandomForestClassifier(), 0.8, 0.2) - expe5(data, RandomForestClassifier(), 0.8, 0.2) + # expe5(data, RandomForestClassifier(), 0.8, 0.2) -- GitLab