From 12f760dce6bf49eca30db8be3ee78f3675e694c0 Mon Sep 17 00:00:00 2001
From: Nelly Barret <nelly.barret@etu.univ-lyon1.fr>
Date: Fri, 29 May 2020 17:29:25 +0200
Subject: [PATCH] [M] prediction with kmeans

---
 predihood/classes/MethodPrediction.py         |  3 +-
 .../.~lock.data_density_filtered.csv#         |  1 +
 predihood/main.py                             |  4 +-
 predihood/predict.py                          | 56 ++++++++++---------
 4 files changed, 35 insertions(+), 29 deletions(-)
 create mode 100644 predihood/generated_files/datasets/.~lock.data_density_filtered.csv#

diff --git a/predihood/classes/MethodPrediction.py b/predihood/classes/MethodPrediction.py
index b2f85172..ca7f6fda 100644
--- a/predihood/classes/MethodPrediction.py
+++ b/predihood/classes/MethodPrediction.py
@@ -36,7 +36,7 @@ class MethodPrediction(Method):
 
     def predict(self, iris_code=None):
         """
-        Predict environment variables for the given iris.
+        Predict environment variables for the given iris. The environment variable to predict is stored in the dataset as "env" variable
         """
         iris_object = model.get_iris_from_code(iris_code)
         iris_area = area(model.get_coords_from_code(iris_code)) / 1000000
@@ -69,6 +69,7 @@ class MethodPrediction(Method):
 
         df = pd.DataFrame([iris_indicators_values], columns=iris_indicators_names)
         self.prediction = self.classifier.predict(df)[0]
+        print(self.prediction)
 
     def plot(self):
         max_depths = np.linspace(1, 32, 32, endpoint=True)
diff --git a/predihood/generated_files/datasets/.~lock.data_density_filtered.csv# b/predihood/generated_files/datasets/.~lock.data_density_filtered.csv#
new file mode 100644
index 00000000..83abca58
--- /dev/null
+++ b/predihood/generated_files/datasets/.~lock.data_density_filtered.csv#
@@ -0,0 +1 @@
+,nelly,MacBook-Pro.local,29.05.2020 17:18,file:///Users/nelly/Library/Application%20Support/LibreOffice/4;
\ No newline at end of file
diff --git a/predihood/main.py b/predihood/main.py
index 19e55c36..ef28479b 100644
--- a/predihood/main.py
+++ b/predihood/main.py
@@ -96,10 +96,12 @@ def run_algorithm():
 @app.route('/predict_iris', methods=["GET"])
 def predict_iris():
     iris_code_to_predict = request.args['iris_code']
+    # clf_name = request.args['clf']
+    # clf = get_classifier(clf_name)
 
     data = Data(normalize="density", filter=True)
     data.init_all_in_one()
-    predictions = predict_one_iris(iris_code_to_predict, data, KNeighborsClassifier(n_neighbors=30), 0.8, 0.2, False)
+    predictions = predict_one_iris(iris_code_to_predict, data, KNeighborsClassifier(n_neighbors=30), 0.8, 0.2, False)  # clf
     return {"predictions": predictions}
 
 
diff --git a/predihood/predict.py b/predihood/predict.py
index 38dcbd92..fb53068d 100644
--- a/predihood/predict.py
+++ b/predihood/predict.py
@@ -426,9 +426,9 @@ def predict_one_iris(iris_code, data, clf, train_size, test_size, remove_outlier
     for j, env in enumerate(ENVIRONMENT_VARIABLES):
         predictions_lst = []
         for top_k, lst in lists.items():
-            dataset = Dataset(data, env, selected_indicators=lst[env], train_size=train_size, test_size=test_size, outliers=remove_outliers)
+            dataset = Dataset(data, env, selected_indicators=lst[env], train_size=train_size, test_size=test_size, outliers=remove_outliers, _type='supervised')
             dataset.init_all_in_one()
-            algo = MethodPrediction(name='', dataset=dataset, classifier=clf, _type='supervised')
+            algo = MethodPrediction(name='', dataset=dataset, classifier=clf)
             # logging.debug("size of X_train: %d", len(algo2.dataset.X_train.columns))
             # logging.debug(algo2.dataset.X_train.columns)
             algo.fit()
@@ -438,7 +438,7 @@ def predict_one_iris(iris_code, data, clf, train_size, test_size, remove_outlier
     return predictions
 
 
-def predict_k_means(data):
+def predict_k_means(data, iris_code):
     nb_clusters = {
         "batiment": 5,
         "usage": 3,
@@ -447,30 +447,32 @@ def predict_k_means(data):
         "geo": 9,
         "social": 5
     }
+
     lists = get_selected_indicators_lists()
     for j, env in enumerate(ENVIRONMENT_VARIABLES):
-        # for top_k, lst in lists.items():
-        # if top_k == "10":
-        # dataset = Dataset(data, env, selected_indicators=lst[env], _type='unsupervised')
-        # dataset.init_all_in_one()
-        # cost = []
-        # for i in range(1, 11):
-        #     kmeans = MethodPrediction(name='', dataset=dataset, classifier=KMeans(n_clusters=i, random_state=RANDOM_STATE))
-        #     kmeans.fit()
-        #     cost.append(kmeans.classifier.inertia_)
-        # plt.plot(range(1, 11), cost, color='g', linewidth='3')
-        # plt.xlabel("Value of K")
-        # plt.ylabel("Squared Error (Cost)")
-        # plt.show()
-        # print("top-k:", top_k, "--", env)
-        dataset = Dataset(data, env, _type='unsupervised')  # selected_indicators=lst[env],
-        dataset.init_all_in_one()
-        # print("K-means with", nb_clusters[env], "clusters")
-        # kmeans = MethodPrediction(name='', dataset=dataset, classifier=KMeans(n_clusters=nb_clusters[env], random_state=RANDOM_STATE))
-        # kmeans.fit()
-        # print(kmeans.classifier.labels_)
-        chart = Chart(dataset=dataset, name='')
-        chart.compute_trendline()
+        for top_k, lst in lists.items():
+            if top_k == "10":
+                dataset = Dataset(data, env, selected_indicators=lst[env], _type='unsupervised')
+                dataset.init_all_in_one()
+                # cost = []
+                # for i in range(1, 11):
+                #     kmeans = MethodPrediction(name='', dataset=dataset, classifier=KMeans(n_clusters=i, random_state=RANDOM_STATE))
+                #     kmeans.fit()
+                #     cost.append(kmeans.classifier.inertia_)
+                # plt.plot(range(1, 11), cost, color='g', linewidth='3')
+                # plt.xlabel("Value of K")
+                # plt.ylabel("Squared Error (Cost)")
+                # plt.show()
+                # print("top-k:", top_k, "--", env)
+                # dataset = Dataset(data, env, _type='unsupervised')  # selected_indicators=lst[env],
+                # dataset.init_all_in_one()
+                # print("K-means with", nb_clusters[env], "clusters")
+                kmeans = MethodPrediction(name='', dataset=dataset, classifier=KMeans(n_clusters=nb_clusters[env], random_state=RANDOM_STATE))
+                kmeans.fit()
+                kmeans.predict(iris_code)
+                print(kmeans.classifier.labels_)
+                # chart = Chart(dataset=dataset, name='')
+                # chart.compute_trendline()
 
 
 if __name__ == '__main__':
@@ -494,7 +496,7 @@ if __name__ == '__main__':
     #
     data = Data(normalize="density", filter=True)
     data.init_all_in_one()
-    # predict_k_means(data)
+    predict_k_means(data, "692440102")
     #
     # data = Data(normalize="pop", filter=True)
     # data.init_all_in_one()
@@ -504,4 +506,4 @@ if __name__ == '__main__':
     # expe2(data)
     # expe3(data)
     # expe4(data, RandomForestClassifier(), 0.8, 0.2)
-    expe5(data, RandomForestClassifier(), 0.8, 0.2)
+    # expe5(data, RandomForestClassifier(), 0.8, 0.2)
-- 
GitLab