Skip to content
Snippets Groups Projects
Commit 12f760dc authored by Nelly Barret's avatar Nelly Barret
Browse files

[M] prediction with kmeans

parent 1e315b84
No related branches found
No related tags found
No related merge requests found
...@@ -36,7 +36,7 @@ class MethodPrediction(Method): ...@@ -36,7 +36,7 @@ class MethodPrediction(Method):
def predict(self, iris_code=None): def predict(self, iris_code=None):
""" """
Predict environment variables for the given iris. Predict environment variables for the given iris. The environment variable to predict is stored in the dataset as "env" variable
""" """
iris_object = model.get_iris_from_code(iris_code) iris_object = model.get_iris_from_code(iris_code)
iris_area = area(model.get_coords_from_code(iris_code)) / 1000000 iris_area = area(model.get_coords_from_code(iris_code)) / 1000000
...@@ -69,6 +69,7 @@ class MethodPrediction(Method): ...@@ -69,6 +69,7 @@ class MethodPrediction(Method):
df = pd.DataFrame([iris_indicators_values], columns=iris_indicators_names) df = pd.DataFrame([iris_indicators_values], columns=iris_indicators_names)
self.prediction = self.classifier.predict(df)[0] self.prediction = self.classifier.predict(df)[0]
print(self.prediction)
def plot(self): def plot(self):
max_depths = np.linspace(1, 32, 32, endpoint=True) max_depths = np.linspace(1, 32, 32, endpoint=True)
......
,nelly,MacBook-Pro.local,29.05.2020 17:18,file:///Users/nelly/Library/Application%20Support/LibreOffice/4;
\ No newline at end of file
...@@ -96,10 +96,12 @@ def run_algorithm(): ...@@ -96,10 +96,12 @@ def run_algorithm():
@app.route('/predict_iris', methods=["GET"]) @app.route('/predict_iris', methods=["GET"])
def predict_iris(): def predict_iris():
iris_code_to_predict = request.args['iris_code'] iris_code_to_predict = request.args['iris_code']
# clf_name = request.args['clf']
# clf = get_classifier(clf_name)
data = Data(normalize="density", filter=True) data = Data(normalize="density", filter=True)
data.init_all_in_one() data.init_all_in_one()
predictions = predict_one_iris(iris_code_to_predict, data, KNeighborsClassifier(n_neighbors=30), 0.8, 0.2, False) predictions = predict_one_iris(iris_code_to_predict, data, KNeighborsClassifier(n_neighbors=30), 0.8, 0.2, False) # clf
return {"predictions": predictions} return {"predictions": predictions}
......
...@@ -426,9 +426,9 @@ def predict_one_iris(iris_code, data, clf, train_size, test_size, remove_outlier ...@@ -426,9 +426,9 @@ def predict_one_iris(iris_code, data, clf, train_size, test_size, remove_outlier
for j, env in enumerate(ENVIRONMENT_VARIABLES): for j, env in enumerate(ENVIRONMENT_VARIABLES):
predictions_lst = [] predictions_lst = []
for top_k, lst in lists.items(): for top_k, lst in lists.items():
dataset = Dataset(data, env, selected_indicators=lst[env], train_size=train_size, test_size=test_size, outliers=remove_outliers) dataset = Dataset(data, env, selected_indicators=lst[env], train_size=train_size, test_size=test_size, outliers=remove_outliers, _type='supervised')
dataset.init_all_in_one() dataset.init_all_in_one()
algo = MethodPrediction(name='', dataset=dataset, classifier=clf, _type='supervised') algo = MethodPrediction(name='', dataset=dataset, classifier=clf)
# logging.debug("size of X_train: %d", len(algo2.dataset.X_train.columns)) # logging.debug("size of X_train: %d", len(algo2.dataset.X_train.columns))
# logging.debug(algo2.dataset.X_train.columns) # logging.debug(algo2.dataset.X_train.columns)
algo.fit() algo.fit()
...@@ -438,7 +438,7 @@ def predict_one_iris(iris_code, data, clf, train_size, test_size, remove_outlier ...@@ -438,7 +438,7 @@ def predict_one_iris(iris_code, data, clf, train_size, test_size, remove_outlier
return predictions return predictions
def predict_k_means(data): def predict_k_means(data, iris_code):
nb_clusters = { nb_clusters = {
"batiment": 5, "batiment": 5,
"usage": 3, "usage": 3,
...@@ -447,30 +447,32 @@ def predict_k_means(data): ...@@ -447,30 +447,32 @@ def predict_k_means(data):
"geo": 9, "geo": 9,
"social": 5 "social": 5
} }
lists = get_selected_indicators_lists() lists = get_selected_indicators_lists()
for j, env in enumerate(ENVIRONMENT_VARIABLES): for j, env in enumerate(ENVIRONMENT_VARIABLES):
# for top_k, lst in lists.items(): for top_k, lst in lists.items():
# if top_k == "10": if top_k == "10":
# dataset = Dataset(data, env, selected_indicators=lst[env], _type='unsupervised') dataset = Dataset(data, env, selected_indicators=lst[env], _type='unsupervised')
# dataset.init_all_in_one() dataset.init_all_in_one()
# cost = [] # cost = []
# for i in range(1, 11): # for i in range(1, 11):
# kmeans = MethodPrediction(name='', dataset=dataset, classifier=KMeans(n_clusters=i, random_state=RANDOM_STATE)) # kmeans = MethodPrediction(name='', dataset=dataset, classifier=KMeans(n_clusters=i, random_state=RANDOM_STATE))
# kmeans.fit() # kmeans.fit()
# cost.append(kmeans.classifier.inertia_) # cost.append(kmeans.classifier.inertia_)
# plt.plot(range(1, 11), cost, color='g', linewidth='3') # plt.plot(range(1, 11), cost, color='g', linewidth='3')
# plt.xlabel("Value of K") # plt.xlabel("Value of K")
# plt.ylabel("Squared Error (Cost)") # plt.ylabel("Squared Error (Cost)")
# plt.show() # plt.show()
# print("top-k:", top_k, "--", env) # print("top-k:", top_k, "--", env)
dataset = Dataset(data, env, _type='unsupervised') # selected_indicators=lst[env], # dataset = Dataset(data, env, _type='unsupervised') # selected_indicators=lst[env],
dataset.init_all_in_one() # dataset.init_all_in_one()
# print("K-means with", nb_clusters[env], "clusters") # print("K-means with", nb_clusters[env], "clusters")
# kmeans = MethodPrediction(name='', dataset=dataset, classifier=KMeans(n_clusters=nb_clusters[env], random_state=RANDOM_STATE)) kmeans = MethodPrediction(name='', dataset=dataset, classifier=KMeans(n_clusters=nb_clusters[env], random_state=RANDOM_STATE))
# kmeans.fit() kmeans.fit()
# print(kmeans.classifier.labels_) kmeans.predict(iris_code)
chart = Chart(dataset=dataset, name='') print(kmeans.classifier.labels_)
chart.compute_trendline() # chart = Chart(dataset=dataset, name='')
# chart.compute_trendline()
if __name__ == '__main__': if __name__ == '__main__':
...@@ -494,7 +496,7 @@ if __name__ == '__main__': ...@@ -494,7 +496,7 @@ if __name__ == '__main__':
# #
data = Data(normalize="density", filter=True) data = Data(normalize="density", filter=True)
data.init_all_in_one() data.init_all_in_one()
# predict_k_means(data) predict_k_means(data, "692440102")
# #
# data = Data(normalize="pop", filter=True) # data = Data(normalize="pop", filter=True)
# data.init_all_in_one() # data.init_all_in_one()
...@@ -504,4 +506,4 @@ if __name__ == '__main__': ...@@ -504,4 +506,4 @@ if __name__ == '__main__':
# expe2(data) # expe2(data)
# expe3(data) # expe3(data)
# expe4(data, RandomForestClassifier(), 0.8, 0.2) # expe4(data, RandomForestClassifier(), 0.8, 0.2)
expe5(data, RandomForestClassifier(), 0.8, 0.2) # expe5(data, RandomForestClassifier(), 0.8, 0.2)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment