From 9bde30f6612448866e882edc86cb6b11da5b0c5e Mon Sep 17 00:00:00 2001
From: Nelly Barret <nelly.barret@etu.univ-lyon1.fr>
Date: Fri, 15 May 2020 19:37:44 +0200
Subject: [PATCH] [M] working on similarity between curves

---
 predihood/charts.py        |  6 +--
 predihood/classes/Chart.py | 85 ++++++++++++++++++++++++++++++++++----
 2 files changed, 80 insertions(+), 11 deletions(-)

diff --git a/predihood/charts.py b/predihood/charts.py
index d99c85aa..7fecdec2 100644
--- a/predihood/charts.py
+++ b/predihood/charts.py
@@ -8,12 +8,10 @@ def generate_charts():
     data = Data(normalize="density", filter=True)
     data.init_all_in_one()
     lists = get_selected_indicators_lists(10)
-    print(lists)
-    for j, env in enumerate(["batiment", "usage"]):
-        print(env)
+    for j, env in enumerate(["batiment"]):
         dataset = Dataset(data, env, selected_indicators=lists["10"][env], train_size=0.8, test_size=0.2)
         dataset.init_all_in_one()
-        algo = Chart(name='chart', dataset=dataset)
+        algo = Chart(name='chart', dataset=dataset, number_of_iris=4)
         algo.compute_trendline()
 
 
diff --git a/predihood/classes/Chart.py b/predihood/classes/Chart.py
index 8b566ab8..25159e59 100644
--- a/predihood/classes/Chart.py
+++ b/predihood/classes/Chart.py
@@ -6,37 +6,108 @@ from scipy.interpolate import interp1d
 from predihood.classes.Method import Method
 
 
+def point_distance(y1, y2):
+    return abs(float(y2) - float(y1))
+
+
+def similarity_point(y1, y2, step):
+    distance = point_distance(y1, y2)
+
+    if distance == 0:
+        return 1  # points are the same
+    elif distance/(2*step) > 1:
+        return 0  # points are too different
+    else:
+        return distance/(2*step)
+
+
+def slope(x1, y1, x2, y2):
+    if y1 == 0 and y2 == 0: return 0
+    leading_coeff = (y2-y1)/(x2-x1) # coefficient directeur
+    # print(leading_coeff)
+    teta = np.degrees(np.arctan(leading_coeff))  # incline of the line between the two points
+    if x1 < x2 and y1 > y2: teta += 360
+    return teta
+
+
+def similarity(data1, data2, max_distance, nb_points):
+    somme = 0
+    for point1, point2 in zip(data1, data2):
+        x1, y1, x2, y2 = point1[0], point1[1], point2[0], point2[1]
+        similarity = point_distance(y1, y2) / max_distance
+        if x1+1 < len(data1) and x2+1 < len(data2):
+            next_x1 = data1[x1+1][0]
+            next_y1 = data1[x1+1][1]
+            next_x2 = data2[x2+1][0]
+            next_y2 = data2[x2+1][1]
+            print("next is (", next_x1, ";", next_y1, "), (", next_x2, ";", next_y2, ")")
+            sinus1 = np.sin(slope(x1, y1, next_x1, next_y1))
+            sinus2 = np.sin(slope(x2, y2, next_x2, next_y2))
+            slope_factor = abs(sinus1 - sinus2)
+            print(sinus1, "-", sinus2, "=", slope_factor)
+        else:
+            slope_factor = 0
+        similarity -= slope_factor
+        somme += similarity
+    somme /= nb_points
+    return somme
+
+
 class Chart(Method):
-    def __init__(self, name, dataset):
+    def __init__(self, name, dataset, number_of_iris=12):
         Method.__init__(self, name, dataset)
         self.chart = None
         self.dataset = dataset
         self.trendline = None
+        self.number_of_iris = number_of_iris if number_of_iris % 2 == 0 else 12
+        self.iris_per_line = 2
+        self.step = 0
 
     def compute_trendline(self):  # TODO: check order of selected indicators
         print("compute trendline")
         # for indicator in self.dataset.selected_indicators:
-        fig, axs = plt.subplots(6, 2, figsize=(15, 15))  # rows, columns
+        fig, axs = plt.subplots(int(self.number_of_iris/2), self.iris_per_line, figsize=(15, 15))  # rows, columns
         i, j, k = 0, 0, 1  # i and j are indices to plot sub-figures and k is the counter to place figures
-        for index, row in self.dataset.data.head(12).iterrows():
+        for index, row in self.dataset.data.head(self.number_of_iris).iterrows():
             data = []
             for indicator in self.dataset.selected_indicators:
                 data.append(row[indicator])
-            max_value = self.dataset.data.head(12)[self.dataset.selected_indicators].values.max()
+            max_value = self.dataset.data.head(self.number_of_iris)[self.dataset.selected_indicators].values.max()
             x = np.arange(0, len(data))
             y = data
             f = interp1d(x, y)
             axs[i, j].axis(ymin=0, ymax=max_value)
             axs[i, j].set_xticks(np.arange(0, len(data)))
-            axs[i, j].set_xticks(np.arange(0, max_value, step=max_value / 5))
+            self.step = max_value/5
+            axs[i, j].set_xticks(np.arange(0, max_value, step=self.step))
             axs[i, j].plot(x, data, 'o', x, f(x), '-')
             title = str(row['CODE']) + " - " + str(self.dataset.env)
             axs[i, j].set_title(title)
-            if k < 2:
+            if k < self.iris_per_line:
                 k += 1
                 j += 1
             else:
                 k = 1
                 i += 1
                 j = 0
-        fig.show()
\ No newline at end of file
+        fig.show()
+        self.compute_similarity()
+
+    def compute_similarity(self):
+        for index1, row1 in self.dataset.data.head(self.number_of_iris).iterrows():
+            data1 = []
+            data2 = []
+            for index2, row2 in self.dataset.data.head(self.number_of_iris).iterrows():
+                if index1 > index2:  # compare charts only once (don't compare 2 and 1 and after 1 and 2)
+                    for i in range(len(self.dataset.selected_indicators)):
+                        indicator = self.dataset.selected_indicators[i]
+                        data1.append((i, row1[indicator]))
+                        data2.append((i, row2[indicator]))
+                    sim_percentage = similarity(data1, data2, self.step, len(self.dataset.selected_indicators))
+                    print(row1['CODE'], "/", row2['CODE'], " ->", sim_percentage)
+        print(data1)
+        print(data2)
+
+
+if __name__ == '__main__':
+    print(slope(0, 1, 1, 0))
-- 
GitLab