[M] working on similarity between curves

9bde30f6 · Nelly Barret · 71599410 · 9bde30f6 · 9bde30f6
Commit 9bde30f6 authored 5 years ago by Nelly Barret
--- a/predihood/charts.py
+++ b/predihood/charts.py
@@ -8,12 +8,10 @@ def generate_charts():
    data = Data(normalize="density", filter=True)
    data.init_all_in_one()
    lists = get_selected_indicators_lists(10)
-    print(lists)
+    for j, env in enumerate(["batiment"]):
-    for j, env in enumerate(["batiment", "usage"]):
-        print(env)
        dataset = Dataset(data, env, selected_indicators=lists["10"][env], train_size=0.8, test_size=0.2)
        dataset.init_all_in_one()
-        algo = Chart(name='chart', dataset=dataset)
+        algo = Chart(name='chart', dataset=dataset, number_of_iris=4)
        algo.compute_trendline()

--- a/predihood/classes/Chart.py
+++ b/predihood/classes/Chart.py
@@ -6,37 +6,108 @@ from scipy.interpolate import interp1d
 from predihood.classes.Method import Method
+def point_distance(y1, y2):
+    return abs(float(y2) - float(y1))
+def similarity_point(y1, y2, step):
+    distance = point_distance(y1, y2)
+    if distance == 0:
+        return 1  # points are the same
+    elif distance/(2*step) > 1:
+        return 0  # points are too different
+    else:
+        return distance/(2*step)
+def slope(x1, y1, x2, y2):
+    if y1 == 0 and y2 == 0: return 0
+    leading_coeff = (y2-y1)/(x2-x1) # coefficient directeur
+    # print(leading_coeff)
+    teta = np.degrees(np.arctan(leading_coeff))  # incline of the line between the two points
+    if x1 < x2 and y1 > y2: teta += 360
+    return teta
+def similarity(data1, data2, max_distance, nb_points):
+    somme = 0
+    for point1, point2 in zip(data1, data2):
+        x1, y1, x2, y2 = point1[0], point1[1], point2[0], point2[1]
+        similarity = point_distance(y1, y2) / max_distance
+        if x1+1 < len(data1) and x2+1 < len(data2):
+            next_x1 = data1[x1+1][0]
+            next_y1 = data1[x1+1][1]
+            next_x2 = data2[x2+1][0]
+            next_y2 = data2[x2+1][1]
+            print("next is (", next_x1, ";", next_y1, "), (", next_x2, ";", next_y2, ")")
+            sinus1 = np.sin(slope(x1, y1, next_x1, next_y1))
+            sinus2 = np.sin(slope(x2, y2, next_x2, next_y2))
+            slope_factor = abs(sinus1 - sinus2)
+            print(sinus1, "-", sinus2, "=", slope_factor)
+        else:
+            slope_factor = 0
+        similarity -= slope_factor
+        somme += similarity
+    somme /= nb_points
+    return somme
 class Chart(Method):
-    def __init__(self, name, dataset):
+    def __init__(self, name, dataset, number_of_iris=12):
        Method.__init__(self, name, dataset)
        self.chart = None
        self.dataset = dataset
        self.trendline = None
+        self.number_of_iris = number_of_iris if number_of_iris % 2 == 0 else 12
+        self.iris_per_line = 2
+        self.step = 0
    def compute_trendline(self):  # TODO: check order of selected indicators
        print("compute trendline")
        # for indicator in self.dataset.selected_indicators:
-        fig, axs = plt.subplots(6, 2, figsize=(15, 15))  # rows, columns
+        fig, axs = plt.subplots(int(self.number_of_iris/2), self.iris_per_line, figsize=(15, 15))  # rows, columns
        i, j, k = 0, 0, 1  # i and j are indices to plot sub-figures and k is the counter to place figures
-        for index, row in self.dataset.data.head(12).iterrows():
+        for index, row in self.dataset.data.head(self.number_of_iris).iterrows():
            data = []
            for indicator in self.dataset.selected_indicators:
                data.append(row[indicator])
-            max_value = self.dataset.data.head(12)[self.dataset.selected_indicators].values.max()
+            max_value = self.dataset.data.head(self.number_of_iris)[self.dataset.selected_indicators].values.max()
            x = np.arange(0, len(data))
            y = data
            f = interp1d(x, y)
            axs[i, j].axis(ymin=0, ymax=max_value)
            axs[i, j].set_xticks(np.arange(0, len(data)))
-            axs[i, j].set_xticks(np.arange(0, max_value, step=max_value / 5))
+            self.step = max_value/5
+            axs[i, j].set_xticks(np.arange(0, max_value, step=self.step))
            axs[i, j].plot(x, data, 'o', x, f(x), '-')
            title = str(row['CODE']) + " - " + str(self.dataset.env)
            axs[i, j].set_title(title)
-            if k < 2:
+            if k < self.iris_per_line:
                k += 1
                j += 1
            else:
                k = 1
                i += 1
                j = 0
        fig.show()
\ No newline at end of file
+        self.compute_similarity()
+    def compute_similarity(self):
+        for index1, row1 in self.dataset.data.head(self.number_of_iris).iterrows():
+            data1 = []
+            data2 = []
+            for index2, row2 in self.dataset.data.head(self.number_of_iris).iterrows():
+                if index1 > index2:  # compare charts only once (don't compare 2 and 1 and after 1 and 2)
+                    for i in range(len(self.dataset.selected_indicators)):
+                        indicator = self.dataset.selected_indicators[i]
+                        data1.append((i, row1[indicator]))
+                        data2.append((i, row2[indicator]))
+                    sim_percentage = similarity(data1, data2, self.step, len(self.dataset.selected_indicators))
+                    print(row1['CODE'], "/", row2['CODE'], " ->", sim_percentage)
+        print(data1)
+        print(data2)
+if __name__ == '__main__':
+    print(slope(0, 1, 1, 0))