From 9bde30f6612448866e882edc86cb6b11da5b0c5e Mon Sep 17 00:00:00 2001 From: Nelly Barret <nelly.barret@etu.univ-lyon1.fr> Date: Fri, 15 May 2020 19:37:44 +0200 Subject: [PATCH] [M] working on similarity between curves --- predihood/charts.py | 6 +-- predihood/classes/Chart.py | 85 ++++++++++++++++++++++++++++++++++---- 2 files changed, 80 insertions(+), 11 deletions(-) diff --git a/predihood/charts.py b/predihood/charts.py index d99c85aa..7fecdec2 100644 --- a/predihood/charts.py +++ b/predihood/charts.py @@ -8,12 +8,10 @@ def generate_charts(): data = Data(normalize="density", filter=True) data.init_all_in_one() lists = get_selected_indicators_lists(10) - print(lists) - for j, env in enumerate(["batiment", "usage"]): - print(env) + for j, env in enumerate(["batiment"]): dataset = Dataset(data, env, selected_indicators=lists["10"][env], train_size=0.8, test_size=0.2) dataset.init_all_in_one() - algo = Chart(name='chart', dataset=dataset) + algo = Chart(name='chart', dataset=dataset, number_of_iris=4) algo.compute_trendline() diff --git a/predihood/classes/Chart.py b/predihood/classes/Chart.py index 8b566ab8..25159e59 100644 --- a/predihood/classes/Chart.py +++ b/predihood/classes/Chart.py @@ -6,37 +6,108 @@ from scipy.interpolate import interp1d from predihood.classes.Method import Method +def point_distance(y1, y2): + return abs(float(y2) - float(y1)) + + +def similarity_point(y1, y2, step): + distance = point_distance(y1, y2) + + if distance == 0: + return 1 # points are the same + elif distance/(2*step) > 1: + return 0 # points are too different + else: + return distance/(2*step) + + +def slope(x1, y1, x2, y2): + if y1 == 0 and y2 == 0: return 0 + leading_coeff = (y2-y1)/(x2-x1) # coefficient directeur + # print(leading_coeff) + teta = np.degrees(np.arctan(leading_coeff)) # incline of the line between the two points + if x1 < x2 and y1 > y2: teta += 360 + return teta + + +def similarity(data1, data2, max_distance, nb_points): + somme = 0 + for point1, point2 in zip(data1, data2): + x1, y1, x2, y2 = point1[0], point1[1], point2[0], point2[1] + similarity = point_distance(y1, y2) / max_distance + if x1+1 < len(data1) and x2+1 < len(data2): + next_x1 = data1[x1+1][0] + next_y1 = data1[x1+1][1] + next_x2 = data2[x2+1][0] + next_y2 = data2[x2+1][1] + print("next is (", next_x1, ";", next_y1, "), (", next_x2, ";", next_y2, ")") + sinus1 = np.sin(slope(x1, y1, next_x1, next_y1)) + sinus2 = np.sin(slope(x2, y2, next_x2, next_y2)) + slope_factor = abs(sinus1 - sinus2) + print(sinus1, "-", sinus2, "=", slope_factor) + else: + slope_factor = 0 + similarity -= slope_factor + somme += similarity + somme /= nb_points + return somme + + class Chart(Method): - def __init__(self, name, dataset): + def __init__(self, name, dataset, number_of_iris=12): Method.__init__(self, name, dataset) self.chart = None self.dataset = dataset self.trendline = None + self.number_of_iris = number_of_iris if number_of_iris % 2 == 0 else 12 + self.iris_per_line = 2 + self.step = 0 def compute_trendline(self): # TODO: check order of selected indicators print("compute trendline") # for indicator in self.dataset.selected_indicators: - fig, axs = plt.subplots(6, 2, figsize=(15, 15)) # rows, columns + fig, axs = plt.subplots(int(self.number_of_iris/2), self.iris_per_line, figsize=(15, 15)) # rows, columns i, j, k = 0, 0, 1 # i and j are indices to plot sub-figures and k is the counter to place figures - for index, row in self.dataset.data.head(12).iterrows(): + for index, row in self.dataset.data.head(self.number_of_iris).iterrows(): data = [] for indicator in self.dataset.selected_indicators: data.append(row[indicator]) - max_value = self.dataset.data.head(12)[self.dataset.selected_indicators].values.max() + max_value = self.dataset.data.head(self.number_of_iris)[self.dataset.selected_indicators].values.max() x = np.arange(0, len(data)) y = data f = interp1d(x, y) axs[i, j].axis(ymin=0, ymax=max_value) axs[i, j].set_xticks(np.arange(0, len(data))) - axs[i, j].set_xticks(np.arange(0, max_value, step=max_value / 5)) + self.step = max_value/5 + axs[i, j].set_xticks(np.arange(0, max_value, step=self.step)) axs[i, j].plot(x, data, 'o', x, f(x), '-') title = str(row['CODE']) + " - " + str(self.dataset.env) axs[i, j].set_title(title) - if k < 2: + if k < self.iris_per_line: k += 1 j += 1 else: k = 1 i += 1 j = 0 - fig.show() \ No newline at end of file + fig.show() + self.compute_similarity() + + def compute_similarity(self): + for index1, row1 in self.dataset.data.head(self.number_of_iris).iterrows(): + data1 = [] + data2 = [] + for index2, row2 in self.dataset.data.head(self.number_of_iris).iterrows(): + if index1 > index2: # compare charts only once (don't compare 2 and 1 and after 1 and 2) + for i in range(len(self.dataset.selected_indicators)): + indicator = self.dataset.selected_indicators[i] + data1.append((i, row1[indicator])) + data2.append((i, row2[indicator])) + sim_percentage = similarity(data1, data2, self.step, len(self.dataset.selected_indicators)) + print(row1['CODE'], "/", row2['CODE'], " ->", sim_percentage) + print(data1) + print(data2) + + +if __name__ == '__main__': + print(slope(0, 1, 1, 0)) -- GitLab