From 6600a0e7d5d629b8ece211b3d8cff56cdab099b2 Mon Sep 17 00:00:00 2001 From: George Marchment <georgemarchment@yahoo.fr> Date: Mon, 14 Apr 2025 13:54:37 +0200 Subject: [PATCH] Added function to get the most uniform selction of clusters --- src/workflow.py | 43 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/src/workflow.py b/src/workflow.py index 4948649..0e156e0 100644 --- a/src/workflow.py +++ b/src/workflow.py @@ -469,11 +469,52 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen self.generate_user_view(relevant_processes = random_relevant_processes, render_graphs=False) clusters = self.graph.get_clusters_from_user_view() #We want the number of clusters to be at least x% of the size of the workflows - if(len(clusters)>=reduction_alpha*len(self.get_processes_called()) and len(clusters)<min_nb_clusters): + if(len(clusters)>=reduction_alpha*len(self.get_processes_called()) and + len(clusters)<min_nb_clusters): min_relevant_processes = random_relevant_processes min_nb_clusters = len(clusters) return min_relevant_processes + #reduction_beta is the maximum number of clusters depending on the number of processes given in a percentage + #For example if there are 10 processes and reduction_beta = 0.8 -> we want a maximum of 8 clusters + #In the same idea if reduction_beta = 0.6 -> we want a maximum of 6 clusters + #reduction_alpha is the same as above + def get_relevant_processes_which_uniformizes_cluster_distribution(self, reduction_alpha = 0.2, reduction_beta = 0.8, number_of_tries = 50): + import numpy as np + min_uniform_score, min_relevant_processes = np.inf, [] + already_tried = [] + for i in range(number_of_tries): + random_relevant_processes = self.generate_random_relevant_processes() + escape = 0 + while(escape<100 and set(random_relevant_processes) in already_tried): + escape+=1 + random_relevant_processes = self.generate_random_relevant_processes() + #Cause it means we've already searched the majority of the possibilities + if(escape>=100): + return min_relevant_processes + already_tried.append(set(random_relevant_processes)) + #Get the clusters and the code + self.generate_user_view(relevant_processes = random_relevant_processes, render_graphs=False) + clusters = self.graph.get_clusters_from_user_view() + clusters_2_size = [] + for c in clusters: + nb_processes = 0 + for ele in c: + if(ele.get_type()=="Process"): + nb_processes+=1 + clusters_2_size.append(nb_processes) + score = 0 + average = np.mean(clusters_2_size) + for x in clusters_2_size: + score += ((average-x)/average)**2 + if(len(clusters)>=reduction_alpha*len(self.get_processes_called()) and + len(clusters)<=reduction_beta*len(self.get_processes_called()) and + score<min_uniform_score): + min_relevant_processes = random_relevant_processes + min_uniform_score = score + return min_relevant_processes + + #Method that returns the order of execution for each executor def get_order_execution_executors(self): dico = {} -- GitLab