Added function to get the most uniform selction of clusters

6600a0e7 · George Marchment · 5948cd65 · 6600a0e7
Commit 6600a0e7 authored 3 months ago by George Marchment
--- a/src/workflow.py
+++ b/src/workflow.py
@@ -469,11 +469,52 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen
            self.generate_user_view(relevant_processes = random_relevant_processes, render_graphs=False)
            clusters = self.graph.get_clusters_from_user_view()
            #We want the number of clusters to be at least x% of the size of the workflows 
-            if(len(clusters)>=reduction_alpha*len(self.get_processes_called()) and len(clusters)<min_nb_clusters):
+            if(len(clusters)>=reduction_alpha*len(self.get_processes_called()) and 
+               len(clusters)<min_nb_clusters):
                min_relevant_processes = random_relevant_processes
                min_nb_clusters = len(clusters)
        return min_relevant_processes

+    #reduction_beta is the maximum number of clusters depending on the number of processes given in a percentage 
+    #For example if there are 10 processes and reduction_beta = 0.8 -> we want a maximum of 8 clusters
+    #In the same idea if reduction_beta = 0.6 -> we want a maximum of 6 clusters
+    #reduction_alpha is the same as above
+    def get_relevant_processes_which_uniformizes_cluster_distribution(self, reduction_alpha = 0.2, reduction_beta = 0.8, number_of_tries = 50):
+        import numpy as np
+        min_uniform_score, min_relevant_processes = np.inf, []
+        already_tried = []
+        for i in range(number_of_tries):
+            random_relevant_processes = self.generate_random_relevant_processes()
+            escape = 0
+            while(escape<100 and set(random_relevant_processes) in already_tried):
+                escape+=1
+                random_relevant_processes = self.generate_random_relevant_processes()
+            #Cause it means we've already searched the majority of the possibilities
+            if(escape>=100):
+                return min_relevant_processes
+            already_tried.append(set(random_relevant_processes))
+            #Get the clusters and the code
+            self.generate_user_view(relevant_processes = random_relevant_processes, render_graphs=False)
+            clusters = self.graph.get_clusters_from_user_view()
+            clusters_2_size = []
+            for c in clusters:
+                nb_processes = 0
+                for ele in c:
+                    if(ele.get_type()=="Process"):
+                        nb_processes+=1
+                clusters_2_size.append(nb_processes)
+            score = 0
+            average = np.mean(clusters_2_size)
+            for x in clusters_2_size:
+                score += ((average-x)/average)**2
+            if(len(clusters)>=reduction_alpha*len(self.get_processes_called()) and 
+               len(clusters)<=reduction_beta*len(self.get_processes_called()) and 
+               score<min_uniform_score):
+                min_relevant_processes = random_relevant_processes
+                min_uniform_score = score
+        return min_relevant_processes
+    
+
    #Method that returns the order of execution for each executor
    def get_order_execution_executors(self):
        dico = {}