From 1aa9682787902cfbc1b7b8a5d88fe9ce7b7ac96e Mon Sep 17 00:00:00 2001
From: George Marchment <georgemarchment@yahoo.fr>
Date: Wed, 7 May 2025 16:54:30 +0200
Subject: [PATCH] Fix small bug

---
 src/workflow.py | 44 +++++++++++++++++++++++++++-----------------
 1 file changed, 27 insertions(+), 17 deletions(-)

diff --git a/src/workflow.py b/src/workflow.py
index bb2575f..30433b1 100644
--- a/src/workflow.py
+++ b/src/workflow.py
@@ -567,7 +567,8 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen
                                                   concordance_factor = 1,
                                                   uniformity_factor = 1,
                                                   min_nb_clusters_factor = 1,
-                                                  min_nb_non_relevant_cluster_factor = 1):
+                                                  min_nb_non_relevant_cluster_factor = 1,
+                                                  relevant_processes = []):#This parameter is to force relevant proceses is the user absolutely wants the 
         
         import copy
         min_score, min_processes = np.inf, []
@@ -581,9 +582,9 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen
             all_process_as_relevant.append(p.get_alias())
         all_process_as_relevant = list(set(all_process_as_relevant))
         #working_workflow.rewrite_workflow_remove_subworkflows(relevant_processes = all_process_as_relevant, render_graphs = False)
-        #w_save = copy.deepcopy(working_workflow)
-        w_save = copy.deepcopy(self)
+    
         w = copy.deepcopy(self)
+        #w = copy.deepcopy(working_workflow)
         scripts_2_tools = {}
         print("Extracting the tools from the processes")
         print('-'*len(processes_called)+">")
@@ -601,20 +602,29 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen
             #print(i/number_of_tries*100)
             #w = copy.deepcopy(w_save)
             
-            if(process_pre_selection == "bioinfo"):
-                random_relevant_processes = w.get_random_relevant_processes_which_use_bioinformatics_tools(scripts_2_tools = scripts_2_tools)
-            elif(process_pre_selection == "bioinfo_freq"):
-                random_relevant_processes = w.get_random_relevant_processes_which_use_bioinformatics_tools_considering_their_frequency(scripts_2_tools = scripts_2_tools)
-            elif(process_pre_selection == "None"):
-                random_relevant_processes = w.generate_random_relevant_processes()
-            else:
-                raise Exception('process_pre_selection option not recognised')
-            escape = 0
-            while(escape<100 and set(random_relevant_processes) in already_tried):
+            def get_randomn_processes():
+                if(process_pre_selection == "bioinfo"):
+                    random_relevant_processes = w.get_random_relevant_processes_which_use_bioinformatics_tools(scripts_2_tools = scripts_2_tools)
+                elif(process_pre_selection == "bioinfo_freq"):
+                    random_relevant_processes = w.get_random_relevant_processes_which_use_bioinformatics_tools_considering_their_frequency(scripts_2_tools = scripts_2_tools)
+                elif(process_pre_selection == "None"):
+                    random_relevant_processes = w.generate_random_relevant_processes()
+                else:
+                    raise Exception('process_pre_selection option not recognised')
+                return list(set(random_relevant_processes+relevant_processes))
+            
+            max_number_clusters = 30
+
+            random_relevant_processes = get_randomn_processes()
+            escape, escape_upper_bound = 0, 1000
+            while(escape<escape_upper_bound and (set(random_relevant_processes) in already_tried
+                  or not(len(random_relevant_processes)<=np.min((reduction_beta*number_processes_called, max_number_clusters)))
+                  or not(len(random_relevant_processes)>=reduction_alpha*number_processes_called))):
+                #print("here", random_relevant_processes)
                 escape+=1
-                random_relevant_processes = w.generate_random_relevant_processes()
+                random_relevant_processes = get_randomn_processes()
             #Cause it means we've already searched the majority of the possibilities
-            if(escape>=100):
+            if(escape>=escape_upper_bound):
                 return min_processes
             already_tried.append(set(random_relevant_processes))
             #Here the nb of conditions returned is the number of conditions in the clusters after the rewrite
@@ -712,10 +722,10 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen
                 return score, cluster_with_processes, dico_results
 
             
-            score, cluster_organisation, dico_results = get_score_from_set_relevant_processes(w_save, random_relevant_processes)
+            score, cluster_organisation, dico_results = get_score_from_set_relevant_processes(w, random_relevant_processes)
             #print(dico_results)
             if(len(cluster_organisation)>=reduction_alpha*number_processes_called and 
-               len(cluster_organisation)<=reduction_beta*number_processes_called and 
+               len(cluster_organisation)<=np.min((reduction_beta*number_processes_called, max_number_clusters)) and 
                score<min_score):
                 #print()
                 #print("concordance",  np.max(np.array(tab_nb_conditions_per_cluster)/np.array(tab_nb_executors_per_cluster)) )
-- 
GitLab