From 1aa9682787902cfbc1b7b8a5d88fe9ce7b7ac96e Mon Sep 17 00:00:00 2001 From: George Marchment <georgemarchment@yahoo.fr> Date: Wed, 7 May 2025 16:54:30 +0200 Subject: [PATCH] Fix small bug --- src/workflow.py | 44 +++++++++++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/src/workflow.py b/src/workflow.py index bb2575f..30433b1 100644 --- a/src/workflow.py +++ b/src/workflow.py @@ -567,7 +567,8 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen concordance_factor = 1, uniformity_factor = 1, min_nb_clusters_factor = 1, - min_nb_non_relevant_cluster_factor = 1): + min_nb_non_relevant_cluster_factor = 1, + relevant_processes = []):#This parameter is to force relevant proceses is the user absolutely wants the import copy min_score, min_processes = np.inf, [] @@ -581,9 +582,9 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen all_process_as_relevant.append(p.get_alias()) all_process_as_relevant = list(set(all_process_as_relevant)) #working_workflow.rewrite_workflow_remove_subworkflows(relevant_processes = all_process_as_relevant, render_graphs = False) - #w_save = copy.deepcopy(working_workflow) - w_save = copy.deepcopy(self) + w = copy.deepcopy(self) + #w = copy.deepcopy(working_workflow) scripts_2_tools = {} print("Extracting the tools from the processes") print('-'*len(processes_called)+">") @@ -601,20 +602,29 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen #print(i/number_of_tries*100) #w = copy.deepcopy(w_save) - if(process_pre_selection == "bioinfo"): - random_relevant_processes = w.get_random_relevant_processes_which_use_bioinformatics_tools(scripts_2_tools = scripts_2_tools) - elif(process_pre_selection == "bioinfo_freq"): - random_relevant_processes = w.get_random_relevant_processes_which_use_bioinformatics_tools_considering_their_frequency(scripts_2_tools = scripts_2_tools) - elif(process_pre_selection == "None"): - random_relevant_processes = w.generate_random_relevant_processes() - else: - raise Exception('process_pre_selection option not recognised') - escape = 0 - while(escape<100 and set(random_relevant_processes) in already_tried): + def get_randomn_processes(): + if(process_pre_selection == "bioinfo"): + random_relevant_processes = w.get_random_relevant_processes_which_use_bioinformatics_tools(scripts_2_tools = scripts_2_tools) + elif(process_pre_selection == "bioinfo_freq"): + random_relevant_processes = w.get_random_relevant_processes_which_use_bioinformatics_tools_considering_their_frequency(scripts_2_tools = scripts_2_tools) + elif(process_pre_selection == "None"): + random_relevant_processes = w.generate_random_relevant_processes() + else: + raise Exception('process_pre_selection option not recognised') + return list(set(random_relevant_processes+relevant_processes)) + + max_number_clusters = 30 + + random_relevant_processes = get_randomn_processes() + escape, escape_upper_bound = 0, 1000 + while(escape<escape_upper_bound and (set(random_relevant_processes) in already_tried + or not(len(random_relevant_processes)<=np.min((reduction_beta*number_processes_called, max_number_clusters))) + or not(len(random_relevant_processes)>=reduction_alpha*number_processes_called))): + #print("here", random_relevant_processes) escape+=1 - random_relevant_processes = w.generate_random_relevant_processes() + random_relevant_processes = get_randomn_processes() #Cause it means we've already searched the majority of the possibilities - if(escape>=100): + if(escape>=escape_upper_bound): return min_processes already_tried.append(set(random_relevant_processes)) #Here the nb of conditions returned is the number of conditions in the clusters after the rewrite @@ -712,10 +722,10 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen return score, cluster_with_processes, dico_results - score, cluster_organisation, dico_results = get_score_from_set_relevant_processes(w_save, random_relevant_processes) + score, cluster_organisation, dico_results = get_score_from_set_relevant_processes(w, random_relevant_processes) #print(dico_results) if(len(cluster_organisation)>=reduction_alpha*number_processes_called and - len(cluster_organisation)<=reduction_beta*number_processes_called and + len(cluster_organisation)<=np.min((reduction_beta*number_processes_called, max_number_clusters)) and score<min_score): #print() #print("concordance", np.max(np.array(tab_nb_conditions_per_cluster)/np.array(tab_nb_executors_per_cluster)) ) -- GitLab