From 30ccf5a8e6d35079dd1b7695d6c936d72c7a0998 Mon Sep 17 00:00:00 2001
From: George Marchment <georgemarchment@yahoo.fr>
Date: Tue, 6 May 2025 16:21:53 +0200
Subject: [PATCH] Added small update

---
 src/code_.py        |  2 +-
 src/constant.py     |  2 +-
 src/main.py         |  2 +-
 src/outils_graph.py |  2 +-
 src/workflow.py     | 24 ++++++++++++++----------
 5 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/src/code_.py b/src/code_.py
index e9eb67b..f8f4bb7 100644
--- a/src/code_.py
+++ b/src/code_.py
@@ -157,7 +157,7 @@ class Code:
         while(searching and timeout<constant.WHILE_UPPER_BOUND):
             searching = False
             #TODO -> do the same with flatMap -> 668
-            for word in ["map", "flatMap", "view"]:
+            for word in ["map", "flatMap", "view", "ifEmpty"]:
                 for end_char in ['{', '\(']:
                     pattern = fr"(\.|\|)\s*"+word+r"\s*"+end_char
                     for match in re.finditer(pattern, code):
diff --git a/src/constant.py b/src/constant.py
index a5cd310..c88a4e3 100644
--- a/src/constant.py
+++ b/src/constant.py
@@ -21,7 +21,7 @@ LIST_OPERATORS = ["distinct", "filter", "first", "last", "randomSample", "take",
                           "branch","choice","multiMap","into","separate","tap",
                           "count","countBy","min","max","sum","toInteger",
                           "close","dump","ifEmpty","print","println","set","view", 
-                          "map_modified","reduce_modified", "flatMap_modified", "view_modified", #The articifiel operations
+                          "map_modified","reduce_modified", "flatMap_modified", "view_modified", "ifEmpty_modified", #The articifiel operations
                           "empty", "of", "fromPath", "fromList", "subscribe", "value", "from"]#This last line is added by me:)
 
 TOOLS = [
diff --git a/src/main.py b/src/main.py
index 5bfec99..b6514af 100644
--- a/src/main.py
+++ b/src/main.py
@@ -64,7 +64,7 @@ class Main(Nextflow_Building_Blocks):
                 if(new!=old):
                     temp = code
                     code = code.replace(old, new, 1)
-                    if(temp==code):
+                    if(temp==code and old.split()!=new.split()):
                         print(exe)
                         print(code)
                         print("- old", f'"{old}"')
diff --git a/src/outils_graph.py b/src/outils_graph.py
index 303d088..4473d51 100644
--- a/src/outils_graph.py
+++ b/src/outils_graph.py
@@ -1002,7 +1002,7 @@ def relev_user_view_builder(dico_param, relevant_modules, alias_2_tools):
         new_nodes[i].sort()
         new_name = get_name_new_node(get_names_tab(dico, new_nodes[i]), relevant_modules, tag, alias_2_tools)
         name_printed = new_name.split(tag)[0]
-        shape = "ellipse"
+        shape = "rectangle"
         if(name_printed==""):
             shape = "point"
         node = {"id": '_$$_'.join(new_nodes[i]).replace('<', '').replace('>', ''),
diff --git a/src/workflow.py b/src/workflow.py
index 4ca2a7c..bce20cb 100644
--- a/src/workflow.py
+++ b/src/workflow.py
@@ -572,16 +572,18 @@ George Marchment, Bryan Brancotte, Marie Schmit, FrÃ©dÃ©ric Lemoine, Sarah Cohen
         import copy
         min_score, min_processes = np.inf, []
         already_tried = []
-        working_workflow = copy.deepcopy(self)
+        #working_workflow = copy.deepcopy(self)
         
-        processes_called = working_workflow.get_processes_called()
+        processes_called = self.get_processes_called()
         number_processes_called = len(processes_called)
         all_process_as_relevant = []
         for p in processes_called:
             all_process_as_relevant.append(p.get_alias())
         all_process_as_relevant = list(set(all_process_as_relevant))
-        working_workflow.rewrite_workflow_remove_subworkflows(relevant_processes = all_process_as_relevant, render_graphs = False)
-        w_save = copy.deepcopy(working_workflow)
+        #working_workflow.rewrite_workflow_remove_subworkflows(relevant_processes = all_process_as_relevant, render_graphs = False)
+        #w_save = copy.deepcopy(working_workflow)
+        w_save = copy.deepcopy(self)
+        w = copy.deepcopy(self)
         scripts_2_tools = {}
         print("Extracting the tools from the processes")
         print('-'*len(processes_called)+">")
@@ -597,7 +599,7 @@ George Marchment, Bryan Brancotte, Marie Schmit, FrÃ©dÃ©ric Lemoine, Sarah Cohen
         for i in range(number_of_tries):
             print('.', end='')
             #print(i/number_of_tries*100)
-            w = copy.deepcopy(w_save)
+            #w = copy.deepcopy(w_save)
             
             if(process_pre_selection == "bioinfo"):
                 random_relevant_processes = w.get_random_relevant_processes_which_use_bioinformatics_tools(scripts_2_tools = scripts_2_tools)
@@ -644,7 +646,7 @@ George Marchment, Bryan Brancotte, Marie Schmit, FrÃ©dÃ©ric Lemoine, Sarah Cohen
                 return nb_conditions_in_clusters
 
 
-            def get_score_from_set_relevant_processes(w_save, random_relevant_processes):
+            def get_score_from_set_relevant_processes(w, random_relevant_processes):
                 #w = copy.deepcopy(w_save)
                 #_, cluster_organisation = w.convert_workflow_2_user_view(relevant_processes=random_relevant_processes, render_graphs = False)
                 #print(random_relevant_processes)
@@ -655,7 +657,7 @@ George Marchment, Bryan Brancotte, Marie Schmit, FrÃ©dÃ©ric Lemoine, Sarah Cohen
                 #    tab_nb_processes_per_cluster_1.append(cluster_organisation[c]["nb_processes"])
                 #    tab_nb_conditions_per_cluster_1.append(cluster_organisation[c]["nb_conditions"])
                 
-                w = copy.deepcopy(w_save)
+                #w = copy.deepcopy(w_save)
                 w.generate_user_view(relevant_processes = random_relevant_processes, render_graphs=False, use_process_dependency_graph = False)
                 clusters = w.graph.get_clusters_from_user_view()
                 cluster_with_processes = []
@@ -671,7 +673,7 @@ George Marchment, Bryan Brancotte, Marie Schmit, FrÃ©dÃ©ric Lemoine, Sarah Cohen
                 for cluster in cluster_with_processes:
                     tab_nb_executors_per_cluster.append(len(cluster))
                 
-                print(np.array(tab_nb_executors_per_cluster).sum())
+                
                 #Number condtions per cluster
                 tab_nb_conditions_per_cluster = get_nb_conditions_in_clusters(cluster_with_processes)
                 #Number of processes per cluster
@@ -700,10 +702,12 @@ George Marchment, Bryan Brancotte, Marie Schmit, FrÃ©dÃ©ric Lemoine, Sarah Cohen
                 average_number_of_process_per_cluster = np.mean(tab_nb_processes_per_cluster)
                 for x in tab_nb_processes_per_cluster:
                     uniformity_variance += (average_number_of_process_per_cluster-x)**2/nb_clusters
-                dico_results = {"min_nb_clusters":(nb_clusters / number_processes_called), "min_nb_non_relevant_cluster":(nb_non_relevant_clusters / nb_clusters), "uniformity":(uniformity_variance / number_processes_called), "concordance":np.max(np.array(tab_nb_conditions_per_cluster)/np.array(tab_nb_executors_per_cluster))  }
+                
+                min_nb_clusters_value = (nb_clusters / number_processes_called)**2
+                dico_results = {"min_nb_clusters":min_nb_clusters_value, "min_nb_non_relevant_cluster":(nb_non_relevant_clusters / nb_clusters), "uniformity":(uniformity_variance / number_processes_called), "concordance":np.max(np.array(tab_nb_conditions_per_cluster)/np.array(tab_nb_executors_per_cluster))  }
                 score = concordance_factor * np.max(np.array(tab_nb_conditions_per_cluster)/np.array(tab_nb_executors_per_cluster)) + \
                         uniformity_factor * (uniformity_variance / number_processes_called) + \
-                        min_nb_clusters_factor * (nb_clusters / number_processes_called) + \
+                        min_nb_clusters_factor * min_nb_clusters_value + \
                         min_nb_non_relevant_cluster_factor * (nb_non_relevant_clusters / nb_clusters)
                 return score, cluster_with_processes, dico_results
 
-- 
GitLab