From 106ffe4d2f43f7001252c980277e279d289191cf Mon Sep 17 00:00:00 2001
From: George Marchment <georgemarchment@yahoo.fr>
Date: Wed, 16 Apr 2025 10:55:38 +0200
Subject: [PATCH] Updated the selection of random processes + updated the
 extarction of paths and files (there was an issue with the groups)

---
 src/constant.py |  6 ++++--
 src/process.py  | 11 ++++++-----
 src/workflow.py | 39 +++++++++++++++++++++++----------------
 3 files changed, 33 insertions(+), 23 deletions(-)

diff --git a/src/constant.py b/src/constant.py
index b46c2b5..400e439 100644
--- a/src/constant.py
+++ b/src/constant.py
@@ -123,8 +123,10 @@ END_PIPE_OPERATOR = r"\s*(\s*\|\s*\w+)+"
 
 #         PROCESS
 #--------------------------
-FILE = r'file +(\w+) *\n|file *\( *(\w+) *\) *\n'
-PATH = r'path +(\w+) *\n|path *\( *(\w+) *\) *\n'
+FILE1 = r'file +(\w+) *\n'
+FILE2 = r'file *\( *(\w+) *\) *\n'
+PATH1 = r'path +(\w+) *\n'
+PATH2 = r'path *\( *(\w+) *\) *\n'
 FROM = r'[^\w]from ([^\n]+)\n'
 INPUT = r"\n\s*input *:"
 INTO = r'into +([\w, ]+)'
diff --git a/src/process.py b/src/process.py
index 8d4c181..6beb287 100644
--- a/src/process.py
+++ b/src/process.py
@@ -289,7 +289,7 @@ class Process(Nextflow_Building_Blocks):
             placed = False
 
             #Case there is a single channel as an input -> doesn't use from to import channel -> uses file (see https://github.com/nextflow-io/nextflow/blob/45ceadbdba90b0b7a42a542a9fc241fb04e3719d/docs/process.rst)
-            patterns = [constant.FILE, constant.PATH]
+            patterns = [constant.FILE1, constant.FILE2, constant.PATH1, constant.PATH2]
             for pattern in patterns:
                 for match in re.finditer(pattern, line+"\n"):
                     #In the first case it's "file ch" in the second "file (ch)" 
@@ -423,10 +423,11 @@ class Process(Nextflow_Building_Blocks):
                 tab.append(self.outputs[-1])
             self.outputs_per_line.append(tab)
         
-        pattern = constant.FILE
-        for match in re.finditer(pattern, code):
-            add_channel(match.group(1))
-            self.outputs_per_line.append([self.outputs[-1]])
+        patterns = [constant.FILE1, constant.FILE2]
+        for pattern in patterns:
+            for match in re.finditer(pattern, code):
+                add_channel(match.group(1))
+                self.outputs_per_line.append([self.outputs[-1]])
 
     #Function that extracts the inputs from a process (for DSLS workflows)
     def initialise_outputs_DSL2(self):
diff --git a/src/workflow.py b/src/workflow.py
index 144cf2f..0d0abe0 100644
--- a/src/workflow.py
+++ b/src/workflow.py
@@ -431,12 +431,13 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen
                     #So that means there are the 2 nodes which form the cycle edge in the relevant processes
                     #-> it means we need to regenerated relevant processes
                     searching = True
-                    break
-                
-            name_select = []
-            for p in sampled:
-                name_select.append(p.get_alias())
-            return name_select
+            if(not searching):
+                name_select = []
+                for p in sampled:
+                    name_select.append(p.get_alias())
+                return name_select
+            else:
+                print("here")
         
 
     #This methods generates a random set of processes to consider as relavant 
@@ -1036,21 +1037,27 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen
         def get_object(address):
             address = int(re.findall(r"\dx\w+", address)[0], base=16)
             return ctypes.cast(address, ctypes.py_object).value
-        #Check that there are no cycles which will break the creation of the user view:
-        edges_create_cycles = self.graph.get_edges_that_create_cycle()
-        edges_create_cycles_objects = []
-        for e in edges_create_cycles:
-            edges_create_cycles_objects.append((get_object(e[0]), get_object(e[1])))
-        for e in edges_create_cycles_objects:
-            n1 = e[0].get_alias()
-            n2 = e[1].get_alias()
-            if(n1 in relevant_processes and n2 in relevant_processes):
-                raise BioFlowInsightError(f"The processes '{n1}' and '{n2}' cannot both be relevant processes since there is a dependency apparant in the workflow between the 2")
+        
 
         ternary_operation_dico = self.ternary_operation_dico
         map_element_dico = self.map_element_dico
         
         if(self.duplicate): 
+
+            #Check that there are no cycles which will break the creation of the user view:
+            edges_create_cycles = self.graph.get_edges_that_create_cycle()
+            #if(len(edges_create_cycles)>0):
+            #    #TODO -> add link for further details for the user
+            #    raise BioFlowInsightError("An indirect cycle was detected in the workflow. This happens often when a same channel is used multiple times in the workflow. This is a norm that we do not recommend. Try rewritting the workflow in a different way. See [link] for more details", type="Cycle detected")
+            edges_create_cycles_objects = []
+            for e in edges_create_cycles:
+                edges_create_cycles_objects.append((get_object(e[0]), get_object(e[1])))
+            for e in edges_create_cycles_objects:
+                n1 = e[0].get_alias()
+                n2 = e[1].get_alias()
+                if(n1 in relevant_processes and n2 in relevant_processes):
+                    raise BioFlowInsightError(f"The processes '{n1}' and '{n2}' cannot both be relevant processes since there is a dependency apparant in the workflow between the 2")
+
             #First check if there are any duplicate operations
             #That method is in the "get_order_execution_executors" method -> so we just run that first
             self.check_multiple_subworkflow()
-- 
GitLab