From 91d83baa61a92eb4f53a4e1ac716ede41542c1b9 Mon Sep 17 00:00:00 2001
From: George Marchment <georgemarchment@yahoo.fr>
Date: Mon, 2 Dec 2024 11:42:58 +0100
Subject: [PATCH] Added v1 of condition extraction -> only works for calls for
 now

---
 src/call.py                     |   2 +
 src/condition.py                |  26 ++++++++
 src/executor.py                 |   1 +
 src/main_DSL2.py                |   3 +
 src/nextflow_building_blocks.py |   6 ++
 src/nextflow_file.py            |   2 +
 src/operation.py                |   3 +-
 src/outils.py                   | 107 +++++++++++++++++++++++++++++++-
 8 files changed, 147 insertions(+), 3 deletions(-)
 create mode 100644 src/condition.py

diff --git a/src/call.py b/src/call.py
index 4e53b82..99e9f17 100644
--- a/src/call.py
+++ b/src/call.py
@@ -3,6 +3,7 @@ import json
 
 
 from .code_ import Code
+from .condition import Condition
 from .outils import get_next_param
 from .executor import Executor
 from .bioflowinsighterror import BioFlowInsightError
@@ -13,6 +14,7 @@ class Call(Executor):
     def __init__(self, code, origin, OG_code = ''):
         self.code = Code(code = code, origin = self)
         self.origin = origin
+        self.condition = Condition(self)
         self.called = []
         self.first_element_called = None
         self.parameters = []#These are in the order
diff --git a/src/condition.py b/src/condition.py
new file mode 100644
index 0000000..22c4884
--- /dev/null
+++ b/src/condition.py
@@ -0,0 +1,26 @@
+
+from .outils import extract_conditions
+
+class Condition:
+    def __init__(self, origin):
+        self.origin = origin
+        self.conditions = []
+        self.initialise()
+
+
+    def initialise(self):
+        thing_defined = self.origin.get_code()
+        code = self.origin.get_workflow_code()
+        #print(f"'{thing_defined}'")
+        #print(f"'{code}'")
+
+        
+        conditions_dico = extract_conditions(code)
+        pos = code.find(thing_defined)
+        for c in conditions_dico:
+            condition_extend = conditions_dico[c]
+            if(condition_extend[0]<pos and pos<condition_extend[1]):
+                self.conditions.append(c)
+        print(thing_defined, self.conditions)
+        print()
+
diff --git a/src/executor.py b/src/executor.py
index 8f099ab..082995b 100644
--- a/src/executor.py
+++ b/src/executor.py
@@ -21,6 +21,7 @@ class Executor(Nextflow_Building_Blocks):
         self.origin = origin
         self.code = Code(code = code, origin = self)
         
+        
 
     #---------------------------------
     #AUXILIARY METHODS FOR ALL CLASSES
diff --git a/src/main_DSL2.py b/src/main_DSL2.py
index dd1ed4b..ecde61b 100644
--- a/src/main_DSL2.py
+++ b/src/main_DSL2.py
@@ -15,6 +15,9 @@ class Main_DSL2(Nextflow_Building_Blocks):
 
     def get_channels(self):
         return self.channels
+    
+    def get_workflow_code(self):
+        return self.get_code()
 
     def get_type(self):
         return "Main DSL2"
diff --git a/src/nextflow_building_blocks.py b/src/nextflow_building_blocks.py
index 0c44921..15e7d5d 100644
--- a/src/nextflow_building_blocks.py
+++ b/src/nextflow_building_blocks.py
@@ -32,6 +32,9 @@ class Nextflow_Building_Blocks:
     def get_code(self, get_OG = False):
         return self.code.get_code(get_OG = get_OG)
     
+    def get_origin(self):
+        return self.origin
+    
     def get_output_dir(self):
         return self.origin.get_output_dir()
     
@@ -124,6 +127,9 @@ class Nextflow_Building_Blocks:
 
     def get_processes(self):
         return self.processes
+    
+    def get_workflow_code(self):
+        return self.origin.get_workflow_code()
 
     #----------------------
     #CHANNELS
diff --git a/src/nextflow_file.py b/src/nextflow_file.py
index 9299da5..9a7e0b0 100644
--- a/src/nextflow_file.py
+++ b/src/nextflow_file.py
@@ -49,6 +49,8 @@ class Nextflow_File(Nextflow_Building_Blocks):
         name = self.get_file_address().split('/')[-1]
         return name[:-3]
 
+    def get_workflow_code(self):
+        return self.get_code()
 
     def check_file_correctness(self):
         code = self.get_code()
diff --git a/src/operation.py b/src/operation.py
index b329a45..8fc7bf4 100644
--- a/src/operation.py
+++ b/src/operation.py
@@ -9,6 +9,7 @@ import warnings
 import re
 from .outils import get_end_operator, get_end_call, get_curly_count
 from .code_ import Code
+from .condition import Condition
 from .executor import Executor
 from .bioflowinsighterror import BioFlowInsightError
 from . import constant
@@ -744,8 +745,6 @@ class Operation(Executor):
             self.initialise_origins()
             self.initialise_gives()
 
-
-
         self.write_summary(self.get_output_dir() / "debug/operations.nf")
         
     def check_if_empty_call(self):
diff --git a/src/outils.py b/src/outils.py
index ad30ce7..a207b25 100644
--- a/src/outils.py
+++ b/src/outils.py
@@ -914,4 +914,109 @@ def check_file_exists(address, origin):
     
 
 def is_git_directory(path = '.'):
-    return subprocess.call(['git', '-C', path, 'status'], stderr=subprocess.STDOUT, stdout = open(os.devnull, 'w')) == 0
\ No newline at end of file
+    return subprocess.call(['git', '-C', path, 'status'], stderr=subprocess.STDOUT, stdout = open(os.devnull, 'w')) == 0
+
+
+#Function that extracts the conditions defined in some code
+def extract_conditions(code):
+
+    conditions_dico = {}
+
+    start = 0
+
+    curly_count, parenthese_count = 0, 0
+    quote_single, quote_double = False, False
+    triple_single, triple_double = False, False
+
+
+    while(start<len(code)):         
+        checked_triple = False
+        if(start+3<=len(code)):
+            if(code[start:start+3]=="'''" and not quote_single and not quote_double and not triple_single and not triple_double):
+                triple_single = True
+                start+=3
+                checked_triple = True
+            elif(code[start:start+3]=="'''" and not quote_single and not quote_double and triple_single and not triple_double):
+                triple_single = False
+                start+=3
+                checked_triple = True
+    
+            if(code[start:start+3]=='"""' and not quote_single and not quote_double and not triple_single and not triple_double):
+                triple_double = True
+                start+=3
+                checked_triple = True
+            elif(code[start:start+3]=='"""' and not quote_single and not quote_double and not triple_single and triple_double):
+                triple_double = False
+                start+=3
+                checked_triple = True
+        
+        if(not checked_triple):
+            if(code[start]=="{" and not quote_single and not quote_double and not triple_single and not triple_double):
+                curly_count+=1
+            elif(code[start]=="}" and not quote_single and not quote_double and not triple_single and not triple_double):
+                curly_count-=1
+            
+            if(code[start]=="(" and not quote_single and not quote_double and not triple_single and not triple_double):
+                parenthese_count+=1
+            elif(code[start]==")" and not quote_single and not quote_double and not triple_single and not triple_double):
+                parenthese_count-=1
+    
+            if(code[start]=="'" and not quote_single and not quote_double and not triple_single and not triple_double):
+                if(code[start-1]!="\\" or (code[start-1]=="\\" and code[start-2]=="\\")):
+                    quote_single=True
+            elif(code[start]=="'" and quote_single and not quote_double and not triple_single and not triple_double):
+                if(code[start-1]!="\\" or (code[start-1]=="\\" and code[start-2]=="\\")):
+                    quote_single=False
+    
+            if(code[start]=='"' and not quote_single and not quote_double and not triple_single and not triple_double):
+                if(code[start-1]!="\\" or (code[start-1]=="\\" and code[start-2]=="\\")):
+                    quote_double=True
+            elif(code[start]=='"' and not quote_single and quote_double and not triple_single and not triple_double):
+                if(code[start-1]!="\\" or (code[start-1]=="\\" and code[start-2]=="\\")):
+                    quote_double=False
+
+            #TODO add "else if" compatibaliaty 
+            #TODO Right now -> support only for if/else written with curlies -> not on single line
+
+            def adding_inside(conditions_dico, code, start_inside, end_inside):
+                temp_dico = extract_conditions(code[start_inside:end_inside])
+                for c in temp_dico:
+                    temp = temp_dico[c]
+                    temp_dico[c] = (temp[0] + start_inside, temp[1] + start_inside)
+                #Merging the 2 dicos
+                conditions_dico = conditions_dico | temp_dico
+                return conditions_dico
+            #Just because there is an 'if' doesn't necessarily mean there is an if bloc
+            found_if_bloc = False
+            if(code[start:start+2]=="if" and [quote_single, quote_double, triple_single, triple_double]==[False, False, False, False]):
+                for match in re.finditer(r"if *\((.+)\)\s*\{", code[start:]):
+                    if(match.span(0)[0]==0):
+                        found_if_bloc = True
+                        condition = match.group(1)
+                        end = extract_curly(code, match.span(0)[1]+start)#Here we nedd to add the start index since we're only working on a subpart of code 
+                        conditions_dico[condition] = (start,end)
+                        start_inside, end_inside = match.span(0)[1]+start, end-1
+                        conditions_dico = adding_inside(conditions_dico, code, start_inside, end_inside)
+                    break
+                #Try to find an else corresponding
+                if(found_if_bloc and code[end:].strip()[:4]=="else"):
+                    #print("corresponding else")
+                    for match in re.finditer(r"else\s*{", code[end:]):
+                        start_else, end_else = match.span(0)
+                        start_else+=end
+                        end_else = extract_curly(code, end_else+end)
+                        conditions_dico[f"neg({condition})"] = (start_else,end_else)
+                        start_inside, end_inside = match.span(0)[1]+start, end_else-1
+                        conditions_dico = adding_inside(conditions_dico, code, start_inside, end_inside)
+                        #print(code[start_else:end_else])
+                        break
+                    #Case we need to jump to the end of the else
+                    start = end_else-1
+                else:
+                    if(found_if_bloc):
+                        #Case we need to jump to the end of the if
+                        start = end-1
+    
+            start+=1
+    return conditions_dico
+    
-- 
GitLab