From 91d83baa61a92eb4f53a4e1ac716ede41542c1b9 Mon Sep 17 00:00:00 2001 From: George Marchment <georgemarchment@yahoo.fr> Date: Mon, 2 Dec 2024 11:42:58 +0100 Subject: [PATCH] Added v1 of condition extraction -> only works for calls for now --- src/call.py | 2 + src/condition.py | 26 ++++++++ src/executor.py | 1 + src/main_DSL2.py | 3 + src/nextflow_building_blocks.py | 6 ++ src/nextflow_file.py | 2 + src/operation.py | 3 +- src/outils.py | 107 +++++++++++++++++++++++++++++++- 8 files changed, 147 insertions(+), 3 deletions(-) create mode 100644 src/condition.py diff --git a/src/call.py b/src/call.py index 4e53b82..99e9f17 100644 --- a/src/call.py +++ b/src/call.py @@ -3,6 +3,7 @@ import json from .code_ import Code +from .condition import Condition from .outils import get_next_param from .executor import Executor from .bioflowinsighterror import BioFlowInsightError @@ -13,6 +14,7 @@ class Call(Executor): def __init__(self, code, origin, OG_code = ''): self.code = Code(code = code, origin = self) self.origin = origin + self.condition = Condition(self) self.called = [] self.first_element_called = None self.parameters = []#These are in the order diff --git a/src/condition.py b/src/condition.py new file mode 100644 index 0000000..22c4884 --- /dev/null +++ b/src/condition.py @@ -0,0 +1,26 @@ + +from .outils import extract_conditions + +class Condition: + def __init__(self, origin): + self.origin = origin + self.conditions = [] + self.initialise() + + + def initialise(self): + thing_defined = self.origin.get_code() + code = self.origin.get_workflow_code() + #print(f"'{thing_defined}'") + #print(f"'{code}'") + + + conditions_dico = extract_conditions(code) + pos = code.find(thing_defined) + for c in conditions_dico: + condition_extend = conditions_dico[c] + if(condition_extend[0]<pos and pos<condition_extend[1]): + self.conditions.append(c) + print(thing_defined, self.conditions) + print() + diff --git a/src/executor.py b/src/executor.py index 8f099ab..082995b 100644 --- a/src/executor.py +++ b/src/executor.py @@ -21,6 +21,7 @@ class Executor(Nextflow_Building_Blocks): self.origin = origin self.code = Code(code = code, origin = self) + #--------------------------------- #AUXILIARY METHODS FOR ALL CLASSES diff --git a/src/main_DSL2.py b/src/main_DSL2.py index dd1ed4b..ecde61b 100644 --- a/src/main_DSL2.py +++ b/src/main_DSL2.py @@ -15,6 +15,9 @@ class Main_DSL2(Nextflow_Building_Blocks): def get_channels(self): return self.channels + + def get_workflow_code(self): + return self.get_code() def get_type(self): return "Main DSL2" diff --git a/src/nextflow_building_blocks.py b/src/nextflow_building_blocks.py index 0c44921..15e7d5d 100644 --- a/src/nextflow_building_blocks.py +++ b/src/nextflow_building_blocks.py @@ -32,6 +32,9 @@ class Nextflow_Building_Blocks: def get_code(self, get_OG = False): return self.code.get_code(get_OG = get_OG) + def get_origin(self): + return self.origin + def get_output_dir(self): return self.origin.get_output_dir() @@ -124,6 +127,9 @@ class Nextflow_Building_Blocks: def get_processes(self): return self.processes + + def get_workflow_code(self): + return self.origin.get_workflow_code() #---------------------- #CHANNELS diff --git a/src/nextflow_file.py b/src/nextflow_file.py index 9299da5..9a7e0b0 100644 --- a/src/nextflow_file.py +++ b/src/nextflow_file.py @@ -49,6 +49,8 @@ class Nextflow_File(Nextflow_Building_Blocks): name = self.get_file_address().split('/')[-1] return name[:-3] + def get_workflow_code(self): + return self.get_code() def check_file_correctness(self): code = self.get_code() diff --git a/src/operation.py b/src/operation.py index b329a45..8fc7bf4 100644 --- a/src/operation.py +++ b/src/operation.py @@ -9,6 +9,7 @@ import warnings import re from .outils import get_end_operator, get_end_call, get_curly_count from .code_ import Code +from .condition import Condition from .executor import Executor from .bioflowinsighterror import BioFlowInsightError from . import constant @@ -744,8 +745,6 @@ class Operation(Executor): self.initialise_origins() self.initialise_gives() - - self.write_summary(self.get_output_dir() / "debug/operations.nf") def check_if_empty_call(self): diff --git a/src/outils.py b/src/outils.py index ad30ce7..a207b25 100644 --- a/src/outils.py +++ b/src/outils.py @@ -914,4 +914,109 @@ def check_file_exists(address, origin): def is_git_directory(path = '.'): - return subprocess.call(['git', '-C', path, 'status'], stderr=subprocess.STDOUT, stdout = open(os.devnull, 'w')) == 0 \ No newline at end of file + return subprocess.call(['git', '-C', path, 'status'], stderr=subprocess.STDOUT, stdout = open(os.devnull, 'w')) == 0 + + +#Function that extracts the conditions defined in some code +def extract_conditions(code): + + conditions_dico = {} + + start = 0 + + curly_count, parenthese_count = 0, 0 + quote_single, quote_double = False, False + triple_single, triple_double = False, False + + + while(start<len(code)): + checked_triple = False + if(start+3<=len(code)): + if(code[start:start+3]=="'''" and not quote_single and not quote_double and not triple_single and not triple_double): + triple_single = True + start+=3 + checked_triple = True + elif(code[start:start+3]=="'''" and not quote_single and not quote_double and triple_single and not triple_double): + triple_single = False + start+=3 + checked_triple = True + + if(code[start:start+3]=='"""' and not quote_single and not quote_double and not triple_single and not triple_double): + triple_double = True + start+=3 + checked_triple = True + elif(code[start:start+3]=='"""' and not quote_single and not quote_double and not triple_single and triple_double): + triple_double = False + start+=3 + checked_triple = True + + if(not checked_triple): + if(code[start]=="{" and not quote_single and not quote_double and not triple_single and not triple_double): + curly_count+=1 + elif(code[start]=="}" and not quote_single and not quote_double and not triple_single and not triple_double): + curly_count-=1 + + if(code[start]=="(" and not quote_single and not quote_double and not triple_single and not triple_double): + parenthese_count+=1 + elif(code[start]==")" and not quote_single and not quote_double and not triple_single and not triple_double): + parenthese_count-=1 + + if(code[start]=="'" and not quote_single and not quote_double and not triple_single and not triple_double): + if(code[start-1]!="\\" or (code[start-1]=="\\" and code[start-2]=="\\")): + quote_single=True + elif(code[start]=="'" and quote_single and not quote_double and not triple_single and not triple_double): + if(code[start-1]!="\\" or (code[start-1]=="\\" and code[start-2]=="\\")): + quote_single=False + + if(code[start]=='"' and not quote_single and not quote_double and not triple_single and not triple_double): + if(code[start-1]!="\\" or (code[start-1]=="\\" and code[start-2]=="\\")): + quote_double=True + elif(code[start]=='"' and not quote_single and quote_double and not triple_single and not triple_double): + if(code[start-1]!="\\" or (code[start-1]=="\\" and code[start-2]=="\\")): + quote_double=False + + #TODO add "else if" compatibaliaty + #TODO Right now -> support only for if/else written with curlies -> not on single line + + def adding_inside(conditions_dico, code, start_inside, end_inside): + temp_dico = extract_conditions(code[start_inside:end_inside]) + for c in temp_dico: + temp = temp_dico[c] + temp_dico[c] = (temp[0] + start_inside, temp[1] + start_inside) + #Merging the 2 dicos + conditions_dico = conditions_dico | temp_dico + return conditions_dico + #Just because there is an 'if' doesn't necessarily mean there is an if bloc + found_if_bloc = False + if(code[start:start+2]=="if" and [quote_single, quote_double, triple_single, triple_double]==[False, False, False, False]): + for match in re.finditer(r"if *\((.+)\)\s*\{", code[start:]): + if(match.span(0)[0]==0): + found_if_bloc = True + condition = match.group(1) + end = extract_curly(code, match.span(0)[1]+start)#Here we nedd to add the start index since we're only working on a subpart of code + conditions_dico[condition] = (start,end) + start_inside, end_inside = match.span(0)[1]+start, end-1 + conditions_dico = adding_inside(conditions_dico, code, start_inside, end_inside) + break + #Try to find an else corresponding + if(found_if_bloc and code[end:].strip()[:4]=="else"): + #print("corresponding else") + for match in re.finditer(r"else\s*{", code[end:]): + start_else, end_else = match.span(0) + start_else+=end + end_else = extract_curly(code, end_else+end) + conditions_dico[f"neg({condition})"] = (start_else,end_else) + start_inside, end_inside = match.span(0)[1]+start, end_else-1 + conditions_dico = adding_inside(conditions_dico, code, start_inside, end_inside) + #print(code[start_else:end_else]) + break + #Case we need to jump to the end of the else + start = end_else-1 + else: + if(found_if_bloc): + #Case we need to jump to the end of the if + start = end-1 + + start+=1 + return conditions_dico + -- GitLab