diff --git a/src/code_.py b/src/code_.py index 3c9fefd039f4cc5222be784c75f5cbe75cdfd7fe..349da56b56c09768b98c7a15ffea16a0285a9489 100644 --- a/src/code_.py +++ b/src/code_.py @@ -1,7 +1,8 @@ -from .outils import remove_comments, get_parenthese_count, get_curly_count, get_code_until_parenthese_count, extract_curly, get_next_element_caracter, get_code_until_character +from .outils import remove_comments, get_parenthese_count, get_curly_count, get_code_until_parenthese_count, extract_curly, get_next_element_caracter, get_code_until_character, get_end_call from .bioflowinsighterror import BioFlowInsightError import re from . import constant +import numpy as np class Code: def __init__(self, code, origin, initialise): @@ -20,7 +21,9 @@ class Code: self.code_wo_comments = remove_comments(self.code) self.code_wo_comments = re.sub(constant.DOUBLE_BACKSLAPSH_JUMP, ' ', self.code_wo_comments) self.code_wo_comments = re.sub(constant.BACKSLAPSH_JUMP, ' ', self.code_wo_comments) + self.code_wo_comments = re.sub(r"(\n( |\t)*)+\.", '.', self.code_wo_comments) self.code_wo_comments = self.code_wo_comments.replace("||", "$OR$") + self.code_wo_comments = self.turn_single_multiline_conditions_into_single(self.code_wo_comments) self.code_wo_comments = self.turn_single_condition_into_multiline(self.code_wo_comments) self.code_wo_comments = self.remove_things_inside_map(self.code_wo_comments ) self.code_wo_comments = self.rewrite_ternary_operation_to_normal_condition(self.code_wo_comments) @@ -119,6 +122,18 @@ class Code: if(char!="{" and temp_code[pos:pos+3] not in ['"""', "'''"]): raise BioFlowInsightError(f"The condition '({extracted_condition}' was not extracted correctly. Make sure the condition follows the correct syntaxe.", type="Unable to extract condition") + if(code[start:start+4]=="else" and [quote_single, quote_double, triple_single, triple_double]==[False, False, False, False]): + pattern = r"(else *)(.+)\n" + temp_code = code[start:] + for match in re.finditer(pattern, temp_code): + if(match.span(0)[0]==0): + _, end_line = match.span(0) + all = match.group(0) + extarcted = match.group(2).strip() + + if(extarcted!="" and extarcted[0] not in ["{"] and extarcted[-1] not in ["{", "}"]): + new = f"else {{\n{extarcted}\n}}\n" + to_replace.append((all, new)) @@ -247,8 +262,11 @@ class Code: #This methods rewrite ternary operation into "normal" conditions #variable = (condition) ? Expression2 : Expression3; def rewrite_ternary_operation_to_normal_condition(self, code): + #Turning multi line ternry operations into single ternary operations + code = re.sub(r"(\n( |\t)*)+:", ' :', code) pattern = r"\n *(def)? *(\w+) *\= *(([^?\n]+) *\? *([^:\n]+) *\: *([^\n]+))\n" to_replace = [] + already_searched = [] searching = True @@ -265,42 +283,67 @@ class Code: old = match.group(0) old = old[1:] dico_conditions = {} - + def rewrite_ternary(exp, dico_conditions): exp = exp.strip() + if(exp==""): + return "null" if(exp[0]=="(" and exp[-1]==")"): exp = exp[1:-1].strip() + try: expression, end_condition = get_code_until_character(exp, "?") except: return exp + condition = exp[:end_condition].strip() + try: + _, end_potential_condition = get_code_until_character(expression, "?") + + except: + end_potential_condition = np.inf exp2, end_exp1 = get_code_until_character(expression, ":") dico_true, dico_false = {}, {} - dico_conditions[condition] = {"True": rewrite_ternary(expression[:end_exp1], dico_true), "False": rewrite_ternary(exp2, dico_false)} + #if(expression[:end_exp1]==""): + # raise BioFlowInsightError(f"The 'True' case in the ternary condition '{exp}' has no value.", type="Incomplete ternary operation") + #if(exp2==""): + # raise BioFlowInsightError(f"The 'False' case in the ternary condition '{exp}' has no value.", type="Incomplete ternary operation") + + #Case there is a condition right after the first condition + if(end_potential_condition<end_exp1): + expression_false, end_exp1 = get_code_until_character(expression, ":", left_to_right=False) + dico_conditions[condition] = {"True": rewrite_ternary(expression[:end_exp1], dico_true), "False": rewrite_ternary(expression_false, dico_false)} + else: + dico_conditions[condition] = {"True": rewrite_ternary(expression[:end_exp1], dico_true), "False": rewrite_ternary(exp2, dico_false)} return dico_conditions - - rewrite_ternary(exp, dico_conditions) + if(old not in already_searched): + + try: + rewrite_ternary(exp, dico_conditions) + except: + already_searched.append(old) + searching = True + break - - def rewrite_dico_2_condition(var, dico_condition, num = 0): - code = '' - if(type(dico_condition)==str): - return f"{var} = {dico_condition}\n" - for condition in dico_condition: - code = f"if({condition}) {{\n\t{rewrite_dico_2_condition(var, dico_condition[condition]['True'], num = num+1)}}} else {{\n\t{rewrite_dico_2_condition(var, dico_condition[condition]['False'], num = num+1)}}}\n" - return code - - new = rewrite_dico_2_condition(f"{def_variable} {variable}", dico_conditions)+'\n' + + def rewrite_dico_2_condition(var, dico_condition, num = 0): + code = '' + if(type(dico_condition)==str): + return f"{var} = {dico_condition}\n" + for condition in dico_condition: + code = f"if({condition}) {{\n\t{rewrite_dico_2_condition(var, dico_condition[condition]['True'], num = num+1)}}} else {{\n\t{rewrite_dico_2_condition(var, dico_condition[condition]['False'], num = num+1)}}}\n" + return code + + new = rewrite_dico_2_condition(f"{def_variable} {variable}", dico_conditions)+'\n' - to_replace.append((old, new)) - tmp = code - code = code.replace(old, new, 1) - if(old!=new and tmp==code): - raise Exception("This shouldn't happen -> the code wasn't replaced") - searching = True - break + to_replace.append((old, new)) + tmp = code + code = code.replace(old, new, 1) + if(old!=new and tmp==code): + raise Exception("This shouldn't happen -> the code wasn't replaced") + searching = True + break for r in to_replace: old, new = r @@ -308,7 +351,95 @@ class Code: #Check if there is still a ternary operation in this case we cannot analyse it for match in re.finditer(pattern, code): - raise BioFlowInsightError(f"Detected a multi ternary operation (a ternary operation in a ternary operation) in the file '{self.origin.get_file_address()}'. BioFlow-Insight does not support this, try defining it in a different way.", type="Multi ternary operation") + old = match.group(0) + old = old[1:] + if(old not in already_searched): + raise BioFlowInsightError(f"Detected a multi ternary operation (a ternary operation in a ternary operation) in the file '{self.origin.get_file_address()}'. BioFlow-Insight does not support this, try defining it in a different way.", type="Multi ternary operation") + + return code + + def turn_single_multiline_conditions_into_single(self, code): + start = 0 + + curly_count, parenthese_count = 0, 0 + quote_single, quote_double = False, False + triple_single, triple_double = False, False + + to_replace = [] + timeout = 0 + while(start<len(code) and timeout < constant.WHILE_UPPER_BOUND): + checked_triple = False + if(start+3<=len(code)): + if(code[start:start+3]=="'''" and not quote_single and not quote_double and not triple_single and not triple_double): + triple_single = True + start+=3 + checked_triple = True + elif(code[start:start+3]=="'''" and not quote_single and not quote_double and triple_single and not triple_double): + triple_single = False + start+=3 + checked_triple = True + + if(code[start:start+3]=='"""' and not quote_single and not quote_double and not triple_single and not triple_double): + triple_double = True + start+=3 + checked_triple = True + elif(code[start:start+3]=='"""' and not quote_single and not quote_double and not triple_single and triple_double): + triple_double = False + start+=3 + checked_triple = True + + if(not checked_triple): + if(code[start]=="{" and not quote_single and not quote_double and not triple_single and not triple_double): + curly_count+=1 + elif(code[start]=="}" and not quote_single and not quote_double and not triple_single and not triple_double): + curly_count-=1 + + if(code[start]=="(" and not quote_single and not quote_double and not triple_single and not triple_double): + parenthese_count+=1 + elif(code[start]==")" and not quote_single and not quote_double and not triple_single and not triple_double): + parenthese_count-=1 + + if(code[start]=="'" and not quote_single and not quote_double and not triple_single and not triple_double): + if(code[start-1]!="\\" or (code[start-1]=="\\" and code[start-2]=="\\")): + quote_single=True + elif(code[start]=="'" and quote_single and not quote_double and not triple_single and not triple_double): + if(code[start-1]!="\\" or (code[start-1]=="\\" and code[start-2]=="\\")): + quote_single=False + + if(code[start]=='"' and not quote_single and not quote_double and not triple_single and not triple_double): + if(code[start-1]!="\\" or (code[start-1]=="\\" and code[start-2]=="\\")): + quote_double=True + elif(code[start]=='"' and not quote_single and quote_double and not triple_single and not triple_double): + if(code[start-1]!="\\" or (code[start-1]=="\\" and code[start-2]=="\\")): + quote_double=False + + + if(code[start:start+2]=="if" and [quote_single, quote_double, triple_single, triple_double]==[False, False, False, False]): + found_if_bloc = False + for match in re.finditer(r"if *\(", code[start:]): + if(match.span(0)[0]==0): + start_if, end_if = match.span(0) + txt = get_end_call(code[start:], start_if, end_if) + if('\n' in txt): + to_replace.append(txt) + end = start+len(txt) + found_if_bloc = True + break + else: + break + if(found_if_bloc): + #Case we need to jump to the end of the if + start = end-1 + + start+=1 + timeout+=1 + if(timeout>=constant.WHILE_UPPER_BOUND): + raise BioFlowInsightError("The WHILE_UPPER_BOUND was exceeded. BioFlow-Insight was unable to extarct the conditions. Make sure the workflow uses correct Nextflow syntaxe (https://www.nextflow.io/docs/latest/index.html).", type="Unable to extract conditions") + for r in to_replace: + temp = code + code = code.replace(r, r.replace('\n', " ")) + if(code==temp): + raise Exception("Not updated!") return code def rewrite_jump_dot(self, code): diff --git a/src/outils.py b/src/outils.py index a80f07c22e3c980577fa7842c489d80b1417130a..d3b8a5d7c2174fa25429dd710315a76df21ff124 100644 --- a/src/outils.py +++ b/src/outils.py @@ -1668,7 +1668,7 @@ def extract_tools(script, extract_general_tools = False): return list(set(tools)) -def get_code_until_character(code, char): +def get_code_until_character_2(code, char): start = 0 @@ -1726,3 +1726,78 @@ def get_code_until_character(code, char): return code[start+1:], start start+=1 raise Exception("") + + +def get_code_until_character(code, char, left_to_right = True): + + def addition(variable, val): + return variable + val + + def substraction(variable, val): + return variable - val + + if(left_to_right): + fun = addition + start = 0 + else: + fun = substraction + start = -1 + + curly_count, parenthese_count = 0, 0 + quote_single, quote_double = False, False + triple_single, triple_double = False, False + + + while(start<len(code)): + checked_triple = False + if(fun(start, 3)<=len(code)): + if(code[start:fun(start, 3)]=="'''" and not quote_single and not quote_double and not triple_single and not triple_double): + triple_single = True + start=fun(start, 3) + checked_triple = True + elif(code[start:fun(start, 3)]=="'''" and not quote_single and not quote_double and triple_single and not triple_double): + triple_single = False + start=fun(start, 3) + checked_triple = True + + if(code[start:fun(start, 3)]=='"""' and not quote_single and not quote_double and not triple_single and not triple_double): + triple_double = True + start=fun(start, 3) + checked_triple = True + elif(code[start:fun(start, 3)]=='"""' and not quote_single and not quote_double and not triple_single and triple_double): + triple_double = False + start=fun(start, 3) + checked_triple = True + + if(not checked_triple): + if(code[start]=="{" and not quote_single and not quote_double and not triple_single and not triple_double): + curly_count+=1 + elif(code[start]=="}" and not quote_single and not quote_double and not triple_single and not triple_double): + curly_count-=1 + + if(code[start]=="(" and not quote_single and not quote_double and not triple_single and not triple_double): + parenthese_count+=1 + elif(code[start]==")" and not quote_single and not quote_double and not triple_single and not triple_double): + parenthese_count-=1 + + if(code[start]=="'" and not quote_single and not quote_double and not triple_single and not triple_double): + if(code[substraction(start, 1)]!="\\" or (code[substraction(start, 1)]=="\\" and code[substraction(start, 2)]=="\\")): + quote_single=True + elif(code[start]=="'" and quote_single and not quote_double and not triple_single and not triple_double): + if(code[substraction(start, 1)]!="\\" or (code[substraction(start, 1)]=="\\" and code[substraction(start, 2)]=="\\")): + quote_single=False + + if(code[start]=='"' and not quote_single and not quote_double and not triple_single and not triple_double): + if(code[substraction(start, 1)]!="\\" or (code[substraction(start, 1)]=="\\" and code[substraction(start, 2)]=="\\")): + quote_double=True + elif(code[start]=='"' and not quote_single and quote_double and not triple_single and not triple_double): + if(code[substraction(start, 1)]!="\\" or (code[substraction(start, 1)]=="\\" and code[substraction(start, 2)]=="\\")): + quote_double=False + if(code[start]==char and parenthese_count==0 and curly_count ==0 and [quote_single, quote_double, triple_single, triple_double]==[False, False, False, False]): + if(left_to_right): + return code[start+1:], start + else: + return code[start+1:], start + + start=fun(start, 1) + raise Exception("")