from .outils import remove_comments, get_parenthese_count, get_curly_count, get_code_until_parenthese_count, extract_curly from .bioflowinsighterror import BioFlowInsightError import re from . import constant class Code: def __init__(self, code, origin): self.code = code self.code_wo_comments = "" self.origin = origin self.initialise() #self.check_its_nextflow() def initialise(self): #I do this just to avoid out of file problems later on self.code = '\n'+self.code+'\n' self.code_wo_comments = remove_comments(self.code) self.code_wo_comments = re.sub(constant.DOUBLE_BACKSLAPSH_JUMP, ' ', self.code_wo_comments) self.code_wo_comments = re.sub(constant.BACKSLAPSH_JUMP, ' ', self.code_wo_comments) self.code_wo_comments = self.code_wo_comments.replace("||", "$OR$") self.code_wo_comments = self.turn_single_condition_into_multiline(self.code_wo_comments) self.code_wo_comments = self.rewrite_ternary_operation_to_normal_condition(self.code_wo_comments) self.code_wo_comments = self.remove_things_inside_map(self.code_wo_comments ) self.code_wo_comments = self.rewrite_jump_dot(self.code_wo_comments) def check_its_nextflow(self): for illegal in constant.ILLEGAL_IMPORTS: for match in re.finditer(constant.START_IMPORT+illegal, self.get_code()): bit_of_code = match.group(0) raise BioFlowInsightError(f"The presence of '{bit_of_code}' is detected{self.get_string_line(bit_of_code)}.", num = 1,origin=self) #This methods turns a single line condition into a muli line conditions def turn_single_condition_into_multiline(self, code): to_replace = [] start = 0 curly_count, parenthese_count = 0, 0 quote_single, quote_double = False, False triple_single, triple_double = False, False while(start<len(code)): checked_triple = False if(start+3<=len(code)): if(code[start:start+3]=="'''" and not quote_single and not quote_double and not triple_single and not triple_double): triple_single = True start+=3 checked_triple = True elif(code[start:start+3]=="'''" and not quote_single and not quote_double and triple_single and not triple_double): triple_single = False start+=3 checked_triple = True if(code[start:start+3]=='"""' and not quote_single and not quote_double and not triple_single and not triple_double): triple_double = True start+=3 checked_triple = True elif(code[start:start+3]=='"""' and not quote_single and not quote_double and not triple_single and triple_double): triple_double = False start+=3 checked_triple = True if(not checked_triple): if(code[start]=="{" and not quote_single and not quote_double and not triple_single and not triple_double): curly_count+=1 elif(code[start]=="}" and not quote_single and not quote_double and not triple_single and not triple_double): curly_count-=1 if(code[start]=="(" and not quote_single and not quote_double and not triple_single and not triple_double): parenthese_count+=1 elif(code[start]==")" and not quote_single and not quote_double and not triple_single and not triple_double): parenthese_count-=1 if(code[start]=="'" and not quote_single and not quote_double and not triple_single and not triple_double): if(code[start-1]!="\\" or (code[start-1]=="\\" and code[start-2]=="\\")): quote_single=True elif(code[start]=="'" and quote_single and not quote_double and not triple_single and not triple_double): if(code[start-1]!="\\" or (code[start-1]=="\\" and code[start-2]=="\\")): quote_single=False if(code[start]=='"' and not quote_single and not quote_double and not triple_single and not triple_double): if(code[start-1]!="\\" or (code[start-1]=="\\" and code[start-2]=="\\")): quote_double=True elif(code[start]=='"' and not quote_single and quote_double and not triple_single and not triple_double): if(code[start-1]!="\\" or (code[start-1]=="\\" and code[start-2]=="\\")): quote_double=False if(code[start:start+2]=="if" and [quote_single, quote_double, triple_single, triple_double]==[False, False, False, False]): pattern = r"(if *\()(.+)\n" temp_code = code[start:] for match in re.finditer(pattern, temp_code): if(match.span(0)[0]==0): found_if_bloc = True all = match.group(0) extarcted = match.group(2).strip() if(extarcted!="" and extarcted[-1] not in ["{", "}"]): _, end_condition = match.span(1) extracted_condition = get_code_until_parenthese_count(code=temp_code[end_condition:], val=-1) condition = extracted_condition[:-1] #body = extarcted.replace(extracted_condition.strip(), "", 1).strip() body = re.sub(r"if *\("+re.escape(extracted_condition.strip()), "", all).strip() if(body!="" and body[0]!="{"): new = f"if ({condition}) {{\n{body}\n}}\n" to_replace.append((all, new)) start+=1 for r in to_replace: old, new = r code = code.replace(old, new) return code #This function takes the code and adds '''...''' inside the map operator def remove_things_inside_map(self, code): index = 0 searching = True while(searching): searching = False #TODO -> do the same with flatMap -> 668 patterns = [r"\.\s*map\s*{", r"\.\s*flatMap\s*{"] for word in ["map", "flatMap"]: pattern = r"\.\s*"+word+r"\s*{" for match in re.finditer(pattern, code): start_map, end = match.span(0) end_map = extract_curly(code, end) old = code[end:end_map-1] new = f"¤{id(self)}_{index}¤" self.add_map_element(old, new) old_code = code[start_map:end_map] new_code = f".{word}_modified {{ {new} }}" code = code.replace(old_code, new_code) searching = True index+=1 break return code def add_to_ternary_operation_dico(self, old, new): self.origin.add_to_ternary_operation_dico(old, new) def add_map_element(self, old, new): self.origin.add_map_element(old, new) #This methods rewrite ternary operation into "normal" conditions #variable = (condition) ? Expression2 : Expression3; def rewrite_ternary_operation_to_normal_condition(self, code): pattern = r"(def)? *(\w+) *\= *([^?\n]+) *\? *([^:\n]+) *\: *([^\n]+)\n" to_replace = [] for match in re.finditer(pattern, code): def_variable = "" if(match.group(1)!=None): def_variable = match.group(1) variable = match.group(2) condition = match.group(3).strip() exp1, exp2 = match.group(4).strip(), match.group(5).strip() old = match.group(0) new = f"if ({condition}) {{\n{def_variable} {variable} = {exp1}\n}}\n" new += f"if (!({condition})) {{\n{def_variable} {variable} = {exp2}\n}}\n\n" #else {{\n{variable} = {exp2}\n}}\n" #Here we check that it's worked correctly -> that we have done a good parsing if(get_parenthese_count(condition)==0 and get_parenthese_count(exp1)==0 and get_parenthese_count(exp2)==0 and get_curly_count(condition)==0 and get_curly_count(exp1)==0 and get_curly_count(exp2)==0): to_replace.append((old, new)) for r in to_replace: old, new = r self.add_to_ternary_operation_dico(old, new) code = code.replace(old, new) return code def rewrite_jump_dot(self, code): pattern = r"(\n *)+\." code = re.sub(pattern, '.', code) return code def get_line(self, bit_of_code): code = remove_comments(self.code) index = code.find(bit_of_code) if(index!=-1): line = code[:index].count('\n') if(line==0): return 1 return line return -1 def get_string_line(self, bit_of_code): line = self.get_line(bit_of_code) line_error = '' if(line!=-1): line_error = f", possibly at line {line}" return line_error #Returns the code witout comments def get_code(self, get_OG =False): if(get_OG): return self.code.strip() else: return self.code_wo_comments.strip() def get_file_address(self): return self.origin.get_file_address()