From a0af3c1a17455409f50e10d7bebff9b47e7ab704 Mon Sep 17 00:00:00 2001 From: George Marchment <georgemarchment@yahoo.fr> Date: Tue, 29 Apr 2025 18:11:29 +0200 Subject: [PATCH] Added support for multi ternary operator -> this will need to be checked --- src/code_.py | 124 ++++++++++++++++++++++++++++++++++++++---------- src/outils.py | 60 +++++++++++++++++++++++ src/workflow.py | 52 ++++++++++---------- 3 files changed, 186 insertions(+), 50 deletions(-) diff --git a/src/code_.py b/src/code_.py index c59e14d..3c9fefd 100644 --- a/src/code_.py +++ b/src/code_.py @@ -1,4 +1,4 @@ -from .outils import remove_comments, get_parenthese_count, get_curly_count, get_code_until_parenthese_count, extract_curly, get_next_element_caracter +from .outils import remove_comments, get_parenthese_count, get_curly_count, get_code_until_parenthese_count, extract_curly, get_next_element_caracter, get_code_until_character from .bioflowinsighterror import BioFlowInsightError import re from . import constant @@ -201,42 +201,114 @@ class Code: def add_map_element(self, old, new): self.origin.add_map_element(old, new) + ##This methods rewrite ternary operation into "normal" conditions + ##variable = (condition) ? Expression2 : Expression3; + #def rewrite_ternary_operation_to_normal_condition(self, code): + # pattern = r"(def)? *(\w+) *\= *([^?\n]+) *\? *([^:\n]+) *\: *([^\n]+)\n" + # to_replace = [] + # checked = [] + # for match in re.finditer(pattern, code): + # def_variable = "" + # if(match.group(1)!=None): + # def_variable = match.group(1) + # + # + # variable = match.group(2) + # condition = match.group(3).strip() + # exp1, exp2 = match.group(4).strip(), match.group(5).strip() + # old = match.group(0) + # #print(exp1) + # #print(exp2) + # #print() + # new = f"if ({condition}) {{\n{def_variable} {variable} = {exp1}\n}}\n" + # new += f"if (!({condition})) {{\n{def_variable} {variable} = {exp2}\n}}\n\n" + # print(new) + # #else {{\n{variable} = {exp2}\n}}\n" + # #Here we check that it's worked correctly -> that we have done a good parsing + # if(get_parenthese_count(condition)==0 and get_parenthese_count(exp1)==0 and get_parenthese_count(exp2)==0 and get_curly_count(condition)==0 and get_curly_count(exp1)==0 and get_curly_count(exp2)==0): + # to_replace.append((old, new)) + # else: + # checked.append(match.group(0)) + # for r in to_replace: + # old, new = r + # self.add_to_ternary_operation_dico(old, new) + # tmp = code + # code = code.replace(old, new, 1) + # if(old!=new and tmp==code): + # raise Exception("This shouldn't happen -> the code wasn't replaced") + # #Check if there is still a ternary operation in this case we cannot analyse it + # #Cause it is a complexe/multiple ternanry operation + # for match in re.finditer(pattern, code): + # #print(match.group(0)) + # if(match.group(0) not in checked): + # raise BioFlowInsightError(f"Detected a multi ternary operation (a ternary operation in a ternary operation) in the file '{self.origin.get_file_address()}'. BioFlow-Insight does not support this, try defining it in a different way.", type="Multi ternary operation") + # return code + #This methods rewrite ternary operation into "normal" conditions #variable = (condition) ? Expression2 : Expression3; def rewrite_ternary_operation_to_normal_condition(self, code): - pattern = r"(def)? *(\w+) *\= *([^?\n]+) *\? *([^:\n]+) *\: *([^\n]+)\n" + pattern = r"\n *(def)? *(\w+) *\= *(([^?\n]+) *\? *([^:\n]+) *\: *([^\n]+))\n" to_replace = [] - checked = [] - for match in re.finditer(pattern, code): - def_variable = "" - if(match.group(1)!=None): - def_variable = match.group(1) - + + searching = True + + while(searching): + searching = False + for match in re.finditer(pattern, code): + def_variable = "" + if(match.group(1)!=None): + def_variable = match.group(1) - variable = match.group(2) - condition = match.group(3).strip() - exp1, exp2 = match.group(4).strip(), match.group(5).strip() - old = match.group(0) - new = f"if ({condition}) {{\n{def_variable} {variable} = {exp1}\n}}\n" - new += f"if (!({condition})) {{\n{def_variable} {variable} = {exp2}\n}}\n\n" - #else {{\n{variable} = {exp2}\n}}\n" - #Here we check that it's worked correctly -> that we have done a good parsing - if(get_parenthese_count(condition)==0 and get_parenthese_count(exp1)==0 and get_parenthese_count(exp2)==0 and get_curly_count(condition)==0 and get_curly_count(exp1)==0 and get_curly_count(exp2)==0): + + variable = match.group(2) + exp = match.group(3).strip() + old = match.group(0) + old = old[1:] + dico_conditions = {} + + def rewrite_ternary(exp, dico_conditions): + exp = exp.strip() + if(exp[0]=="(" and exp[-1]==")"): + exp = exp[1:-1].strip() + try: + expression, end_condition = get_code_until_character(exp, "?") + except: + return exp + condition = exp[:end_condition].strip() + exp2, end_exp1 = get_code_until_character(expression, ":") + dico_true, dico_false = {}, {} + dico_conditions[condition] = {"True": rewrite_ternary(expression[:end_exp1], dico_true), "False": rewrite_ternary(exp2, dico_false)} + return dico_conditions + + + rewrite_ternary(exp, dico_conditions) + + + def rewrite_dico_2_condition(var, dico_condition, num = 0): + code = '' + if(type(dico_condition)==str): + return f"{var} = {dico_condition}\n" + for condition in dico_condition: + code = f"if({condition}) {{\n\t{rewrite_dico_2_condition(var, dico_condition[condition]['True'], num = num+1)}}} else {{\n\t{rewrite_dico_2_condition(var, dico_condition[condition]['False'], num = num+1)}}}\n" + return code + + new = rewrite_dico_2_condition(f"{def_variable} {variable}", dico_conditions)+'\n' + to_replace.append((old, new)) - else: - checked.append(match.group(0)) + tmp = code + code = code.replace(old, new, 1) + if(old!=new and tmp==code): + raise Exception("This shouldn't happen -> the code wasn't replaced") + searching = True + break + for r in to_replace: old, new = r self.add_to_ternary_operation_dico(old, new) - tmp = code - code = code.replace(old, new, 1) - if(old!=new and tmp==code): - raise Exception("This shouldn't happen -> the code wasn't replaced") + #Check if there is still a ternary operation in this case we cannot analyse it - #Cause it is a complexe/multiple ternanry operation for match in re.finditer(pattern, code): - if(match.group(0) not in checked): - raise BioFlowInsightError(f"Detected a multi ternary operation (a ternary operation in a ternary operation) in the file '{self.origin.get_file_address()}'. BioFlow-Insight does not support this, try defining it in a different way.", type="Multi ternary operation") + raise BioFlowInsightError(f"Detected a multi ternary operation (a ternary operation in a ternary operation) in the file '{self.origin.get_file_address()}'. BioFlow-Insight does not support this, try defining it in a different way.", type="Multi ternary operation") return code def rewrite_jump_dot(self, code): diff --git a/src/outils.py b/src/outils.py index a69da0e..a80f07c 100644 --- a/src/outils.py +++ b/src/outils.py @@ -1666,3 +1666,63 @@ def extract_tools(script, extract_general_tools = False): #Return the tools extarcted return list(set(tools)) + + +def get_code_until_character(code, char): + + start = 0 + + curly_count, parenthese_count = 0, 0 + quote_single, quote_double = False, False + triple_single, triple_double = False, False + + + while(start<len(code)): + checked_triple = False + if(start+3<=len(code)): + if(code[start:start+3]=="'''" and not quote_single and not quote_double and not triple_single and not triple_double): + triple_single = True + start+=3 + checked_triple = True + elif(code[start:start+3]=="'''" and not quote_single and not quote_double and triple_single and not triple_double): + triple_single = False + start+=3 + checked_triple = True + + if(code[start:start+3]=='"""' and not quote_single and not quote_double and not triple_single and not triple_double): + triple_double = True + start+=3 + checked_triple = True + elif(code[start:start+3]=='"""' and not quote_single and not quote_double and not triple_single and triple_double): + triple_double = False + start+=3 + checked_triple = True + + if(not checked_triple): + if(code[start]=="{" and not quote_single and not quote_double and not triple_single and not triple_double): + curly_count+=1 + elif(code[start]=="}" and not quote_single and not quote_double and not triple_single and not triple_double): + curly_count-=1 + + if(code[start]=="(" and not quote_single and not quote_double and not triple_single and not triple_double): + parenthese_count+=1 + elif(code[start]==")" and not quote_single and not quote_double and not triple_single and not triple_double): + parenthese_count-=1 + + if(code[start]=="'" and not quote_single and not quote_double and not triple_single and not triple_double): + if(code[start-1]!="\\" or (code[start-1]=="\\" and code[start-2]=="\\")): + quote_single=True + elif(code[start]=="'" and quote_single and not quote_double and not triple_single and not triple_double): + if(code[start-1]!="\\" or (code[start-1]=="\\" and code[start-2]=="\\")): + quote_single=False + + if(code[start]=='"' and not quote_single and not quote_double and not triple_single and not triple_double): + if(code[start-1]!="\\" or (code[start-1]=="\\" and code[start-2]=="\\")): + quote_double=True + elif(code[start]=='"' and not quote_single and quote_double and not triple_single and not triple_double): + if(code[start-1]!="\\" or (code[start-1]=="\\" and code[start-2]=="\\")): + quote_double=False + if(code[start]==char and parenthese_count==0 and curly_count ==0 and [quote_single, quote_double, triple_single, triple_double]==[False, False, False, False]): + return code[start+1:], start + start+=1 + raise Exception("") diff --git a/src/workflow.py b/src/workflow.py index fe35077..790c0c9 100644 --- a/src/workflow.py +++ b/src/workflow.py @@ -708,31 +708,35 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen if(escape>=100): return min_processes already_tried.append(set(random_relevant_processes)) - _, cluster_organisation = w.convert_workflow_2_user_view(relevant_processes=random_relevant_processes, render_graphs = False) - - tab_nb_executors_per_cluster, tab_nb_processes_per_cluster, tab_nb_conditions_per_cluster = [], [], [] - for c in cluster_organisation: - tab_nb_executors_per_cluster.append(cluster_organisation[c]["nb_executors"]) - tab_nb_processes_per_cluster.append(cluster_organisation[c]["nb_processes"]) - tab_nb_conditions_per_cluster.append(cluster_organisation[c]["nb_conditions"]) - nb_clusters = len(cluster_organisation) - nb_non_relevant_clusters = 0 - for c in cluster_organisation: - #This means it's a non relvant cluster - if("non_relevant_cluster_" in c): - nb_non_relevant_clusters+=1 - - uniformity_variance = 0 - average_number_of_process_per_cluster = np.mean(tab_nb_processes_per_cluster) - for x in tab_nb_processes_per_cluster: - uniformity_variance += (average_number_of_process_per_cluster-x)**2/nb_clusters - - score = concordance_factor * np.max(np.array(tab_nb_conditions_per_cluster)/np.array(tab_nb_executors_per_cluster)) + \ - uniformity_factor * (uniformity_variance / number_processes_called) + \ - min_nb_clusters_factor * (nb_clusters / number_processes_called) + \ - min_nb_non_relevant_cluster_factor * (nb_non_relevant_clusters / nb_clusters) - + def get_score_from_set_relevant_processes(random_relevant_processes): + _, cluster_organisation = w.convert_workflow_2_user_view(relevant_processes=random_relevant_processes, render_graphs = False) + + tab_nb_executors_per_cluster, tab_nb_processes_per_cluster, tab_nb_conditions_per_cluster = [], [], [] + for c in cluster_organisation: + tab_nb_executors_per_cluster.append(cluster_organisation[c]["nb_executors"]) + tab_nb_processes_per_cluster.append(cluster_organisation[c]["nb_processes"]) + tab_nb_conditions_per_cluster.append(cluster_organisation[c]["nb_conditions"]) + + nb_clusters = len(cluster_organisation) + nb_non_relevant_clusters = 0 + for c in cluster_organisation: + #This means it's a non relvant cluster + if("non_relevant_cluster_" in c): + nb_non_relevant_clusters+=1 + + uniformity_variance = 0 + average_number_of_process_per_cluster = np.mean(tab_nb_processes_per_cluster) + for x in tab_nb_processes_per_cluster: + uniformity_variance += (average_number_of_process_per_cluster-x)**2/nb_clusters + + score = concordance_factor * np.max(np.array(tab_nb_conditions_per_cluster)/np.array(tab_nb_executors_per_cluster)) + \ + uniformity_factor * (uniformity_variance / number_processes_called) + \ + min_nb_clusters_factor * (nb_clusters / number_processes_called) + \ + min_nb_non_relevant_cluster_factor * (nb_non_relevant_clusters / nb_clusters) + return score, cluster_organisation + + score, cluster_organisation = get_score_from_set_relevant_processes(random_relevant_processes) if(len(cluster_organisation)>=reduction_alpha*number_processes_called and len(cluster_organisation)<=reduction_beta*number_processes_called and score<min_score): -- GitLab