diff --git a/src/outils.py b/src/outils.py index 65a91ba32c62747ae090066fcb7803e36e4d60cc..5c3e7c6c3b8aff35f8fa9286fcb5ab06ae3507c6 100644 --- a/src/outils.py +++ b/src/outils.py @@ -1025,7 +1025,7 @@ def extract_conditions(code): start_else, end_else = match.span(0) start_else+=end end_else = extract_curly(code, end_else+end) - conditions_dico[' && '.join(["neg({})".format(v) for v in conditions])] = (start_else,end_else) + conditions_dico[' && '.join(["!({})".format(v) for v in conditions])] = (start_else,end_else) start_inside, end_inside = match.span(0)[1]+end, end_else-1 conditions_dico = adding_inside(conditions_dico, code, start_inside, end_inside) #print(code[start_else:end_else]) @@ -1133,6 +1133,12 @@ def operation_2_DSL2(code, origin): return code def format_with_tabs(code): + + def replace_jump(match): + return "\n" + #Removing the current "\t"s and extras " " + code = re.sub(r"\n[\t ]*", replace_jump, code) + start = 0 curly_count, parenthese_count = 0, 0 @@ -1251,4 +1257,54 @@ def group_together_ifs(code): code = code.replace(code[big_start:big_end], f"if({condition_1}{{\n{inside_1}\n{inside_2}\n}}\n") break - return code \ No newline at end of file + return code + +def remove_extra_jumps(code): + changed = True + while(changed): + changed = False + temp = code + def replacer(match): + return "\n\n" + code = re.sub(r"\n\s*\n\s*\n", replacer, code) + if(code!=temp): + changed = True + + return code + +#This functions analyses the body of the subworkflow and the emitted values +#If a channel is created in a certain condition (and not the negative) -> then we create it +def get_channels_to_add_in_false_conditions(body, emitted_channels): + conditions = extract_conditions(body) + channels_2_conditions = {} + #Creating the dictionnary channels 2 conditions + for channel in emitted_channels: + channels_2_conditions[channel] = [] + for match in re.finditer(fr"{re.escape(channel)}\s*=", body): + start, end = match.span(0) + for c in conditions: + start_condition, end_conditions = conditions[c] + if(start_condition<=start and start<=end_conditions): + channels_2_conditions[channel].append(c) + #Simplifying the list of conditions + #TODO -> here it's important that the input workflow doesn't have a too complexe condition systems (e.g. writting the same condition in multiple ways) + #Cause the converter doesn't analyse the conditions -> and basically it would create things which shloudn't + for channel in channels_2_conditions: + tab = channels_2_conditions[channel] + to_remove = [] + for condition in tab: + #If the condition and it's neagtion are in the tab -> then we remove the condition and it's negation form the list + negation = f"!({condition})" + if(negation in tab): + to_remove.append(condition) + to_remove.append(negation) + for r in to_remove: + tab.remove(r) + channels_2_conditions[channel] = tab + + #For the remaining condition in the list -> need to create an empty channel in the case of the negation + for condition in channels_2_conditions[channel]: + body += f"\nif(!({condition})) {{\n{channel} = Channel.empty()\n}}" + + return body + diff --git a/src/workflow.py b/src/workflow.py index 179595d2ccb217e6ba114edca4bc5a436e4d2ead..4aa19ba19e59e72e6eb400b4e6e37d5679e93c9f 100644 --- a/src/workflow.py +++ b/src/workflow.py @@ -3,7 +3,7 @@ from .nextflow_file import Nextflow_File from .ro_crate import RO_Crate from . import constant -from .outils import is_git_directory, format_with_tabs, replace_thing_by_call, replace_group1, group_together_ifs, extract_curly +from .outils import is_git_directory, format_with_tabs, replace_thing_by_call, replace_group1, group_together_ifs, extract_curly, remove_extra_jumps, get_channels_to_add_in_false_conditions from .outils_graph import flatten_dico, initia_link_dico_rec, get_number_cycles from .outils_annotate import get_tools_commands_from_user_for_process from .bioflowinsighterror import BioFlowInsightError @@ -759,9 +759,9 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen code = self.nextflow_file.get_code() #params_section = f"//PARAMS_SECTION_{tag}" - function_section = f"//FUNCTION_SECTION_{tag}" - process_section = f"//PROCESS_SECTION_{tag}" - subworkflow_section = f"//SUBWORKFLOW_SECTION_{tag}" + function_section = f"//FUNCTION_SECTION" + process_section = f"//PROCESS_SECTION" + subworkflow_section = f"//SUBWORKFLOW_SECTION" ankers = function_section+ "\n"*3 + process_section+ "\n"*3 + subworkflow_section @@ -823,14 +823,31 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen self.__init__(str(temp_file), display_info = False, duplicate=True) self.initialise(create_rocrate=False) - #for e in self.get_all_executors(): - # print(e.get_code(get_OG = True)) + #Get the clusters and the code self.nextflow_file.generate_user_view(relevant_processes = relevant_processes, processes_2_remove = []) clusters = self.nextflow_file.graph.get_clusters_from_user_view() + + #cluster_2_subworkflows = [] + #print(clusters) + ##Basically here i'm checking if a subworkflow is split + ##TODO Check this -> i think in the case something is at the root -> it won't work + #for sub in self.get_subworkflows_called(): + # clusters_in_which_elements_are_taken = [] + # #TODO do this + # print(sub.get_name(), sub.get_executors()) + # for exe in sub.get_executors(): + # print(exe, exe.get_subworkflow_origin()) + # #If there are multiple clusters and the number of clusters isn't equal to all the clusters -> it means that subworkflow is broken + # if(len(clusters_in_which_elements_are_taken)>1 and len(clusters_in_which_elements_are_taken)!=len(clusters)): + # print(sub.get_name()) #print(clusters) + #for c in clusters: + # for ele in c: + # print(ele.get_type()) + #TODO -> need to break clusters here #And redo analysis @@ -975,6 +992,10 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen temp = '\n'.join(old_output_names) emit = f"\nemit:\n{temp}\n" + #Adding empty channels if it doesn't exist in the case of a negative condition + body = get_channels_to_add_in_false_conditions(body, old_output_names) + + #Replace names inside subworkflow subworkflow_code = f"workflow {name} {{\n{take}\nmain:\n{body}\n{emit}\n}}" @@ -1010,10 +1031,6 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen subworkflow_cluster_calls_to_add.append(subworkfow_call) index_cluster+=1 - - print(format_with_tabs(subworkflow_code)) - print("//-----------") - #TODO -> rmoving the conditions which are problematic #This might not be the probleme -> when rerunnung the analysis isn't totally robust @@ -1048,7 +1065,24 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen code = replace_group1(code, pattern, new) #code = code.replace(old, new) - print(code) + + #Add the subworkflow defintions + #------------------------------------- + #Add anker + subworkflow_section = f"//ANKER 4 SUBWORKFLOW DEF" + to_replace = "" + for match in re.finditer(r"workflow\s+\w*\s*\{", code): + to_replace = match.group(0) + break + if(to_replace==""): + raise Exception("No call to a workflow") + + code = code.replace(to_replace, f"{subworkflow_section}\n\n{to_replace}") + + for sub in subworkflow_clusters_to_add: + code = code.replace(f'{subworkflow_section}', f"{sub}\n\n{subworkflow_section}") + + print(remove_extra_jumps(format_with_tabs(code))) #So basically when retriving a thing (process or subworkflow)