diff --git a/src/call.py b/src/call.py index 53bb68015bbb2413a558c8abe88b92f31a58d329..bdc880c4eab8ece4c070e938433dc29e031c92ae 100644 --- a/src/call.py +++ b/src/call.py @@ -26,7 +26,6 @@ class Call(Executor): def __str__(self): return f"Call_{id(self)}" - def get_code(self, clean_pipe = False, get_OG=False): if(get_OG): @@ -47,15 +46,34 @@ class Call(Executor): code = f"{tag_to_add}\n{code}" index = 1 for param in self.parameters: + param_new_name = f"{self.get_first_element_called().get_name()}_param_{index}" + #Case the param is a call if(param.get_type()=="Call"): - param_new_name = f"{self.get_first_element_called().get_name()}_param_{index}" - code = code.replace(param.get_code(), param_new_name) + code = code.replace(param.get_code(get_OG=True), param_new_name) new_bit = param.simplify_code(new_name = param_new_name) code = code.replace(tag_to_add, f"{tag_to_add}\n{new_bit}") - + #Case the param is an operation - #TODO + elif(param.get_type()=="Operation"): + + code = code.replace(param.get_code(get_OG=True), param_new_name) + lines = param.simplify_code().split('\n') + if(len(lines)==1): + new_bit = f"{param_new_name} = {lines[0]}" + else: + head = '\n'.join(lines[:-1]) + new_bit = f"{head}\n{param_new_name} = {lines[-1]}" + code = code.replace(tag_to_add, f"{tag_to_add}\n{new_bit}") + + #Case Channel + elif(param.get_type()=="Channel"): + None + elif(param.get_type()=="Emmited"): + None + else: + print(param.get_code(), param.get_type()) + raise Exception("This shouldn't happen") index+=1 return code.replace(tag_to_add, "").strip() diff --git a/src/graph.py b/src/graph.py index bcc826f6a78e803675ef0cf6e3e0709d98824018..084b061185e3c163ba313360dc1d002d9a42aa6e 100644 --- a/src/graph.py +++ b/src/graph.py @@ -300,8 +300,8 @@ class Graph(): def get_user_view_graph(self, relevant_processes = []): #For now i'm only gonna work from the flattened dico - self.initialise_flattened_dico(self.dico_process_dependency_graph) - #self.initialise_flattened_dico(self.full_dico) + #self.initialise_flattened_dico(self.dico_process_dependency_graph) + self.initialise_flattened_dico(self.full_dico) dico = self.dico_flattened user_view, self.new_nodes_user_view = relev_user_view_builder(dico, relevant_modules=relevant_processes) diff --git a/src/main_DSL2.py b/src/main_DSL2.py index cc4a188df1d1f1bf1ae7c4e895cf963c234a40b7..ab27fc75cfd879654424e5a912db873a5758ae12 100644 --- a/src/main_DSL2.py +++ b/src/main_DSL2.py @@ -15,12 +15,26 @@ class Main_DSL2(Nextflow_Building_Blocks): self.initialised = False self.conditions=None - def get_all_executors(self): - tab = [] - tab+=self.get_executors() - for sub in self.get_subworkflows_called(): - tab+=sub.get_executors() - return tab + def get_all_executors(self, dico): + for e in self.get_executors(): + dico[e] = 1 + + for exe in self.get_executors(): + if(exe.get_type()=="Call"): + first = exe.get_first_element_called() + if(first.get_type()=="Subworkflow"): + first.get_all_executors(dico) + elif(exe.get_type()=="Operation"): + for o in exe.get_origins(): + if(o.get_type()=="Call"): + first = o.get_first_element_called() + if(first.get_type()=="Subworkflow"): + print("here") + first.get_all_executors(dico) + else: + raise Exception("This shouldn't happen") + + def get_channels(self): return self.channels diff --git a/src/nextflow_building_blocks.py b/src/nextflow_building_blocks.py index f54182b57d4b83f94f7bf10e325bcb468e25564c..82a782269820625db1d8e5db1f37196df6c60c0d 100644 --- a/src/nextflow_building_blocks.py +++ b/src/nextflow_building_blocks.py @@ -16,7 +16,7 @@ class Nextflow_Building_Blocks: self.processes = [] self.channels = [] - self.DSL = "" + #self.DSL = "" #DSL2 self.includes = [] self.main = None @@ -24,7 +24,7 @@ class Nextflow_Building_Blocks: self.subworkflows = [] self.functions=[] - + #--------------------------------- #AUXILIARY METHODS FOR ALL CLASSES diff --git a/src/nextflow_file.py b/src/nextflow_file.py index 3a3054d5e713e91e777fd3d7ae326112fd1b5e43..e25e2f03eca47db88dfaaf403093c38906c30b0c 100644 --- a/src/nextflow_file.py +++ b/src/nextflow_file.py @@ -44,8 +44,14 @@ class Nextflow_File(Nextflow_Building_Blocks): #self.extract_metadata() self.check_file_correctness_after_DSL() self.set_null() + + def set_new_code(self, code): + #self.DSL = self.get_DSL() + Nextflow_Building_Blocks.__init__(self, code) - + def get_address(self): + return self.get_file_address() + def get_name_file(self): name = self.get_file_address().split('/')[-1] return name[:-3] @@ -133,8 +139,8 @@ class Nextflow_File(Nextflow_Building_Blocks): self.all_includes = [] self.added_2_rocrate = False - def get_all_executors(self): - return self.main.get_all_executors() + def get_all_executors(self, dico): + return self.main.get_all_executors(dico) def extract_metadata(self): @@ -546,7 +552,7 @@ class Nextflow_File(Nextflow_Building_Blocks): self.extract_functions() #Extract Executors - self.extract_executors() + #self.extract_executors() #Analyse Executors for e in self.executors: diff --git a/src/operation.py b/src/operation.py index 25fa23e2bbc7c4397051cb49309025a478217d6d..ee7964968fa29b0e67c477a81da6fd771d339836 100644 --- a/src/operation.py +++ b/src/operation.py @@ -875,6 +875,34 @@ class Operation(Executor): def convert_to_DSL2(self): code = self.get_code(get_OG=True) return operation_2_DSL2(code, self) + + #Method that rewrites operations to simplify it -> decompose it into multiple line -> to be able to manipulate the calls in a easier way + def simplify_code(self): + code = self.get_code(get_OG=True) + index = 1 + operation_id = str(self)[-7:-2] + + def add_origin_equals(call, index): + simplified_code = call.simplify_code() + lines = simplified_code.split('\n') + return f"{simplified_code}\noperation_{operation_id}_{index} = {call.get_first_element_called().get_name()}.out[0]" + #if(len(lines)==1): + # return f"operation_{operation_id}_{index} = {simplified_code}" + #else: + # head = '\n'.join(lines[:-1]) + # return f"{head}\noperation_{operation_id}_{index} = {lines[-1]}" + + to_add = [] + for o in self.origins: + if(o.get_type()=="Call"): + to_add.append(add_origin_equals(o, index)) + code = code.replace(o.get_code(get_OG=True), f"operation_{operation_id}_{index}") + index += 1 + + to_add.reverse() + for c in to_add: + code = f"{c}\n{code}" + return code diff --git a/src/outils.py b/src/outils.py index e06bda1df0f21c01f57a62a95a2626efd0175e64..e33d6a0e1fd79c05c5d3fcd4d53f7d39cc812e02 100644 --- a/src/outils.py +++ b/src/outils.py @@ -1105,3 +1105,65 @@ def operation_2_DSL2(code, origin): code+=f"\n{body}.set{{{gives.get_code()}}}" return code + +def format_with_tabs(code): + start = 0 + + curly_count, parenthese_count = 0, 0 + quote_single, quote_double = False, False + triple_single, triple_double = False, False + + + while(start<len(code)): + checked_triple = False + if(start+3<=len(code)): + if(code[start:start+3]=="'''" and not quote_single and not quote_double and not triple_single and not triple_double): + triple_single = True + start+=3 + checked_triple = True + elif(code[start:start+3]=="'''" and not quote_single and not quote_double and triple_single and not triple_double): + triple_single = False + start+=3 + checked_triple = True + + if(code[start:start+3]=='"""' and not quote_single and not quote_double and not triple_single and not triple_double): + triple_double = True + start+=3 + checked_triple = True + elif(code[start:start+3]=='"""' and not quote_single and not quote_double and not triple_single and triple_double): + triple_double = False + start+=3 + checked_triple = True + + if(not checked_triple): + if(code[start]=="{" and not quote_single and not quote_double and not triple_single and not triple_double): + curly_count+=1 + elif(code[start]=="}" and not quote_single and not quote_double and not triple_single and not triple_double): + curly_count-=1 + + if(code[start]=="(" and not quote_single and not quote_double and not triple_single and not triple_double): + parenthese_count+=1 + elif(code[start]==")" and not quote_single and not quote_double and not triple_single and not triple_double): + parenthese_count-=1 + + if(code[start]=="'" and not quote_single and not quote_double and not triple_single and not triple_double): + if(code[start-1]!="\\" or (code[start-1]=="\\" and code[start-2]=="\\")): + quote_single=True + elif(code[start]=="'" and quote_single and not quote_double and not triple_single and not triple_double): + if(code[start-1]!="\\" or (code[start-1]=="\\" and code[start-2]=="\\")): + quote_single=False + + if(code[start]=='"' and not quote_single and not quote_double and not triple_single and not triple_double): + if(code[start-1]!="\\" or (code[start-1]=="\\" and code[start-2]=="\\")): + quote_double=True + elif(code[start]=='"' and not quote_single and quote_double and not triple_single and not triple_double): + if(code[start-1]!="\\" or (code[start-1]=="\\" and code[start-2]=="\\")): + quote_double=False + if(parenthese_count==0 and [quote_single, quote_double, triple_single, triple_double]==[False, False, False, False]): + if(curly_count>0 and code[start]=="\n"): + code = code[:start+1]+"\t"*curly_count+code[start+1:] + start+=1 + start+=1 + + return code + diff --git a/src/subworkflow.py b/src/subworkflow.py index b0aed5d57e6e72d7f735f1f2beb823141f39b4c1..1c1ef2c72f85c0fe5b938477db161e597acf2a2d 100644 --- a/src/subworkflow.py +++ b/src/subworkflow.py @@ -20,6 +20,7 @@ class Subworkflow(Main_DSL2): self.call = [] self.initialised = False + def set_call(self, call): self.call.append(call) diff --git a/src/workflow.py b/src/workflow.py index 0a0013f1b9d293c5fb3efb826b8e44190c9d3984..9d0f6639b63ee76647abfc416abf6cf0863a7066 100644 --- a/src/workflow.py +++ b/src/workflow.py @@ -3,7 +3,7 @@ from .nextflow_file import Nextflow_File from .ro_crate import RO_Crate from . import constant -from .outils import is_git_directory +from .outils import is_git_directory, format_with_tabs from .outils_graph import flatten_dico, initia_link_dico_rec, get_number_cycles from .outils_annotate import get_tools_commands_from_user_for_process from .bioflowinsighterror import BioFlowInsightError @@ -82,6 +82,7 @@ class Workflow: self.dico = {} self.get_dico() + def get_duplicate_status(self): return self.duplicate @@ -98,7 +99,9 @@ class Workflow: self.DSL = DSL def get_all_executors(self): - return list(set(self.nextflow_file.get_all_executors())) + dico = {} + self.nextflow_file.get_all_executors(dico) + return list(dico.keys()) def get_is_a_git_repo(self): return is_git_directory(path = self.get_repo_adress()) @@ -741,43 +744,69 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen def simplify_workflow_code(self): code, ankers = self.write_workflow_into_one_file() for exe in self.get_all_executors(): - #print(exe.get_code(), exe.get_type()) - if(exe.get_type()=="Call"): - print(exe.simplify_code()) - print() + if(exe.get_type()=="Call" or exe.get_type()=="Operation"): + code = code.replace(exe.get_code(get_OG = True), exe.simplify_code()) + else: + print(exe.get_code(), exe.get_type()) + raise Exception("This shouldn't happen") + return code #Conert workflow to user_view only makes sense when the option duplicate is activated -> otherwise is doesn't make sense + it makes the analysis way more complicated def convert_workflow_2_user_view(self, relevant_processes = []): if(self.duplicate): + + #Write new code in temporary file + code = self.simplify_workflow_code() + temp_file = self.get_output_dir()/f"temp_{str(self)[-7:-2]}.nf" + with open(temp_file, "w") as file: + file.write(code) + + #Replace old analysis with new analysis (simplified code) + self.__init__(str(temp_file), display_info = False, duplicate=True) + self.initialise(create_rocrate=False) + + #for e in self.get_all_executors(): + # print(e.get_code(get_OG = True)) + + #Get the clusters and the code self.nextflow_file.generate_user_view(relevant_processes = relevant_processes, processes_2_remove = []) clusters = self.nextflow_file.graph.get_clusters_from_user_view() print(clusters) - code, ankers = self.write_workflow_into_one_file() - #print(code) - - for i in range(len(clusters)): - c = clusters[i] - if(len(c)>1): - clusters[i] = self.nextflow_file.graph.get_induced_subgraph(c) - print(clusters) + + #Get the clsuters with the corresponding operations inside + #for i in range(len(clusters)): + # c = clusters[i] + # if(len(c)>1): + # clusters[i] = self.nextflow_file.graph.get_induced_subgraph(c) + #print(clusters) + #Get the topological order clusters = self.nextflow_file.graph.get_topogical_order(clusters) print(clusters) + + + #Creating the subworkflows from clusters calls_in_operations = [] for elements in clusters: + name, body, take, emit = "", "", "", "" for ele in elements: if(ele.get_type()=="Process"): + #Determine the name of the created subworkflow cluster if(ele.get_name() in relevant_processes): name = f"cluster_{ele.get_name()}" #Get the call of thing (either process or subworkflow) #TODO -> check it works with subworkflows call = ele.get_call() + #This verification is really important if(len(call)!=1): + for c in call: + print(c.get_code(get_OG=True)) + raise Exception("This shoudn't happen since duplicate mode is activated") call = call[0] printed_condition = " && ".join(call.get_condition().get_conditions()) @@ -786,23 +815,26 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen else: body+=f"\n{call.get_code()}\n" elif(ele.get_type()=="Operation"): - origins = ele.get_origins() - for o in origins: - if(o.get_type()=="Call"): - calls_in_operations.append(o) - printed_condition = " && ".join(ele.get_condition().get_conditions()) - if(printed_condition!=""): - body+=f"if({printed_condition}) {{\n{ele.get_code()}\n}}\n" - else: - body+=f"\n{ele.get_code()}\n" + + #Ignore these cases + if(ele.get_code()[:4] not in ["emit", "take"]): + origins = ele.get_origins() + for o in origins: + if(o.get_type()=="Call"): + calls_in_operations.append(o) + printed_condition = " && ".join(ele.get_condition().get_conditions()) + if(printed_condition!=""): + body+=f"if({printed_condition}) {{\n{ele.get_code()}\n}}\n" + else: + body+=f"\n{ele.get_code()}\n" + #Here we removing the Call_12313 thing for call in calls_in_operations: body = body.replace(call.get_code(), "") body = body.replace(str(call), call.get_code()) - subworkflow_code = f"subworkflow {name} {{\n{take}\n{body}\n{emit}\n}}" - print(subworkflow_code) - print() + print(format_with_tabs(subworkflow_code)) + print("-----------") #So basically when retriving a thing (process or subworkflow)