diff --git a/src/main.py b/src/main.py index 84af93f17fca5fe80f19c8030eda936ad4533e57..fc011ee6400a45b9ea6c0c623be34ad53fa1a73d 100644 --- a/src/main.py +++ b/src/main.py @@ -124,15 +124,16 @@ class Main(Nextflow_Building_Blocks): most_influential_conditions[condition]+=num return most_influential_conditions elif(self.nextflow_file.get_DSL()=="DSL1"): - processes = self.get_modules_defined() - for p in processes: - if(p.is_initialised()): - for condition in p.origin.get_all_conditions(conditions = {}): - try: - temp = most_influential_conditions[condition] - except: - most_influential_conditions[condition] = 0 - most_influential_conditions[condition]+=1 + modules = self.get_modules_defined() + for p in modules: + if(p.get_type()=="Process"): + if(p.is_initialised()): + for condition in p.origin.get_all_conditions(conditions = {}): + try: + temp = most_influential_conditions[condition] + except: + most_influential_conditions[condition] = 0 + most_influential_conditions[condition]+=1 return most_influential_conditions else: raise Exception("This shouldn't happen") \ No newline at end of file diff --git a/src/nextflow_file.py b/src/nextflow_file.py index b78af3bb02c4bfb4ed7c3bc72855c4ad188bef7d..d9ad8fa78ed5ac74575f3df70d560fef8c3e5b70 100644 --- a/src/nextflow_file.py +++ b/src/nextflow_file.py @@ -326,9 +326,15 @@ class Nextflow_File(Nextflow_Building_Blocks): #Extarct Processes self.extract_processes() code = self.get_code() - #Replacing the processes defined with their identifiers -> this is to simplifly the analysis with the conditions + #Extract functions + self.extract_functions() + + + #Replacing the processes and functions defined with their identifiers -> this is to simplifly the analysis with the conditions for process in self.processes: code = code.replace(process.get_code(), f"process: {str(process)}") + for function in self.functions: + code = code.replace(function.get_code(), f"function: {str(function)}") self.main = Main(code= code, nextflow_file=self) self.main.initialise() diff --git a/src/process.py b/src/process.py index e717fe84b02b048c9109155602c7ce7d23371132..8da04e5bbab7045fa6da41fe8d3c6f22e6fb3120 100644 --- a/src/process.py +++ b/src/process.py @@ -423,4 +423,85 @@ class Process(Nextflow_Building_Blocks): self.initialise_parts() self.initialise_inputs_outputs() + def convert_input_code_to_DSL2(self): + code = self.input_code + #code = process_2_DSL2(code) + lines = [] + for line in code.split("\n"): + temp = process_2_DSL2(line.split(" from ")[0]) + lines.append(temp) + code = "\n".join(lines) + return code + + def convert_output_code_to_DSL2(self): + code = self.output_code + lines = [] + for line in code.split("\n"): + line = line.replace(" into ", ", emit: ") + line = line.replace(" mode flatten", "") + #Remove optionnal true #TODO check if this breaks soemthing + line = line.replace("optional true", "") + line = process_2_DSL2(line) + lines.append(line) + code = "\n".join(lines) + #Removing the extra emits + #For it to only have one, + for line in self.outputs_per_line: + def replacer(match): + return match.group(1) + for o in line[1:]: + code = re.sub(fr"\,\s*{re.escape(o.get_code())}(\s|\,|\))", replacer, code+"\n") + return code + + #This method is to detect which are the channels which need to be flattened + #See https://github.com/nextflow-io/nextflow/blob/be1694bfebeb2df509ec4b42ea5b878ebfbb6627/docs/dsl1.md + def get_channels_to_flatten(self): + code = self.output_code + channels = [] + for match in re.finditer(r"(\w+) mode flatten", code): + channels.append(match.group(1)) + return channels + + #This method cleans the raw_input_names to use when rewriting DSL1 workflows + def clean_raw_input_names(self, raw_input_names): + for i in range(len(raw_input_names)): + if(bool(re.fullmatch(r"\w+\.val", raw_input_names[i]))): + raw_input_names[i] = raw_input_names[i].split('.')[0] + return raw_input_names + + def get_parameters_call(self): + return ', '.join(self.clean_raw_input_names(self.raw_input_names)) + + + def convert_to_DSL2(self): + if(self.get_DSL()=="DSL2"): + print("Workflow is already written in DSL2") + else: + code = self.get_code() + call = [f"{self.get_name()}({self.get_parameters_call()})"] + code = code.replace(self.input_code, self.convert_input_code_to_DSL2()) + code = code.replace(self.output_code, self.convert_output_code_to_DSL2()) + channels_to_flatten = self.get_channels_to_flatten() + + + #Rewriting the attributions of the channels for it to match the new values emitted (single values) + index = 0 + for line in self.outputs_per_line: + for emitted in line: + o = self.outputs[index] + if(o.get_code() in channels_to_flatten): + call.append(f"{o.get_code()} = {self.get_name()}.out.{line[0].get_code()}.flatten()") + else: + call.append(f"{o.get_code()} = {self.get_name()}.out.{line[0].get_code()}") + index+=1 + + #for o in self.outputs: + # if(o.get_code() in channels_to_flatten): + # call.append(f"{o.get_code()} = {self.get_name()}.out.{o.get_code()}.flatten()") + # else: + # call.append(f"{o.get_code()} = {self.get_name()}.out.{o.get_code()}") + call = "\n".join(call) + return code, call + + diff --git a/src/root.py b/src/root.py index 23edb5198a24679cb572da4f4dfea5d295f953c1..a7e3af10a5a3bd30bd7460bc600f1c9dc347aa39 100644 --- a/src/root.py +++ b/src/root.py @@ -164,6 +164,8 @@ class Root(Nextflow_Building_Blocks): b.get_inside_executors_rec(dico) return list(dico.keys()) + def get_all_executors_from_workflow(self): + return self.get_executors_same_level()+self.get_inside_executors() #def get_calls(self): # tab = [] diff --git a/src/workflow.py b/src/workflow.py index 6aada985dc2907ef7376eab88eab6b108f6dc9de..d4f43d088df288b1b20b0bc7fff37a1a204e65ec 100644 --- a/src/workflow.py +++ b/src/workflow.py @@ -226,4 +226,81 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen else: BioFlowInsightError("Need to activate 'duplicate' mode to use this method.") + #When there are multiple emits turn them into one and the end of the call eg, instead of fastp_ch2 = fastp.out.fastp_ch2 -> have fastp_ch2 = fastp_ch + def convert_to_DSL2(self): + if(self.get_DSL()=="DSL2"): + print("Workflow is already written in DSL2") + else: + #This tag is used as an identification to safely manipulate the string + tag = str(time.time()) + nextflow_file = self.get_first_file() + + code = nextflow_file.get_code() + start_code = r"#!/usr/bin/env nextflow" + start_code_pattern = r"\#\!\s*\/usr\/bin\/env\s+nextflow" + end_code = "workflow.onComplete" + + pos_start, pos_end= 0, len(code) + if(code.find(end_code)!=-1): + pos_end = code.find(end_code) + code_to_replace = code[pos_start:pos_end] + for match in re.finditer(start_code_pattern, code): + pos_start = match.span(0)[1]+1 + #if(code.find(start_code)!=-1): + # pos_start = code.find(start_code)+len(start_code) + body = code[pos_start:pos_end]#.replace('\n', '\n\t') + + include_section = f"//INCLUDE_SECTION_{tag}" + params_section = f"//PARAMS_SECTION_{tag}" + function_section = f"//FUNCTION_SECTION_{tag}" + process_section = f"//PROCESS_SECTION_{tag}" + + + code = code.replace(code_to_replace, f"""{start_code}\n\n\n{include_section}\n\n\n{params_section}\n\n\n{function_section}\n\n\n{process_section}\n\n\nworkflow{{\n\n{body}\n}}\n\n""") + + ##I've out this in a comment cause since it's a DSL1 + #params_list = [] + #for match in re.finditer(r"params.\w+ *\= *[^\n=]([^\n])*", code): + # params_list.append(match.group(0)) + #for params in params_list: + # code = code.replace(params, "") + #params_code = "\n".join(params_list) + #code = code.replace(params_section, params_code) + + #Moving Functions + functions = [] + for f in nextflow_file.functions: + function = f.get_code() + functions.append(function) + print(functions) + for r in functions: + code = code.replace(r, "") + code = code.replace(function_section, "\n\n".join(functions)) + + #Moving Processes + processes = [] + to_replace = [] + for p in nextflow_file.get_processes(): + new_process, call = p.convert_to_DSL2() + processes.append(new_process) + to_replace.append((p.get_code(), call)) + + for r in to_replace: + code = code.replace(r[0], r[1]) + code = code.replace(process_section, "\n\n".join(processes)) + + #TODO -> update the operations -> also consider the operations in the params of the calls which need to be updated + + for o in self.get_workflow_main().root.get_all_executors_from_workflow(): + if(o.get_type()=="Operation"): + code = code.replace(o.get_code(get_OG=True), o.convert_to_DSL2()) + else: + raise Exception(f"Executor of type '{o.get_type()}' was extracted in a DSL1 workflow! This shoudn't happen! The code is '{o.get_code()}'") + + #Putting || back + code = code.replace("$OR$", "||") + + return code + +