diff --git a/src/condition.py b/src/condition.py index 34a1fcbe8cfddb9ccc9524f6af7d28aed790c45c..7b4a5e8ea4c1d118e6b4ff43363f93e1309a02d7 100644 --- a/src/condition.py +++ b/src/condition.py @@ -24,6 +24,6 @@ class Condition: if(condition_extend[0]<pos and pos<condition_extend[1]): self.conditions.append(c) #print(conditions_dico) - print(thing_defined, self.conditions) + #print(thing_defined, self.conditions) #print() diff --git a/src/operation.py b/src/operation.py index 6cdc9556e744c47e1080160cea373c7c27f86ec4..25fa23e2bbc7c4397051cb49309025a478217d6d 100644 --- a/src/operation.py +++ b/src/operation.py @@ -7,7 +7,7 @@ import warnings #- add a list of words illegal for channel eg. [true, process, workflow...] import re -from .outils import get_end_operator, get_end_call, get_curly_count +from .outils import get_end_operator, get_end_call, get_curly_count, operation_2_DSL2 from .code_ import Code from .condition import Condition from .executor import Executor @@ -872,4 +872,9 @@ class Operation(Executor): print(self.get_file_address()) raise Exception(f"This souldn't happen! The origin of an operation is of type '{o.get_type()}'. It's code is '{o.get_code()}'") + def convert_to_DSL2(self): + code = self.get_code(get_OG=True) + return operation_2_DSL2(code, self) + + diff --git a/src/outils.py b/src/outils.py index 3b7bc90e34634439f0b4ba46736b0341cd44e8e8..e06bda1df0f21c01f57a62a95a2626efd0175e64 100644 --- a/src/outils.py +++ b/src/outils.py @@ -1037,3 +1037,71 @@ def extract_conditions(code): start+=1 return conditions_dico +def process_2_DSL2(code): + def replace_file_by_path(match): + if(match.group(1)==" "): + return "path " + else: + return "path(" + + def replace_set_by_tuple(match): + if(match.group(1)==" "): + return "tuple " + else: + return "tuple(" + + code = re.sub(r'file( | *\()', replace_file_by_path, code) + code = re.sub(r'set( | *\()', replace_set_by_tuple, code) + return code + + +def operation_2_DSL2(code, origin): + from .bioflowinsighterror import BioFlowInsightError + + #If channel.close() -> just remove it + if(re.fullmatch(r'\w+\s*\.\s*close\s*\(\s*\)', code)): + return "" + + def replace_create_by_empty(match): + return ".empty()" + def replace_groupBy_by_groupTuple(match): + return ".groupTuple(" + def replace_print_by_view(match): + return ".view(" + def replace_spread_by_combine(match): + return ".combine(" + + #Create to empty + code = re.sub(r'\.\s*create\s*\(\s*\)', replace_create_by_empty, code) + #groupBy to groupTuple + code = re.sub(r'\.\s*groupBy\s*\(', replace_groupBy_by_groupTuple, code) + #print and println to view + code = re.sub(r'\.\s*(print|println)\s*\(', replace_print_by_view, code) + #spread to combine + code = re.sub(r'\.\s*spread\s*\(', replace_spread_by_combine, code) + + if(bool(re.findall(r"\.\s*spread\s*\(", code))): + raise BioFlowInsightError(f"spread is not supported", num = -1, origin=origin) + if(bool(re.findall(r"\.\s*choice", code))): + raise BioFlowInsightError(f"choice is not supported", num = -2, origin=origin) + if(bool(re.findall(r"\.\s*countBy", code))): + raise BioFlowInsightError(f"countBy is not supported", num = -3, origin=origin) + if(bool(re.findall(r"\.\s*fork", code))): + raise BioFlowInsightError(f"fork is not supported", num = -4, origin=origin) + if(bool(re.findall(r"\.\s*route", code))): + raise BioFlowInsightError(f"route is not supported", num = -5, origin=origin) + if(bool(re.findall(r"\.\s*separate", code))): + raise BioFlowInsightError(f"separate is not supported", num = -6, origin=origin) + + + #Imporant it's last + there_is_an_into = False + tab = re.split(r'\.\s*into\s*\{', code) + if(len(tab)>1): + code = "" + body = tab[0] + for gives in origin.get_gives(): + #code+=f"\n{gives.get_code()} = {body}" + code+=f"\n{body}.set{{{gives.get_code()}}}" + + return code diff --git a/src/process.py b/src/process.py index 3de0822144a1e7ad6948704536ab2c1545e3d5a3..51017ce32f83002dca9e93b3b12135559683ab08 100644 --- a/src/process.py +++ b/src/process.py @@ -4,7 +4,7 @@ import glob from .code_ import Code from .condition import Condition from .nextflow_building_blocks import Nextflow_Building_Blocks -from .outils import remove_jumps_inbetween_parentheses, remove_jumps_inbetween_curlies, sort_and_filter, get_dico_from_tab_from_id, check_if_element_in_tab_rocrate, get_python_packages, get_R_libraries, get_perl_modules +from .outils import remove_jumps_inbetween_parentheses, remove_jumps_inbetween_curlies, sort_and_filter, get_dico_from_tab_from_id, check_if_element_in_tab_rocrate, get_python_packages, get_R_libraries, get_perl_modules, process_2_DSL2 from .bioflowinsighterror import BioFlowInsightError from . import constant @@ -16,6 +16,7 @@ class Process(Nextflow_Building_Blocks): self.name = "" self.alias = "" self.inputs = [] + self.raw_input_names = []#This is used to convert DSL1 workflows to DSL2 self.outputs = [] self.input_code = "" self.output_code = "" @@ -325,24 +326,30 @@ class Process(Nextflow_Building_Blocks): self.inputs.append(input) input.add_sink(self) - #Case there is a single channel as an input -> doesn't use from to import channel -> uses file (see https://github.com/nextflow-io/nextflow/blob/45ceadbdba90b0b7a42a542a9fc241fb04e3719d/docs/process.rst) - pattern = constant.FILE - for match in re.finditer(pattern, code): - add_channel(match.group(1)) - - - #Case there are multiple channels as input (e.g. channel1.mix(channel2)) - pattern = constant.FROM - for match in re.finditer(pattern, code): - extracted = match.group(1).strip() - if(bool(re.fullmatch(constant.WORD, extracted))): + for line in code.split("\n"): + + #Case there is a single channel as an input -> doesn't use from to import channel -> uses file (see https://github.com/nextflow-io/nextflow/blob/45ceadbdba90b0b7a42a542a9fc241fb04e3719d/docs/process.rst) + pattern = constant.FILE + for match in re.finditer(pattern, line+"\n"): + extracted = match.group(1).strip() add_channel(extracted) - else: - from .operation import Operation - operation = Operation(code=extracted, origin=self.origin) - operation.initialise() - operation.is_defined_in_process(self) - self.inputs+=operation.get_origins() + self.raw_input_names.append(extracted) + + + #Case there are multiple channels as input (e.g. channel1.mix(channel2)) + pattern = constant.FROM + for match in re.finditer(pattern, line+"\n"): + extracted = match.group(1).strip() + self.raw_input_names.append(extracted) + #print(extracted) + if(bool(re.fullmatch(constant.WORD, extracted))): + add_channel(extracted) + else: + from .operation import Operation + operation = Operation(code=extracted, origin=self.origin) + operation.initialise() + operation.is_defined_in_process(self) + self.inputs+=operation.get_origins() #self.inputs = list(set(self.inputs))#TODO Check this @@ -525,3 +532,45 @@ class Process(Nextflow_Building_Blocks): if(not check_if_element_in_tab_rocrate(dico_process["isPartOf"], parent_key)): dico_process["isPartOf"].append({"@id":parent_key}) + def convert_input_code_to_DSL2(self): + code = self.input_code + code = process_2_DSL2(code) + lines = [] + for line in code.split("\n"): + lines.append(line.split(" from ")[0]) + code = "\n".join(lines) + return code + + def convert_output_code_to_DSL2(self): + code = self.output_code + code = process_2_DSL2(code) + code = code.replace(" into ", ", emits: ") + code = code.replace(" mode flatten", "") + return code + + #This method is to detect which are the channels which need to be flattened + #See https://github.com/nextflow-io/nextflow/blob/be1694bfebeb2df509ec4b42ea5b878ebfbb6627/docs/dsl1.md + def get_channels_to_flatten(self): + code = self.output_code + channels = [] + for match in re.finditer(r"(\w+) mode flatten", code): + channels.append(match.group(1)) + return channels + + def convert_to_DSL2(self): + if(self.get_DSL()=="DSL2"): + print("Workflow is already written in DSL2") + else: + code = self.get_code() + call = [f"{self.get_name()}({', '.join(self.raw_input_names)})"] + code = code.replace(self.input_code, self.convert_input_code_to_DSL2()) + code = code.replace(self.output_code, self.convert_output_code_to_DSL2()) + channels_to_flatten = self.get_channels_to_flatten() + for o in self.outputs: + if(o.get_code() in channels_to_flatten): + call.append(f"{o.get_code()} = {self.get_name()}.out.{o.get_code()}.flatten()") + else: + call.append(f"{o.get_code()} = {self.get_name()}.out.{o.get_code()}") + call = "\n".join(call) + return code, call + diff --git a/src/workflow.py b/src/workflow.py index 9252627ab47b644798d193394d7f703dc8824530..b0c6fb0457f9b5c9d7b6c128a7b550edac2fc25c 100644 --- a/src/workflow.py +++ b/src/workflow.py @@ -15,6 +15,7 @@ import json from pathlib import Path import glob import ctypes +import time @@ -602,4 +603,63 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen return self.nextflow_file.check_fake_dependency_user_view() def generate_user_and_process_metadata(self): - self.nextflow_file.generate_user_and_process_metadata() \ No newline at end of file + self.nextflow_file.generate_user_and_process_metadata() + + def convert_to_DSL2(self): + if(self.get_DSL()=="DSL2"): + print("Workflow is already written in DSL2") + else: + #This tag is used as an identification to safely manipulate the string + tag = str(time.time()) + + code = self.nextflow_file.get_code() + start_code = "#!/usr/bin/env nextflow" + end_code = "workflow.onComplete" + + pos_start, pos_end= 0, len(code) + if(code.find(end_code)!=-1): + pos_end = code.find(end_code) + code_to_replace = code[pos_start:pos_end] + if(code.find(start_code)!=-1): + pos_start = code.find(start_code)+len(start_code) + body = code[pos_start:pos_end]#.replace('\n', '\n\t') + + include_section = f"//INCLUDE_SECTION_{tag}" + params_section = f"//PARAMS_SECTION_{tag}" + process_section = f"//PROCESS_SECTION_{tag}" + + + code = code.replace(code_to_replace, f"""{start_code}\n\n{include_section}\n\n{params_section}\n\n{process_section}\n\nworkflow{{\n\n{body}\n}}\n\n""") + + params_list = [] + for match in re.finditer(r"params.\w+ *\= *[^\n=]([^\n])*", code): + params_list.append(match.group(0)) + for params in params_list: + code = code.replace(params, "") + params_code = "\n".join(params_list) + code = code.replace(params_section, params_code) + + + processes = [] + to_replace = [] + for p in self.nextflow_file.get_processes(): + new_process, call = p.convert_to_DSL2() + processes.append(new_process) + to_replace.append((p.get_code(), call)) + + for r in to_replace: + code = code.replace(r[0], r[1]) + code = code.replace(process_section, "\n\n".join(processes)) + + #TODO -> update the operations -> also consider the operations in the params of the calls which need to be updated + + for o in self.nextflow_file.get_executors(): + if(o.get_type()=="Operation"): + code = code.replace(o.get_code(get_OG=True), o.convert_to_DSL2()) + else: + raise Exception(f"Executor of type '{o.get_type()}' was extracted in a DSL1 workflow! This shoudn't happen! The code is '{o.get_code()}'") + #print(code) + return code + #print(code) + # + #for c in self.get_channels(): \ No newline at end of file