diff --git a/src/process.py b/src/process.py index eb9a691c3b86eaa9ec2c2ac331324e858c4e988f..e5b25acceac12e3444e7c78cba6c891284d8f16f 100644 --- a/src/process.py +++ b/src/process.py @@ -19,6 +19,7 @@ class Process(Nextflow_Building_Blocks): self.input_code = "" self.output_code = "" self.when_code = "" + self.pusblishDir_code = "" self.script_code = "" self.tools = [] self.modules = [] @@ -103,7 +104,7 @@ class Process(Nextflow_Building_Blocks): packages = [] packages+= self.get_python_packages_imported_internal_script() packages+= self.get_python_packages_imported_external_scripts() - return packages + return list(set(packages)) def get_R_libraries_loaded_internal_script(self): @@ -122,7 +123,7 @@ class Process(Nextflow_Building_Blocks): libraries = [] libraries+= self.get_R_libraries_loaded_internal_script() libraries+= self.get_R_libraries_loaded_external_scripts() - return libraries + return list(set(libraries)) #TODO -> do the same with Ruby/PErl scripts @@ -155,6 +156,11 @@ class Process(Nextflow_Building_Blocks): + def get_input_code_lines(self): + tab = [] + for l in self.input_code.split('\n'): + tab.append(l.strip()) + return tab def get_inputs(self): return self.inputs @@ -165,6 +171,12 @@ class Process(Nextflow_Building_Blocks): def get_outputs(self): return self.outputs + def get_output_code_lines(self): + tab = [] + for l in self.output_code.split('\n'): + tab.append(l.strip()) + return tab + def get_nb_outputs(self): return len(self.outputs) @@ -184,6 +196,14 @@ class Process(Nextflow_Building_Blocks): temp_code = temp_code[:-1].strip() if(len(temp_code)==0): raise BioFlowInsightError(f"The process '{self.get_name()}' defined in the file '{self.get_file_address()}' is an empty process!", num = 22, origin=self) + + publishDir_multiple, publishDir_pos= False, (0, 0) + for match in re.finditer(r"publishDir", code): + #if(publishDir_multiple): + # raise BioFlowInsightError(f"Multiple 'publishDir' were found in the process '{self.get_name()}'.", num = 22, origin=self) + publishDir_pos = match.span(0) + publishDir_multiple = True + input_multiple, input_pos= False, (0, 0) for match in re.finditer(constant.INPUT, code): if(input_multiple): @@ -210,8 +230,8 @@ class Process(Nextflow_Building_Blocks): script_pos = match.span(0) break - positions = [input_pos, output_pos, when_pos, script_pos] - variables_index = ['input', 'output', 'when', 'script'] + positions = [publishDir_pos, input_pos, output_pos, when_pos, script_pos] + variables_index = ['pusblishDir', 'input', 'output', 'when', 'script'] positions, variables_index = sort_and_filter(positions, variables_index) @@ -226,6 +246,8 @@ class Process(Nextflow_Building_Blocks): self.input_code = temp_code elif(variables_index[i]=='output'): self.output_code = temp_code + elif(variables_index[i]=='pusblishDir'): + self.pusblishDir_code = temp_code elif(variables_index[i]=='when'): self.when_code = temp_code elif(variables_index[i]=='script'): @@ -305,11 +327,20 @@ class Process(Nextflow_Building_Blocks): def get_input_parameters(self): code = self.get_input_code() + + #This is to remove the from for the DSL1 processes + #But also remoce the 'stageAs' + lines = code.split('\n') + code = "" + for l in lines: + code+=l.split(" from ")[0].split("stageAs")[0] + code+'\n' + parameters = [] - for match in re.finditer(r"\w+", code): + for match in re.finditer(r"\w+(\.\w+)*", code): parameters.append(match.group(0)) parameters = list(set(parameters))#Here we can a unique cause a parameter can only be given once in any case - words_2_remove = ["path", "val", "tuple", "into", "stageAs", "emit"] + words_2_remove = ["path", "val", "tuple", "into", "stageAs", "emit", "file", "set"] for word in words_2_remove: try: parameters.remove(word)