diff --git a/src/bioflowinsighterror.py b/src/bioflowinsighterror.py index e5936faef440367150d3f0710e970a0e00afe3ab..3a2f3d4f32dfc7af01cdf3b26d6647e0e2ea57e3 100644 --- a/src/bioflowinsighterror.py +++ b/src/bioflowinsighterror.py @@ -53,6 +53,7 @@ class BioFlowInsightError(Exception): #* [7] -> Tuple with emit (ch1, ch2) = emit.out #* [9] -> Tuple with call (ch1, ch2) = wf() #* [11] -> Failed to extract the operation or call at the line x. Try rewriting it in a simplified version. +#* [13] -> Multiple scripts with the same name were defined in the source code -> don't know which one to extract then when calling 'get_external_scripts_code' diff --git a/src/outils.py b/src/outils.py index a8082d4d9b5930b4fe905f173ebaba4fdb9fee76..4021381674f0ad62b6b92ffbc8d857846d617fc7 100644 --- a/src/outils.py +++ b/src/outils.py @@ -846,4 +846,49 @@ def check_if_element_in_tab_rocrate(tab, id): for ele in tab: if(ele["@id"]==id): return True - return False \ No newline at end of file + return False + + +#Function that parses python script and extracts the packages which are imported +def get_python_packages(script): + packages = [] + #Examples that i need to consider: + # from fibo import * + # from sound.effects.echo import echofilter + # import fibo + # import fibo, sys + # import sound.effects.echo + # import numpy as np + + #STEP1 + patterns_from = [r"fr(om)\s+(\w+)\s+import.+", + r"from\s+((\w+)(\.\w+)+)\s+import.+",] + #First step is to extract the packages which are imported from the from and then removing them from the string + froms = [] + for pattern in patterns_from: + for match in re.finditer(pattern, script): + packages.append(match.group(2)) + froms.append(match.group(0)) + for f in froms: + script = script.replace(f, "") + + #STEP2 + #Remove the rest of the 'simple' imports + def remove_commas(text): + tab = text.split(',') + words = [] + for t in tab: + words.append(t.strip()) + return words + for match in re.finditer(r"import\s+(\w+(\s*\,\s*\w+)+|(\w+))", script): + packages+= remove_commas(match.group(1)) + + return packages + + +#Function that parses R script and extracts the libraries which are loaded +def get_R_libraries(script): + libraries = [] + for match in re.finditer(r"library\s*\(\s*(\w+)\s*\)", script): + libraries.append(match.group(1)) + return libraries \ No newline at end of file diff --git a/src/process.py b/src/process.py index 78f39e444f36c579bf939e81abaa4da4936f3002..eb9a691c3b86eaa9ec2c2ac331324e858c4e988f 100644 --- a/src/process.py +++ b/src/process.py @@ -3,7 +3,7 @@ import glob from .code_ import Code from .nextflow_building_blocks import Nextflow_Building_Blocks -from .outils import remove_jumps_inbetween_parentheses, remove_jumps_inbetween_curlies, sort_and_filter, get_dico_from_tab_from_id, check_if_element_in_tab_rocrate +from .outils import remove_jumps_inbetween_parentheses, remove_jumps_inbetween_curlies, sort_and_filter, get_dico_from_tab_from_id, check_if_element_in_tab_rocrate, get_python_packages, get_R_libraries from .bioflowinsighterror import BioFlowInsightError from . import constant @@ -67,20 +67,65 @@ class Process(Nextflow_Building_Blocks): #print(workflow_directory) #import os #print(os.getcwd(), self.origin.get_address(), self.get_workflow_address()) + scripts = [] for call in calls: #Check first if the file is in the bin file = glob.glob(f'{self.get_workflow_address()}/bin/**/{call}', recursive=True) if(len(file)>1): - print(file) - print("More than one file found!") + raise BioFlowInsightError(f"More than one script named '{call}' in the workflow source code bin, don't know which one to use when using the process '{self.get_name()}'", num = 13, origin=self) #If not we search again - if(len(file)==0): + elif(len(file)==0): file = glob.glob(f'{self.get_workflow_address()}/**/{call}', recursive=True) if(len(file)>1): - print(file) - print("More than one file found!") + raise BioFlowInsightError(f"More than one script named '{call}' in the workflow source code, don't know which one to use when using the process '{self.get_name()}'", num = 13, origin=self) + for f in file: + with open(f, 'r') as s: + scripts.append(s.read()) + + return scripts + + + def get_python_packages_imported_internal_script(self): + packages = [] + packages+= get_python_packages(self.get_script_code()) + return packages + + def get_python_packages_imported_external_scripts(self): + packages = [] + for s in self.get_external_scripts_code(): + packages+= get_python_packages(s) + return packages + + #This methods checks the script and the external script calls for python packages imports + def get_python_packages_imported(self): + packages = [] + packages+= self.get_python_packages_imported_internal_script() + packages+= self.get_python_packages_imported_external_scripts() + return packages + + + def get_R_libraries_loaded_internal_script(self): + libraries = [] + libraries+= get_R_libraries(self.get_script_code()) + return libraries + + def get_R_libraries_loaded_external_scripts(self): + libraries = [] + for s in self.get_external_scripts_code(): + libraries+= get_R_libraries(s) + return libraries + + #This methods checks the script and the external script calls for python packages imports + def get_R_libraries_loaded(self): + libraries = [] + libraries+= self.get_R_libraries_loaded_internal_script() + libraries+= self.get_R_libraries_loaded_external_scripts() + return libraries + + #TODO -> do the same with Ruby/PErl scripts +