Skip to content
Snippets Groups Projects
Commit fde3727c authored by George Marchment's avatar George Marchment
Browse files

added functionnality to extract libraires and packages in R and python in...

added functionnality to extract libraires and packages in R and python in process scripts -> both internal and external
parent aa312abc
No related branches found
No related tags found
No related merge requests found
Pipeline #13865 passed with stage
in 1 minute and 3 seconds
......@@ -53,6 +53,7 @@ class BioFlowInsightError(Exception):
#* [7] -> Tuple with emit (ch1, ch2) = emit.out
#* [9] -> Tuple with call (ch1, ch2) = wf()
#* [11] -> Failed to extract the operation or call at the line x. Try rewriting it in a simplified version.
#* [13] -> Multiple scripts with the same name were defined in the source code -> don't know which one to extract then when calling 'get_external_scripts_code'
......
......@@ -846,4 +846,49 @@ def check_if_element_in_tab_rocrate(tab, id):
for ele in tab:
if(ele["@id"]==id):
return True
return False
\ No newline at end of file
return False
#Function that parses python script and extracts the packages which are imported
def get_python_packages(script):
packages = []
#Examples that i need to consider:
# from fibo import *
# from sound.effects.echo import echofilter
# import fibo
# import fibo, sys
# import sound.effects.echo
# import numpy as np
#STEP1
patterns_from = [r"fr(om)\s+(\w+)\s+import.+",
r"from\s+((\w+)(\.\w+)+)\s+import.+",]
#First step is to extract the packages which are imported from the from and then removing them from the string
froms = []
for pattern in patterns_from:
for match in re.finditer(pattern, script):
packages.append(match.group(2))
froms.append(match.group(0))
for f in froms:
script = script.replace(f, "")
#STEP2
#Remove the rest of the 'simple' imports
def remove_commas(text):
tab = text.split(',')
words = []
for t in tab:
words.append(t.strip())
return words
for match in re.finditer(r"import\s+(\w+(\s*\,\s*\w+)+|(\w+))", script):
packages+= remove_commas(match.group(1))
return packages
#Function that parses R script and extracts the libraries which are loaded
def get_R_libraries(script):
libraries = []
for match in re.finditer(r"library\s*\(\s*(\w+)\s*\)", script):
libraries.append(match.group(1))
return libraries
\ No newline at end of file
......@@ -3,7 +3,7 @@ import glob
from .code_ import Code
from .nextflow_building_blocks import Nextflow_Building_Blocks
from .outils import remove_jumps_inbetween_parentheses, remove_jumps_inbetween_curlies, sort_and_filter, get_dico_from_tab_from_id, check_if_element_in_tab_rocrate
from .outils import remove_jumps_inbetween_parentheses, remove_jumps_inbetween_curlies, sort_and_filter, get_dico_from_tab_from_id, check_if_element_in_tab_rocrate, get_python_packages, get_R_libraries
from .bioflowinsighterror import BioFlowInsightError
from . import constant
......@@ -67,20 +67,65 @@ class Process(Nextflow_Building_Blocks):
#print(workflow_directory)
#import os
#print(os.getcwd(), self.origin.get_address(), self.get_workflow_address())
scripts = []
for call in calls:
#Check first if the file is in the bin
file = glob.glob(f'{self.get_workflow_address()}/bin/**/{call}', recursive=True)
if(len(file)>1):
print(file)
print("More than one file found!")
raise BioFlowInsightError(f"More than one script named '{call}' in the workflow source code bin, don't know which one to use when using the process '{self.get_name()}'", num = 13, origin=self)
#If not we search again
if(len(file)==0):
elif(len(file)==0):
file = glob.glob(f'{self.get_workflow_address()}/**/{call}', recursive=True)
if(len(file)>1):
print(file)
print("More than one file found!")
raise BioFlowInsightError(f"More than one script named '{call}' in the workflow source code, don't know which one to use when using the process '{self.get_name()}'", num = 13, origin=self)
for f in file:
with open(f, 'r') as s:
scripts.append(s.read())
return scripts
def get_python_packages_imported_internal_script(self):
packages = []
packages+= get_python_packages(self.get_script_code())
return packages
def get_python_packages_imported_external_scripts(self):
packages = []
for s in self.get_external_scripts_code():
packages+= get_python_packages(s)
return packages
#This methods checks the script and the external script calls for python packages imports
def get_python_packages_imported(self):
packages = []
packages+= self.get_python_packages_imported_internal_script()
packages+= self.get_python_packages_imported_external_scripts()
return packages
def get_R_libraries_loaded_internal_script(self):
libraries = []
libraries+= get_R_libraries(self.get_script_code())
return libraries
def get_R_libraries_loaded_external_scripts(self):
libraries = []
for s in self.get_external_scripts_code():
libraries+= get_R_libraries(s)
return libraries
#This methods checks the script and the external script calls for python packages imports
def get_R_libraries_loaded(self):
libraries = []
libraries+= self.get_R_libraries_loaded_internal_script()
libraries+= self.get_R_libraries_loaded_external_scripts()
return libraries
#TODO -> do the same with Ruby/PErl scripts
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment