Skip to content
Snippets Groups Projects
Commit 3a052510 authored by George Marchment's avatar George Marchment
Browse files

Added the extraction of the tools funnction to the process object

parent 4976c876
No related branches found
No related tags found
No related merge requests found
Pipeline #14612 failed with stage
in 2 minutes and 14 seconds
File added
......@@ -1391,5 +1391,250 @@ def remove_empty_conditions_place_anker(code, workflow):
code = code.replace(OG_anker, new_anker)
code = remove_empty_conditions(code)
return code
def extract_single_quote(text, start):
end = start
code= text
quote_single = True
while(quote_single):
if(code[end]=="'" and quote_single):
if(code[end-1]!="\\" or (code[end-1]=="\\" and code[end-2]=="\\")):
quote_single=False
end+=1
if(end>=len(code)):
raise Exception('Unable to extract')
return end
def extract_double_quote(text, start):
temp_start = start
end = start
code= text
quote_double = True
while(quote_double):
if(code[end]=='"' and quote_double):
if(code[end-1]!="\\" or (code[end-1]=="\\" and code[end-2]=="\\")):
quote_double=False
end+=1
if(end>=len(code)):
raise Exception('Unable to extract')
return end
#This function extracts the tools used in a script by running each line in the bash script
#in an empty bash envrionment using a singularity image (by doing this with parse the errors
#and extract the tools)
def extract_tools(script, extract_general_tools = False):
#If we want to extract the general tools we define a list of the general tools 'to remove' from the tools extracted
if(extract_general_tools):
general_tools = []
else:
general_tools = ['cd', 'cat', 'sed', 'echo', 'mv', 'mkdir', 'cp', 'awk', 'touch', 'tabix',
'gzip', 'rm', 'bgzip', 'set', 'grep', 'egrep', 'pigz', 'head', 'tar', 'tail',
'gunzip', 'wc', 'ls', 'find', "sort", "uniq", "printf", "ln", "zcat", "which",
"eval", "paste", "tr", "gawk", "date", "tee", "trap","base64", 'parallel', 'time',
"pwd", "sleep", "ssh", "cpu", "fgrep", "bc", "chmod", "whereis", "conda", "wait",
"split", "git", "join", "unzip", "wget", "print", "rev", 'rmdir']
OG_script = script
script = " "+script+" "
#Detecting cases of none bash environments
python = ["#!/usr/bin/env python"]
for p in python:
if(p in script):
return []
rscript = ["#!/usr/bin/env Rscript"]
for r in rscript:
if(r in script):
return []
perl_script = ['#!/usr/bin/env perl']
for p in perl_script:
if(p in script):
return []
tools = []
#----------------------------
#"CLEANING" the script
#----------------------------
#Removing the curlies and the elements inside them -> to avoid the errors not recognising the variables
searching = True
while(searching):
searching = False
for match in re.finditer(r'\{.+\}', script):
start, _ = match.span(0)
end = extract_curly(script+"\n\n\n\n", start+1)
if(end!=-1):
inside_curly = script[start:end]
script = script.replace(inside_curly, "")
searching = True
break
#Removing the triple quotes from the script
script = re.sub(r"\"\"\"", "\n", script)
script = re.sub(r"\'\'\'", "\n", script)
#Removing elements inside the single quotes
searching = True
while(searching):
searching = False
for match in re.finditer(r'\'', script):
start, end = match.span(0)
end = extract_single_quote(script+"\n\n\n\n", start+1)
inside_single_quote = script[start:end]
script = script.replace(inside_single_quote, "")
searching = True
break
#Removing elements inside the doucle quotes
searching = True
while(searching):
searching = False
for match in re.finditer(r'\"', script):
start, end = match.span(0)
end = extract_double_quote(script+"\n\n\n\n", start+1)
inside_double_quote = script[start:end]
script = script.replace(inside_double_quote, "")
searching = True
break
script = re.sub(r"\\\$", "", script)
script = re.sub(r"\$", "", script)
script = re.sub(r"\(", "", script)
script = re.sub(r"\)", "", script)
script = re.sub(r'\(', "", script)
script = re.sub(r'\)', "", script)
script = re.sub(r"\n *\<[^\>.]+\>", " ", script)
script = re.sub(r"\<", " ", script)
script = re.sub(r"\>", " ", script)
script = re.sub(r"\&", " ", script)
script = re.sub(r"\n\s*\\", " ", script)
script = re.sub(r"\s*\\", " ", script)
script = re.sub(r" then ", " ", script)
#Repalcing xargs by nothing
#"xargs" -> is not really a tool in a traditional sense
temp = script
def replacer(match):
return match.group(0).replace(match.group(1), '')
for tool in ["xargs"]:
script = re.sub(fr"[^\w]({tool})\s", replacer, script)
#Removing the pipe operators
searching = True
while(searching):
searching = False
to_replace = []
for command in script.split('\n'):
if('|' in command):
left, right = command.split('|')[0], '|'.join(command.split('|')[1:])
if(left.count('(')==left.count(')') and right.count('(')==right.count(')')):
searching = True
to_replace.append([command, f"{left}\n{right}"])
for r in to_replace:
script = script.replace(r[0], r[1], 1)
OG_path = os.getcwd()
#Change working directory to the one of the file
os.chdir("/".join((str(__file__).split("/")[:-1])))
#Get list of files which already exist in folder
OG_files = os.listdir()
#Create empty output.txt file
os.system(f"> output.txt")
for command in script.split('\n'):
command = command.strip()
os.system(f"> output.txt")
if(command!=""):
if(command[-1]==";"):
command = command[:-1]
if(command[0]=="&"):
command = command[1:]
test_apptainer = True
#In the case the command is "var = ..." we don't run it
for match in re.finditer(r"\w+\s*=", command):
if(match.span(0)[0]==0):
test_apptainer = False
#Running the command in the empty environment
if(test_apptainer):
apptainer_command = f"apptainer exec ../ressources/empty.sif {command} >> output.txt 2>&1"
f = open("apptainer_script.sh", "w")
f.write(apptainer_command)
f.close()
os.system(f"chmod +x apptainer_script.sh")
#apptainer pull empty.sif docker://cfgarden/empty
os.system(f"./apptainer_script.sh >> .out 2>&1 && rm -rf .out")
#Parsing the error to extarct the tool
results = open("output.txt").read()
#print("*", f"'{results}'")
for pattern in [r'FATAL: +\"([^"]+)"', r'FATAL: +stat +([^:]+):']:
for match in re.finditer(pattern, results):
extarcted = match.group(1).split("/")[-1].strip()
#List of things to ignore -> these can be detected for tools -> obviously they are not tools
random_things = ['if', 'elif', "else", "done", "fi", 'do', 'for', 'module','then',
"def", "{", "}", "end_versions", ":", "stub:", "stub :", "__pycache__",
"cut", "source", "export", "[", "]", "$", ",", "case", "esac", "exit",
"cli", "e0f", "gnu", "env", "!", "function", "readme.md", "false", "while"]
to_add = True
for match2 in re.finditer(r"\w+\s*=", extarcted):
if(match2.span(0)[0]==0):
to_add = False
extarcted = extarcted.lower()
if(to_add and extarcted not in random_things):
#If it's a parameter
if(extarcted[0]=="-"):
None
#If it's a script -> we get of which kind
elif(extarcted[-3:]==".py" or extarcted=="python3" or extarcted=="python2"):
tools.append("python")
elif(extarcted[-2:]==".R" or extarcted[-2:]==".r"):
tools.append("r")
elif(extarcted[-3:]==".pl"):
tools.append("perl")
elif(extarcted[-3:]==".jl"):
tools.append("julia")
elif(extarcted[-3:]==".sh"):
#For now the bash script is not considered
#tools.append("bash")
None
else:
ex = extarcted.lower().strip()
if(ex=="rscript"):
tools.append("r")
elif(ex=="bash"):
None
#If the tool extarcted is "template" -> we search for the script used
elif(ex=="template"):
for extension_search in re.finditer(r'template *[^\/\s]+(\.\w+)', OG_script):
extension = extension_search.group(1)
if(extension==".py"):
tools.append("python")
elif(extension==".R" or extension==".r"):
tools.append("r")
elif(extension==".pl"):
tools.append("perl")
elif(extension==".jl"):
tools.append("julia")
elif (ex!="" and len(ex)>1 and ex not in general_tools and ex[-1]!=":" and re.fullmatch(r"\w", ex[0])):
tools.append(ex)
#If the tool is java -> we search for the jar file in the command
if(ex=="java"):
for java_search in re.finditer(r'([^\/\s]+)\.jar', command):
tools.append(java_search.group(1).lower())
tools.remove('java')
#We remove the remaining files which have been created in the meantime
for file in os.listdir():
if(file not in OG_files):
os.system(f'rm {file}')
#Change working directory back to the OG one
os.chdir(OG_path)
#Return the tools extarcted
return list(set(tools))
......@@ -5,7 +5,7 @@ import copy
from .code_ import Code
from .condition import Condition
from .nextflow_building_blocks import Nextflow_Building_Blocks
from .outils import remove_jumps_inbetween_parentheses, remove_jumps_inbetween_curlies, sort_and_filter, get_dico_from_tab_from_id, check_if_element_in_tab_rocrate, get_python_packages, get_R_libraries, get_perl_modules, process_2_DSL2
from .outils import remove_jumps_inbetween_parentheses, remove_jumps_inbetween_curlies, sort_and_filter, get_dico_from_tab_from_id, check_if_element_in_tab_rocrate, get_python_packages, get_R_libraries, get_perl_modules, process_2_DSL2, extract_tools
from .bioflowinsighterror import BioFlowInsightError
from . import constant
......@@ -578,6 +578,9 @@ class Process(Nextflow_Building_Blocks):
# call.append(f"{o.get_code()} = {self.get_name()}.out.{o.get_code()}")
call = "\n".join(call)
return code, call
def get_tools(self, extract_general_tools = False):
return extract_tools(self.get_script_code(), extract_general_tools = extract_general_tools)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment