diff --git a/ressources/empty.sif b/ressources/empty.sif new file mode 100755 index 0000000000000000000000000000000000000000..4195d8ae42fdba5b066423fdb7d4d6c2f8ee9b9c Binary files /dev/null and b/ressources/empty.sif differ diff --git a/src/outils.py b/src/outils.py index ea78ff62e8d0a94c6e1022becd372e2de32c2a2a..7116f0913939453a95fa4c48ca724d31b01fd040 100644 --- a/src/outils.py +++ b/src/outils.py @@ -1391,5 +1391,250 @@ def remove_empty_conditions_place_anker(code, workflow): code = code.replace(OG_anker, new_anker) code = remove_empty_conditions(code) return code + + +def extract_single_quote(text, start): + end = start + code= text + quote_single = True + + while(quote_single): + if(code[end]=="'" and quote_single): + if(code[end-1]!="\\" or (code[end-1]=="\\" and code[end-2]=="\\")): + quote_single=False + end+=1 + if(end>=len(code)): + raise Exception('Unable to extract') + return end + +def extract_double_quote(text, start): + temp_start = start + end = start + code= text + quote_double = True + + while(quote_double): + if(code[end]=='"' and quote_double): + if(code[end-1]!="\\" or (code[end-1]=="\\" and code[end-2]=="\\")): + quote_double=False + end+=1 + if(end>=len(code)): + raise Exception('Unable to extract') + return end + +#This function extracts the tools used in a script by running each line in the bash script +#in an empty bash envrionment using a singularity image (by doing this with parse the errors +#and extract the tools) +def extract_tools(script, extract_general_tools = False): + #If we want to extract the general tools we define a list of the general tools 'to remove' from the tools extracted + if(extract_general_tools): + general_tools = [] + else: + general_tools = ['cd', 'cat', 'sed', 'echo', 'mv', 'mkdir', 'cp', 'awk', 'touch', 'tabix', + 'gzip', 'rm', 'bgzip', 'set', 'grep', 'egrep', 'pigz', 'head', 'tar', 'tail', + 'gunzip', 'wc', 'ls', 'find', "sort", "uniq", "printf", "ln", "zcat", "which", + "eval", "paste", "tr", "gawk", "date", "tee", "trap","base64", 'parallel', 'time', + "pwd", "sleep", "ssh", "cpu", "fgrep", "bc", "chmod", "whereis", "conda", "wait", + "split", "git", "join", "unzip", "wget", "print", "rev", 'rmdir'] + + OG_script = script + script = " "+script+" " + + #Detecting cases of none bash environments + python = ["#!/usr/bin/env python"] + for p in python: + if(p in script): + return [] + rscript = ["#!/usr/bin/env Rscript"] + for r in rscript: + if(r in script): + return [] + perl_script = ['#!/usr/bin/env perl'] + for p in perl_script: + if(p in script): + return [] + + + tools = [] + + #---------------------------- + #"CLEANING" the script + #---------------------------- + #Removing the curlies and the elements inside them -> to avoid the errors not recognising the variables + searching = True + while(searching): + searching = False + for match in re.finditer(r'\{.+\}', script): + start, _ = match.span(0) + end = extract_curly(script+"\n\n\n\n", start+1) + if(end!=-1): + inside_curly = script[start:end] + script = script.replace(inside_curly, "") + searching = True + break + #Removing the triple quotes from the script + script = re.sub(r"\"\"\"", "\n", script) + script = re.sub(r"\'\'\'", "\n", script) + + #Removing elements inside the single quotes + searching = True + while(searching): + searching = False + for match in re.finditer(r'\'', script): + start, end = match.span(0) + end = extract_single_quote(script+"\n\n\n\n", start+1) + inside_single_quote = script[start:end] + script = script.replace(inside_single_quote, "") + searching = True + break + + #Removing elements inside the doucle quotes + searching = True + while(searching): + searching = False + for match in re.finditer(r'\"', script): + start, end = match.span(0) + end = extract_double_quote(script+"\n\n\n\n", start+1) + inside_double_quote = script[start:end] + script = script.replace(inside_double_quote, "") + searching = True + break + script = re.sub(r"\\\$", "", script) + script = re.sub(r"\$", "", script) + script = re.sub(r"\(", "", script) + script = re.sub(r"\)", "", script) + script = re.sub(r'\(', "", script) + script = re.sub(r'\)', "", script) + script = re.sub(r"\n *\<[^\>.]+\>", " ", script) + script = re.sub(r"\<", " ", script) + script = re.sub(r"\>", " ", script) + script = re.sub(r"\&", " ", script) + script = re.sub(r"\n\s*\\", " ", script) + script = re.sub(r"\s*\\", " ", script) + script = re.sub(r" then ", " ", script) + #Repalcing xargs by nothing + #"xargs" -> is not really a tool in a traditional sense + temp = script + def replacer(match): + return match.group(0).replace(match.group(1), '') + for tool in ["xargs"]: + script = re.sub(fr"[^\w]({tool})\s", replacer, script) + + #Removing the pipe operators + searching = True + while(searching): + searching = False + to_replace = [] + for command in script.split('\n'): + if('|' in command): + left, right = command.split('|')[0], '|'.join(command.split('|')[1:]) + if(left.count('(')==left.count(')') and right.count('(')==right.count(')')): + searching = True + to_replace.append([command, f"{left}\n{right}"]) + for r in to_replace: + script = script.replace(r[0], r[1], 1) + + OG_path = os.getcwd() + #Change working directory to the one of the file + os.chdir("/".join((str(__file__).split("/")[:-1]))) + + #Get list of files which already exist in folder + OG_files = os.listdir() + + #Create empty output.txt file + os.system(f"> output.txt") + for command in script.split('\n'): + command = command.strip() + os.system(f"> output.txt") + if(command!=""): + if(command[-1]==";"): + command = command[:-1] + if(command[0]=="&"): + command = command[1:] + test_apptainer = True + #In the case the command is "var = ..." we don't run it + for match in re.finditer(r"\w+\s*=", command): + if(match.span(0)[0]==0): + test_apptainer = False + #Running the command in the empty environment + if(test_apptainer): + apptainer_command = f"apptainer exec ../ressources/empty.sif {command} >> output.txt 2>&1" + f = open("apptainer_script.sh", "w") + f.write(apptainer_command) + f.close() + os.system(f"chmod +x apptainer_script.sh") + #apptainer pull empty.sif docker://cfgarden/empty + os.system(f"./apptainer_script.sh >> .out 2>&1 && rm -rf .out") + + + #Parsing the error to extarct the tool + results = open("output.txt").read() + #print("*", f"'{results}'") + for pattern in [r'FATAL: +\"([^"]+)"', r'FATAL: +stat +([^:]+):']: + for match in re.finditer(pattern, results): + extarcted = match.group(1).split("/")[-1].strip() + #List of things to ignore -> these can be detected for tools -> obviously they are not tools + random_things = ['if', 'elif', "else", "done", "fi", 'do', 'for', 'module','then', + "def", "{", "}", "end_versions", ":", "stub:", "stub :", "__pycache__", + "cut", "source", "export", "[", "]", "$", ",", "case", "esac", "exit", + "cli", "e0f", "gnu", "env", "!", "function", "readme.md", "false", "while"] + to_add = True + for match2 in re.finditer(r"\w+\s*=", extarcted): + if(match2.span(0)[0]==0): + to_add = False + extarcted = extarcted.lower() + if(to_add and extarcted not in random_things): + #If it's a parameter + if(extarcted[0]=="-"): + None + #If it's a script -> we get of which kind + elif(extarcted[-3:]==".py" or extarcted=="python3" or extarcted=="python2"): + tools.append("python") + elif(extarcted[-2:]==".R" or extarcted[-2:]==".r"): + tools.append("r") + elif(extarcted[-3:]==".pl"): + tools.append("perl") + elif(extarcted[-3:]==".jl"): + tools.append("julia") + elif(extarcted[-3:]==".sh"): + #For now the bash script is not considered + #tools.append("bash") + None + else: + ex = extarcted.lower().strip() + if(ex=="rscript"): + tools.append("r") + elif(ex=="bash"): + None + #If the tool extarcted is "template" -> we search for the script used + elif(ex=="template"): + for extension_search in re.finditer(r'template *[^\/\s]+(\.\w+)', OG_script): + extension = extension_search.group(1) + if(extension==".py"): + tools.append("python") + elif(extension==".R" or extension==".r"): + tools.append("r") + elif(extension==".pl"): + tools.append("perl") + elif(extension==".jl"): + tools.append("julia") + elif (ex!="" and len(ex)>1 and ex not in general_tools and ex[-1]!=":" and re.fullmatch(r"\w", ex[0])): + tools.append(ex) + #If the tool is java -> we search for the jar file in the command + if(ex=="java"): + for java_search in re.finditer(r'([^\/\s]+)\.jar', command): + tools.append(java_search.group(1).lower()) + tools.remove('java') + + #We remove the remaining files which have been created in the meantime + for file in os.listdir(): + if(file not in OG_files): + os.system(f'rm {file}') + + #Change working directory back to the OG one + os.chdir(OG_path) + + #Return the tools extarcted + return list(set(tools)) diff --git a/src/process.py b/src/process.py index d73f94fd2e148f4ce29e9bf82a3461f1df57e9d3..8d4c18137596ba7ba34b5ba1cfbac37c41f3e146 100644 --- a/src/process.py +++ b/src/process.py @@ -5,7 +5,7 @@ import copy from .code_ import Code from .condition import Condition from .nextflow_building_blocks import Nextflow_Building_Blocks -from .outils import remove_jumps_inbetween_parentheses, remove_jumps_inbetween_curlies, sort_and_filter, get_dico_from_tab_from_id, check_if_element_in_tab_rocrate, get_python_packages, get_R_libraries, get_perl_modules, process_2_DSL2 +from .outils import remove_jumps_inbetween_parentheses, remove_jumps_inbetween_curlies, sort_and_filter, get_dico_from_tab_from_id, check_if_element_in_tab_rocrate, get_python_packages, get_R_libraries, get_perl_modules, process_2_DSL2, extract_tools from .bioflowinsighterror import BioFlowInsightError from . import constant @@ -578,6 +578,9 @@ class Process(Nextflow_Building_Blocks): # call.append(f"{o.get_code()} = {self.get_name()}.out.{o.get_code()}") call = "\n".join(call) return code, call + + def get_tools(self, extract_general_tools = False): + return extract_tools(self.get_script_code(), extract_general_tools = extract_general_tools)