Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
BioFlow-Insight
Manage
Activity
Members
Labels
Plan
Issues
0
Issue boards
Milestones
Wiki
Code
Merge requests
1
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
shareFAIR
BioFlow-Insight
Commits
3a052510
Commit
3a052510
authored
4 months ago
by
George Marchment
Browse files
Options
Downloads
Patches
Plain Diff
Added the extraction of the tools funnction to the process object
parent
4976c876
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Pipeline
#14612
failed with stage
in 2 minutes and 14 seconds
Changes
3
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
ressources/empty.sif
+0
-0
0 additions, 0 deletions
ressources/empty.sif
src/outils.py
+245
-0
245 additions, 0 deletions
src/outils.py
src/process.py
+4
-1
4 additions, 1 deletion
src/process.py
with
249 additions
and
1 deletion
ressources/empty.sif
0 → 100755
+
0
−
0
View file @
3a052510
File added
This diff is collapsed.
Click to expand it.
src/outils.py
+
245
−
0
View file @
3a052510
...
...
@@ -1391,5 +1391,250 @@ def remove_empty_conditions_place_anker(code, workflow):
code
=
code
.
replace
(
OG_anker
,
new_anker
)
code
=
remove_empty_conditions
(
code
)
return
code
def
extract_single_quote
(
text
,
start
):
end
=
start
code
=
text
quote_single
=
True
while
(
quote_single
):
if
(
code
[
end
]
==
"'"
and
quote_single
):
if
(
code
[
end
-
1
]
!=
"
\\
"
or
(
code
[
end
-
1
]
==
"
\\
"
and
code
[
end
-
2
]
==
"
\\
"
)):
quote_single
=
False
end
+=
1
if
(
end
>=
len
(
code
)):
raise
Exception
(
'
Unable to extract
'
)
return
end
def
extract_double_quote
(
text
,
start
):
temp_start
=
start
end
=
start
code
=
text
quote_double
=
True
while
(
quote_double
):
if
(
code
[
end
]
==
'"'
and
quote_double
):
if
(
code
[
end
-
1
]
!=
"
\\
"
or
(
code
[
end
-
1
]
==
"
\\
"
and
code
[
end
-
2
]
==
"
\\
"
)):
quote_double
=
False
end
+=
1
if
(
end
>=
len
(
code
)):
raise
Exception
(
'
Unable to extract
'
)
return
end
#This function extracts the tools used in a script by running each line in the bash script
#in an empty bash envrionment using a singularity image (by doing this with parse the errors
#and extract the tools)
def
extract_tools
(
script
,
extract_general_tools
=
False
):
#If we want to extract the general tools we define a list of the general tools 'to remove' from the tools extracted
if
(
extract_general_tools
):
general_tools
=
[]
else
:
general_tools
=
[
'
cd
'
,
'
cat
'
,
'
sed
'
,
'
echo
'
,
'
mv
'
,
'
mkdir
'
,
'
cp
'
,
'
awk
'
,
'
touch
'
,
'
tabix
'
,
'
gzip
'
,
'
rm
'
,
'
bgzip
'
,
'
set
'
,
'
grep
'
,
'
egrep
'
,
'
pigz
'
,
'
head
'
,
'
tar
'
,
'
tail
'
,
'
gunzip
'
,
'
wc
'
,
'
ls
'
,
'
find
'
,
"
sort
"
,
"
uniq
"
,
"
printf
"
,
"
ln
"
,
"
zcat
"
,
"
which
"
,
"
eval
"
,
"
paste
"
,
"
tr
"
,
"
gawk
"
,
"
date
"
,
"
tee
"
,
"
trap
"
,
"
base64
"
,
'
parallel
'
,
'
time
'
,
"
pwd
"
,
"
sleep
"
,
"
ssh
"
,
"
cpu
"
,
"
fgrep
"
,
"
bc
"
,
"
chmod
"
,
"
whereis
"
,
"
conda
"
,
"
wait
"
,
"
split
"
,
"
git
"
,
"
join
"
,
"
unzip
"
,
"
wget
"
,
"
print
"
,
"
rev
"
,
'
rmdir
'
]
OG_script
=
script
script
=
"
"
+
script
+
"
"
#Detecting cases of none bash environments
python
=
[
"
#!/usr/bin/env python
"
]
for
p
in
python
:
if
(
p
in
script
):
return
[]
rscript
=
[
"
#!/usr/bin/env Rscript
"
]
for
r
in
rscript
:
if
(
r
in
script
):
return
[]
perl_script
=
[
'
#!/usr/bin/env perl
'
]
for
p
in
perl_script
:
if
(
p
in
script
):
return
[]
tools
=
[]
#----------------------------
#"CLEANING" the script
#----------------------------
#Removing the curlies and the elements inside them -> to avoid the errors not recognising the variables
searching
=
True
while
(
searching
):
searching
=
False
for
match
in
re
.
finditer
(
r
'
\{.+\}
'
,
script
):
start
,
_
=
match
.
span
(
0
)
end
=
extract_curly
(
script
+
"
\n\n\n\n
"
,
start
+
1
)
if
(
end
!=-
1
):
inside_curly
=
script
[
start
:
end
]
script
=
script
.
replace
(
inside_curly
,
""
)
searching
=
True
break
#Removing the triple quotes from the script
script
=
re
.
sub
(
r
"
\"\"\"
"
,
"
\n
"
,
script
)
script
=
re
.
sub
(
r
"
\'\'\'
"
,
"
\n
"
,
script
)
#Removing elements inside the single quotes
searching
=
True
while
(
searching
):
searching
=
False
for
match
in
re
.
finditer
(
r
'
\'
'
,
script
):
start
,
end
=
match
.
span
(
0
)
end
=
extract_single_quote
(
script
+
"
\n\n\n\n
"
,
start
+
1
)
inside_single_quote
=
script
[
start
:
end
]
script
=
script
.
replace
(
inside_single_quote
,
""
)
searching
=
True
break
#Removing elements inside the doucle quotes
searching
=
True
while
(
searching
):
searching
=
False
for
match
in
re
.
finditer
(
r
'
\"
'
,
script
):
start
,
end
=
match
.
span
(
0
)
end
=
extract_double_quote
(
script
+
"
\n\n\n\n
"
,
start
+
1
)
inside_double_quote
=
script
[
start
:
end
]
script
=
script
.
replace
(
inside_double_quote
,
""
)
searching
=
True
break
script
=
re
.
sub
(
r
"
\\\$
"
,
""
,
script
)
script
=
re
.
sub
(
r
"
\$
"
,
""
,
script
)
script
=
re
.
sub
(
r
"
\(
"
,
""
,
script
)
script
=
re
.
sub
(
r
"
\)
"
,
""
,
script
)
script
=
re
.
sub
(
r
'
\(
'
,
""
,
script
)
script
=
re
.
sub
(
r
'
\)
'
,
""
,
script
)
script
=
re
.
sub
(
r
"
\n *\<[^\>.]+\>
"
,
"
"
,
script
)
script
=
re
.
sub
(
r
"
\<
"
,
"
"
,
script
)
script
=
re
.
sub
(
r
"
\>
"
,
"
"
,
script
)
script
=
re
.
sub
(
r
"
\&
"
,
"
"
,
script
)
script
=
re
.
sub
(
r
"
\n\s*\\
"
,
"
"
,
script
)
script
=
re
.
sub
(
r
"
\s*\\
"
,
"
"
,
script
)
script
=
re
.
sub
(
r
"
then
"
,
"
"
,
script
)
#Repalcing xargs by nothing
#"xargs" -> is not really a tool in a traditional sense
temp
=
script
def
replacer
(
match
):
return
match
.
group
(
0
).
replace
(
match
.
group
(
1
),
''
)
for
tool
in
[
"
xargs
"
]:
script
=
re
.
sub
(
fr
"
[^\w](
{
tool
}
)\s
"
,
replacer
,
script
)
#Removing the pipe operators
searching
=
True
while
(
searching
):
searching
=
False
to_replace
=
[]
for
command
in
script
.
split
(
'
\n
'
):
if
(
'
|
'
in
command
):
left
,
right
=
command
.
split
(
'
|
'
)[
0
],
'
|
'
.
join
(
command
.
split
(
'
|
'
)[
1
:])
if
(
left
.
count
(
'
(
'
)
==
left
.
count
(
'
)
'
)
and
right
.
count
(
'
(
'
)
==
right
.
count
(
'
)
'
)):
searching
=
True
to_replace
.
append
([
command
,
f
"
{
left
}
\n
{
right
}
"
])
for
r
in
to_replace
:
script
=
script
.
replace
(
r
[
0
],
r
[
1
],
1
)
OG_path
=
os
.
getcwd
()
#Change working directory to the one of the file
os
.
chdir
(
"
/
"
.
join
((
str
(
__file__
).
split
(
"
/
"
)[:
-
1
])))
#Get list of files which already exist in folder
OG_files
=
os
.
listdir
()
#Create empty output.txt file
os
.
system
(
f
"
> output.txt
"
)
for
command
in
script
.
split
(
'
\n
'
):
command
=
command
.
strip
()
os
.
system
(
f
"
> output.txt
"
)
if
(
command
!=
""
):
if
(
command
[
-
1
]
==
"
;
"
):
command
=
command
[:
-
1
]
if
(
command
[
0
]
==
"
&
"
):
command
=
command
[
1
:]
test_apptainer
=
True
#In the case the command is "var = ..." we don't run it
for
match
in
re
.
finditer
(
r
"
\w+\s*=
"
,
command
):
if
(
match
.
span
(
0
)[
0
]
==
0
):
test_apptainer
=
False
#Running the command in the empty environment
if
(
test_apptainer
):
apptainer_command
=
f
"
apptainer exec ../ressources/empty.sif
{
command
}
>> output.txt 2>&1
"
f
=
open
(
"
apptainer_script.sh
"
,
"
w
"
)
f
.
write
(
apptainer_command
)
f
.
close
()
os
.
system
(
f
"
chmod +x apptainer_script.sh
"
)
#apptainer pull empty.sif docker://cfgarden/empty
os
.
system
(
f
"
./apptainer_script.sh >> .out 2>&1 && rm -rf .out
"
)
#Parsing the error to extarct the tool
results
=
open
(
"
output.txt
"
).
read
()
#print("*", f"'{results}'")
for
pattern
in
[
r
'
FATAL: +\"([^
"
]+)
"'
,
r
'
FATAL: +stat +([^:]+):
'
]:
for
match
in
re
.
finditer
(
pattern
,
results
):
extarcted
=
match
.
group
(
1
).
split
(
"
/
"
)[
-
1
].
strip
()
#List of things to ignore -> these can be detected for tools -> obviously they are not tools
random_things
=
[
'
if
'
,
'
elif
'
,
"
else
"
,
"
done
"
,
"
fi
"
,
'
do
'
,
'
for
'
,
'
module
'
,
'
then
'
,
"
def
"
,
"
{
"
,
"
}
"
,
"
end_versions
"
,
"
:
"
,
"
stub:
"
,
"
stub :
"
,
"
__pycache__
"
,
"
cut
"
,
"
source
"
,
"
export
"
,
"
[
"
,
"
]
"
,
"
$
"
,
"
,
"
,
"
case
"
,
"
esac
"
,
"
exit
"
,
"
cli
"
,
"
e0f
"
,
"
gnu
"
,
"
env
"
,
"
!
"
,
"
function
"
,
"
readme.md
"
,
"
false
"
,
"
while
"
]
to_add
=
True
for
match2
in
re
.
finditer
(
r
"
\w+\s*=
"
,
extarcted
):
if
(
match2
.
span
(
0
)[
0
]
==
0
):
to_add
=
False
extarcted
=
extarcted
.
lower
()
if
(
to_add
and
extarcted
not
in
random_things
):
#If it's a parameter
if
(
extarcted
[
0
]
==
"
-
"
):
None
#If it's a script -> we get of which kind
elif
(
extarcted
[
-
3
:]
==
"
.py
"
or
extarcted
==
"
python3
"
or
extarcted
==
"
python2
"
):
tools
.
append
(
"
python
"
)
elif
(
extarcted
[
-
2
:]
==
"
.R
"
or
extarcted
[
-
2
:]
==
"
.r
"
):
tools
.
append
(
"
r
"
)
elif
(
extarcted
[
-
3
:]
==
"
.pl
"
):
tools
.
append
(
"
perl
"
)
elif
(
extarcted
[
-
3
:]
==
"
.jl
"
):
tools
.
append
(
"
julia
"
)
elif
(
extarcted
[
-
3
:]
==
"
.sh
"
):
#For now the bash script is not considered
#tools.append("bash")
None
else
:
ex
=
extarcted
.
lower
().
strip
()
if
(
ex
==
"
rscript
"
):
tools
.
append
(
"
r
"
)
elif
(
ex
==
"
bash
"
):
None
#If the tool extarcted is "template" -> we search for the script used
elif
(
ex
==
"
template
"
):
for
extension_search
in
re
.
finditer
(
r
'
template *[^\/\s]+(\.\w+)
'
,
OG_script
):
extension
=
extension_search
.
group
(
1
)
if
(
extension
==
"
.py
"
):
tools
.
append
(
"
python
"
)
elif
(
extension
==
"
.R
"
or
extension
==
"
.r
"
):
tools
.
append
(
"
r
"
)
elif
(
extension
==
"
.pl
"
):
tools
.
append
(
"
perl
"
)
elif
(
extension
==
"
.jl
"
):
tools
.
append
(
"
julia
"
)
elif
(
ex
!=
""
and
len
(
ex
)
>
1
and
ex
not
in
general_tools
and
ex
[
-
1
]
!=
"
:
"
and
re
.
fullmatch
(
r
"
\w
"
,
ex
[
0
])):
tools
.
append
(
ex
)
#If the tool is java -> we search for the jar file in the command
if
(
ex
==
"
java
"
):
for
java_search
in
re
.
finditer
(
r
'
([^\/\s]+)\.jar
'
,
command
):
tools
.
append
(
java_search
.
group
(
1
).
lower
())
tools
.
remove
(
'
java
'
)
#We remove the remaining files which have been created in the meantime
for
file
in
os
.
listdir
():
if
(
file
not
in
OG_files
):
os
.
system
(
f
'
rm
{
file
}
'
)
#Change working directory back to the OG one
os
.
chdir
(
OG_path
)
#Return the tools extarcted
return
list
(
set
(
tools
))
This diff is collapsed.
Click to expand it.
src/process.py
+
4
−
1
View file @
3a052510
...
...
@@ -5,7 +5,7 @@ import copy
from
.code_
import
Code
from
.condition
import
Condition
from
.nextflow_building_blocks
import
Nextflow_Building_Blocks
from
.outils
import
remove_jumps_inbetween_parentheses
,
remove_jumps_inbetween_curlies
,
sort_and_filter
,
get_dico_from_tab_from_id
,
check_if_element_in_tab_rocrate
,
get_python_packages
,
get_R_libraries
,
get_perl_modules
,
process_2_DSL2
from
.outils
import
remove_jumps_inbetween_parentheses
,
remove_jumps_inbetween_curlies
,
sort_and_filter
,
get_dico_from_tab_from_id
,
check_if_element_in_tab_rocrate
,
get_python_packages
,
get_R_libraries
,
get_perl_modules
,
process_2_DSL2
,
extract_tools
from
.bioflowinsighterror
import
BioFlowInsightError
from
.
import
constant
...
...
@@ -578,6 +578,9 @@ class Process(Nextflow_Building_Blocks):
# call.append(f"{o.get_code()} = {self.get_name()}.out.{o.get_code()}")
call
=
"
\n
"
.
join
(
call
)
return
code
,
call
def
get_tools
(
self
,
extract_general_tools
=
False
):
return
extract_tools
(
self
.
get_script_code
(),
extract_general_tools
=
extract_general_tools
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment