Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
BioFlow-Insight
Manage
Activity
Members
Labels
Plan
Issues
0
Issue boards
Milestones
Wiki
Code
Merge requests
1
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
shareFAIR
BioFlow-Insight
Commits
5951b6b2
Commit
5951b6b2
authored
4 months ago
by
George Marchment
Browse files
Options
Downloads
Patches
Plain Diff
Continue reintagrating rewritting
parent
dd319afc
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Pipeline
#14377
failed with stage
Stage:
in 2 minutes and 11 seconds
Changes
3
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
src/main.py
+20
-12
20 additions, 12 deletions
src/main.py
src/nextflow_file.py
+2
-4
2 additions, 4 deletions
src/nextflow_file.py
src/workflow.py
+67
-20
67 additions, 20 deletions
src/workflow.py
with
89 additions
and
36 deletions
src/main.py
+
20
−
12
View file @
5951b6b2
...
...
@@ -75,21 +75,29 @@ class Main(Nextflow_Building_Blocks):
def
get_all_executors_in_workflow
(
self
):
all_executors
=
self
.
get_all_executors_in_subworkflow
()
dico
=
{}
for
e
in
all_executors
:
if
(
e
.
get_type
()
==
"
Call
"
):
for
c
in
e
.
get_all_calls
():
sub
=
c
.
get_first_element_called
()
if
(
sub
.
get_type
()
==
"
Subworkflow
"
):
if
(
c
not
in
dico
):
sub_calls
=
sub
.
get_all_executors_in_workflow
()
for
sub_c
in
sub_calls
:
dico
[
sub_c
]
=
""
#Case it's an operation
else
:
dico
[
e
]
=
""
for
e
in
all_executors
:
dico
[
e
]
=
""
calls
=
self
.
get_all_calls_in_workflow
()
for
call
in
calls
:
sub
=
call
.
get_first_element_called
()
if
(
sub
.
get_type
()
==
"
Subworkflow
"
):
sub_calls
=
sub
.
get_all_executors_in_workflow
()
for
sub_c
in
sub_calls
:
dico
[
sub_c
]
=
""
#for e in all_executors:
# if(e.get_type()=="Call"):
# for c in e.get_all_calls():
# sub = c.get_first_element_called()
# if(sub.get_type()=="Subworkflow"):
# if(c not in dico):
# sub_calls = sub.get_all_executors_in_workflow()
# for sub_c in sub_calls:
# dico[sub_c] = ""
return
list
(
dico
.
keys
())
...
...
This diff is collapsed.
Click to expand it.
src/nextflow_file.py
+
2
−
4
View file @
5951b6b2
...
...
@@ -277,10 +277,10 @@ class Nextflow_File(Nextflow_Building_Blocks):
#If the file is not alreday initialised then we self.initialise it
if
(
not
self
.
initialised
):
self
.
initialised
=
True
if
(
self
.
workflow
.
get_display_info_bool
()):
print
(
f
"
Analysing ->
'
{
self
.
get_file_address
()
}
'"
)
if
(
self
.
get_DSL
()
==
"
DSL2
"
):
if
(
self
.
workflow
.
get_display_info_bool
()):
print
(
f
"
Analysing ->
'
{
self
.
get_file_address
()
}
'"
)
#Extarct Processes
self
.
extract_processes
()
...
...
@@ -320,8 +320,6 @@ class Nextflow_File(Nextflow_Building_Blocks):
# sub.initialise()
# indice+=1
elif
(
self
.
get_DSL
()
==
"
DSL1
"
):
if
(
self
.
workflow
.
get_display_info_bool
()):
print
(
f
"
Analysing ->
'
{
self
.
get_file_address
()
}
'"
)
from
.main
import
Main
#Extarct Processes
self
.
extract_processes
()
...
...
This diff is collapsed.
Click to expand it.
src/workflow.py
+
67
−
20
View file @
5951b6b2
...
...
@@ -336,10 +336,10 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen
processes_called
.
append
(
p
)
nb_2_select
=
int
(
alpha
*
len
(
processes_called
))
sampled
=
random
.
sample
(
set
(
processes_called
),
nb_2_select
)
#
name_select = []
#
for p in sampled:
#
name_select.append(p.get_alias())
return
s
am
pled
name_select
=
[]
for
p
in
sampled
:
name_select
.
append
(
p
.
get_alias
())
return
n
am
e_select
else
:
raise
BioFlowInsightError
(
"
Trying to generate random relevant processes however option
'
duplicate
'
is not activated.
"
)
...
...
@@ -365,18 +365,32 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen
code
=
code
.
replace
(
r
,
ankers
)
ankers
=
""
processes
,
subworkflows
,
functions
=
[],
[],
[]
for
c
in
self
.
get_workflow_main
().
get_all_calls_in_workflow
():
ele
=
c
.
get_first_element_called
()
if
(
ele
.
get_type
()
==
"
Process
"
):
processes
.
append
(
ele
)
elif
(
ele
.
get_type
()
==
"
Subworkflow
"
):
subworkflows
.
append
(
ele
)
elif
(
ele
.
get_type
()
==
"
Function
"
):
functions
.
append
(
ele
)
else
:
raise
Exception
(
"
This shoudn
'
t happen
"
)
#Adding processes into code
for
p
in
self
.
get_processes_called
()
:
for
p
in
processes
:
if
(
p
.
get_code
()
not
in
code
):
code
=
code
.
replace
(
process_section
,
'
\n
'
+
p
.
get_code_with_alias
()
+
'
\n
'
+
process_section
)
#Adding subworkflows into code
for
sub
in
self
.
get_
subworkflows
_called
()
:
for
sub
in
subworkflows
:
if
(
sub
.
get_code
()
not
in
code
):
code
=
code
.
replace
(
subworkflow_section
,
subworkflow_section
+
'
\n
'
+
sub
.
get_code_with_alias
()
+
'
\n
'
)
#Adding functions into code
for
fun
in
self
.
get_functions_called
()
:
for
fun
in
functions
:
if
(
fun
.
get_code
()
not
in
code
):
code
=
code
.
replace
(
function_section
,
function_section
+
'
\n
'
+
fun
.
get_code
()
+
'
\n
'
)
...
...
@@ -397,30 +411,63 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen
#Rewriting everything in one file + simplifying the operations and calls to simplify the analysis
def
simplify_workflow_code
(
self
):
code
=
self
.
get_first_file
().
get_code
()
#code, ankers = self.write_workflow_into_one_file()
#TODO -> update method get_all_executors_from_workflow -> right now it's not searching through the subworkflows
for
exe
in
self
.
get_workflow_main
().
get_all_executors_in_workflow
():
code
,
ankers
=
self
.
write_workflow_into_one_file
()
all_executors
=
self
.
get_workflow_main
().
get_all_executors_in_workflow
()
#We do this so that the longest operation and calls are rewritten first in the code -> to avoid problems
executor_2_length
=
{}
for
e
in
all_executors
:
executor_2_length
[
e
]
=
len
(
e
.
get_code
(
get_OG
=
True
))
sorted_executor_2_length
=
{
k
:
v
for
k
,
v
in
sorted
(
executor_2_length
.
items
(),
key
=
lambda
item
:
item
[
1
],
reverse
=
True
)}
for
exe
in
sorted_executor_2_length
:
if
(
exe
.
get_type
()
==
"
Call
"
or
exe
.
get_type
()
==
"
Operation
"
):
code
=
code
.
replace
(
exe
.
get_code
(
get_OG
=
True
),
exe
.
simplify_code
())
code
=
code
.
replace
(
exe
.
get_code
(
get_OG
=
True
),
exe
.
simplify_code
()
,
1
)
else
:
print
(
exe
.
get_code
(),
exe
.
get_type
())
raise
Exception
(
"
This shouldn
'
t happen
"
)
return
code
def
rewrite_and_initialise
(
self
,
code
):
#Write new code in temporary file
temp_file
=
self
.
get_output_dir
()
/
f
"
temp_
{
str
(
self
)[
-
7
:
-
2
]
}
.nf
"
with
open
(
temp_file
,
"
w
"
)
as
file
:
file
.
write
(
code
)
#Replace old analysis with new analysis (simplified code)
self
.
__init__
(
str
(
temp_file
),
display_info
=
False
,
duplicate
=
True
)
self
.
initialise
()
def
check_relevant_processes_in_workflow
(
self
,
relevant_processes
):
#Check all relevat processes are in wf
workflow_processes
=
[]
for
c
in
self
.
get_workflow_main
().
get_all_calls_in_workflow
():
ele
=
c
.
get_first_element_called
()
if
(
ele
.
get_type
()
==
"
Process
"
):
workflow_processes
.
append
(
ele
.
get_alias
())
for
p
in
relevant_processes
:
if
(
p
not
in
workflow_processes
):
raise
BioFlowInsightError
(
f
"
The element
'
{
p
}
'
given as a relevant processes is not present in the workflow
'
s processes
"
,
24
)
def
generate_user_view
(
self
,
relevant_processes
=
[],
render_graphs
=
True
,
processes_2_remove
=
[]):
self
.
graph
.
initialise
(
processes_2_remove
=
processes_2_remove
)
self
.
graph
.
generate_user_view
(
relevant_processes
=
relevant_processes
,
render_graphs
=
render_graphs
)
#Method which rewrites the workflow follwong the user view
#Conert workflow to user_view only makes sense when the option duplicate is activated -> otherwise is doesn't make sense + it makes the analysis way more complicated
def
convert_workflow_2_user_view
(
self
,
relevant_processes
=
[]):
if
(
self
.
duplicate
):
None
code
=
self
.
simplify_workflow_code
()
print
(
code
)
#self.rewrite_and_initialise(code)
#
##Get the clusters and the code
#self.check_relevant_processes_in_workflow(relevant_processes)
#self.nextflow_file.generate_user_view(relevant_processes = relevant_processes, processes_2_remove = [])
#clusters = self.nextflow_file.graph.get_clusters_from_user_view()
self
.
rewrite_and_initialise
(
code
)
#Get the clusters and the code
self
.
check_relevant_processes_in_workflow
(
relevant_processes
)
self
.
generate_user_view
(
relevant_processes
=
relevant_processes
,
processes_2_remove
=
[])
clusters
=
self
.
graph
.
get_clusters_from_user_view
()
#
##DETERMING WHICH SUBWORKFLOWS ARE BROKEN WITH THE CLUSTER
##Creating the clusters with calls instead of processes or subworkflows
...
...
@@ -724,7 +771,7 @@ George Marchment, Bryan Brancotte, Marie Schmit, Frédéric Lemoine, Sarah Cohen
#code = code.replace("$OR$", "||")
#
#return remove_extra_jumps(format_with_tabs(code))
#
return
code
#
##So basically when retriving a thing (process or subworkflow)
##There is necessarily one call associated with the thing -> since we have the option duplicate activated
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment