Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
BioFlow-Insight
Manage
Activity
Members
Labels
Plan
Issues
0
Issue boards
Milestones
Wiki
Code
Merge requests
1
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
shareFAIR
BioFlow-Insight
Commits
48d0b921
Commit
48d0b921
authored
1 year ago
by
George Marchment
Browse files
Options
Downloads
Patches
Plain Diff
Update RO-Crate -> complet version
parent
8a5e87ad
No related branches found
No related tags found
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
src/nextflow_file.py
+1
-1
1 addition, 1 deletion
src/nextflow_file.py
src/ro_crate.py
+48
-15
48 additions, 15 deletions
src/ro_crate.py
src/workflow.py
+92
-22
92 additions, 22 deletions
src/workflow.py
with
141 additions
and
38 deletions
src/nextflow_file.py
+
1
−
1
View file @
48d0b921
...
...
@@ -679,7 +679,7 @@ class Nextflow_File(Nextflow_Building_Blocks):
def
add_subworkflows_2_rocrate
(
self
,
dico
,
file_dico
,
file_name
):
for
sub
in
self
.
subworkflows
:
sub_key
=
sub
.
get_rocrate_key
(
dico
)
file_dico
[
"
hasPart
"
].
append
(
sub_key
)
file_dico
[
"
hasPart
"
].
append
(
{
"
@id
"
:
sub_key
}
)
sub
.
add_2_rocrate
(
dico
,
file_name
)
def
add_2_rocrate
(
self
,
dico
):
...
...
This diff is collapsed.
Click to expand it.
src/ro_crate.py
+
48
−
15
View file @
48d0b921
import
json
import
glob
import
os
import
re
from
.
import
constant
class
RO_Crate
:
def
__init__
(
self
,
workflow
):
...
...
@@ -42,7 +45,7 @@ class RO_Crate:
authors
=
self
.
workflow
.
get_authors
()
tab_authors
=
[]
for
author
in
authors
:
tab_authors
.
append
({
"
@id
"
:
author
[
"
@id
"
],
"
name
"
:
author
[
"
name
"
]})
tab_authors
.
append
({
"
@id
"
:
author
[
"
@id
"
],
"
email
"
:
author
[
"
email
"
]})
root
[
"
author
"
]
=
tab_authors
root
[
"
maintainer
"
]
=
tab_authors
#Right now i'm assuming that all the authors are maintainers
files
=
self
.
get_files
()
...
...
@@ -69,23 +72,53 @@ class RO_Crate:
file_stats
=
os
.
stat
(
file
)
return
file_stats
.
st_size
/
1e3
#TODO
def
fill_log_file
(
self
,
file
,
reverse
=
True
):
info
=
""
current_directory
=
os
.
getcwd
()
os
.
chdir
(
"
/
"
.
join
(
self
.
workflow
.
nextflow_file
.
get_file_address
().
split
(
"
/
"
)[:
-
1
]))
try
:
os
.
system
(
f
"
git log
{
'
--reverse
'
*
reverse
}
{
file
}
> temp_
{
id
(
self
)
}
.txt
"
)
with
open
(
f
'
temp_
{
id
(
self
)
}
.txt
'
)
as
f
:
info
=
f
.
read
()
os
.
system
(
f
"
rm temp_
{
id
(
self
)
}
.txt
"
)
except
:
None
os
.
chdir
(
current_directory
)
return
info
def
get_dateCreated
(
self
,
file
):
return
"
TODO
"
info
=
self
.
fill_log_file
(
file
,
reverse
=
True
)
for
match
in
re
.
finditer
(
r
"
Date: +\w+ +(\w+) +(\d+) +\d+:\d+:\d+ +(\d+)
"
,
info
):
month
=
constant
.
month_mapping
[
match
.
group
(
1
)]
day
=
match
.
group
(
2
)
year
=
match
.
group
(
3
)
return
f
"
{
year
}
-
{
month
}
-
{
day
}
"
return
None
#TODO
def
get_dateModified
(
self
,
file
):
return
"
TODO
"
info
=
self
.
fill_log_file
(
file
,
reverse
=
False
)
for
match
in
re
.
finditer
(
r
"
Date: +\w+ +(\w+) +(\d+) +\d+:\d+:\d+ +(\d+)
"
,
info
):
month
=
constant
.
month_mapping
[
match
.
group
(
1
)]
day
=
match
.
group
(
2
)
year
=
match
.
group
(
3
)
return
f
"
{
year
}
-
{
month
}
-
{
day
}
"
return
None
#TODO
def
get_url
(
self
,
file
):
return
"
TODO
"
if
(
self
.
workflow
.
dico
!=
{}):
return
f
"
https://github.com/
{
self
.
workflow
.
get_address
()
}
/blob/main/
{
file
}
"
return
None
#TODO
def
get_creators
(
self
,
file
):
return
[{
"
@id
"
:
"
George
"
}]
#TODO
info
=
self
.
fill_log_file
(
file
,
reverse
=
True
)
for
match
in
re
.
finditer
(
r
"
Author: ([ \w-]+) <([^>]+)>
"
,
info
):
return
[{
"
@id
"
:
match
.
group
(
1
)}]
return
None
def
get_types
(
self
,
file
):
types
=
[
"
File
"
]
if
(
file
[
-
3
:]
==
"
.nf
"
):
...
...
@@ -101,10 +134,10 @@ class RO_Crate:
dico
[
"
@type
"
]
=
self
.
get_types
(
file
)
dico
[
"
programmingLanguage
"
]
=
{
"
@id
"
:
self
.
get_programming_language
(
file
)}
dico
[
"
contentSize
"
]
=
self
.
get_contentSize
(
file
)
dico
[
"
dateCreated
"
]
=
self
.
get_dateCreated
(
file
)
dico
[
"
dateModified
"
]
=
self
.
get_dateModified
(
file
)
dico
[
"
url
"
]
=
self
.
get_url
(
file
)
creators
=
self
.
get_creators
(
file
)
dico
[
"
dateCreated
"
]
=
self
.
get_dateCreated
(
key
)
dico
[
"
dateModified
"
]
=
self
.
get_dateModified
(
key
)
dico
[
"
url
"
]
=
self
.
get_url
(
key
)
creators
=
self
.
get_creators
(
key
)
dico
[
"
creator
"
]
=
[]
for
creator
in
creators
:
dico
[
"
creator
"
].
append
({
"
@id
"
:
creator
[
"
@id
"
]})
...
...
This diff is collapsed.
Click to expand it.
src/workflow.py
+
92
−
22
View file @
48d0b921
...
...
@@ -5,6 +5,7 @@ from . import constant
import
os
import
re
import
json
class
Workflow
:
...
...
@@ -25,17 +26,52 @@ class Workflow:
self
.
keywords
=
keywords
self
.
producer
=
producer
self
.
publisher
=
publisher
self
.
log
=
None
self
.
log
=
""
self
.
fill_log
()
self
.
address
=
""
self
.
set_address
()
self
.
dico
=
{}
self
.
get_dico
()
def
fill_log
(
self
):
current_directory
=
os
.
getcwd
()
os
.
chdir
(
"
/
"
.
join
(
self
.
nextflow_file
.
get_file_address
().
split
(
"
/
"
)[:
-
1
]))
os
.
system
(
f
"
git log --reverse > temp_
{
id
(
self
)
}
.txt
"
)
with
open
(
f
'
temp_
{
id
(
self
)
}
.txt
'
)
as
f
:
self
.
log
=
f
.
read
()
os
.
system
(
f
"
rm temp_
{
id
(
self
)
}
.txt
"
)
try
:
os
.
system
(
f
"
git log --reverse > temp_
{
id
(
self
)
}
.txt
"
)
with
open
(
f
'
temp_
{
id
(
self
)
}
.txt
'
)
as
f
:
self
.
log
=
f
.
read
()
os
.
system
(
f
"
rm temp_
{
id
(
self
)
}
.txt
"
)
except
:
None
os
.
chdir
(
current_directory
)
def
get_address
(
self
):
return
self
.
address
def
set_address
(
self
):
current_directory
=
os
.
getcwd
()
os
.
chdir
(
"
/
"
.
join
(
self
.
nextflow_file
.
get_file_address
().
split
(
"
/
"
)[:
-
1
]))
try
:
os
.
system
(
f
"
git ls-remote --get-url origin > temp_address_
{
id
(
self
)
}
.txt
"
)
with
open
(
f
'
temp_address_
{
id
(
self
)
}
.txt
'
)
as
f
:
self
.
address
=
f
.
read
()
os
.
system
(
f
"
rm temp_address_
{
id
(
self
)
}
.txt
"
)
except
:
None
os
.
chdir
(
current_directory
)
for
match
in
re
.
finditer
(
r
"
https:\/\/github\.com\/([^\.]+)\.git
"
,
self
.
address
):
self
.
address
=
match
.
group
(
1
)
def
get_dico
(
self
):
current_directory
=
os
.
getcwd
()
os
.
chdir
(
"
/
"
.
join
(
self
.
nextflow_file
.
get_file_address
().
split
(
"
/
"
)[:
-
1
]))
try
:
_
=
os
.
system
(
f
"
wget -qO - https://api.github.com/repos/
{
self
.
address
}
> temp_dico_
{
id
(
self
)
}
.json
"
)
with
open
(
f
'
temp_dico_
{
id
(
self
)
}
.json
'
)
as
json_file
:
self
.
dico
=
json
.
load
(
json_file
)
os
.
system
(
f
"
rm temp_dico_
{
id
(
self
)
}
.json
"
)
except
:
None
os
.
chdir
(
current_directory
)
...
...
@@ -47,6 +83,7 @@ class Workflow:
return
self
.
name
#Format yyyy-mm-dd
#Here i return the first commit date
def
get_datePublished
(
self
):
if
(
self
.
datePublished
==
None
):
for
match
in
re
.
finditer
(
r
"
Date: +\w+ +(\w+) +(\d+) +\d+:\d+:\d+ +(\d+)
"
,
self
.
log
):
...
...
@@ -58,18 +95,32 @@ class Workflow:
return
self
.
datePublished
#TODO
def
get_description
(
self
):
return
"
TODO
"
if
(
self
.
description
==
None
):
try
:
res
=
self
.
dico
[
"
description
"
]
except
:
res
=
None
return
res
else
:
return
self
.
description
def
get_main_file
(
self
):
return
self
.
nextflow_file
.
get_file_address
().
split
(
"
/
"
)[
-
1
]
#TODO
def
get_license
(
self
):
return
"
TODO
"
if
(
self
.
license
==
None
):
try
:
res
=
self
.
dico
[
"
license
"
][
"
key
"
]
except
:
res
=
None
return
res
else
:
return
self
.
license
#TODO
def
get_creativeWorkStatus
(
self
):
...
...
@@ -78,32 +129,51 @@ class Workflow:
#TODO
def
get_version
(
self
):
return
"
TODO
"
#TODO -> this doesn't workf perfectly
def
get_authors
(
self
):
if
(
self
.
authors
==
None
):
authors
=
{}
for
match
in
re
.
finditer
(
r
"
Author: (
\w+ +\w
+) <([^>]+)>
"
,
self
.
log
):
for
match
in
re
.
finditer
(
r
"
Author: (
[ \w-]
+) <([^>]+)>
"
,
self
.
log
):
authors
[
match
.
group
(
2
)]
=
match
.
group
(
1
)
tab
=
[]
for
author
in
authors
:
tab
.
append
({
"
@id
"
:
author
,
"
name
"
:
authors
[
author
]})
#tab.append({"@id":author, "name":authors[author]})
tab
.
append
({
"
@id
"
:
authors
[
author
],
"
email
"
:
author
})
return
tab
else
:
return
self
.
authors
#TODO
#Need to follow this format : "rna-seq, nextflow, bioinformatics, reproducibility, workflow, reproducible-research, bioinformatics-pipeline"
def
get_keywords
(
self
):
return
"
TODO
"
if
(
self
.
keywords
==
None
):
try
:
res
=
"
,
"
.
join
(
self
.
dico
[
"
topics
"
])
except
:
res
=
None
return
res
else
:
return
self
.
keywords
#TODO
def
get_producer
(
self
):
return
"
TODO
"
if
(
self
.
producer
==
None
):
try
:
res
=
{
"
@id
"
:
self
.
dico
[
"
owner
"
][
"
login
"
]}
except
:
res
=
None
return
res
else
:
return
self
.
producer
#TODO
def
get_publisher
(
self
):
return
"
TODO
"
if
(
self
.
dico
!=
{}):
return
"
https://github.com/
"
else
:
return
None
def
get_output_dir
(
self
):
return
self
.
nextflow_file
.
get_output_dir
()
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment