"print(f\"The crawler found {len(dict)} Nextflow workflows with at least Nextflow file at the root.\")"
"print(f\"The crawler found {len(dict)} Nextflow workflows with at least Nextflow file at the root.\")"
]
]
},
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"At the time of writing there are 52 Nextflow workflows integrated on WorkflowHub."
]
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": 4,
"execution_count": 4,
"metadata": {},
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Hence, at least 93.1% of Nextflow workflows found on Github are not integrated into WorkflowHub\n"
]
}
],
"source": [
"nb_wfhub = 52\n",
"print(f\"Hence, at least {(len(dict)-nb_wfhub)/len(dict)*100:.1f}% of Nextflow workflows found on Github are not integrated into WorkflowHub\")"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"outputs": [],
"source": [
"source": [
"tab = []\n",
"tab = []\n",
...
@@ -75,7 +100,7 @@
...
@@ -75,7 +100,7 @@
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": 5,
"execution_count": 6,
"metadata": {},
"metadata": {},
"outputs": [
"outputs": [
{
{
...
@@ -124,7 +149,7 @@
...
@@ -124,7 +149,7 @@
},
},
{
{
"cell_type": "code",
"cell_type": "code",
"execution_count": 6,
"execution_count": 7,
"metadata": {},
"metadata": {},
"outputs": [
"outputs": [
{
{
...
...
%% Cell type:markdown id: tags:
%% Cell type:markdown id: tags:
# Analysis of results of crawler
# Analysis of results of crawler
%% Cell type:code id: tags:
%% Cell type:code id: tags:
``` python
``` python
importseabornassns
importseabornassns
importmatplotlib.pyplotasplt
importmatplotlib.pyplotasplt
importnumpyasnp
importnumpyasnp
sns.set(style='darkgrid',palette="Accent")
sns.set(style='darkgrid',palette="Accent")
taille=(9,5)
taille=(9,5)
```
```
%% Output
%% Output
/usr/lib/python3/dist-packages/scipy/__init__.py:146: UserWarning: A NumPy version >=1.17.3 and <1.25.0 is required for this version of SciPy (detected version 1.26.1
/usr/lib/python3/dist-packages/scipy/__init__.py:146: UserWarning: A NumPy version >=1.17.3 and <1.25.0 is required for this version of SciPy (detected version 1.26.1
warnings.warn(f"A NumPy version >={np_minversion} and <{np_maxversion}"
warnings.warn(f"A NumPy version >={np_minversion} and <{np_maxversion}"
%% Cell type:code id: tags:
%% Cell type:code id: tags:
``` python
``` python
importjson
importjson
importpandasaspd
importpandasaspd
withopen('wf_crawl_nextflow.json')asjson_file:
withopen('wf_crawl_nextflow.json')asjson_file:
dict=json.load(json_file)
dict=json.load(json_file)
_=dict.pop("last_date")
_=dict.pop("last_date")
```
```
%% Cell type:code id: tags:
%% Cell type:code id: tags:
``` python
``` python
print(f"The crawler found {len(dict)} Nextflow workflows with at least Nextflow file at the root.")
print(f"The crawler found {len(dict)} Nextflow workflows with at least Nextflow file at the root.")
```
```
%% Output
%% Output
The crawler found 752 Nextflow workflows with at least Nextflow file at the root.
The crawler found 752 Nextflow workflows with at least Nextflow file at the root.
%% Cell type:markdown id: tags:
At the time of writing there are 52 Nextflow workflows integrated on WorkflowHub.
%% Cell type:code id: tags:
``` python
nb_wfhub=52
print(f"Hence, at least {(len(dict)-nb_wfhub)/len(dict)*100:.1f}% of Nextflow workflows found on Github are not integrated into WorkflowHub")
```
%% Output
Hence, at least 93.1% of Nextflow workflows found on Github are not integrated into WorkflowHub