From bbc8f88dca7d84fba5b26b31c6c9ede4712f2fec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fran=C3=A7oise=20Conil?= <francoise.conil@insa-lyon.fr>
Date: Mon, 29 Jan 2024 18:09:18 +0100
Subject: [PATCH] Trying to understand the type of PyPI packages

This is an additional resource to the pyproject.toml analysis.
---
 README.md                                  | 663 +++++++++++++++++++--
 create-table-project-stats.sql             |  73 +++
 extract-project-releases-2018-and-later.py |   9 +
 extract-pyproject-releases.sql             |  21 +
 4 files changed, 710 insertions(+), 56 deletions(-)
 create mode 100644 create-table-project-stats.sql
 create mode 100644 extract-project-releases-2018-and-later.py
 create mode 100644 extract-pyproject-releases.sql

diff --git a/README.md b/README.md
index e698083..c7ff4b1 100644
--- a/README.md
+++ b/README.md
@@ -1,93 +1,644 @@
 # Get PyPI packages general
 
+This code is based on fconil-small-programs/packaging/get-pypi-packages-backends>
 
+This code extracts from the same parquet files, some project metadata (see
+`extract-pyproject-releases.sql`) without filtering on projects that contains a
+`pyproject.toml` file.
 
-## Getting started
+The aim is to make additionnal statistics.
 
-To make it easy for you to get started with GitLab, here's a list of recommended next steps.
+## Building extract-project-releases-2018-and-later.db
 
-Already a pro? Just edit this README.md and make it your own. Want to make it easy? [Use the template at the bottom](#editing-this-readme)!
+### Extract PyPI metadata
 
-## Add your files
+See :
 
-- [ ] [Create](https://docs.gitlab.com/ee/user/project/repository/web_editor.html#create-a-file) or [upload](https://docs.gitlab.com/ee/user/project/repository/web_editor.html#upload-a-file) files
-- [ ] [Add files using the command line](https://docs.gitlab.com/ee/gitlab-basics/add-file.html#add-a-file-using-the-command-line) or push an existing Git repository with the following command:
+- `extract-project-releases-2018-and-later.py`
 
+This script executes the following [duckdb query](https://duckdb.org/docs/sql/introduction)
+on [PyPI metadata parquet files](https://py-code.org/datasets#metadata) to
+generate `extract-project-releases-2018-and-later.csv` file.
+
+```duckdb
+SELECT
+  project_name,
+  project_version,
+  project_release,
+  suffix(project_release, '.whl') AS wheel,
+  suffix(project_release, '.tar.gz') AS source,
+  max(uploaded_on) AS max_uploaded_on,
+  date_part('year', max(uploaded_on)) AS max_year,
+  list(DISTINCT uploaded_on)
+FROM '*.parquet'
+WHERE (date_part('year', uploaded_on) >= '2018') AND skip_reason = ''
+GROUP BY project_name, project_version, project_release
+```
+
+### CSV to sqlite3
+
+An [SQLite](https://sqlite.org/index.html) database is created with this
+`extract-project-releases-2018-and-later.csv` file.
+
+```bash
+$ < extract-project-releases-2018-and-later.csv sed '1d' > extract-project-releases-2018-and-later_no-header.csv
+```
+
+```sqlite
+$ sqlite3 extract-project-releases-2018-and-later.db
+SQLite version 3.37.2 2022-01-06 13:25:41
+Enter ".help" for usage hints.
+
+sqlite> .read create-pyproject-table-for-releases.sql
+
+sqlite> .schema
+CREATE TABLE pyprojects (
+  project_name TEXT,
+  project_version TEXT,
+  project_release TEXT,
+  wheel TEXT,
+  source TEXT,
+  max_uploaded_on TEXT,
+  max_year INTEGER,
+  list_uploaded_on TEXT
+);
+
+sqlite> .mode csv
+sqlite> .import /home/fconil/Progs/python/duckdb/extract-project-releases-2018-and-later_no-header.csv pyprojects
+
+sqlite> .mode table
+sqlite> select * from pyprojects limit 10;
++--------------------------------+-----------------+----------------------------------------------------+-------+--------+-------------------------+----------+---------------------------+
+|          project_name          | project_version |                  project_release                   | wheel | source |     max_uploaded_on     | max_year |     list_uploaded_on      |
++--------------------------------+-----------------+----------------------------------------------------+-------+--------+-------------------------+----------+---------------------------+
+| cmake                          | 3.15.3.post1    | cmake-3.15.3.post1-py2-none-win32.whl              | true  | false  | 2020-04-08 05:34:50.424 | 2020     | [2020-04-08 05:34:50.424] |
+| cmake                          | 3.14.4.post1    | cmake-3.14.4.post1-py3-none-win32.whl              | true  | false  | 2020-04-08 05:30:20.525 | 2020     | [2020-04-08 05:30:20.525] |
+| cmake                          | 3.14.4.post1    | cmake-3.14.4.post1-py2-none-macosx_10_6_x86_64.whl | true  | false  | 2020-04-08 05:24:05.195 | 2020     | [2020-04-08 05:24:05.195] |
+| cmake                          | 3.14.3.post1    | cmake-3.14.3.post1-py3-none-manylinux1_x86_64.whl  | true  | false  | 2020-04-08 05:26:23.496 | 2020     | [2020-04-08 05:26:23.496] |
+| cmake                          | 3.14.3.post1    | cmake-3.14.3.post1-py2-none-manylinux1_x86_64.whl  | true  | false  | 2020-04-08 05:26:18.667 | 2020     | [2020-04-08 05:26:18.667] |
+| cmake                          | 3.14.3.post1    | cmake-3.14.3.post1-py2-none-manylinux1_i686.whl    | true  | false  | 2020-04-08 05:26:16.195 | 2020     | [2020-04-08 05:26:16.195] |
+| cluster-over-sampling          | 0.2.0           | cluster-over-sampling-0.2.0.tar.gz                 | false | true   | 2020-04-07 23:46:21.648 | 2020     | [2020-04-07 23:46:21.648] |
+| cloverwallpaper                | 0.1             | cloverwallpaper-0.1.tar.gz                         | false | true   | 2020-04-14 16:31:42.203 | 2020     | [2020-04-14 16:31:42.203] |
+| cloudwright-todoist            | 0.0.0           | cloudwright_todoist-0.0.0-py3-none-any.whl         | true  | false  | 2020-04-14 20:36:27.243 | 2020     | [2020-04-14 20:36:27.243] |
+| cloudutils                     | 1.2.4           | cloudutils-1.2.4-py3-none-any.whl                  | true  | false  | 2020-04-03 11:14:18.819 | 2020     | [2020-04-03 11:14:18.819] |
+| cloudutils                     | 1.2.4           | cloudutils-1.2.4-py3-none-any.whl                  | true  | false  | 2020-04-03 11:14:18.819 | 2020     | [2020-04-03 11:14:18.819] |
++--------------------------------+-----------------+----------------------------------------------------+-------+--------+-------------------------+----------+---------------------------+
 ```
-cd existing_repo
-git remote add origin https://gitlab.liris.cnrs.fr/fconil-small-programs/packaging/get-pypi-packages-general.git
-git branch -M main
-git push -uf origin main
+
+## Create new tables to analyze the data
+
+### releases_types
+
+Find, for each project:
+
+- how many wheels (the are a few `.egg` packages)
+- how many source package with `.tar.gz` format (there are a few `.zip` and `.bz2`)
+
+have been uploaded to [PyPI](https://pypi.org/)
+
+```sqlite
+sqlite> CREATE TABLE releases_types (
+  project_name TEXT,
+  wheel_true_count INTEGER,
+  source_true_count INTEGER
+);
+
+sqlite> INSERT INTO releases_types
+SELECT
+  project_name,
+  SUM(CASE WHEN wheel = 'true' THEN 1 ELSE 0 END) AS wheel_true_count,
+  SUM(CASE WHEN source = 'true' THEN 1 ELSE 0 END) AS source_true_count
+FROM pyprojects
+GROUP BY project_name;
+
+sqlite> select * from releases_types order by wheel_true_count desc limit 10;
++-----------------------+------------------+-------------------+
+|     project_name      | wheel_true_count | source_true_count |
++-----------------------+------------------+-------------------+
+| pyagrum-nightly       | 19792            | 0                 |
+| ddtrace               | 19464            | 392               |
+| lalsuite              | 14047            | 0                 |
+| zenroom               | 10617            | 121               |
+| tfa-nightly           | 8625             | 0                 |
+| tensorflow-io-nightly | 7855             | 0                 |
+| dependency-injector   | 7778             | 192               |
+| rapidfuzz             | 7750             | 147               |
+| pydantic-core         | 7360             | 89                |
+| grpcio-tools          | 6350             | 167               |
++-----------------------+------------------+-------------------+
+
+sqlite> select * from releases_types order by source_true_count desc limit 10;
++-------------------+------------------+-------------------+
+|   project_name    | wheel_true_count | source_true_count |
++-------------------+------------------+-------------------+
+| spanishconjugator | 4596             | 4594              |
+| teamhack-nmap     | 4141             | 4140              |
+| checkov           | 2751             | 2751              |
+| moto              | 2748             | 2750              |
+| jina              | 1364             | 2394              |
+| bridgecrew        | 2325             | 2324              |
+| lusid-sdk         | 2308             | 2305              |
+| python-must       | 2298             | 2298              |
+| lusid-sdk-preview | 2272             | 2270              |
+| lbt-dragonfly     | 2212             | 2206              |
++-------------------+------------------+-------------------+
+
+sqlite> select count(project_name) FROM releases_types;
++---------------------+
+| count(project_name) |
++---------------------+
+| 410944              |
++---------------------+
 ```
 
-## Integrate with your tools
+How many projects have no source package (`.tar.gz` format) at all?
+
+```sqlite
+sqlite> select count(project_name) from releases_types where source_true_count=0;
++---------------------+
+| count(project_name) |
++---------------------+
+| 43975               |
++---------------------+
 
-- [ ] [Set up project integrations](https://gitlab.liris.cnrs.fr/fconil-small-programs/packaging/get-pypi-packages-general/-/settings/integrations)
+sqlite> select 43975 * 100.0 / 410944;
+10.7009714218969
+```
 
-## Collaborate with your team
+**So ~ 11 % of the projects have no source package on PyPI (since 2018)**.
 
-- [ ] [Invite team members and collaborators](https://docs.gitlab.com/ee/user/project/members/)
-- [ ] [Create a new merge request](https://docs.gitlab.com/ee/user/project/merge_requests/creating_merge_requests.html)
-- [ ] [Automatically close issues from merge requests](https://docs.gitlab.com/ee/user/project/issues/managing_issues.html#closing-issues-automatically)
-- [ ] [Enable merge request approvals](https://docs.gitlab.com/ee/user/project/merge_requests/approvals/)
-- [ ] [Set auto-merge](https://docs.gitlab.com/ee/user/project/merge_requests/merge_when_pipeline_succeeds.html)
+How many projects have no wheel package (`.whl` format) at all?
 
-## Test and Deploy
+```sqlite
+sqlite> select count(project_name) from releases_types where wheel_true_count=0;
++---------------------+
+| count(project_name) |
++---------------------+
+| 88407               |
++---------------------+
 
-Use the built-in continuous integration in GitLab.
+sqlite> select 88407 * 100.0 / 410944;
+21.5131502102476
+```
 
-- [ ] [Get started with GitLab CI/CD](https://docs.gitlab.com/ee/ci/quick_start/index.html)
-- [ ] [Analyze your code for known vulnerabilities with Static Application Security Testing (SAST)](https://docs.gitlab.com/ee/user/application_security/sast/)
-- [ ] [Deploy to Kubernetes, Amazon EC2, or Amazon ECS using Auto Deploy](https://docs.gitlab.com/ee/topics/autodevops/requirements.html)
-- [ ] [Use pull-based deployments for improved Kubernetes management](https://docs.gitlab.com/ee/user/clusters/agent/)
-- [ ] [Set up protected environments](https://docs.gitlab.com/ee/ci/environments/protected_environments.html)
+**So ~ 21.5 % of the projects have no wheel package on PyPI (since 2018)**.
+
+### version_releases_types
+
+A global number for each project is quite vague, let's try to get some data
+with version information.
+
+```sqlite
+sqlite> CREATE TABLE version_releases_types (
+  project_name TEXT,
+  project_version TEXT,
+  wheel_true_count INTEGER,
+  source_true_count INTEGER
+);
+
+sqlite> INSERT INTO version_releases_types
+SELECT
+  project_name,
+  project_version,
+  SUM(CASE WHEN wheel = 'true' THEN 1 ELSE 0 END) AS wheel_true_count,
+  SUM(CASE WHEN source = 'true' THEN 1 ELSE 0 END) AS source_true_count
+FROM pyprojects
+GROUP BY project_name, project_version;
+
+sqlite> SELECT project_name, COUNT(project_version) AS nb_versions, SUM(wheel_true_count) AS nb_wheels, SUM(source_true_count) AS nb_source FROM version_releases_types GROUP BY project_name ORDER BY nb_versions DESC LIMIT 100;
++---------------------------------------+-------------+-----------+-----------+
+|             project_name              | nb_versions | nb_wheels | nb_source |
++---------------------------------------+-------------+-----------+-----------+
+| spanishconjugator                     | 4595        | 4596      | 4594      |
+| teamhack-nmap                         | 4141        | 4141      | 4140      |
+| kcli                                  | 3164        | 3164      | 1         |
+| pulumi                                | 3158        | 3158      | 0         |
+| moto                                  | 2753        | 2748      | 2750      |
+| ...                                   | ...         | ...       | ...       |
++---------------------------------------+-------------+-----------+-----------+
+```
+
+### version_releases_stats
+
+For each project, how many version, how many wheels and how many source
+packages have been uploaded to PyPI? Keeping theses data in
+`version_releases_stats` table.
+
+```sqlite
+sqlite> CREATE TABLE version_releases_stats (
+  project_name TEXT,
+  nb_versions INTEGER,
+  nb_wheels INTEGER,
+  nb_sources INTEGER
+);
+
+sqlite> INSERT INTO version_releases_stats
+SELECT
+  project_name,
+  COUNT(project_version) AS nb_versions,
+  SUM(wheel_true_count) AS nb_wheels,
+  SUM(source_true_count) AS nb_source
+FROM version_releases_types
+GROUP BY project_name;
+```
 
-***
+## New analysis
 
-# Editing this README
+The first created table, `pyprojects holds 8 742 125 `project_release`
+(source and binary packages) from 410 944 `project_name`.
 
-When you're ready to make this README your own, just edit this file and use the handy template below (or feel free to structure it however you want - this is just a starting point!). Thanks to [makeareadme.com](https://www.makeareadme.com/) for this template.
+There are :
 
-## Suggestions for a good README
+- 4 900 410 releases `.whl`, being 56.06 % of the uploaded releases
+- 3 760 245 releases `.tar.gz`, being 43.01 % of the uploaded releases
 
-Every project is different, so consider which of these sections apply to yours. The sections used in the template are suggestions for most open source projects. Also keep in mind that while a README can be too long and detailed, too long is better than too short. If you think your README is too long, consider utilizing another form of documentation rather than cutting out information.
+It is logical to have more binary packages than source packages as multiple
+binary packages must be created for one project version, depending on
+platforms, ...
 
-## Name
-Choose a self-explaining name for your project.
+```sqlite
+sqlite> select count(project_release) from pyprojects;
+8742125
 
-## Description
-Let people know what your project can do specifically. Provide context and add a link to any reference visitors might be unfamiliar with. A list of Features or a Background subsection can also be added here. If there are alternatives to your project, this is a good place to list differentiating factors.
+sqlite> select count(distinct project_name) from pyprojects;
+410944
 
-## Badges
-On some READMEs, you may see small images that convey metadata, such as whether or not all the tests are passing for the project. You can use Shields to add some to your README. Many services also have instructions for adding a badge.
+sqlite> select count(project_release) from pyprojects where project_release REGEXP '.whl$';
+4900410
 
-## Visuals
-Depending on what you are making, it can be a good idea to include screenshots or even a video (you'll frequently see GIFs rather than actual videos). Tools like ttygif can help, but check out Asciinema for a more sophisticated method.
+sqlite> select count(project_release) from pyprojects where project_release REGEXP '.tar.gz$';
+3760245
 
-## Installation
-Within a particular ecosystem, there may be a common way of installing things, such as using Yarn, NuGet, or Homebrew. However, consider the possibility that whoever is reading your README is a novice and would like more guidance. Listing specific steps helps remove ambiguity and gets people to using your project as quickly as possible. If it only runs in a specific context like a particular programming language version or operating system or has dependencies that have to be installed manually, also add a Requirements subsection.
+sqlite> select 4900410 + 3760245;
+8660655
 
-## Usage
-Use examples liberally, and show the expected output if you can. It's helpful to have inline the smallest example of usage that you can demonstrate, while providing links to more sophisticated examples if they are too long to reasonably include in the README.
+sqlite> select 4900410 + 3760245 + 81470;
+8742125
 
-## Support
-Tell people where they can go to for help. It can be any combination of an issue tracker, a chat room, an email address, etc.
+sqlite> select cast(4900410 as real) * 100 / 8742125;
+56.0551353360882
+
+sqlite> select cast(3760245 as real) * 100 / 8742125;
+43.0129402176244
+```
+
+### Releases != `.whl`, `.tar.gz`
+
+There are 81 470 releases that do not end with `.whl` or `.tar.gz`, this is
+0.93 % of the total uploaded release.
+
+```sqlite
+sqlite> select count(project_release) from pyprojects;
+8742125
+
+sqlite> select count(project_release) from pyprojects where wheel = 'false' and source = 'false';
+81470
+
+sqlite> select 81470 * 100.0 / 8742125;
+0.931924446287373
+```
+
+A simple query show 3 other extension types.
+
+```sqlite
+sqlite> select distinct(substr(project_release, length(project_release) - 3, length(project_release))) from pyprojects where wheel = 'false' and source = 'false';
+.egg
+.zip
+.bz2
+```
+
+It seems to the only extension that do not end with `.whl` or `.tar.gz`
+
+```sqlite
+sqlite> select count(project_release) from pyprojects where project_release REGEXP '.egg$';
+61205
+sqlite> select count(project_release) from pyprojects where project_release REGEXP '.zip$';
+20120
+sqlite> select count(project_release) from pyprojects where project_release REGEXP '.bz2$';
+145
+sqlite> select 61205 + 20120 + 145;
+81470
+```
 
-## Roadmap
-If you have ideas for releases in the future, it is a good idea to list them in the README.
+### Table releases_types
 
-## Contributing
-State if you are open to contributions and what your requirements are for accepting them.
+La table `releases_types` recense, pour chaque package (voir la construction et
+le remplissage ci-dessus) :
 
-For people who want to make changes to your project, it's helpful to have some documentation on how to get started. Perhaps there is a script that they should run or some environment variables that they need to set. Make these steps explicit. These instructions could also be useful to your future self.
+The `releases_types` table indicates, for each project  :
 
-You can also document commands to lint the code or run tests. These steps help to ensure high code quality and reduce the likelihood that the changes inadvertently break something. Having instructions for running tests is especially helpful if it requires external setup, such as starting a Selenium server for testing in a browser.
+- the total number of wheel packages (there are a few `.egg`)
+- the total number of source packages with `.tar.gz` format (there are a few
+  `.zip` and `.bz2` packages)
 
-## Authors and acknowledgment
-Show your appreciation to those who have contributed to the project.
+see table creation above.
 
-## License
-For open source projects, say how it is licensed.
+For the 410 944 projects :
+
+- 88 407, being 21.51 %, have no wheel package (there are 3.04 % `.egg`)
+- 43 975, being 10.70 %, have no source package with `.tar.gz` format (there
+  are 0.77 % `.zip` / `.bz2`)
+
+So ~ 90 % of the projects have at least one source package, but that may be one
+source package for many versions.
+
+```sqlite
+sqlite> select count(project_name) FROM releases_types;
+410944
+
+sqlite> select count(project_name) from releases_types where wheel_true_count=0;
+88407
+sqlite> select count(project_name) from releases_types where source_true_count=0;
+43975
+
+sqlite> select count(distinct project_name) from pyprojects where project_release REGEXP '.egg$';
+12501
+sqlite> select cast(12501 as real) * 100 / 410944;
+3.04202032393708
+
+sqlite> select count(distinct project_name) from pyprojects where project_release REGEXP '.zip$';
+3143
+sqlite> select count(distinct project_name) from pyprojects where project_release REGEXP '.bz2$';
+39
+sqlite> select (3143 + 39) * 100.0 / 410944;
+0.774314748481545
+```
+
+### Tables version_releases_types, version_releases_stats
+
+Those tables were created to refine the analysis of the number of binary /
+source package for each project version.
+
+The total number of projects in those tables is the same as in `pyprojects`
+table.
+
+```sqlite
+sqlite> select count(distinct project_name) from version_releases_types;
+410944
+
+sqlite> select count(project_name) from version_releases_stats;
+410944
+```
+
+For some projects, the number of uploaded version since 2018 can be surprising:
+
+- 4595 => more than 2 versions per day, for 6 years
+- 2325 => more than one version per day, for 6 years
+
+```sqlite
+sqlite> select project_name, count(distinct project_version) as nb_versions from pyprojects group by project_name order by nb_versions desc limit 10;
++-------------------------+-------------+
+|      project_name       | nb_versions |
++-------------------------+-------------+
+| spanishconjugator       | 4595        |
+| teamhack-nmap           | 4141        |
+| kcli                    | 3164        |
+| pulumi                  | 3158        |
+| moto                    | 2753        |
+| checkov                 | 2751        |
+| assisted-service-client | 2486        |
+| jina                    | 2445        |
+| tfa-nightly             | 2343        |
+| bridgecrew              | 2325        |
++-------------------------+-------------+
+
+sqlite> select project_name, count(distinct project_release) as nb_releases from pyprojects group by project_name order by nb_releases desc limit 10;
++-----------------------+-------------+
+|     project_name      | nb_releases |
++-----------------------+-------------+
+| ddtrace               | 19856       |
+| pyagrum-nightly       | 19792       |
+| lalsuite              | 14047       |
+| zenroom               | 10738       |
+| spanishconjugator     | 9190        |
+| tfa-nightly           | 8625        |
+| teamhack-nmap         | 8281        |
+| dependency-injector   | 7970        |
+| rapidfuzz             | 7897        |
+| tensorflow-io-nightly | 7855        |
++-----------------------+-------------+
+
+sqlite> select project_name, count(distinct project_version) as nb_versions, count(distinct project_release) as nb_releases from pyprojects group by project_name order by nb_releases desc limit 10;
++-----------------------+-------------+-------------+
+|     project_name      | nb_versions | nb_releases |
++-----------------------+-------------+-------------+
+| ddtrace               | 393         | 19856       |
+| pyagrum-nightly       | 965         | 19792       |
+| lalsuite              | 1461        | 14047       |
+| zenroom               | 969         | 10738       |
+| spanishconjugator     | 4595        | 9190        |
+| tfa-nightly           | 2343        | 8625        |
+| teamhack-nmap         | 4141        | 8281        |
+| dependency-injector   | 192         | 7970        |
+| rapidfuzz             | 148         | 7897        |
+| tensorflow-io-nightly | 902         | 7855        |
++-----------------------+-------------+-------------+
+
+sqlite> select project_name, count(distinct project_version) as nb_versions, count(distinct project_release) as nb_releases from pyprojects group by project_name order by nb_versions desc limit 10;
++-------------------------+-------------+-------------+
+|      project_name       | nb_versions | nb_releases |
++-------------------------+-------------+-------------+
+| spanishconjugator       | 4595        | 9190        |
+| teamhack-nmap           | 4141        | 8281        |
+| kcli                    | 3164        | 3165        |
+| pulumi                  | 3158        | 3158        |
+| moto                    | 2753        | 5498        |
+| checkov                 | 2751        | 5502        |
+| assisted-service-client | 2486        | 3719        |
+| jina                    | 2445        | 3758        |
+| tfa-nightly             | 2343        | 8625        |
+| bridgecrew              | 2325        | 4649        |
++-------------------------+-------------+-------------+
+```
+
+I want to find the amount of project that have less than 10 versions on PyPI,
+those that have between 10 and 15 versions, ... until I reach the maximum
+versions number 4595.
+
+For these values or intervals, I compute some statistics on the number of wheel
+packages and the number of source packages.
+
+```sqlite
+sqlite> SELECT value, avg(nb_versions) AS avg_versions, COUNT(project_name) AS nb_projects, avg(nb_wheels) AS avg_wheels, min(nb_wheels) AS min_wheels, max(nb_wheels) as max_wheels, AVG(nb_sources) as avg_source, min(nb_sources) AS min_sources, max(nb_sources) AS max_sources FROM version_releases_stats, generate_series(0, 10, 1) WHERE nb_versions >= value AND nb_versions < (value + 1) GROUP BY value;
++-------+--------------+-------------+-------------------+------------+------------+-------------------+-------------+-------------+
+| value | avg_versions | nb_projects |    avg_wheels     | min_wheels | max_wheels |    avg_source     | min_sources | max_sources |
++-------+--------------+-------------+-------------------+------------+------------+-------------------+-------------+-------------+
+| 1     | 1.0          | 121283      | 0.828632207316772 | 0          | 125        | 0.889918620086904 | 0           | 2           |
+| 2     | 2.0          | 56766       | 1.87032730860022  | 0          | 198        | 1.75827079589895  | 0           | 3           |
+| 3     | 3.0          | 38727       | 3.06804038526093  | 0          | 316        | 2.61037519043561  | 0           | 4           |
+| 4     | 4.0          | 29008       | 4.09487038058467  | 0          | 334        | 3.45035852178709  | 0           | 5           |
+| 5     | 5.0          | 22571       | 5.05254530149307  | 0          | 406        | 4.28310664126534  | 0           | 6           |
+| 6     | 6.0          | 18074       | 6.3152594887684   | 0          | 492        | 5.10894102025008  | 0           | 11          |
+| 7     | 7.0          | 14438       | 7.26374844161241  | 0          | 456        | 5.98794846931708  | 0           | 7           |
+| 8     | 8.0          | 11868       | 8.45163464779238  | 0          | 540        | 6.87605325244355  | 0           | 8           |
+| 9     | 9.0          | 10005       | 9.77881059470265  | 0          | 676        | 7.67306346826587  | 0           | 9           |
+| 10    | 10.0         | 8287        | 10.9997586581393  | 0          | 658        | 8.59599372511162  | 0           | 16          |
++-------+--------------+-------------+-------------------+------------+------------+-------------------+-------------+-------------+
+
+sqlite> SELECT value, avg(nb_versions) AS avg_versions, COUNT(project_name) AS nb_projects, avg(nb_wheels) AS avg_wheels, min(nb_wheels) AS min_wheels, max(nb_wheels) as max_wheels, AVG(nb_sources) as avg_source, min(nb_sources) AS min_sources, max(nb_sources) AS max_sources FROM version_releases_stats, generate_series(10, 100, 5) WHERE nb_versions >= value AND nb_versions < (value + 5) GROUP BY value;
++-------+------------------+-------------+------------------+------------+------------+------------------+-------------+-------------+
+| value |   avg_versions   | nb_projects |    avg_wheels    | min_wheels | max_wheels |    avg_source    | min_sources | max_sources |
++-------+------------------+-------------+------------------+------------+------------+------------------+-------------+-------------+
+| 10    | 11.7271776340814 | 31548       | 13.2031824521364 | 0          | 1112       | 10.0823507036896 | 0           | 19          |
+| 15    | 16.7961260267255 | 16314       | 19.9346573495158 | 0          | 1323       | 14.5425401495648 | 0           | 21          |
+| 20    | 21.8250381679389 | 9825        | 25.9341475826972 | 0          | 1650       | 19.0416284987277 | 0           | 24          |
+| 25    | 26.8137270299984 | 6367        | 33.2283650070677 | 0          | 1772       | 23.5434270457044 | 0           | 32          |
+| 30    | 31.9035087719298 | 4446        | 39.1306792622582 | 0          | 2100       | 27.8533513270355 | 0           | 34          |
+| 35    | 36.9039975772259 | 3302        | 49.7792247122956 | 0          | 1939       | 32.3846153846154 | 0           | 39          |
+| 40    | 41.9338235294118 | 2448        | 52.4763071895425 | 0          | 2055       | 36.8525326797386 | 0           | 44          |
+| 45    | 46.9399235390497 | 1831        | 55.206990715456  | 0          | 2169       | 40.9475696340797 | 0           | 49          |
+| 50    | 52.0068587105624 | 1458        | 67.2366255144033 | 0          | 1981       | 45.537037037037  | 0           | 80          |
+| 55    | 56.862812769629  | 1159        | 83.2484900776531 | 0          | 2864       | 49.1760138050043 | 0           | 59          |
+| 60    | 61.9033297529538 | 931         | 69.5488721804511 | 0          | 2072       | 53.6305048335124 | 0           | 64          |
+| 65    | 66.9153846153846 | 780         | 103.988461538462 | 0          | 4465       | 56.5576923076923 | 0           | 69          |
+| 70    | 71.9671875       | 640         | 88.19375         | 0          | 1976       | 61.3921875       | 0           | 118         |
+| 75    | 76.9870848708487 | 542         | 89.6162361623616 | 0          | 2077       | 67.0885608856089 | 0           | 79          |
+| 80    | 81.9262472885032 | 461         | 96.826464208243  | 0          | 2402       | 71.5357917570499 | 0           | 84          |
+| 85    | 87.0             | 433         | 117.750577367206 | 0          | 3113       | 74.8175519630485 | 0           | 89          |
+| 90    | 91.906432748538  | 342         | 122.081871345029 | 0          | 7360       | 78.359649122807  | 0           | 94          |
+| 95    | 97.0440677966102 | 295         | 118.325423728814 | 0          | 2118       | 81.9864406779661 | 0           | 99          |
+| 100   | 101.977707006369 | 314         | 102.579617834395 | 0          | 3263       | 88.9363057324841 | 0           | 104         |
++-------+------------------+-------------+------------------+------------+------------+------------------+-------------+-------------+
+
+sqlite> SELECT value, avg(nb_versions) AS avg_versions, COUNT(project_name) AS nb_projects, avg(nb_wheels) AS avg_wheels, min(nb_wheels) AS min_wheels, max(nb_wheels) as max_wheels, AVG(nb_sources) as avg_source, min(nb_sources) AS min_sources, max(nb_sources) AS max_sources FROM version_releases_stats, generate_series(100,4700,200) WHERE nb_versions >= value AND nb_versions < (value + 200) GROUP BY value;
++-------+------------------+-------------+------------------+------------+------------+------------------+-------------+-------------+
+| value |   avg_versions   | nb_projects |    avg_wheels    | min_wheels | max_wheels |    avg_source    | min_sources | max_sources |
++-------+------------------+-------------+------------------+------------+------------+------------------+-------------+-------------+
+| 100   | 163.212247016087 | 3854        | 186.20835495589  | 0          | 7778       | 141.867929423975 | 0           | 299         |
+| 300   | 382.267857142857 | 448         | 477.587053571429 | 0          | 19464      | 319.899553571429 | 0           | 499         |
+| 500   | 585.822966507177 | 418         | 485.598086124402 | 0          | 4331       | 559.188995215311 | 0           | 698         |
+| 700   | 761.475113122172 | 221         | 739.923076923077 | 0          | 1192       | 708.461538461538 | 0           | 898         |
+| 900   | 984.316666666667 | 60          | 1485.78333333333 | 0          | 19792      | 889.183333333333 | 0           | 1091        |
+| 1100  | 1150.41176470588 | 17          | 1087.29411764706 | 64         | 1224       | 881.294117647059 | 0           | 1224        |
+| 1300  | 1385.54545454545 | 33          | 1677.57575757576 | 0          | 14047      | 706.212121212121 | 0           | 1463        |
+| 1500  | 1605.33333333333 | 9           | 1469.88888888889 | 0          | 2393       | 1049.0           | 0           | 1665        |
+| 1700  | 1764.71428571429 | 7           | 1539.28571428571 | 183        | 1826       | 1000.85714285714 | 0           | 1824        |
+| 1900  | 2033.0           | 1           | 1708.0           | 1708       | 1708       | 2031.0           | 2031        | 2031        |
+| 2100  | 2260.66666666667 | 3           | 2260.66666666667 | 2212       | 2298       | 2258.0           | 2206        | 2298        |
+| 2300  | 2381.2           | 5           | 3421.6           | 1364       | 8625       | 1651.2           | 0           | 2394        |
+| 2700  | 2752.0           | 2           | 2749.5           | 2748       | 2751       | 2750.5           | 2750        | 2751        |
+| 3100  | 3161.0           | 2           | 3161.0           | 3158       | 3164       | 0.5              | 0           | 1           |
+| 4100  | 4141.0           | 1           | 4141.0           | 4141       | 4141       | 4140.0           | 4140        | 4140        |
+| 4500  | 4595.0           | 1           | 4596.0           | 4596       | 4596       | 4594.0           | 4594        | 4594        |
++-------+------------------+-------------+------------------+------------+------------+------------------+-------------+-------------+
+```
+
+I created a table to store theses statistics, see `create-table-project-stats.sql`.
+
+```sqlite
+sqlite> select * from project_stats;
++-------+-------------+------------------+--------------+--------------+-------------------+------------+------------+-------------------+-------------+-------------+
+| value | nb_projects |   avg_versions   | min_versions | max_versions |    avg_wheels     | min_wheels | max_wheels |    avg_source     | min_sources | max_sources |
++-------+-------------+------------------+--------------+--------------+-------------------+------------+------------+-------------------+-------------+-------------+
+| 1     | 121283      | 1.0              | 1            | 1            | 0.828632207316772 | 0          | 125        | 0.889918620086904 | 0           | 2           |
+| 2     | 56766       | 2.0              | 2            | 2            | 1.87032730860022  | 0          | 198        | 1.75827079589895  | 0           | 3           |
+| 3     | 38727       | 3.0              | 3            | 3            | 3.06804038526093  | 0          | 316        | 2.61037519043561  | 0           | 4           |
+| 4     | 29008       | 4.0              | 4            | 4            | 4.09487038058467  | 0          | 334        | 3.45035852178709  | 0           | 5           |
+| 5     | 22571       | 5.0              | 5            | 5            | 5.05254530149307  | 0          | 406        | 4.28310664126534  | 0           | 6           |
+| 6     | 18074       | 6.0              | 6            | 6            | 6.3152594887684   | 0          | 492        | 5.10894102025008  | 0           | 11          |
+| 7     | 14438       | 7.0              | 7            | 7            | 7.26374844161241  | 0          | 456        | 5.98794846931708  | 0           | 7           |
+| 8     | 11868       | 8.0              | 8            | 8            | 8.45163464779238  | 0          | 540        | 6.87605325244355  | 0           | 8           |
+| 9     | 10005       | 9.0              | 9            | 9            | 9.77881059470265  | 0          | 676        | 7.67306346826587  | 0           | 9           |
+| 10    | 8287        | 10.0             | 10           | 10           | 10.9997586581393  | 0          | 658        | 8.59599372511162  | 0           | 16          |
+| 10    | 31548       | 11.7271776340814 | 10           | 14           | 13.2031824521364  | 0          | 1112       | 10.0823507036896  | 0           | 19          |
+| 15    | 16314       | 16.7961260267255 | 15           | 19           | 19.9346573495158  | 0          | 1323       | 14.5425401495648  | 0           | 21          |
+| 20    | 9825        | 21.8250381679389 | 20           | 24           | 25.9341475826972  | 0          | 1650       | 19.0416284987277  | 0           | 24          |
+| 25    | 6367        | 26.8137270299984 | 25           | 29           | 33.2283650070677  | 0          | 1772       | 23.5434270457044  | 0           | 32          |
+| 30    | 4446        | 31.9035087719298 | 30           | 34           | 39.1306792622582  | 0          | 2100       | 27.8533513270355  | 0           | 34          |
+| 35    | 3302        | 36.9039975772259 | 35           | 39           | 49.7792247122956  | 0          | 1939       | 32.3846153846154  | 0           | 39          |
+| 40    | 2448        | 41.9338235294118 | 40           | 44           | 52.4763071895425  | 0          | 2055       | 36.8525326797386  | 0           | 44          |
+| 45    | 1831        | 46.9399235390497 | 45           | 49           | 55.206990715456   | 0          | 2169       | 40.9475696340797  | 0           | 49          |
+| 50    | 1458        | 52.0068587105624 | 50           | 54           | 67.2366255144033  | 0          | 1981       | 45.537037037037   | 0           | 80          |
+| 55    | 1159        | 56.862812769629  | 55           | 59           | 83.2484900776531  | 0          | 2864       | 49.1760138050043  | 0           | 59          |
+| 60    | 931         | 61.9033297529538 | 60           | 64           | 69.5488721804511  | 0          | 2072       | 53.6305048335124  | 0           | 64          |
+| 65    | 780         | 66.9153846153846 | 65           | 69           | 103.988461538462  | 0          | 4465       | 56.5576923076923  | 0           | 69          |
+| 70    | 640         | 71.9671875       | 70           | 74           | 88.19375          | 0          | 1976       | 61.3921875        | 0           | 118         |
+| 75    | 542         | 76.9870848708487 | 75           | 79           | 89.6162361623616  | 0          | 2077       | 67.0885608856089  | 0           | 79          |
+| 80    | 461         | 81.9262472885032 | 80           | 84           | 96.826464208243   | 0          | 2402       | 71.5357917570499  | 0           | 84          |
+| 85    | 433         | 87.0             | 85           | 89           | 117.750577367206  | 0          | 3113       | 74.8175519630485  | 0           | 89          |
+| 90    | 342         | 91.906432748538  | 90           | 94           | 122.081871345029  | 0          | 7360       | 78.359649122807   | 0           | 94          |
+| 95    | 295         | 97.0440677966102 | 95           | 99           | 118.325423728814  | 0          | 2118       | 81.9864406779661  | 0           | 99          |
+| 100   | 314         | 101.977707006369 | 100          | 104          | 102.579617834395  | 0          | 3263       | 88.9363057324841  | 0           | 104         |
+| 100   | 3854        | 163.212247016087 | 100          | 299          | 186.20835495589   | 0          | 7778       | 141.867929423975  | 0           | 299         |
+| 300   | 448         | 382.267857142857 | 300          | 499          | 477.587053571429  | 0          | 19464      | 319.899553571429  | 0           | 499         |
+| 500   | 418         | 585.822966507177 | 500          | 699          | 485.598086124402  | 0          | 4331       | 559.188995215311  | 0           | 698         |
+| 700   | 221         | 761.475113122172 | 701          | 898          | 739.923076923077  | 0          | 1192       | 708.461538461538  | 0           | 898         |
+| 900   | 60          | 984.316666666667 | 902          | 1094         | 1485.78333333333  | 0          | 19792      | 889.183333333333  | 0           | 1091        |
+| 1100  | 17          | 1150.41176470588 | 1106         | 1224         | 1087.29411764706  | 64         | 1224       | 881.294117647059  | 0           | 1224        |
+| 1300  | 33          | 1385.54545454545 | 1300         | 1489         | 1677.57575757576  | 0          | 14047      | 706.212121212121  | 0           | 1463        |
+| 1500  | 9           | 1605.33333333333 | 1522         | 1686         | 1469.88888888889  | 0          | 2393       | 1049.0            | 0           | 1665        |
+| 1700  | 7           | 1764.71428571429 | 1729         | 1826         | 1539.28571428571  | 183        | 1826       | 1000.85714285714  | 0           | 1824        |
+| 1900  | 1           | 2033.0           | 2033         | 2033         | 1708.0            | 1708       | 1708       | 2031.0            | 2031        | 2031        |
+| 2100  | 3           | 2260.66666666667 | 2212         | 2298         | 2260.66666666667  | 2212       | 2298       | 2258.0            | 2206        | 2298        |
+| 2300  | 5           | 2381.2           | 2307         | 2486         | 3421.6            | 1364       | 8625       | 1651.2            | 0           | 2394        |
+| 2700  | 2           | 2752.0           | 2751         | 2753         | 2749.5            | 2748       | 2751       | 2750.5            | 2750        | 2751        |
+| 3100  | 2           | 3161.0           | 3158         | 3164         | 3161.0            | 3158       | 3164       | 0.5               | 0           | 1           |
+| 4100  | 1           | 4141.0           | 4141         | 4141         | 4141.0            | 4141       | 4141       | 4140.0            | 4140        | 4140        |
+| 4500  | 1           | 4595.0           | 4595         | 4595         | 4596.0            | 4596       | 4596       | 4594.0            | 4594        | 4594        |
++-------+-------------+------------------+--------------+--------------+-------------------+------------+------------+-------------------+-------------+-------------+
+```
+
+### Several sources packages for one version
+
+It seems that there are projects for which there is more than one source
+package for some versions.
+
+Few projects are concerned:
+
+```sqlite
+sqlite> SELECT count(DISTINCT project_name) FROM pyprojects WHERE project_name IN 
+(SELECT project_name FROM pyprojects WHERE source = 'true' GROUP BY project_name, project_version HAVING count(project_release) > 1);
+27
+```
+
+Sometimes the sources packages seems to have differents usages:
+
+- CairoSVG-2.1.3.tar.gz, CairoSVG-2.1.3.linux-x86_64.tar.gz
+- coal_mine-0.4.14.tar.gz, coal_mine-0.4.14.linux-x86_64.tar.gz
+- gmaps-0.7.1.tar.gz, gmaps-0.7.1.macosx-10.13-intel.tar.gz
+- htrc-0.1.52b0.tar.gz, htrc-0.1.52b0.macosx-10.7-x86_64.tar.gz
+ 
+and sometimes it seems that it is a modification of the package name (hyphen /
+underscore, ...) :
+
+- persistentMemory-0.1.0.tar.gz, PersistentMemory-0.1.0.tar.gz
+- OZI-0.0.26.tar.gz, OZI-0.0.27.tar.gz : bizarre
+- camper_dramkit-1.0.2.tar.gz, camper_dramkit-1.0.1.tar.gz : bizarre 2
+- intlmch-0.1.0.tar.gz, intLMCH-0.1.0.tar.gz
+- mkdocs-strapi-plugin-0.1.0.tar.gz, mkdocs_strapi_plugin-0.1.0.tar.gz
+- vectorflow_client-0.0.5.tar.gz, vectorflow-client-0.0.5.tar.gz : bizarre, sur chaque version, sauf la dernière 0.0.6
+
+```sqlite
+sqlite> SELECT
+  project_name,
+  project_version,
+  count(project_release) AS nb_releases,
+  group_concat(project_release, ', '),
+  avg(max_year) AS avg_year,
+  min(max_year) AS min_year,
+  max(max_year) AS max_year
+FROM pyprojects
+WHERE source = 'true'
+GROUP BY project_name, project_version
+HAVING nb_releases > 1;
+
++-----------------------+-----------------+-------------+-----------------------------------------------------------------------+----------+----------+----------+
+|     project_name      | project_version | nb_releases |                  group_concat(project_release, ', ')                  | avg_year | min_year | max_year |
++-----------------------+-----------------+-------------+-----------------------------------------------------------------------+----------+----------+----------+
+| CairoSVG              | 2.1.3           | 2           | CairoSVG-2.1.3.tar.gz, CairoSVG-2.1.3.linux-x86_64.tar.gz             | 2018.0   | 2018     | 2018     |
+| OZI                   | 0.0.26          | 2           | OZI-0.0.26.tar.gz, OZI-0.0.27.tar.gz                                  | 2023.0   | 2023     | 2023     |
+| PersistentMemory      | 0.1.0           | 2           | persistentMemory-0.1.0.tar.gz, PersistentMemory-0.1.0.tar.gz          | 2023.0   | 2023     | 2023     |
+| aiacc-nccl            | 2.0.0           | 2           | aiacc-nccl-2.0.0.tar.gz, aiacc_nccl-2.0.0.tar.gz                      | 2023.0   | 2023     | 2023     |
+| algorithmia           | 1.1.2           | 2           | algorithmia-1.1.2.linux-x86_64.tar.gz, algorithmia-1.1.2.tar.gz       | 2018.0   | 2018     | 2018     |
+| algorithmia           | 1.1.3           | 2           | algorithmia-1.1.3.tar.gz, algorithmia-1.1.3.linux-x86_64.tar.gz       | 2018.0   | 2018     | 2018     |
+| algorithmia           | 1.2.0           | 2           | algorithmia-1.2.0.linux-x86_64.tar.gz, algorithmia-1.2.0.tar.gz       | 2019.0   | 2019     | 2019     |
+| barySSH               | 0.3.3           | 2           | barySSH-0.3.3.tar.gz, barySSH-0.3.3.linux-x86_64.tar.gz               | 2018.0   | 2018     | 2018     |
+| buttersink            | 0.6.9           | 2           | buttersink-0.6.9.linux-x86_64.tar.gz, buttersink-0.6.9.tar.gz         | 2018.0   | 2018     | 2018     |
+| camper-dramkit        | 1.0.1           | 2           | camper_dramkit-1.0.2.tar.gz, camper_dramkit-1.0.1.tar.gz              | 2023.0   | 2023     | 2023     |
+| coal-mine             | 0.4.12          | 2           | coal_mine-0.4.12.tar.gz, coal_mine-0.4.12.linux-x86_64.tar.gz         | 2018.0   | 2018     | 2018     |
+| ...                   | ...             | ...         | ...                                                                   | ...      | ...      | ...      |
+| spotify-share-project | 0.1             | 2           | spotify-share-project-0.1.tar.gz, spotify_share_project-0.1.tar.gz    | 2023.0   | 2023     | 2023     |
+| ...                   | ...             | ...         | ...                                                                   | ...      | ...      | ...      |
++-----------------------+-----------------+-------------+-----------------------------------------------------------------------+----------+----------+----------+
+```
 
-## Project status
-If you have run out of energy or time for your project, put a note at the top of the README saying that development has slowed down or stopped completely. Someone may choose to fork your project or volunteer to step in as a maintainer or owner, allowing your project to keep going. You can also make an explicit request for maintainers.
diff --git a/create-table-project-stats.sql b/create-table-project-stats.sql
new file mode 100644
index 0000000..c2f5ff5
--- /dev/null
+++ b/create-table-project-stats.sql
@@ -0,0 +1,73 @@
+-- Define the dialect
+-- sqlfluff:dialect:sqlite
+
+-- Set a smaller indent for this file
+-- sqlfluff:indentation:tab_space_size:2
+
+-- Set keywords to be capitalised
+-- sqlfluff:rules:capitalisation.keywords:capitalisation_policy:upper
+
+CREATE TABLE IF NOT EXISTS project_stats_t (
+  value INTEGER,
+  nb_projects INTEGER,
+  avg_versions REAL,
+  min_versions INTEGER,
+  max_versions INTEGER,
+  avg_wheels REAL,
+  min_wheels INTEGER,
+  max_wheels INTEGER,
+  avg_source REAL,
+  min_sources INTEGER,
+  max_sources INTEGER
+);
+
+INSERT INTO project_stats_t
+SELECT
+  value,
+  count(project_name),
+  avg(nb_versions),
+  min(nb_versions),
+  max(nb_versions),
+  avg(nb_wheels),
+  min(nb_wheels),
+  max(nb_wheels),
+  avg(nb_sources),
+  min(nb_sources),
+  max(nb_sources)
+FROM version_releases_stats, generate_series(0, 10, 1)
+WHERE nb_versions >= value AND nb_versions < (value + 1)
+GROUP BY value;
+
+INSERT INTO project_stats_t
+SELECT
+  value,
+  count(project_name),
+  avg(nb_versions),
+  min(nb_versions),
+  max(nb_versions),
+  avg(nb_wheels),
+  min(nb_wheels),
+  max(nb_wheels),
+  avg(nb_sources),
+  min(nb_sources),
+  max(nb_sources)
+FROM version_releases_stats, generate_series(10, 100, 5)
+WHERE nb_versions >= value AND nb_versions < (value + 5)
+GROUP BY value;
+
+INSERT INTO project_stats_t
+SELECT
+  value,
+  count(project_name),
+  avg(nb_versions),
+  min(nb_versions),
+  max(nb_versions),
+  avg(nb_wheels),
+  min(nb_wheels),
+  max(nb_wheels),
+  avg(nb_sources),
+  min(nb_sources),
+  max(nb_sources)
+FROM version_releases_stats, generate_series(100, 4700, 200)
+WHERE nb_versions >= value AND nb_versions < (value + 200)
+GROUP BY value;
diff --git a/extract-project-releases-2018-and-later.py b/extract-project-releases-2018-and-later.py
new file mode 100644
index 0000000..7a465dd
--- /dev/null
+++ b/extract-project-releases-2018-and-later.py
@@ -0,0 +1,9 @@
+# coding: utf-8
+
+import duckdb
+
+with open('extract-pyproject-releases.sql', 'r') as f:
+    QUERY = f.read()
+
+res = duckdb.sql(QUERY)
+res.to_csv("extract-project-releases-2018-and-later.csv", header=True)
diff --git a/extract-pyproject-releases.sql b/extract-pyproject-releases.sql
new file mode 100644
index 0000000..d9f1df5
--- /dev/null
+++ b/extract-pyproject-releases.sql
@@ -0,0 +1,21 @@
+-- Define the dialect
+-- sqlfluff:dialect:duckdb
+
+-- Set a smaller indent for this file
+-- sqlfluff:indentation:tab_space_size:2
+
+-- Set keywords to be capitalised
+-- sqlfluff:rules:capitalisation.keywords:capitalisation_policy:upper
+
+SELECT
+  project_name,
+  project_version,
+  project_release,
+  suffix(project_release, '.whl') AS wheel,
+  suffix(project_release, '.tar.gz') AS source,
+  max(uploaded_on) AS max_uploaded_on,
+  date_part('year', max(uploaded_on)) AS max_year,
+  list(DISTINCT uploaded_on)
+FROM '*.parquet'
+WHERE (date_part('year', uploaded_on) >= '2018') AND skip_reason = ''
+GROUP BY project_name, project_version, project_release
-- 
GitLab