Skip to content
Snippets Groups Projects
Commit bbc8f88d authored by Françoise Conil's avatar Françoise Conil
Browse files

Trying to understand the type of PyPI packages

This is an additional resource to the pyproject.toml analysis.
parent 18084eac
No related branches found
No related tags found
No related merge requests found
This diff is collapsed.
-- Define the dialect
-- sqlfluff:dialect:sqlite
-- Set a smaller indent for this file
-- sqlfluff:indentation:tab_space_size:2
-- Set keywords to be capitalised
-- sqlfluff:rules:capitalisation.keywords:capitalisation_policy:upper
CREATE TABLE IF NOT EXISTS project_stats_t (
value INTEGER,
nb_projects INTEGER,
avg_versions REAL,
min_versions INTEGER,
max_versions INTEGER,
avg_wheels REAL,
min_wheels INTEGER,
max_wheels INTEGER,
avg_source REAL,
min_sources INTEGER,
max_sources INTEGER
);
INSERT INTO project_stats_t
SELECT
value,
count(project_name),
avg(nb_versions),
min(nb_versions),
max(nb_versions),
avg(nb_wheels),
min(nb_wheels),
max(nb_wheels),
avg(nb_sources),
min(nb_sources),
max(nb_sources)
FROM version_releases_stats, generate_series(0, 10, 1)
WHERE nb_versions >= value AND nb_versions < (value + 1)
GROUP BY value;
INSERT INTO project_stats_t
SELECT
value,
count(project_name),
avg(nb_versions),
min(nb_versions),
max(nb_versions),
avg(nb_wheels),
min(nb_wheels),
max(nb_wheels),
avg(nb_sources),
min(nb_sources),
max(nb_sources)
FROM version_releases_stats, generate_series(10, 100, 5)
WHERE nb_versions >= value AND nb_versions < (value + 5)
GROUP BY value;
INSERT INTO project_stats_t
SELECT
value,
count(project_name),
avg(nb_versions),
min(nb_versions),
max(nb_versions),
avg(nb_wheels),
min(nb_wheels),
max(nb_wheels),
avg(nb_sources),
min(nb_sources),
max(nb_sources)
FROM version_releases_stats, generate_series(100, 4700, 200)
WHERE nb_versions >= value AND nb_versions < (value + 200)
GROUP BY value;
# coding: utf-8
import duckdb
with open('extract-pyproject-releases.sql', 'r') as f:
QUERY = f.read()
res = duckdb.sql(QUERY)
res.to_csv("extract-project-releases-2018-and-later.csv", header=True)
-- Define the dialect
-- sqlfluff:dialect:duckdb
-- Set a smaller indent for this file
-- sqlfluff:indentation:tab_space_size:2
-- Set keywords to be capitalised
-- sqlfluff:rules:capitalisation.keywords:capitalisation_policy:upper
SELECT
project_name,
project_version,
project_release,
suffix(project_release, '.whl') AS wheel,
suffix(project_release, '.tar.gz') AS source,
max(uploaded_on) AS max_uploaded_on,
date_part('year', max(uploaded_on)) AS max_year,
list(DISTINCT uploaded_on)
FROM '*.parquet'
WHERE (date_part('year', uploaded_on) >= '2018') AND skip_reason = ''
GROUP BY project_name, project_version, project_release
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment