diff --git a/issues-with-duckdb_gh/.sqlfluff b/issues-with-duckdb_gh/.sqlfluff new file mode 100644 index 0000000000000000000000000000000000000000..358fc481d09a27776477bf0c30a3dbc2083a1cad --- /dev/null +++ b/issues-with-duckdb_gh/.sqlfluff @@ -0,0 +1,8 @@ +[sqlfluff] +dialect = duckdb + +[sqlfluff:indentation] +tab_space_size = 4 + +[sqlfluff:rules:capitalisation.keywords] +capitalisation_policy = upper diff --git a/issues-with-duckdb_gh/duckdb-config-cli.sql b/issues-with-duckdb_gh/duckdb-config-cli.sql new file mode 100644 index 0000000000000000000000000000000000000000..17d26218fedfec2495df94e54d3a44789be863aa --- /dev/null +++ b/issues-with-duckdb_gh/duckdb-config-cli.sql @@ -0,0 +1,11 @@ +-- How do I make duckbox output show more than 40 rows? +-- https://github.com/duckdb/duckdb/discussions/10562 + +-- Je pensais que c'était un paramètre de configuration : +-- https://duckdb.org/docs/configuration/overview +-- mais non c'est une "dot" commande du CLI +-- https://duckdb.org/docs/api/cli/dot_commands.html +-- qui ne serait donc pas disponible pour Python +-- https://stackoverflow.com/a/76860941 + +.maxrows 80 diff --git a/issues-with-duckdb_gh/plot_issues_stats_by_month.py b/issues-with-duckdb_gh/plot_issues_stats_by_month.py new file mode 100644 index 0000000000000000000000000000000000000000..2fc39bec9d461156be1b05b320e751be22db3be7 --- /dev/null +++ b/issues-with-duckdb_gh/plot_issues_stats_by_month.py @@ -0,0 +1,86 @@ +""" +https://matplotlib.org/2.0.2/users/pyplot_tutorial.html +https://matplotlib.org/2.0.2/examples/api/barchart_demo.html +""" + +from pathlib import Path + +import duckdb +import dotenv + +# import pandas as pd +import matplotlib.pyplot as plt + + +def generate_date_tick(row): + """Generate a month-date label for horizontal axis. + + ATTENTION : row will be a Series with uniform data types + - if a column contains float64 and other columns are int64, + all row elements will be float64 + - if a column contains strings (object), all row elements + will be object + See "Why data types was changed while calling the + apply function in Pandas?" : https://stackoverflow.com/a/40315866 + + :param _type_ row: A Pandas row with all columns + :return _type_: string + """ + return f"{row['cl_year']}-{row['cl_month']:02d}" + + +if __name__ == "__main__": + # Get environment variables in an OrderedDict + # https://github.com/theskumar/python-dotenv + env_vars = dotenv.dotenv_values() + + # Build the path to the issues DuckDB database file + issues_db = Path(env_vars.get("PYPI_DB_PATH")) / env_vars.get("PYPI_DB_NAME") + + # Connect to the file (no failure if nothing to expand) + cnx = duckdb.connect(str(issues_db.expanduser()), read_only=True) + + # Load query from file + with open(env_vars.get("QUERY_ISSUES_STATS_BY_MONTH"), "r", encoding="utf-8") as f: + QUERY = f.read() + + # Execute query and get a Pandas DataFrame back + # https://duckdb.org/docs/api/python/overview#result-conversion + df = cnx.sql(QUERY).df() + + # Create a value for x tick labelling + df["x_tick"] = df.apply(generate_date_tick, axis=1) # , result_type="reduce") + + # fig = plt.figure(num="PyPI Issues", figsize=(12, 8), dpi=600) + fig = plt.figure(num="PyPI Issues", figsize=(12, 8), layout="constrained") + ax = fig.add_subplot(1, 1, 1) + + # https://matplotlib.org/stable/users/explain/colors/colors.html#colors-def + # https://matplotlib.org/stable/users/explain/colors/colormaps.html + ax.plot( + df["x_tick"], + df["nb_days_avg"], + color="b", + marker="o", + label="Number of days between opening and close", + ) + + ax.plot( + df["x_tick"], + df["closed_nb"], + color="g", + marker="o", + label="Issues closed by someone else than the opener", + ) + + ax.set_title( + "Statistics on PyPI issues, by month", + pad=10, + ) + + ax.legend() + + # ~/Documents/DEVLOG/Evaluations/2024/Bilan-Formations/bilan-8-global-reseau.matplotlib.py + plt.setp(ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor") + + plt.show() diff --git a/issues-with-duckdb_gh/requirements-dev.in b/issues-with-duckdb_gh/requirements-dev.in new file mode 100644 index 0000000000000000000000000000000000000000..58b71b112ca694784a936d7c11f7635c53efdb48 --- /dev/null +++ b/issues-with-duckdb_gh/requirements-dev.in @@ -0,0 +1,3 @@ +-c requirements.txt +ipython +pytest diff --git a/issues-with-duckdb_gh/requirements-dev.txt b/issues-with-duckdb_gh/requirements-dev.txt new file mode 100644 index 0000000000000000000000000000000000000000..c020bce66c5f712d0e90fee5ed8946fa3d7b3d63 --- /dev/null +++ b/issues-with-duckdb_gh/requirements-dev.txt @@ -0,0 +1,60 @@ +# +# This file is autogenerated by pip-compile with Python 3.10 +# by the following command: +# +# pip-compile requirements-dev.in +# +asttokens==2.4.1 + # via stack-data +decorator==5.1.1 + # via ipython +exceptiongroup==1.2.1 + # via + # ipython + # pytest +executing==2.0.1 + # via stack-data +iniconfig==2.0.0 + # via pytest +ipython==8.25.0 + # via -r requirements-dev.in +jedi==0.19.1 + # via ipython +matplotlib-inline==0.1.7 + # via ipython +packaging==24.1 + # via + # -c requirements.txt + # pytest +parso==0.8.4 + # via jedi +pexpect==4.9.0 + # via ipython +pluggy==1.5.0 + # via pytest +prompt-toolkit==3.0.47 + # via ipython +ptyprocess==0.7.0 + # via pexpect +pure-eval==0.2.2 + # via stack-data +pygments==2.18.0 + # via ipython +pytest==8.2.2 + # via -r requirements-dev.in +six==1.16.0 + # via + # -c requirements.txt + # asttokens +stack-data==0.6.3 + # via ipython +tomli==2.0.1 + # via pytest +traitlets==5.14.3 + # via + # ipython + # matplotlib-inline +typing-extensions==4.12.2 + # via ipython +wcwidth==0.2.13 + # via prompt-toolkit diff --git a/issues-with-duckdb_gh/requirements.in b/issues-with-duckdb_gh/requirements.in new file mode 100644 index 0000000000000000000000000000000000000000..0a92d0ee6fae5ecd591c7ce8befa93f04fa5cb59 --- /dev/null +++ b/issues-with-duckdb_gh/requirements.in @@ -0,0 +1,4 @@ +duckdb +matplotlib +pandas +python-dotenv diff --git a/issues-with-duckdb_gh/requirements.txt b/issues-with-duckdb_gh/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..2e13ca7d2214d23c242ea5a780b965c931cc7dde --- /dev/null +++ b/issues-with-duckdb_gh/requirements.txt @@ -0,0 +1,43 @@ +# +# This file is autogenerated by pip-compile with Python 3.10 +# by the following command: +# +# pip-compile requirements.in +# +contourpy==1.2.1 + # via matplotlib +cycler==0.12.1 + # via matplotlib +duckdb==1.0.0 + # via -r requirements.in +fonttools==4.53.0 + # via matplotlib +kiwisolver==1.4.5 + # via matplotlib +matplotlib==3.9.0 + # via -r requirements.in +numpy==1.26.4 + # via + # contourpy + # matplotlib + # pandas +packaging==24.1 + # via matplotlib +pandas==2.2.2 + # via -r requirements.in +pillow==10.3.0 + # via matplotlib +pyparsing==3.1.2 + # via matplotlib +python-dateutil==2.9.0.post0 + # via + # matplotlib + # pandas +python-dotenv==1.0.1 + # via -r requirements.in +pytz==2024.1 + # via pandas +six==1.16.0 + # via python-dateutil +tzdata==2024.1 + # via pandas diff --git a/issues-with-duckdb_gh/select-issues-stats-by-month.sql b/issues-with-duckdb_gh/select-issues-stats-by-month.sql new file mode 100644 index 0000000000000000000000000000000000000000..f40ca79936f20e54e8f3a4278ecb1784f852c787 --- /dev/null +++ b/issues-with-duckdb_gh/select-issues-stats-by-month.sql @@ -0,0 +1,21 @@ +-- Compter les issues "PEP 541" fermées par d'autres personnes que celles qui +-- les ont ouvertes. Par mois + +SELECT + year(i.closed_at) AS cl_year, + month(i.closed_at) AS cl_month, + cast(round(avg(datediff('day', i.created_at, i.closed_at)), 0) AS BIGINT) AS nb_days_avg, + count(*) AS closed_nb +FROM + pypi_issues AS i +INNER JOIN pypi_issues_events AS e + ON i.number = e.issue_number +INNER JOIN pypi_issues_labels AS l + ON l.issue_id = i.id +WHERE + i.state = 'closed' + AND i.user_login != e.actor_login +GROUP BY + cl_year, cl_month +ORDER BY + cl_year, cl_month;