Skip to content
Snippets Groups Projects
Commit 345b0958 authored by Françoise Conil's avatar Françoise Conil
Browse files

Fixed wrong backend extraction

pyproject-sqlite-get-files-and-extract-backend.py has also been modified
to analyze only the main pyproject.toml at the root of the package.
parent aada1b1b
No related branches found
No related tags found
No related merge requests found
......@@ -55,7 +55,7 @@ if __name__ == "__main__":
ax.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 0.5, str(nb), ha='center', va='bottom')
# Set logarithmic scale on y-axis
# ax.set_yscale('log')
ax.set_yscale('log')
# Color x-axis labels based on the condition
for label, n in zip(ax.get_xticklabels(), backend_nb):
......@@ -63,7 +63,7 @@ if __name__ == "__main__":
label.set_color('blue')
# Adjust layout to prevent clipping of rotated labels
plt.tight_layout()
# plt.tight_layout()
#plt.show()
plt.savefig("python-backends-2018-2023.png", dpi=600)
......
......@@ -36,6 +36,8 @@ sqlite> select DISTINCT project_name, project_version, nb_uploads, uploaded_on,
import logging
import sqlite3
import re
import time
import tomli
import pycodeorg
LOG = logging.getLogger(__name__)
......@@ -55,7 +57,6 @@ CREATE_BACKEND = """CREATE TABLE IF NOT EXISTS backends
QUERY = """SELECT repository, project_name, project_version,
nb_uploads, uploaded_on, year, path
FROM pyprojects
WHERE year=2018
"""
INSERT_BACKEND = """INSERT INTO backends
......@@ -64,7 +65,9 @@ VALUES (:repository, :project_name, :project_version,
"""
if __name__ == "__main__":
logging.basicConfig(filename='pyproject-backends.log', level=logging.ERROR)
start_time = time.time()
logging.basicConfig(filename='pyproject-backends.log', level=logging.INFO)
# Create backend table
# --------------------
......@@ -76,40 +79,78 @@ if __name__ == "__main__":
# Get project data
# ----------------
cnx_proj = sqlite3.connect('extract-pyproject-latest.db')
cnx_proj.row_factory = sqlite3.Row
cur_proj = cnx_proj.cursor()
cur_proj.execute("SELECT COUNT(*) AS nb FROM pyprojects;")
r = cur_proj.fetchone()
total = r['nb']
cpt = 0
for row in cur_proj.execute(QUERY):
# print(row)
t_values = {
"repository": row[0],
"project_name": row[1],
"project_version": row[2],
"nb_uploads": row[3],
"uploaded_on": row[4],
"year": row[5],
"path": row[6]
values = {
"repository": row["repository"],
"project_name": row["project_name"],
"project_version": row["project_version"],
"nb_uploads": row["nb_uploads"],
"uploaded_on": row["uploaded_on"],
"year": row["year"],
"path": row["path"]
}
# Fetch the file data from the dataset
# ------------------------------------
try:
data = pycodeorg.get_data(t_values['repository'], t_values['project_name'], t_values['path'])
except ValueError as e:
LOG.error("pycodeorg.get_data failed to retrieve %s: '%s'" % (t_values['project_name'], e))
# Only fetch the pyproject.toml at the root of the project
# --------------------------------------------------------
parts = values['path'].split("/")
if len(parts) == 5 and parts[-1] == "pyproject.toml":
# Fetch the file data from the dataset
# ------------------------------------
try:
data = pycodeorg.get_data(values['repository'], values['project_name'], values['path'])
except ValueError as e:
LOG.error("pycodeorg.get_data failed to retrieve %s: '%s'" % (values['project_name'], e))
continue
# Then get the 'build-backend' value with a toml library
# ------------------------------------------------------
try:
toml_dict = tomli.loads(data.decode())
except (tomli.TOMLDecodeError, UnicodeDecodeError) as e:
LOG.error("Error reading TOML file for %s: '%s'" % (values['project_name'], e))
continue
# print(f"{toml_dict=}")
if toml_dict.get('build-system') and toml_dict['build-system'].get('build-backend'):
backend = toml_dict['build-system'].get('build-backend')
values['backend'] = backend
print(f"{values['project_name']} : {values['backend']}")
else:
values['backend'] = None
print(f"{values['project_name']} : .......... no backend found")
try:
cur_backend.execute(INSERT_BACKEND, values)
cnx_backend.commit()
except sqlite3.InterfaceError as e:
LOG.error("Error writing to sqlite3 for %s: '%s'" % (values['project_name'], e))
continue
# Then parse the 'build-backend' field and aggregate
# --------------------------------------------------
if match := re.search(rb'\nbuild-backend\s*=\s*"([A-Za-z0-9-\.]+)"', data):
backend = match.group(1).decode()
t_values['backend'] = backend
print(f"{t_values['project_name']} : {t_values['backend']}")
else:
t_values['backend'] = None
print(f"{t_values['project_name']} : .......... no backend found")
LOG.info(f"%s is not a root path for %s" % (values['path'], values['project_name']))
cur_backend.execute(INSERT_BACKEND, t_values)
cnx_backend.commit()
cpt = cpt + 1
if cpt % 2500 == 0:
LOG.info("PROGRESS: %d / %d, %.2f %%" % (cpt, total, cpt * 100 / total))
cnx_proj.close()
cnx_backend.close()
end_time = time.time()
duration_msg = f"Getting backends took : {end_time - start_time:0.3} seconds."
LOG.info(duration_msg)
print(duration_msg)
python-backends-2018-2023-log-scale.png

1.75 MiB | W: | H:

python-backends-2018-2023-log-scale.png

412 KiB | W: | H:

python-backends-2018-2023-log-scale.png
python-backends-2018-2023-log-scale.png
python-backends-2018-2023-log-scale.png
python-backends-2018-2023-log-scale.png
  • 2-up
  • Swipe
  • Onion skin
python-backends-2018-2023.png

1.74 MiB | W: | H:

python-backends-2018-2023.png

410 KiB | W: | H:

python-backends-2018-2023.png
python-backends-2018-2023.png
python-backends-2018-2023.png
python-backends-2018-2023.png
  • 2-up
  • Swipe
  • Onion skin
duckdb
tomli
urllib3
matplotlib
pandas
......@@ -37,6 +37,8 @@ pytz==2023.3.post1
# via pandas
six==1.16.0
# via python-dateutil
tomli==2.0.1
# via -r requirements.in
tzdata==2023.3
# via pandas
urllib3==2.1.0
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment