Skip to content
Snippets Groups Projects
Commit 345b0958 authored by Françoise Conil's avatar Françoise Conil
Browse files

Fixed wrong backend extraction

pyproject-sqlite-get-files-and-extract-backend.py has also been modified
to analyze only the main pyproject.toml at the root of the package.
parent aada1b1b
No related branches found
No related tags found
No related merge requests found
...@@ -55,7 +55,7 @@ if __name__ == "__main__": ...@@ -55,7 +55,7 @@ if __name__ == "__main__":
ax.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 0.5, str(nb), ha='center', va='bottom') ax.text(bar.get_x() + bar.get_width() / 2, bar.get_height() + 0.5, str(nb), ha='center', va='bottom')
# Set logarithmic scale on y-axis # Set logarithmic scale on y-axis
# ax.set_yscale('log') ax.set_yscale('log')
# Color x-axis labels based on the condition # Color x-axis labels based on the condition
for label, n in zip(ax.get_xticklabels(), backend_nb): for label, n in zip(ax.get_xticklabels(), backend_nb):
...@@ -63,7 +63,7 @@ if __name__ == "__main__": ...@@ -63,7 +63,7 @@ if __name__ == "__main__":
label.set_color('blue') label.set_color('blue')
# Adjust layout to prevent clipping of rotated labels # Adjust layout to prevent clipping of rotated labels
plt.tight_layout() # plt.tight_layout()
#plt.show() #plt.show()
plt.savefig("python-backends-2018-2023.png", dpi=600) plt.savefig("python-backends-2018-2023.png", dpi=600)
......
...@@ -36,6 +36,8 @@ sqlite> select DISTINCT project_name, project_version, nb_uploads, uploaded_on, ...@@ -36,6 +36,8 @@ sqlite> select DISTINCT project_name, project_version, nb_uploads, uploaded_on,
import logging import logging
import sqlite3 import sqlite3
import re import re
import time
import tomli
import pycodeorg import pycodeorg
LOG = logging.getLogger(__name__) LOG = logging.getLogger(__name__)
...@@ -55,7 +57,6 @@ CREATE_BACKEND = """CREATE TABLE IF NOT EXISTS backends ...@@ -55,7 +57,6 @@ CREATE_BACKEND = """CREATE TABLE IF NOT EXISTS backends
QUERY = """SELECT repository, project_name, project_version, QUERY = """SELECT repository, project_name, project_version,
nb_uploads, uploaded_on, year, path nb_uploads, uploaded_on, year, path
FROM pyprojects FROM pyprojects
WHERE year=2018
""" """
INSERT_BACKEND = """INSERT INTO backends INSERT_BACKEND = """INSERT INTO backends
...@@ -64,7 +65,9 @@ VALUES (:repository, :project_name, :project_version, ...@@ -64,7 +65,9 @@ VALUES (:repository, :project_name, :project_version,
""" """
if __name__ == "__main__": if __name__ == "__main__":
logging.basicConfig(filename='pyproject-backends.log', level=logging.ERROR) start_time = time.time()
logging.basicConfig(filename='pyproject-backends.log', level=logging.INFO)
# Create backend table # Create backend table
# -------------------- # --------------------
...@@ -76,40 +79,78 @@ if __name__ == "__main__": ...@@ -76,40 +79,78 @@ if __name__ == "__main__":
# Get project data # Get project data
# ---------------- # ----------------
cnx_proj = sqlite3.connect('extract-pyproject-latest.db') cnx_proj = sqlite3.connect('extract-pyproject-latest.db')
cnx_proj.row_factory = sqlite3.Row
cur_proj = cnx_proj.cursor() cur_proj = cnx_proj.cursor()
cur_proj.execute("SELECT COUNT(*) AS nb FROM pyprojects;")
r = cur_proj.fetchone()
total = r['nb']
cpt = 0
for row in cur_proj.execute(QUERY): for row in cur_proj.execute(QUERY):
# print(row) values = {
t_values = { "repository": row["repository"],
"repository": row[0], "project_name": row["project_name"],
"project_name": row[1], "project_version": row["project_version"],
"project_version": row[2], "nb_uploads": row["nb_uploads"],
"nb_uploads": row[3], "uploaded_on": row["uploaded_on"],
"uploaded_on": row[4], "year": row["year"],
"year": row[5], "path": row["path"]
"path": row[6]
} }
# Fetch the file data from the dataset # Only fetch the pyproject.toml at the root of the project
# ------------------------------------ # --------------------------------------------------------
try: parts = values['path'].split("/")
data = pycodeorg.get_data(t_values['repository'], t_values['project_name'], t_values['path']) if len(parts) == 5 and parts[-1] == "pyproject.toml":
except ValueError as e:
LOG.error("pycodeorg.get_data failed to retrieve %s: '%s'" % (t_values['project_name'], e)) # Fetch the file data from the dataset
# ------------------------------------
try:
data = pycodeorg.get_data(values['repository'], values['project_name'], values['path'])
except ValueError as e:
LOG.error("pycodeorg.get_data failed to retrieve %s: '%s'" % (values['project_name'], e))
continue
# Then get the 'build-backend' value with a toml library
# ------------------------------------------------------
try:
toml_dict = tomli.loads(data.decode())
except (tomli.TOMLDecodeError, UnicodeDecodeError) as e:
LOG.error("Error reading TOML file for %s: '%s'" % (values['project_name'], e))
continue
# print(f"{toml_dict=}")
if toml_dict.get('build-system') and toml_dict['build-system'].get('build-backend'):
backend = toml_dict['build-system'].get('build-backend')
values['backend'] = backend
print(f"{values['project_name']} : {values['backend']}")
else:
values['backend'] = None
print(f"{values['project_name']} : .......... no backend found")
try:
cur_backend.execute(INSERT_BACKEND, values)
cnx_backend.commit()
except sqlite3.InterfaceError as e:
LOG.error("Error writing to sqlite3 for %s: '%s'" % (values['project_name'], e))
continue
# Then parse the 'build-backend' field and aggregate
# --------------------------------------------------
if match := re.search(rb'\nbuild-backend\s*=\s*"([A-Za-z0-9-\.]+)"', data):
backend = match.group(1).decode()
t_values['backend'] = backend
print(f"{t_values['project_name']} : {t_values['backend']}")
else: else:
t_values['backend'] = None LOG.info(f"%s is not a root path for %s" % (values['path'], values['project_name']))
print(f"{t_values['project_name']} : .......... no backend found")
cur_backend.execute(INSERT_BACKEND, t_values) cpt = cpt + 1
cnx_backend.commit() if cpt % 2500 == 0:
LOG.info("PROGRESS: %d / %d, %.2f %%" % (cpt, total, cpt * 100 / total))
cnx_proj.close() cnx_proj.close()
cnx_backend.close() cnx_backend.close()
end_time = time.time()
duration_msg = f"Getting backends took : {end_time - start_time:0.3} seconds."
LOG.info(duration_msg)
print(duration_msg)
python-backends-2018-2023-log-scale.png

1.75 MiB | W: | H:

python-backends-2018-2023-log-scale.png

412 KiB | W: | H:

python-backends-2018-2023-log-scale.png
python-backends-2018-2023-log-scale.png
python-backends-2018-2023-log-scale.png
python-backends-2018-2023-log-scale.png
  • 2-up
  • Swipe
  • Onion skin
python-backends-2018-2023.png

1.74 MiB | W: | H:

python-backends-2018-2023.png

410 KiB | W: | H:

python-backends-2018-2023.png
python-backends-2018-2023.png
python-backends-2018-2023.png
python-backends-2018-2023.png
  • 2-up
  • Swipe
  • Onion skin
duckdb duckdb
tomli
urllib3 urllib3
matplotlib matplotlib
pandas pandas
...@@ -37,6 +37,8 @@ pytz==2023.3.post1 ...@@ -37,6 +37,8 @@ pytz==2023.3.post1
# via pandas # via pandas
six==1.16.0 six==1.16.0
# via python-dateutil # via python-dateutil
tomli==2.0.1
# via -r requirements.in
tzdata==2023.3 tzdata==2023.3
# via pandas # via pandas
urllib3==2.1.0 urllib3==2.1.0
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment