diff --git a/create-table-backend.sql b/create-table-backend.sql
new file mode 100644
index 0000000000000000000000000000000000000000..583cafd55ca39a20cd14a01b1a5997ed0765a077
--- /dev/null
+++ b/create-table-backend.sql
@@ -0,0 +1,20 @@
+-- Define the dialect
+-- sqlfluff:dialect:sqlite
+
+-- Set a smaller indent for this file
+-- sqlfluff:indentation:tab_space_size:2
+
+-- Set keywords to be capitalised
+-- sqlfluff:rules:capitalisation.keywords:capitalisation_policy:upper
+
+CREATE TABLE IF NOT EXISTS backends
+(
+  repository TEXT,
+  project_name TEXT,
+  project_version TEXT,
+  backend TEXT,
+  nb_uploads INTEGER,
+  uploaded_on TEXT,
+  year INTEGER,
+  path TEXT
+);
diff --git a/extract-all-projects-versions.sql b/extract-all-projects-versions.sql
new file mode 100644
index 0000000000000000000000000000000000000000..f5803ea0ea51881e38d856cd45756cd6ada11d27
--- /dev/null
+++ b/extract-all-projects-versions.sql
@@ -0,0 +1,24 @@
+-- Define the dialect
+-- sqlfluff:dialect:duckdb
+
+-- Set a smaller indent for this file
+-- sqlfluff:indentation:tab_space_size:2
+
+-- Set keywords to be capitalised
+-- sqlfluff:rules:capitalisation.keywords:capitalisation_policy:upper
+
+SELECT
+  project_name,
+  COUNT(project_name) AS nb_uploads,
+  MAX(project_version) AS max_version,
+  LIST(DISTINCT project_version) AS all_versions,
+  MAX(uploaded_on) AS max_uploaded_on,
+  LIST(DISTINCT uploaded_on) AS all_uploaded_on,
+  LIST(DISTINCT repository) AS all_repository,
+  LIST(DISTINCT path) AS all_path
+FROM '*.parquet'
+WHERE
+  (DATE_PART('year', uploaded_on) >= '2018')
+  AND REGEXP_MATCHES(path, 'pyproject.toml$')
+  AND skip_reason = ''
+GROUP BY project_name;
diff --git a/extract-latest-project-version.sql b/extract-latest-project-version.sql
new file mode 100644
index 0000000000000000000000000000000000000000..16945e853c62dd4cf77121084607f2f6c2e6d016
--- /dev/null
+++ b/extract-latest-project-version.sql
@@ -0,0 +1,36 @@
+-- Define the dialect
+-- sqlfluff:dialect:duckdb
+
+-- Set a smaller indent for this file
+-- sqlfluff:indentation:tab_space_size:2
+
+-- Set keywords to be capitalised
+-- sqlfluff:rules:capitalisation.keywords:capitalisation_policy:upper
+
+WITH lpv AS (
+  SELECT
+    project_name,
+    COUNT(project_name) AS nb_uploads,
+    MAX(uploaded_on) AS max_uploaded_on,
+    LIST(DISTINCT uploaded_on) AS all_uploaded_on
+  FROM '*.parquet'
+  WHERE
+    (DATE_PART('year', uploaded_on) >= '2018')
+    AND REGEXP_MATCHES(path, 'pyproject.toml$')
+    AND skip_reason = ''
+  GROUP BY project_name
+)
+
+SELECT
+  ip.repository,
+  ip.project_name,
+  ip.project_version,
+  lpv.nb_uploads,
+  ip.uploaded_on,
+  DATE_PART('year', ip.uploaded_on) AS year,
+  ip.path
+FROM '*.parquet' AS ip
+JOIN
+  lpv
+  ON ip.project_name = lpv.project_name AND ip.uploaded_on = lpv.max_uploaded_on
+WHERE REGEXP_MATCHES(path, 'pyproject.toml$') AND skip_reason = '';
diff --git a/pyproject-latest-to-csv.py b/pyproject-latest-to-csv.py
index 7ebca3709da73f26b21ede2357d971b7ba2812f4..428eb336782bce345a0116dcfd3377534348e736 100644
--- a/pyproject-latest-to-csv.py
+++ b/pyproject-latest-to-csv.py
@@ -9,33 +9,15 @@ https://duckdb.org/docs/guides/python/execute_sql
 
 import duckdb
 
-ALL_VERSIONS_QUERY = """SELECT project_name, COUNT(project_name) AS nb_uploads,
-  MAX(project_version) AS max_version, 
-  LIST(DISTINCT project_version) AS all_versions,
-  MAX(uploaded_on) AS max_uploaded_on, 
-  LIST(DISTINCT uploaded_on) AS all_uploaded_on,
-  LIST(DISTINCT repository) AS all_repository,
-  LIST(DISTINCT path) AS all_path
-  FROM '*.parquet'
-  WHERE (date_part('year', uploaded_on) >= '2018') AND regexp_matches(path, 'pyproject.toml$') AND skip_reason == ''
-  GROUP BY project_name;
-"""
+with open('extract-all-projects-versions.sql', 'r') as f:
+    ALL_VERSIONS_QUERY = f.read()
 
 res = duckdb.sql(ALL_VERSIONS_QUERY)
+
 res.to_csv("extract-pyproject-all-versions.csv", header=True)
 
-LATEST_QUERY = """WITH lpv AS (SELECT project_name, COUNT(project_name) AS nb_uploads,
-  MAX(uploaded_on) AS max_uploaded_on, 
-  LIST(DISTINCT uploaded_on) AS all_uploaded_on
-  FROM '*.parquet'
-  WHERE (date_part('year', uploaded_on) >= '2018') AND regexp_matches(path, 'pyproject.toml$') AND skip_reason == ''
-  GROUP BY project_name)
-SELECT ip.repository, ip.project_name, ip.project_version, lpv.nb_uploads, 
-  ip.uploaded_on, date_part('year', ip.uploaded_on) AS year, ip.path
-  FROM '*.parquet' as ip
-    JOIN lpv ON ip.project_name=lpv.project_name AND ip.uploaded_on=lpv.max_uploaded_on
-  WHERE regexp_matches(path, 'pyproject.toml$') AND skip_reason == '';
-"""
+with open('extract-latest-project-version.sql', 'r') as f:
+    LATEST_QUERY = f.read()
 
 # res = duckdb.sql(LATEST_QUERY).show()
 
diff --git a/pyproject-sqlite-get-files-and-extract-backend.py b/pyproject-sqlite-get-files-and-extract-backend.py
index 6d31a1510bed679c1921abcc7bbf3b0a6575be0b..48c12b825967af5d802763c8a2daf676fcdb7c12 100644
--- a/pyproject-sqlite-get-files-and-extract-backend.py
+++ b/pyproject-sqlite-get-files-and-extract-backend.py
@@ -42,35 +42,17 @@ import pycodeorg
 
 LOG = logging.getLogger(__name__)
 
-CREATE_BACKEND = """CREATE TABLE IF NOT EXISTS backends
-(repository TEXT,
- project_name TEXT,
- project_version TEXT,
- backend TEXT,
- nb_uploads INTEGER,
- uploaded_on TEXT,
- year INTEGER,
- path TEXT
-);
-"""
-
-QUERY = """SELECT repository, project_name, project_version, 
-nb_uploads, uploaded_on, year, path 
-FROM pyprojects
-"""
-
-INSERT_BACKEND = """INSERT INTO backends 
-VALUES (:repository, :project_name, :project_version,
-        :backend, :nb_uploads, :uploaded_on, :year, :path)
-"""
 
 if __name__ == "__main__":
     start_time = time.time()
 
-    logging.basicConfig(filename='pyproject-backends.log', level=logging.INFO)
+    logging.basicConfig(filename="pyproject-backends.log", level=logging.INFO)
 
     # Create backend table
     # --------------------
+    with open("create-table-backend.sql", "r") as f:
+        CREATE_BACKEND = f.read()
+
     cnx_backend = sqlite3.connect("pyproject_backends.db")
     cur_backend = cnx_backend.cursor()
 
@@ -78,37 +60,44 @@ if __name__ == "__main__":
 
     # Get project data
     # ----------------
-    cnx_proj = sqlite3.connect('extract-pyproject-latest.db')
+    with open("query-projects.sql", "r") as f:
+        QUERY_PROJECTS = f.read()
+
+    cnx_proj = sqlite3.connect("extract-pyproject-latest.db")
     cnx_proj.row_factory = sqlite3.Row
     cur_proj = cnx_proj.cursor()
 
     cur_proj.execute("SELECT COUNT(*) AS nb FROM pyprojects;")
     r = cur_proj.fetchone()
-    total = r['nb']
+    total = r["nb"]
     cpt = 0
 
-    for row in cur_proj.execute(QUERY):
+    for row in cur_proj.execute(QUERY_PROJECTS):
         values = {
-                "repository": row["repository"],
-                "project_name": row["project_name"],
-                "project_version": row["project_version"],
-                "nb_uploads": row["nb_uploads"],
-                "uploaded_on": row["uploaded_on"],
-                "year": row["year"],
-                "path": row["path"]
-                }
+            "repository": row["repository"],
+            "project_name": row["project_name"],
+            "project_version": row["project_version"],
+            "nb_uploads": row["nb_uploads"],
+            "uploaded_on": row["uploaded_on"],
+            "year": row["year"],
+            "path": row["path"],
+        }
 
         # Only fetch the pyproject.toml at the root of the project
         # --------------------------------------------------------
-        parts = values['path'].split("/")
+        parts = values["path"].split("/")
         if len(parts) == 5 and parts[-1] == "pyproject.toml":
-
             # Fetch the file data from the dataset
             # ------------------------------------
             try:
-                data = pycodeorg.get_data(values['repository'], values['project_name'], values['path'])
+                data = pycodeorg.get_data(
+                    values["repository"], values["project_name"], values["path"]
+                )
             except ValueError as e:
-                LOG.error("pycodeorg.get_data failed to retrieve %s: '%s'" % (values['project_name'], e))
+                LOG.error(
+                    "pycodeorg.get_data failed to retrieve %s: '%s'"
+                    % (values["project_name"], e)
+                )
                 continue
 
             # Then get the 'build-backend' value with a toml library
@@ -116,29 +105,45 @@ if __name__ == "__main__":
             try:
                 toml_dict = tomli.loads(data.decode())
             except (tomli.TOMLDecodeError, UnicodeDecodeError) as e:
-                LOG.error("Error reading TOML file for %s: '%s'" % (values['project_name'], e))
+                LOG.error(
+                    "Error reading TOML file for %s: '%s'" % (values["project_name"], e)
+                )
                 continue
 
             # print(f"{toml_dict=}")
 
-            if toml_dict.get('build-system') and toml_dict['build-system'].get('build-backend'):
-                backend = toml_dict['build-system'].get('build-backend')
+            if toml_dict.get("build-system") and toml_dict["build-system"].get(
+                "build-backend"
+            ):
+                backend = toml_dict["build-system"].get("build-backend")
 
-                values['backend'] = backend
+                values["backend"] = backend
                 print(f"{values['project_name']} : {values['backend']}")
             else:
-                values['backend'] = None
+                values["backend"] = None
                 print(f"{values['project_name']} : .......... no backend found")
 
             try:
-                cur_backend.execute(INSERT_BACKEND, values)
+                cur_backend.execute(
+                    """INSERT INTO backends 
+                              VALUES (:repository, :project_name, :project_version,
+                                      :backend, :nb_uploads, :uploaded_on, :year, :path)
+                    """,
+                    values,
+                )
                 cnx_backend.commit()
             except sqlite3.InterfaceError as e:
-                LOG.error("Error writing to sqlite3 for %s: '%s'" % (values['project_name'], e))
+                LOG.error(
+                    "Error writing to sqlite3 for %s: '%s'"
+                    % (values["project_name"], e)
+                )
                 continue
 
         else:
-            LOG.info(f"%s is not a root path for %s" % (values['path'], values['project_name']))
+            LOG.info(
+                f"%s is not a root path for %s"
+                % (values["path"], values["project_name"])
+            )
 
         cpt = cpt + 1
         if cpt % 2500 == 0:
@@ -153,4 +158,3 @@ if __name__ == "__main__":
 
     LOG.info(duration_msg)
     print(duration_msg)
-
diff --git a/query-projects.sql b/query-projects.sql
new file mode 100644
index 0000000000000000000000000000000000000000..39a726fe8f493df5c672742dd8b0f77499c1b6e4
--- /dev/null
+++ b/query-projects.sql
@@ -0,0 +1,18 @@
+-- Define the dialect
+-- sqlfluff:dialect:sqlite
+
+-- Set a smaller indent for this file
+-- sqlfluff:indentation:tab_space_size:2
+
+-- Set keywords to be capitalised
+-- sqlfluff:rules:capitalisation.keywords:capitalisation_policy:upper
+
+SELECT
+  repository,
+  project_name,
+  project_version,
+  nb_uploads,
+  uploaded_on,
+  year,
+  path
+FROM pyprojects;