From a6d161abb2e98463c02b0e6728a7ea300ea8df3d Mon Sep 17 00:00:00 2001
From: Empiriker <till.ueberfries@gmail.com>
Date: Wed, 18 Oct 2023 14:24:38 +0300
Subject: [PATCH] Load whole dumps instead of only templates

---
 src/{load_templates.py => load_dumps.py} | 29 ++++++++++++++++--------
 1 file changed, 20 insertions(+), 9 deletions(-)
 rename src/{load_templates.py => load_dumps.py} (85%)

diff --git a/src/load_templates.py b/src/load_dumps.py
similarity index 85%
rename from src/load_templates.py
rename to src/load_dumps.py
index 17e132b..a9c9ba9 100644
--- a/src/load_templates.py
+++ b/src/load_dumps.py
@@ -1,16 +1,26 @@
-from wiktextract.wiktionary import parse_wiktionary
-from wiktextract_context import get_wiktextract_context
-
 import os
 import re
-
 import threading
 import time
 from typing import List
 
+from wiktextract.wiktionary import parse_wiktionary
+
+from wiktextract_context import get_wiktextract_context
 
 DUMPS_DIR = "dumps"
 
+RECOGNIZED_NAMESPACE_NAMES = [
+    "Main",
+    "Category",
+    "Appendix",
+    "Project",
+    "Thesaurus",
+    "Module",
+    "Template",
+    "Reconstruction",
+]
+
 
 def start_progress_indicator(is_done: List[bool], msg: str = ""):
     is_done[0] = False
@@ -64,20 +74,21 @@ def load_templates(wiktlang: str):
         is_done, msg=f"Loading templates for {wiktlang}..."
     )
 
-    wxr = get_wiktextract_context(wiktlang, mock_get_page=False)
+    wxr = get_wiktextract_context(wiktlang)
 
     wxr.wtp.db_conn.execute("DELETE FROM pages")
     wxr.wtp.db_conn.commit()
 
+    namespace_ids = {
+        wxr.wtp.NAMESPACE_DATA.get(name, {}).get("id")
+        for name in RECOGNIZED_NAMESPACE_NAMES
+    }
     parse_wiktionary(
         wxr,
         dump_file,
         num_processes=1,
         phase1_only=True,
-        namespace_ids={
-            10,
-            828,
-        },  # Template and Module namespaces; ToDo: Get the namespace IDs from the dump file
+        namespace_ids=namespace_ids,
         out_f=None,  # type: ignore
     )
     wxr.wtp.db_conn.commit()
-- 
GitLab