From 0b130bd57bff03a8ba0d392e35f0e4e90e1e325a Mon Sep 17 00:00:00 2001
From: Empiriker <till.ueberfries@gmail.com>
Date: Mon, 16 Oct 2023 20:16:19 +0300
Subject: [PATCH] Override Wtp's get_page() function to live query missing
 /translations pages

---
 src/wiktextract_context.py | 28 ++++++++++++++++++++++++++--
 1 file changed, 26 insertions(+), 2 deletions(-)

diff --git a/src/wiktextract_context.py b/src/wiktextract_context.py
index fcf7687..e1abb42 100644
--- a/src/wiktextract_context.py
+++ b/src/wiktextract_context.py
@@ -2,10 +2,34 @@ from wiktextract import (
     WiktextractContext,
     WiktionaryConfig,
 )
-from wikitextprocessor import Wtp
+from wikitextprocessor import Wtp, Page
 
 from typing import Optional
 
+from get_wikicode import get_wikicode
+
+
+class CustomWtp(Wtp):
+    def get_page(
+        self,
+        title: str,
+        namespace_id: Optional[int] = None,
+        no_redirect: bool = False,
+    ) -> Optional[Page]:
+        # Call the original get_page method
+        original_result = super().get_page(title, namespace_id, no_redirect)
+
+        if original_result == None:
+            # The db is often called with titles like "tracking/parameters/empty parameter". These seem to return None by design and are not present in Wiktionary. Skip these.
+            if "/translations" in title and not "tracking" in title:
+                print(f"Page '{title}' not found in db. Fetching from live wiktionary.")
+
+                body = get_wikicode(title, self.lang_code)
+
+                return Page(title, namespace_id, body=body)
+
+        return original_result
+
 
 def get_wiktextract_context(wiktlang: str, wordlang: Optional[str] = None):
     db_path = f"./sqlite-{wiktlang}.db"
@@ -22,6 +46,6 @@ def get_wiktextract_context(wiktlang: str, wordlang: Optional[str] = None):
         capture_descendants=True,
         capture_inflections=True,
     )
-    wxr = WiktextractContext(Wtp(db_path=db_path), config)
+    wxr = WiktextractContext(CustomWtp(db_path=db_path), config)
 
     return wxr
-- 
GitLab