Skip to content
Snippets Groups Projects
Commit 0b130bd5 authored by Empiriker's avatar Empiriker
Browse files

Override Wtp's get_page() function to live query missing /translations pages

parent e7b0fbaf
No related branches found
No related tags found
No related merge requests found
...@@ -2,10 +2,34 @@ from wiktextract import ( ...@@ -2,10 +2,34 @@ from wiktextract import (
WiktextractContext, WiktextractContext,
WiktionaryConfig, WiktionaryConfig,
) )
from wikitextprocessor import Wtp from wikitextprocessor import Wtp, Page
from typing import Optional from typing import Optional
from get_wikicode import get_wikicode
class CustomWtp(Wtp):
def get_page(
self,
title: str,
namespace_id: Optional[int] = None,
no_redirect: bool = False,
) -> Optional[Page]:
# Call the original get_page method
original_result = super().get_page(title, namespace_id, no_redirect)
if original_result == None:
# The db is often called with titles like "tracking/parameters/empty parameter". These seem to return None by design and are not present in Wiktionary. Skip these.
if "/translations" in title and not "tracking" in title:
print(f"Page '{title}' not found in db. Fetching from live wiktionary.")
body = get_wikicode(title, self.lang_code)
return Page(title, namespace_id, body=body)
return original_result
def get_wiktextract_context(wiktlang: str, wordlang: Optional[str] = None): def get_wiktextract_context(wiktlang: str, wordlang: Optional[str] = None):
db_path = f"./sqlite-{wiktlang}.db" db_path = f"./sqlite-{wiktlang}.db"
...@@ -22,6 +46,6 @@ def get_wiktextract_context(wiktlang: str, wordlang: Optional[str] = None): ...@@ -22,6 +46,6 @@ def get_wiktextract_context(wiktlang: str, wordlang: Optional[str] = None):
capture_descendants=True, capture_descendants=True,
capture_inflections=True, capture_inflections=True,
) )
wxr = WiktextractContext(Wtp(db_path=db_path), config) wxr = WiktextractContext(CustomWtp(db_path=db_path), config)
return wxr return wxr
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment