Skip to content
Snippets Groups Projects
Commit 0b130bd5 authored by Empiriker's avatar Empiriker
Browse files

Override Wtp's get_page() function to live query missing /translations pages

parent e7b0fbaf
No related branches found
No related tags found
No related merge requests found
......@@ -2,10 +2,34 @@ from wiktextract import (
WiktextractContext,
WiktionaryConfig,
)
from wikitextprocessor import Wtp
from wikitextprocessor import Wtp, Page
from typing import Optional
from get_wikicode import get_wikicode
class CustomWtp(Wtp):
def get_page(
self,
title: str,
namespace_id: Optional[int] = None,
no_redirect: bool = False,
) -> Optional[Page]:
# Call the original get_page method
original_result = super().get_page(title, namespace_id, no_redirect)
if original_result == None:
# The db is often called with titles like "tracking/parameters/empty parameter". These seem to return None by design and are not present in Wiktionary. Skip these.
if "/translations" in title and not "tracking" in title:
print(f"Page '{title}' not found in db. Fetching from live wiktionary.")
body = get_wikicode(title, self.lang_code)
return Page(title, namespace_id, body=body)
return original_result
def get_wiktextract_context(wiktlang: str, wordlang: Optional[str] = None):
db_path = f"./sqlite-{wiktlang}.db"
......@@ -22,6 +46,6 @@ def get_wiktextract_context(wiktlang: str, wordlang: Optional[str] = None):
capture_descendants=True,
capture_inflections=True,
)
wxr = WiktextractContext(Wtp(db_path=db_path), config)
wxr = WiktextractContext(CustomWtp(db_path=db_path), config)
return wxr
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment