-
Empiriker authored84b58dcc
wiktextract_wrapper.py 1.22 KiB
from wiktextract import (
WiktextractContext,
WiktionaryConfig,
)
from wiktextract.wiktionary import page_handler
from wikitextprocessor import Wtp, Page
db_path = "./sqlite.db"
class Wiktextract:
def __init__(self, wiktlang:str, wordlang:str):
self.wiktlang = wiktlang
self.wordlang = wordlang
config = WiktionaryConfig(
dump_file_lang_code=wiktlang,
capture_language_codes=[wordlang],
capture_translations=True,
capture_pronunciation=True,
capture_linkages=True,
capture_compounds=True,
capture_redirects=True,
capture_examples=True,
capture_etymologies=True,
capture_descendants=True,
capture_inflections=True,)
wxr = WiktextractContext(Wtp(db_path=db_path), config)
self.page_handler = page_handler
self.page_handler.wxr = wxr
def parse_page(self, title:str, wikicode:str):
# add page to the database
self.page_handler.wxr.wtp.add_page(title=title, namespace_id=0, body=wikicode, model='wikitext')
# create a page object
page = Page(title, 0, None, True, wikicode, 'wikitext')
# parse the page
success, ret, err = self.page_handler(page)
if success:
return ret
else:
raise Exception(err)