Skip to content
Snippets Groups Projects
wiktextract_wrapper.py 1.22 KiB
from wiktextract import (
    WiktextractContext,
    WiktionaryConfig,
)
from wiktextract.wiktionary import page_handler
from wikitextprocessor import Wtp, Page

db_path = "./sqlite.db"

class Wiktextract:
  def __init__(self, wiktlang:str, wordlang:str):
    self.wiktlang = wiktlang
    self.wordlang = wordlang
    
    config = WiktionaryConfig(
      dump_file_lang_code=wiktlang,
      capture_language_codes=[wordlang],
      capture_translations=True,
      capture_pronunciation=True,
      capture_linkages=True,
      capture_compounds=True,
      capture_redirects=True,
      capture_examples=True,
      capture_etymologies=True,
      capture_descendants=True,
      capture_inflections=True,)
    wxr = WiktextractContext(Wtp(db_path=db_path), config)

    self.page_handler = page_handler
    self.page_handler.wxr = wxr

  def parse_page(self, title:str, wikicode:str):
    # add page to the database
    self.page_handler.wxr.wtp.add_page(title=title, namespace_id=0, body=wikicode, model='wikitext')

    # create a page object
    page = Page(title, 0, None, True, wikicode, 'wikitext')

    # parse the page
    success, ret, err = self.page_handler(page)

    if success:
      return ret
    else:
      raise Exception(err)