from wiktextract import ( WiktextractContext, WiktionaryConfig, ) from wiktextract.wiktionary import page_handler from wikitextprocessor import Wtp, Page db_path = "./sqlite.db" class Wiktextract: def __init__(self, wiktlang:str, wordlang:str): self.wiktlang = wiktlang self.wordlang = wordlang config = WiktionaryConfig( dump_file_lang_code=wiktlang, capture_language_codes=[wordlang], capture_translations=True, capture_pronunciation=True, capture_linkages=True, capture_compounds=True, capture_redirects=True, capture_examples=True, capture_etymologies=True, capture_descendants=True, capture_inflections=True,) wxr = WiktextractContext(Wtp(db_path=db_path), config) self.page_handler = page_handler self.page_handler.wxr = wxr def parse_page(self, title:str, wikicode:str): # add page to the database self.page_handler.wxr.wtp.add_page(title=title, namespace_id=0, body=wikicode, model='wikitext') # create a page object page = Page(title, 0, None, True, wikicode, 'wikitext') # parse the page success, ret, err = self.page_handler(page) if success: return ret else: raise Exception(err)