Skip to content
Snippets Groups Projects
Commit 1f085125 authored by Empiriker's avatar Empiriker
Browse files

remove page from db after parsing

parent 942aca68
No related branches found
No related tags found
No related merge requests found
...@@ -7,6 +7,10 @@ from wikitextprocessor import Wtp, Page ...@@ -7,6 +7,10 @@ from wikitextprocessor import Wtp, Page
db_path = "./sqlite.db" db_path = "./sqlite.db"
DEFAULT_PAGE_VALUES = {
"namespace_id": 0,
"model": 'wikitext',
}
class Wiktextract: class Wiktextract:
def __init__(self, wiktlang:str, wordlang:str): def __init__(self, wiktlang:str, wordlang:str):
self.wiktlang = wiktlang self.wiktlang = wiktlang
...@@ -30,17 +34,22 @@ class Wiktextract: ...@@ -30,17 +34,22 @@ class Wiktextract:
self.page_handler.wxr = wxr self.page_handler.wxr = wxr
def parse_page(self, title:str, wikicode:str): def parse_page(self, title:str, wikicode:str):
# add page to the database # add page to the database (making it accessible to LUA templates)
self.page_handler.wxr.wtp.add_page(title=title, namespace_id=0, body=wikicode, model='wikitext') self.page_handler.wxr.wtp.add_page(title=title, namespace_id=DEFAULT_PAGE_VALUES["namespace_id"], body=wikicode, model=DEFAULT_PAGE_VALUES["model"])
# create a page object # create a page object (for parsing)
page = Page(title, 0, None, True, wikicode, 'wikitext') page = Page(title, 0, None, True, wikicode, 'wikitext')
# parse the page # parse the page
success, ret, err = self.page_handler(page) success, ret, err = self.page_handler(page)
result, parsing_errors = ret
# remove the page from the database
self.page_handler.wxr.wtp.db_conn.execute("DELETE FROM pages WHERE title = ? AND model = ?", (title, DEFAULT_PAGE_VALUES["model"]))
self.page_handler.wxr.wtp.db_conn.commit()
if success: if success:
return ret return result
else: else:
raise Exception(err) raise Exception(err)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment