From ffe7de7e495cff32d83853a24961231cd6018af2 Mon Sep 17 00:00:00 2001 From: Empiriker <till.ueberfries@gmail.com> Date: Mon, 11 Sep 2023 12:44:47 +0200 Subject: [PATCH] set supported wiktlangs in config --- dumps/place_dump_files_here.xml.bz2 | 0 src/app.py | 47 +++++++++++------------------ src/config.py | 2 ++ src/load_templates.py | 9 ++++-- 4 files changed, 26 insertions(+), 32 deletions(-) create mode 100644 dumps/place_dump_files_here.xml.bz2 diff --git a/dumps/place_dump_files_here.xml.bz2 b/dumps/place_dump_files_here.xml.bz2 new file mode 100644 index 0000000..e69de29 diff --git a/src/app.py b/src/app.py index 9978ea0..df1abcf 100644 --- a/src/app.py +++ b/src/app.py @@ -8,7 +8,8 @@ from get_wikicode import get_wikicode from wiktextract_wrapper import Wiktextract from load_templates import load_templates -load_templates() +for wiktlang in config.supported_wiktlangs: + load_templates(wiktlang) app = Flask(__name__) CORS(app) @@ -23,44 +24,32 @@ def index(): @app.route("/search/<wiktlang>/<wordlang>/<word>", methods=["GET"]) def search(wiktlang, wordlang, word): + if wiktlang not in config.supported_wiktlangs: + return jsonify({"error": f"Language {wiktlang} not supported"}), 400 + wikicode = get_wikicode(word, wiktlang) if wikicode: - en_wiktextract = Wiktextract("en", wordlang) + wiktextractor = Wiktextract(wiktlang, wordlang) try: - resp = en_wiktextract.parse_page(word, wikicode) + resp = wiktextractor.parse_page(word, wikicode) return jsonify(resp) except Exception as e: print(e) - resp = f"""<!doctype html> - <html> - <head> - <title>Error</title> - </head> - <body> - <h1>{word}</h1> - <p>{e}</p> - </body> - </html>""" - status = 404 - mimetype = "text/html" + + return jsonify({"error": "Parsing page resulted in error: " + str(e)}), 500 finally: - en_wiktextract.page_handler.wxr.wtp.db_conn.close() + wiktextractor.page_handler.wxr.wtp.db_conn.close() else: - resp = f"""<!doctype html> - <html> - <head> - <title>Error</title> - </head> - <body> - <h1>{word}</h1> - <p>{word} is unknown in “{wordlang}†in {wiktlang}.wiktionary.org.</p> - </body> - </html>""" - status = 404 - mimetype = "text/html" - return Response(resp, status=status, mimetype=mimetype) + return ( + jsonify( + { + "error": f"{word} is unknown in “{wordlang}†in {wiktlang}.wiktionary.org." + } + ), + 404, + ) if __name__ == "__main__": diff --git a/src/config.py b/src/config.py index c7d3549..633fe19 100644 --- a/src/config.py +++ b/src/config.py @@ -1,3 +1,5 @@ host = "0.0.0.0" port = 80 debugging = True + +supported_wiktlangs = ["en"] diff --git a/src/load_templates.py b/src/load_templates.py index 3f06460..f326de7 100644 --- a/src/load_templates.py +++ b/src/load_templates.py @@ -37,7 +37,7 @@ def time_elapsed_indicator(): def get_most_recent_file(directory, lang_code): pattern = re.compile( - f"{lang_code}wiktionary-(\d+)-pages-articles-multistream.xml.bz2" + r"" + lang_code + r"wiktionary-(\d+)-pages-articles-multistream.xml.bz2" ) matching_files = [f for f in os.listdir(directory) if pattern.match(f)] @@ -46,7 +46,7 @@ def get_most_recent_file(directory, lang_code): return None most_recent_file = sorted( - matching_files, key=lambda x: pattern.match(x).group(1), reverse=True + matching_files, key=lambda x: pattern.match(x).group(1), reverse=True # type: ignore )[0] return os.path.join(directory, most_recent_file) @@ -72,7 +72,10 @@ def load_templates(wiktlang: str): dump_file, num_processes=1, phase1_only=True, - namespace_ids={10, 828}, + namespace_ids={ + 10, + 828, + }, # Template and Module namespaces; ToDo: Get the namespace IDs from the dump file out_f=None, # type: ignore ) wxr.wtp.db_conn.commit() -- GitLab