diff --git a/dumps/place_dump_files_here.xml.bz2 b/dumps/place_dump_files_here.xml.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/app.py b/src/app.py index 9978ea0667d141ead048e24deebcdbf12a7985b4..df1abcfe7836aa17f604930bfc2aa45e3e675e10 100644 --- a/src/app.py +++ b/src/app.py @@ -8,7 +8,8 @@ from get_wikicode import get_wikicode from wiktextract_wrapper import Wiktextract from load_templates import load_templates -load_templates() +for wiktlang in config.supported_wiktlangs: + load_templates(wiktlang) app = Flask(__name__) CORS(app) @@ -23,44 +24,32 @@ def index(): @app.route("/search/<wiktlang>/<wordlang>/<word>", methods=["GET"]) def search(wiktlang, wordlang, word): + if wiktlang not in config.supported_wiktlangs: + return jsonify({"error": f"Language {wiktlang} not supported"}), 400 + wikicode = get_wikicode(word, wiktlang) if wikicode: - en_wiktextract = Wiktextract("en", wordlang) + wiktextractor = Wiktextract(wiktlang, wordlang) try: - resp = en_wiktextract.parse_page(word, wikicode) + resp = wiktextractor.parse_page(word, wikicode) return jsonify(resp) except Exception as e: print(e) - resp = f"""<!doctype html> - <html> - <head> - <title>Error</title> - </head> - <body> - <h1>{word}</h1> - <p>{e}</p> - </body> - </html>""" - status = 404 - mimetype = "text/html" + + return jsonify({"error": "Parsing page resulted in error: " + str(e)}), 500 finally: - en_wiktextract.page_handler.wxr.wtp.db_conn.close() + wiktextractor.page_handler.wxr.wtp.db_conn.close() else: - resp = f"""<!doctype html> - <html> - <head> - <title>Error</title> - </head> - <body> - <h1>{word}</h1> - <p>{word} is unknown in “{wordlang}†in {wiktlang}.wiktionary.org.</p> - </body> - </html>""" - status = 404 - mimetype = "text/html" - return Response(resp, status=status, mimetype=mimetype) + return ( + jsonify( + { + "error": f"{word} is unknown in “{wordlang}†in {wiktlang}.wiktionary.org." + } + ), + 404, + ) if __name__ == "__main__": diff --git a/src/config.py b/src/config.py index c7d354923ac874bc7da5c2c0849a2ade714e5a79..633fe19286f1a4f4f20d14804a2e615ccff28875 100644 --- a/src/config.py +++ b/src/config.py @@ -1,3 +1,5 @@ host = "0.0.0.0" port = 80 debugging = True + +supported_wiktlangs = ["en"] diff --git a/src/load_templates.py b/src/load_templates.py index 3f064600c69cc84c172d4f94d4af64f25e630f35..f326de74a389da09fdc1d416bc88582fd470139e 100644 --- a/src/load_templates.py +++ b/src/load_templates.py @@ -37,7 +37,7 @@ def time_elapsed_indicator(): def get_most_recent_file(directory, lang_code): pattern = re.compile( - f"{lang_code}wiktionary-(\d+)-pages-articles-multistream.xml.bz2" + r"" + lang_code + r"wiktionary-(\d+)-pages-articles-multistream.xml.bz2" ) matching_files = [f for f in os.listdir(directory) if pattern.match(f)] @@ -46,7 +46,7 @@ def get_most_recent_file(directory, lang_code): return None most_recent_file = sorted( - matching_files, key=lambda x: pattern.match(x).group(1), reverse=True + matching_files, key=lambda x: pattern.match(x).group(1), reverse=True # type: ignore )[0] return os.path.join(directory, most_recent_file) @@ -72,7 +72,10 @@ def load_templates(wiktlang: str): dump_file, num_processes=1, phase1_only=True, - namespace_ids={10, 828}, + namespace_ids={ + 10, + 828, + }, # Template and Module namespaces; ToDo: Get the namespace IDs from the dump file out_f=None, # type: ignore ) wxr.wtp.db_conn.commit()