Skip to content
Snippets Groups Projects
Commit ffe7de7e authored by Empiriker's avatar Empiriker
Browse files

set supported wiktlangs in config

parent 482bb453
No related branches found
No related tags found
No related merge requests found
...@@ -8,7 +8,8 @@ from get_wikicode import get_wikicode ...@@ -8,7 +8,8 @@ from get_wikicode import get_wikicode
from wiktextract_wrapper import Wiktextract from wiktextract_wrapper import Wiktextract
from load_templates import load_templates from load_templates import load_templates
load_templates() for wiktlang in config.supported_wiktlangs:
load_templates(wiktlang)
app = Flask(__name__) app = Flask(__name__)
CORS(app) CORS(app)
...@@ -23,44 +24,32 @@ def index(): ...@@ -23,44 +24,32 @@ def index():
@app.route("/search/<wiktlang>/<wordlang>/<word>", methods=["GET"]) @app.route("/search/<wiktlang>/<wordlang>/<word>", methods=["GET"])
def search(wiktlang, wordlang, word): def search(wiktlang, wordlang, word):
if wiktlang not in config.supported_wiktlangs:
return jsonify({"error": f"Language {wiktlang} not supported"}), 400
wikicode = get_wikicode(word, wiktlang) wikicode = get_wikicode(word, wiktlang)
if wikicode: if wikicode:
en_wiktextract = Wiktextract("en", wordlang) wiktextractor = Wiktextract(wiktlang, wordlang)
try: try:
resp = en_wiktextract.parse_page(word, wikicode) resp = wiktextractor.parse_page(word, wikicode)
return jsonify(resp) return jsonify(resp)
except Exception as e: except Exception as e:
print(e) print(e)
resp = f"""<!doctype html>
<html> return jsonify({"error": "Parsing page resulted in error: " + str(e)}), 500
<head>
<title>Error</title>
</head>
<body>
<h1>{word}</h1>
<p>{e}</p>
</body>
</html>"""
status = 404
mimetype = "text/html"
finally: finally:
en_wiktextract.page_handler.wxr.wtp.db_conn.close() wiktextractor.page_handler.wxr.wtp.db_conn.close()
else: else:
resp = f"""<!doctype html> return (
<html> jsonify(
<head> {
<title>Error</title> "error": f"{word} is unknown in “{wordlang}” in {wiktlang}.wiktionary.org."
</head> }
<body> ),
<h1>{word}</h1> 404,
<p>{word} is unknown in “{wordlang}” in {wiktlang}.wiktionary.org.</p> )
</body>
</html>"""
status = 404
mimetype = "text/html"
return Response(resp, status=status, mimetype=mimetype)
if __name__ == "__main__": if __name__ == "__main__":
......
host = "0.0.0.0" host = "0.0.0.0"
port = 80 port = 80
debugging = True debugging = True
supported_wiktlangs = ["en"]
...@@ -37,7 +37,7 @@ def time_elapsed_indicator(): ...@@ -37,7 +37,7 @@ def time_elapsed_indicator():
def get_most_recent_file(directory, lang_code): def get_most_recent_file(directory, lang_code):
pattern = re.compile( pattern = re.compile(
f"{lang_code}wiktionary-(\d+)-pages-articles-multistream.xml.bz2" r"" + lang_code + r"wiktionary-(\d+)-pages-articles-multistream.xml.bz2"
) )
matching_files = [f for f in os.listdir(directory) if pattern.match(f)] matching_files = [f for f in os.listdir(directory) if pattern.match(f)]
...@@ -46,7 +46,7 @@ def get_most_recent_file(directory, lang_code): ...@@ -46,7 +46,7 @@ def get_most_recent_file(directory, lang_code):
return None return None
most_recent_file = sorted( most_recent_file = sorted(
matching_files, key=lambda x: pattern.match(x).group(1), reverse=True matching_files, key=lambda x: pattern.match(x).group(1), reverse=True # type: ignore
)[0] )[0]
return os.path.join(directory, most_recent_file) return os.path.join(directory, most_recent_file)
...@@ -72,7 +72,10 @@ def load_templates(wiktlang: str): ...@@ -72,7 +72,10 @@ def load_templates(wiktlang: str):
dump_file, dump_file,
num_processes=1, num_processes=1,
phase1_only=True, phase1_only=True,
namespace_ids={10, 828}, namespace_ids={
10,
828,
}, # Template and Module namespaces; ToDo: Get the namespace IDs from the dump file
out_f=None, # type: ignore out_f=None, # type: ignore
) )
wxr.wtp.db_conn.commit() wxr.wtp.db_conn.commit()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment