Skip to content
Snippets Groups Projects
Commit ffe7de7e authored by Empiriker's avatar Empiriker
Browse files

set supported wiktlangs in config

parent 482bb453
No related branches found
No related tags found
No related merge requests found
......@@ -8,7 +8,8 @@ from get_wikicode import get_wikicode
from wiktextract_wrapper import Wiktextract
from load_templates import load_templates
load_templates()
for wiktlang in config.supported_wiktlangs:
load_templates(wiktlang)
app = Flask(__name__)
CORS(app)
......@@ -23,44 +24,32 @@ def index():
@app.route("/search/<wiktlang>/<wordlang>/<word>", methods=["GET"])
def search(wiktlang, wordlang, word):
if wiktlang not in config.supported_wiktlangs:
return jsonify({"error": f"Language {wiktlang} not supported"}), 400
wikicode = get_wikicode(word, wiktlang)
if wikicode:
en_wiktextract = Wiktextract("en", wordlang)
wiktextractor = Wiktextract(wiktlang, wordlang)
try:
resp = en_wiktextract.parse_page(word, wikicode)
resp = wiktextractor.parse_page(word, wikicode)
return jsonify(resp)
except Exception as e:
print(e)
resp = f"""<!doctype html>
<html>
<head>
<title>Error</title>
</head>
<body>
<h1>{word}</h1>
<p>{e}</p>
</body>
</html>"""
status = 404
mimetype = "text/html"
return jsonify({"error": "Parsing page resulted in error: " + str(e)}), 500
finally:
en_wiktextract.page_handler.wxr.wtp.db_conn.close()
wiktextractor.page_handler.wxr.wtp.db_conn.close()
else:
resp = f"""<!doctype html>
<html>
<head>
<title>Error</title>
</head>
<body>
<h1>{word}</h1>
<p>{word} is unknown in “{wordlang}” in {wiktlang}.wiktionary.org.</p>
</body>
</html>"""
status = 404
mimetype = "text/html"
return Response(resp, status=status, mimetype=mimetype)
return (
jsonify(
{
"error": f"{word} is unknown in “{wordlang}” in {wiktlang}.wiktionary.org."
}
),
404,
)
if __name__ == "__main__":
......
host = "0.0.0.0"
port = 80
debugging = True
supported_wiktlangs = ["en"]
......@@ -37,7 +37,7 @@ def time_elapsed_indicator():
def get_most_recent_file(directory, lang_code):
pattern = re.compile(
f"{lang_code}wiktionary-(\d+)-pages-articles-multistream.xml.bz2"
r"" + lang_code + r"wiktionary-(\d+)-pages-articles-multistream.xml.bz2"
)
matching_files = [f for f in os.listdir(directory) if pattern.match(f)]
......@@ -46,7 +46,7 @@ def get_most_recent_file(directory, lang_code):
return None
most_recent_file = sorted(
matching_files, key=lambda x: pattern.match(x).group(1), reverse=True
matching_files, key=lambda x: pattern.match(x).group(1), reverse=True # type: ignore
)[0]
return os.path.join(directory, most_recent_file)
......@@ -72,7 +72,10 @@ def load_templates(wiktlang: str):
dump_file,
num_processes=1,
phase1_only=True,
namespace_ids={10, 828},
namespace_ids={
10,
828,
}, # Template and Module namespaces; ToDo: Get the namespace IDs from the dump file
out_f=None, # type: ignore
)
wxr.wtp.db_conn.commit()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment