Skip to content
Snippets Groups Projects
Commit 4e2f3049 authored by Mathieu Loiseau's avatar Mathieu Loiseau
Browse files
parents 2156444c 05d25235
No related branches found
No related tags found
No related merge requests found
...@@ -5,10 +5,20 @@ This project provides a light-weight wrapper to the [wiktextract](https://github ...@@ -5,10 +5,20 @@ This project provides a light-weight wrapper to the [wiktextract](https://github
The FLASK app accepts GET request at the url The FLASK app accepts GET request at the url
``` ```
localhost:5000/search/<wiktlang>/<wordlang>/<word> localhost:5000/simplesearch/<lang>/<word>
localhost:5000/search/<wiktlang>/<wordlang>/<word>/<format>
``` ```
* `simplesearch` returns a non-ascii wikstraktor json formatted entry
where `<wiktlang>` specifies the language of the desired Wiktionary edition, `<wordlang>` the language of the word, and `<word>` the word itself to be queried. The route returns the extracted JSON object for the given query. * `lang`: language both for the wiktionary and the word,
* `word`: the wordform to be queried.
* `search` returns a json formatted entry
* `<wiktlang>`: specifies the language of the desired Wiktionary edition,
* `<wordlang>`: the language of the word,
* `<word>`: the word itself to be queried.
* `<format>`: the format of the output
* `wiktextract` or `xtr` : wiktextract native format
* `wikstraktor` or `strkt`: conversion to wikstraktor format
* prefix `a_` can be used to ensure ascii
## Local installation ## Local installation
......
from flask import Flask, Response, jsonify, request from flask import Flask, Response, json, jsonify, request
from flask_cors import CORS from flask_cors import CORS
import config import config
...@@ -15,12 +15,12 @@ def index(): ...@@ -15,12 +15,12 @@ def index():
return Response(response, 200) return Response(response, 200)
@app.route("/search/<wiktlang>/<wordlang>/<word>", methods=["GET"]) @app.route("/simplesearch/<lang>/<word>", methods=["GET"])
def search(wiktlang, wordlang, word): def search(lang, word):
if wiktlang not in config.supported_wiktlangs: if lang not in config.supported_wiktlangs:
return jsonify({"error": f"Language {wiktlang} not supported"}), 400 return jsonify({"error": f"Language {lang} not supported"}), 400
wiktextractor = Wiktextract(wiktlang, wordlang) wiktextractor = Wiktextract(lang, lang)
try: try:
resp = wiktextractor.parse_page(word) resp = wiktextractor.parse_page(word)
if resp: if resp:
...@@ -29,7 +29,7 @@ def search(wiktlang, wordlang, word): ...@@ -29,7 +29,7 @@ def search(wiktlang, wordlang, word):
return ( return (
jsonify( jsonify(
{ {
"error": f"{word} is unknown in “{wordlang}” in {wiktlang}.wiktionary.org." "error": f"{word} is unknown in “{lang}” in {lang}.wiktionary.org."
} }
), ),
404, 404,
...@@ -44,6 +44,47 @@ def search(wiktlang, wordlang, word): ...@@ -44,6 +44,47 @@ def search(wiktlang, wordlang, word):
if wiktextractor.wxr.thesaurus_db_conn: if wiktextractor.wxr.thesaurus_db_conn:
wiktextractor.wxr.thesaurus_db_conn.close() wiktextractor.wxr.thesaurus_db_conn.close()
@app.route("/search/<wiktlang>/<wordlang>/<word>/<format>", methods=["GET"])
def search_and_format(wiktlang, wordlang, word, format):
if wiktlang not in config.supported_wiktlangs:
return jsonify({"error": f"Language {wiktlang} not supported"}), 400
wiktextractor = Wiktextract(wiktlang, wordlang)
if len(format)>2 and format[0:2] in ("a_", "A_"):
ascii = True
format = format[2:]
print(ascii, format)
else:
ascii = False
try:
if format in ("wiktextract", "Wiktextract", "xtr"):
resp = wiktextractor.parse_page(word, False)
elif format in ("wikstraktor", "Wikstraktor", "strkt"):
resp = wiktextractor.parse_page(word, True)
else:
return jsonify({"error": f"{format} is not expected"}), 400
if resp:
if not ascii:
return jsonify(resp)
else:
return Response(json.dumps(resp, ensure_ascii=True), mimetype="application/json")
else:
return (
jsonify(
{
"error": f"{word} is unknown in “{wordlang}” in {wiktlang}.wiktionary.org."
}
),
404,
)
except Exception as e:
print(e)
return jsonify({"error": "Parsing page resulted in error: " + str(e)}), 500
finally:
wiktextractor.wxr.wtp.db_conn.close()
if wiktextractor.wxr.thesaurus_db_conn:
wiktextractor.wxr.thesaurus_db_conn.close()
if __name__ == "__main__": if __name__ == "__main__":
app.run(host=config.host, port=config.port, debug=config.debugging) app.run(host=config.host, port=config.port, debug=config.debugging)
...@@ -33,20 +33,18 @@ class Wiktextract: ...@@ -33,20 +33,18 @@ class Wiktextract:
self.wxr = get_wiktextract_context(wiktlang, wordlang) self.wxr = get_wiktextract_context(wiktlang, wordlang)
def parse_page(self, title: str): def parse_page(self, title: str, wikstraktor_format: bool = True):
page = self.wxr.wtp.get_page(title) page = self.wxr.wtp.get_page(title)
if not page: if not page:
return None return None
result = parse_page(self.wxr, title, page.body) result = parse_page(self.wxr, title, page.body)
if wikstraktor_format and result:
result = self.wikstraktor_format(result)
return result
converted_result = self.convert(result) def wikstraktor_format(self, data_format1):
return converted_result
#return result
def convert(self, data_format1):
transformed_data = [] transformed_data = []
for index, pos in enumerate(data_format1): for index, pos in enumerate(data_format1):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment