Skip to content
Snippets Groups Projects
Commit 05d25235 authored by Mathieu Loiseau's avatar Mathieu Loiseau
Browse files

api allows format choice

parent c70a61fa
No related branches found
No related tags found
No related merge requests found
......@@ -5,10 +5,20 @@ This project provides a light-weight wrapper to the [wiktextract](https://github
The FLASK app accepts GET request at the url
```
localhost:5000/search/<wiktlang>/<wordlang>/<word>
localhost:5000/simplesearch/<lang>/<word>
localhost:5000/search/<wiktlang>/<wordlang>/<word>/<format>
```
where `<wiktlang>` specifies the language of the desired Wiktionary edition, `<wordlang>` the language of the word, and `<word>` the word itself to be queried. The route returns the extracted JSON object for the given query.
* `simplesearch` returns a non-ascii wikstraktor json formatted entry
* `lang`: language both for the wiktionary and the word,
* `word`: the wordform to be queried.
* `search` returns a json formatted entry
* `<wiktlang>`: specifies the language of the desired Wiktionary edition,
* `<wordlang>`: the language of the word,
* `<word>`: the word itself to be queried.
* `<format>`: the format of the output
* `wiktextract` or `xtr` : wiktextract native format
* `wikstraktor` or `strkt`: conversion to wikstraktor format
* prefix `a_` can be used to ensure ascii
## Local installation
......
from flask import Flask, Response, jsonify, request
from flask import Flask, Response, json, jsonify, request
from flask_cors import CORS
import config
......@@ -15,12 +15,12 @@ def index():
return Response(response, 200)
@app.route("/search/<wiktlang>/<wordlang>/<word>", methods=["GET"])
def search(wiktlang, wordlang, word):
if wiktlang not in config.supported_wiktlangs:
return jsonify({"error": f"Language {wiktlang} not supported"}), 400
@app.route("/simplesearch/<lang>/<word>", methods=["GET"])
def search(lang, word):
if lang not in config.supported_wiktlangs:
return jsonify({"error": f"Language {lang} not supported"}), 400
wiktextractor = Wiktextract(wiktlang, wordlang)
wiktextractor = Wiktextract(lang, lang)
try:
resp = wiktextractor.parse_page(word)
if resp:
......@@ -29,7 +29,7 @@ def search(wiktlang, wordlang, word):
return (
jsonify(
{
"error": f"{word} is unknown in “{wordlang}” in {wiktlang}.wiktionary.org."
"error": f"{word} is unknown in “{lang}” in {lang}.wiktionary.org."
}
),
404,
......@@ -44,6 +44,47 @@ def search(wiktlang, wordlang, word):
if wiktextractor.wxr.thesaurus_db_conn:
wiktextractor.wxr.thesaurus_db_conn.close()
@app.route("/search/<wiktlang>/<wordlang>/<word>/<format>", methods=["GET"])
def search_and_format(wiktlang, wordlang, word, format):
if wiktlang not in config.supported_wiktlangs:
return jsonify({"error": f"Language {wiktlang} not supported"}), 400
wiktextractor = Wiktextract(wiktlang, wordlang)
if len(format)>2 and format[0:2] in ("a_", "A_"):
ascii = True
format = format[2:]
print(ascii, format)
else:
ascii = False
try:
if format in ("wiktextract", "Wiktextract", "xtr"):
resp = wiktextractor.parse_page(word, False)
elif format in ("wikstraktor", "Wikstraktor", "strkt"):
resp = wiktextractor.parse_page(word, True)
else:
return jsonify({"error": f"{format} is not expected"}), 400
if resp:
if not ascii:
return jsonify(resp)
else:
return Response(json.dumps(resp, ensure_ascii=True), mimetype="application/json")
else:
return (
jsonify(
{
"error": f"{word} is unknown in “{wordlang}” in {wiktlang}.wiktionary.org."
}
),
404,
)
except Exception as e:
print(e)
return jsonify({"error": "Parsing page resulted in error: " + str(e)}), 500
finally:
wiktextractor.wxr.wtp.db_conn.close()
if wiktextractor.wxr.thesaurus_db_conn:
wiktextractor.wxr.thesaurus_db_conn.close()
if __name__ == "__main__":
app.run(host=config.host, port=config.port, debug=config.debugging)
......@@ -33,20 +33,18 @@ class Wiktextract:
self.wxr = get_wiktextract_context(wiktlang, wordlang)
def parse_page(self, title: str):
def parse_page(self, title: str, wikstraktor_format: bool = True):
page = self.wxr.wtp.get_page(title)
if not page:
return None
result = parse_page(self.wxr, title, page.body)
if wikstraktor_format and result:
result = self.wikstraktor_format(result)
return result
converted_result = self.convert(result)
return converted_result
#return result
def convert(self, data_format1):
def wikstraktor_format(self, data_format1):
transformed_data = []
for index, pos in enumerate(data_format1):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment