Skip to content
Snippets Groups Projects
Commit 84b58dcc authored by Empiriker's avatar Empiriker
Browse files

add flask app with wiktextract wrapper

parent fc6b6263
No related branches found
No related tags found
No related merge requests found
/wikstraktor
/wiktextract
throttle.ctrl
apicache-py3
# Byte-compiled / optimized / DLL files # Byte-compiled / optimized / DLL files
__pycache__/ __pycache__/
*.py[cod] *.py[cod]
......
# live-query-wiktextract # live-query-wiktextract
## Installation
1. Install wikitextprocessor from source:
```
git clone https://github.com/tatuylonen/wikitextprocessor.git
cd wikitextprocessor
python -m pip install -U pip
python -m pip install --use-pep517 .
```
- Commit `e5296c16f2d715e62121f23cb5057374da48cda3` was used during development.
2. Clone wiktextract
```
https://github.com/tatuylonen/wiktextract.git
```
- Commit `205c4a2d88c27113f0117e0095f466605976af81` was used during development.
pywikibot==8.3.2
Flask==2.3.3
flask_cors==4.0.0
\ No newline at end of file
from flask import Flask, jsonify
from flask import request
from flask import Response
from flask_cors import CORS
import config
from get_wikicode import get_wikicode
from wiktextract_wrapper import Wiktextract
app = Flask(__name__)
CORS(app)
@app.route('/', methods=['GET'])
def index():
c = request.remote_addr
response = f"<p>Server is running, your ip is {c}</p>"
return Response(response, 200)
@app.route('/search/<wiktlang>/<wordlang>/<word>', methods=['GET'])
def search(wiktlang, wordlang, word):
en_wiktextract = Wiktextract("en", "en")
wikicode = get_wikicode(word, wiktlang)
if wikicode:
try:
resp = en_wiktextract.parse_page(word, wikicode)
return jsonify(resp)
except Exception as e:
print(e)
resp = f"""<!doctype html>
<html>
<head>
<title>Error</title>
</head>
<body>
<h1>{word}</h1>
<p>{e}</p>
</body>
</html>"""
status = 404
mimetype='text/html'
else:
resp = f"""<!doctype html>
<html>
<head>
<title>Error</title>
</head>
<body>
<h1>{word}</h1>
<p>{word} is unknown in “{wordlang}” in {wiktlang}.wiktionary.org.</p>
</body>
</html>"""
status = 404
mimetype='text/html'
return Response(resp, status=status, mimetype=mimetype)
if __name__ == "__main__":
app.run(host=config.host, port=config.port, debug=config.debugging)
host = "0.0.0.0"
port = 80
debugging = True
\ No newline at end of file
import pywikibot
def get_wikicode(title:str, wiktlang:str):
site = pywikibot.Site(f"wiktionary:{wiktlang}")
page = pywikibot.Page(site, title)
return page.text
\ No newline at end of file
from wiktextract import (
WiktextractContext,
WiktionaryConfig,
)
from wiktextract.wiktionary import page_handler
from wikitextprocessor import Wtp, Page
db_path = "./sqlite.db"
class Wiktextract:
def __init__(self, wiktlang:str, wordlang:str):
self.wiktlang = wiktlang
self.wordlang = wordlang
config = WiktionaryConfig(
dump_file_lang_code=wiktlang,
capture_language_codes=[wordlang],
capture_translations=True,
capture_pronunciation=True,
capture_linkages=True,
capture_compounds=True,
capture_redirects=True,
capture_examples=True,
capture_etymologies=True,
capture_descendants=True,
capture_inflections=True,)
wxr = WiktextractContext(Wtp(db_path=db_path), config)
self.page_handler = page_handler
self.page_handler.wxr = wxr
def parse_page(self, title:str, wikicode:str):
# add page to the database
self.page_handler.wxr.wtp.add_page(title=title, namespace_id=0, body=wikicode, model='wikitext')
# create a page object
page = Page(title, 0, None, True, wikicode, 'wikitext')
# parse the page
success, ret, err = self.page_handler(page)
if success:
return ret
else:
raise Exception(err)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment