add flask app with wiktextract wrapper

84b58dcc · Empiriker · fc6b6263 · 84b58dcc · 84b58dcc · 84b58dcc
Commit 84b58dcc authored 1 year ago by Empiriker
--- a/.gitignore
+++ b/.gitignore
+/wikstraktor
+/wiktextract
+throttle.ctrl
+apicache-py3
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]

--- a/README.md
+++ b/README.md
 # live-query-wiktextract
+## Installation
+1. Install wikitextprocessor from source:
+```
+git clone https://github.com/tatuylonen/wikitextprocessor.git
+cd wikitextprocessor
+python -m pip install -U pip
+python -m pip install --use-pep517 .
+```
+- Commit `e5296c16f2d715e62121f23cb5057374da48cda3` was used during development.
+2. Clone wiktextract
+```
+https://github.com/tatuylonen/wiktextract.git
+```
+- Commit `205c4a2d88c27113f0117e0095f466605976af81` was used during development.
--- a/requirements.txt
+++ b/requirements.txt
+pywikibot==8.3.2
+Flask==2.3.3
+flask_cors==4.0.0
\ No newline at end of file
--- a/src/app.py
+++ b/src/app.py
+from flask import Flask, jsonify
+from flask import request
+from flask import Response 
+from flask_cors import CORS 
+import config
+from get_wikicode import get_wikicode
+from wiktextract_wrapper import Wiktextract
+app = Flask(__name__)
+CORS(app)
+@app.route('/', methods=['GET'])
+def index():
+    c = request.remote_addr
+    response = f"<p>Server is running, your ip is {c}</p>"
+    return Response(response, 200)
+@app.route('/search/<wiktlang>/<wordlang>/<word>', methods=['GET'])
+def search(wiktlang, wordlang, word):
+    en_wiktextract = Wiktextract("en", "en")
+    wikicode = get_wikicode(word, wiktlang)
+    if wikicode:
+      try:
+        resp = en_wiktextract.parse_page(word, wikicode)
+        return jsonify(resp)
+      except Exception as e:
+        print(e)
+        resp =  f"""<!doctype html>
+                    <html>
+                    <head>
+                        <title>Error</title>
+                    </head> 
+                    <body>
+                        <h1>{word}</h1>
+                        <p>{e}</p>  
+                    </body>
+                    </html>"""
+        status = 404
+        mimetype='text/html'
+    else:
+      resp =  f"""<!doctype html>
+                  <html>
+                  <head>
+                      <title>Error</title>
+                  </head>
+                  <body>
+                      <h1>{word}</h1>
+                      <p>{word} is unknown in “{wordlang}” in {wiktlang}.wiktionary.org.</p>
+                  </body>
+                  </html>"""
+      status = 404
+      mimetype='text/html'
+    return Response(resp, status=status, mimetype=mimetype)
+if __name__ == "__main__":
+    app.run(host=config.host, port=config.port, debug=config.debugging)
--- a/src/config.py
+++ b/src/config.py
+host = "0.0.0.0"
+port = 80
+debugging = True
\ No newline at end of file
--- a/src/get_wikicode.py
+++ b/src/get_wikicode.py
+import pywikibot
+def get_wikicode(title:str, wiktlang:str):
+  site = pywikibot.Site(f"wiktionary:{wiktlang}")
+  page = pywikibot.Page(site, title)
+  return page.text
\ No newline at end of file
--- a/src/wiktextract_wrapper.py
+++ b/src/wiktextract_wrapper.py
+from wiktextract import (
+    WiktextractContext,
+    WiktionaryConfig,
+)
+from wiktextract.wiktionary import page_handler
+from wikitextprocessor import Wtp, Page
+db_path = "./sqlite.db"
+class Wiktextract:
+  def __init__(self, wiktlang:str, wordlang:str):
+    self.wiktlang = wiktlang
+    self.wordlang = wordlang
+    config = WiktionaryConfig(
+      dump_file_lang_code=wiktlang,
+      capture_language_codes=[wordlang],
+      capture_translations=True,
+      capture_pronunciation=True,
+      capture_linkages=True,
+      capture_compounds=True,
+      capture_redirects=True,
+      capture_examples=True,
+      capture_etymologies=True,
+      capture_descendants=True,
+      capture_inflections=True,)
+    wxr = WiktextractContext(Wtp(db_path=db_path), config)
+    self.page_handler = page_handler
+    self.page_handler.wxr = wxr
+  def parse_page(self, title:str, wikicode:str):
+    # add page to the database
+    self.page_handler.wxr.wtp.add_page(title=title, namespace_id=0, body=wikicode, model='wikitext')
+    # create a page object
+    page = Page(title, 0, None, True, wikicode, 'wikitext')
+    # parse the page
+    success, ret, err = self.page_handler(page)
+    if success:
+      return ret
+    else:
+      raise Exception(err)