From 8354f1a7a2798f0154d8a8946040baf0f6f649c4 Mon Sep 17 00:00:00 2001 From: Mathieu Loiseau <mathieu.loiseau@liris.cnrs.fr> Date: Tue, 18 Jun 2024 14:33:31 +0200 Subject: [PATCH] =?UTF-8?q?temps=20d'ex=C3=A9=20optionnels=20dans=20wrappe?= =?UTF-8?q?r?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 22 ++++++++++++++++++++ src/wiktextract_wrapper.py | 41 ++++++++++++++++++++++++++++++-------- 2 files changed, 55 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index cb07af7..1288722 100755 --- a/README.md +++ b/README.md @@ -46,10 +46,12 @@ pip install -r requirements.txt _Since `wiktextract` and its dependency `wikitextprocessor` are not regularly published as a Python package, it's a challenge to fix them to a specific version. From `requirements.txt`, the latest version will always be installed. Attention: This might mean that after reinstalling, the output schema of `wiktextract` might have slightly changed._ ### 4. Congigure server + [config.py](https://gitlab.liris.cnrs.fr/lex-game/live-query-wiktextract/-/blob/main/src/config.py) contains : * server settings (`host`, `port` and `debug` (boolean)) * supported wiktionary language * working directory (this can be useful if the server is launched by another server using absolute paths to handle virtual environment) + ### 5. Load templates from dump files Run the script `src/load_dumps.py` to load the most recent dumpfile (for each [supported wiktionary language](https://gitlab.liris.cnrs.fr/lex-game/live-query-wiktextract/-/blob/main/src/config.py#L5)) into an sqlite database that will be used by `wiktextract`. @@ -73,6 +75,26 @@ You can run directly in your virtual environment using absolute paths (in case a sh -c nohup /var/www/live-query-wiktextract/lq-w-extr/bin/python3 /var/www/live-query-wiktextract/src/app.py ``` +## Use without server +usage: wiktextract_wrapper.py [-h] [-l WORD_LANGUAGE] [-w WIKT_LANGUAGE] + [-e ENTRY] [-z] [-A] +``` +Wiktextract wrapper + ex : + ‣python3 src/wiktextract_wrapper.py -l en -w fr -e yellow + +options: + -h, --help show this help message and exit + -l WORD_LANGUAGE, --word_language WORD_LANGUAGE + language of the sought entry + -w WIKT_LANGUAGE, --wikt_language WIKT_LANGUAGE + language of the wiktionary + -e ENTRY, --entry ENTRY + the entry + -z, --zero_config Don't use if you know how to configure a server (this changes the working directory) + -A, --force_ascii json avec que des caractères ascii + -t, --show_timings montrer les temps d'exécution +``` ## Using Docker diff --git a/src/wiktextract_wrapper.py b/src/wiktextract_wrapper.py index c458dc4..2d6bc48 100755 --- a/src/wiktextract_wrapper.py +++ b/src/wiktextract_wrapper.py @@ -10,6 +10,8 @@ if __name__ == "__main__": parser.add_argument("-e", "--entry", help="the entry", type=str, default=None) parser.add_argument("-z", "--zero_config", help="Don't use if you know how to configure a server (this changes the working directory)", action="store_true") parser.add_argument("-A", "--force_ascii", help="json avec que des caractères ascii", action="store_true") + parser.add_argument("-t", "--show_timings", help="montrer les temps d'exécution", action="store_true") + args = parser.parse_args() formerdir = None if args.zero_config: @@ -19,12 +21,20 @@ if __name__ == "__main__": formerdir = os.getcwd() os.chdir(pathlib.Path(__file__).parent.parent.resolve()) - +import time +t = time.time() from wiktextract.page import parse_page +if args.show_timings: + print(time.time()-t) from wiktextract_context import get_wiktextract_context import json from importlib import metadata import git +wiktextractime = -1 +convertime = -1 +startime = -1 +startparsetime = -1 +getpagetime = -1 class Wiktextract: def __init__(self, wiktlang: str, wordlang: str): @@ -33,18 +43,26 @@ class Wiktextract: self.wxr = get_wiktextract_context(wiktlang, wordlang) - def parse_page(self, title: str, wikstraktor_format: bool = True): + def parse_page(self, title: str): + global wiktextractime + global convertime + global startime + global startparsetime + global getpagetime + startparsetime = time.time() page = self.wxr.wtp.get_page(title) if not page: return None - - + getpagetime = time.time() result = parse_page(self.wxr, title, page.body) - if wikstraktor_format and result: - result = self.wikstraktor_format(result) - return result + wiktextractime = time.time() + converted_result = self.convert(result) + convertime = time.time() + + return converted_result + #return result - def wikstraktor_format(self, data_format1): + def convert(self, data_format1): transformed_data = [] for index, pos in enumerate(data_format1): @@ -108,10 +126,17 @@ class Wiktextract: if __name__ == "__main__": + top = time.time() if args.entry != None: + startime = time.time() wkstrkt = Wiktextract(args.wikt_language, args.word_language) + instantiatime = time.time() result = wkstrkt.parse_page(args.entry) print(json.dumps(result, ensure_ascii=args.force_ascii)) + endtime = time.time() + if args.show_timings: + print(f"Execution ({endtime-startime}â€):\n\tinstantiation→{instantiatime-startime}\n\twiktextract get page → {getpagetime-startparsetime}\n\twiktextract parse → {wiktextractime-startparsetime}\n\tconversion → {convertime-wiktextractime}\n\tprint result → {endtime - convertime}") + print(time.time()-top) else: print("{'err':'You need to specify a word'}") if formerdir != None: -- GitLab