Skip to content
Snippets Groups Projects
Commit 8354f1a7 authored by Mathieu Loiseau's avatar Mathieu Loiseau
Browse files

temps d'exé optionnels dans wrapper

parent 99cef757
No related branches found
No related tags found
No related merge requests found
......@@ -46,10 +46,12 @@ pip install -r requirements.txt
_Since `wiktextract` and its dependency `wikitextprocessor` are not regularly published as a Python package, it's a challenge to fix them to a specific version. From `requirements.txt`, the latest version will always be installed. Attention: This might mean that after reinstalling, the output schema of `wiktextract` might have slightly changed._
### 4. Congigure server
[config.py](https://gitlab.liris.cnrs.fr/lex-game/live-query-wiktextract/-/blob/main/src/config.py) contains :
* server settings (`host`, `port` and `debug` (boolean))
* supported wiktionary language
* working directory (this can be useful if the server is launched by another server using absolute paths to handle virtual environment)
### 5. Load templates from dump files
Run the script `src/load_dumps.py` to load the most recent dumpfile (for each [supported wiktionary language](https://gitlab.liris.cnrs.fr/lex-game/live-query-wiktextract/-/blob/main/src/config.py#L5)) into an sqlite database that will be used by `wiktextract`.
......@@ -73,6 +75,26 @@ You can run directly in your virtual environment using absolute paths (in case a
sh -c nohup /var/www/live-query-wiktextract/lq-w-extr/bin/python3 /var/www/live-query-wiktextract/src/app.py
```
## Use without server
usage: wiktextract_wrapper.py [-h] [-l WORD_LANGUAGE] [-w WIKT_LANGUAGE]
[-e ENTRY] [-z] [-A]
```
Wiktextract wrapper
ex :
‣python3 src/wiktextract_wrapper.py -l en -w fr -e yellow
options:
-h, --help show this help message and exit
-l WORD_LANGUAGE, --word_language WORD_LANGUAGE
language of the sought entry
-w WIKT_LANGUAGE, --wikt_language WIKT_LANGUAGE
language of the wiktionary
-e ENTRY, --entry ENTRY
the entry
-z, --zero_config Don't use if you know how to configure a server (this changes the working directory)
-A, --force_ascii json avec que des caractères ascii
-t, --show_timings montrer les temps d'exécution
```
## Using Docker
......
......@@ -10,6 +10,8 @@ if __name__ == "__main__":
parser.add_argument("-e", "--entry", help="the entry", type=str, default=None)
parser.add_argument("-z", "--zero_config", help="Don't use if you know how to configure a server (this changes the working directory)", action="store_true")
parser.add_argument("-A", "--force_ascii", help="json avec que des caractères ascii", action="store_true")
parser.add_argument("-t", "--show_timings", help="montrer les temps d'exécution", action="store_true")
args = parser.parse_args()
formerdir = None
if args.zero_config:
......@@ -19,12 +21,20 @@ if __name__ == "__main__":
formerdir = os.getcwd()
os.chdir(pathlib.Path(__file__).parent.parent.resolve())
import time
t = time.time()
from wiktextract.page import parse_page
if args.show_timings:
print(time.time()-t)
from wiktextract_context import get_wiktextract_context
import json
from importlib import metadata
import git
wiktextractime = -1
convertime = -1
startime = -1
startparsetime = -1
getpagetime = -1
class Wiktextract:
def __init__(self, wiktlang: str, wordlang: str):
......@@ -33,18 +43,26 @@ class Wiktextract:
self.wxr = get_wiktextract_context(wiktlang, wordlang)
def parse_page(self, title: str, wikstraktor_format: bool = True):
def parse_page(self, title: str):
global wiktextractime
global convertime
global startime
global startparsetime
global getpagetime
startparsetime = time.time()
page = self.wxr.wtp.get_page(title)
if not page:
return None
getpagetime = time.time()
result = parse_page(self.wxr, title, page.body)
if wikstraktor_format and result:
result = self.wikstraktor_format(result)
return result
wiktextractime = time.time()
converted_result = self.convert(result)
convertime = time.time()
return converted_result
#return result
def wikstraktor_format(self, data_format1):
def convert(self, data_format1):
transformed_data = []
for index, pos in enumerate(data_format1):
......@@ -108,10 +126,17 @@ class Wiktextract:
if __name__ == "__main__":
top = time.time()
if args.entry != None:
startime = time.time()
wkstrkt = Wiktextract(args.wikt_language, args.word_language)
instantiatime = time.time()
result = wkstrkt.parse_page(args.entry)
print(json.dumps(result, ensure_ascii=args.force_ascii))
endtime = time.time()
if args.show_timings:
print(f"Execution ({endtime-startime}”):\n\tinstantiation→{instantiatime-startime}\n\twiktextract get page → {getpagetime-startparsetime}\n\twiktextract parse → {wiktextractime-startparsetime}\n\tconversion → {convertime-wiktextractime}\n\tprint result → {endtime - convertime}")
print(time.time()-top)
else:
print("{'err':'You need to specify a word'}")
if formerdir != None:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment