diff --git a/README.md b/README.md index 3b52a76f0bedcfb8c7e26c2cf8892c5b3e8c6018..0be09e83b179b0c9566699766d947769bfdcfa55 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,6 @@ This project does depend on python packages. python3 -m venv wikstraktorenv #optional for basic version . wikstraktorenv/bin/activate #activate environment (optional) pip install -r requirements.txt -./setup.py ``` ### Wikstraktor Server @@ -38,7 +37,6 @@ The following commands are extracted from the aforementionned documentation, it python3 -m venv wikstraktorenv #create wikstraktorenv environment . wikstraktorenv/bin/activate #activate environment pip install -r server_requirements.txt -./setup.py ``` ## Use @@ -56,13 +54,14 @@ str(f) #convert content to json #### Bash ``` usage: wikstraktor.py [-h] [-l LANGUAGE] [-w WIKI_LANGUAGE] [-m MOT] - [-f DESTINATION_FILE] [-A] [-C] + [-f DESTINATION_FILE] [-A] [-C] [-n] [-r] [-L LOG_FILE] Interroger un wiktionnaire ex : ‣./wikstraktor.py -m blue - ‣./wikstraktor.py -m blue -f blue.json -A -C - ‣./wikstraktor.py -l en -w fr -m blue -f blue.json -A -C + ‣./wikstraktor.py -m blue -f blue.json -AC + ‣./wikstraktor.py -l en -w fr -m yellow -L /var/log/wikstraktor.sqlite + ‣./wikstraktor.py -l en -w fr -m blue -f blue.json -n -ACr options: -h, --help show this help message and exit @@ -75,6 +74,14 @@ options: le fichier dans lequel stocker le résultat -A, --force_ascii json avec que des caractères ascii -C, --compact json sans indentation + -n, --no_id json sans id + -r, --follow_redirections + pour suivre les redirections (ex: did → do) + -L LOG_FILE, --log_file LOG_FILE + le fichier sqlite où stocker les log + (bien vérifier que l'utilisateur qui lance le script a + accès en écriture à ce fichier + et au dossier qui le contient) ``` ### Wikstraktor Server diff --git a/parsers/en_en.py b/parsers/en_en.py index 960944a0d658afcf5be6942a2f1154ac978810c3..febb56a627bd994c8b5924948acc8e45925ffea0 100644 --- a/parsers/en_en.py +++ b/parsers/en_en.py @@ -2,6 +2,7 @@ from wikstraktor import Wikstraktor, Pronunciation, Sense, SubSense, Definition from parsers.en_constants import string_values +import re debugEty = 0 @@ -13,6 +14,19 @@ class En_en_straktor(Wikstraktor): self.constants = string_values self.site = self.pwb.Site(f'wiktionary:en') + def process_audio_accent(self, audio, file_name = False): + if file_name: #on traite un nom de fichier + match = re.search(r'(e|E)(n|N)-(\w\w)-(.*)\.ogg', audio) + if match: + res = match.group(3).upper() + else: + res = None + else: + res = re.sub(r'((a|A)udio ?\(?)|\)?','',audio) + if res == "": + res = None + return res + def process_pronunciation(self, proContent): # TODO: ne marche que pour les listes à 2 niveaux, voir water pour 3 niveaux l = proContent.get_lists()[0] @@ -29,7 +43,11 @@ class En_en_straktor(Wikstraktor): elif t.normal_name() == self.constants['t_ipa']: p.set_transcription(t.arguments[1].value) elif t.normal_name() == self.constants['t_snd']: - p.add_sound(self.get_file_url(t.arguments[1].value), t.arguments[2].value) + if len(t.arguments) > 2: + acc = self.process_audio_accent(t.arguments[2].value) + if acc == None: + acc = self.process_audio_accent(t.arguments[1].value, True) + p.add_sound(self.get_file_url(t.arguments[1].value), acc) if p.ipa != None or p.has_accents() or p.has_sounds(): pronunciations.append(p) else: diff --git a/requirements.txt b/requirements.txt index 9a6b3d4292473c38ba84133651fa862ffb47e31b..665ffb7a40d2289f6dece5f881cc6cec9295457c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,4 @@ #necessary pywikibot>=8.1.2 wikitextparser>=0.51.0 -importlib>=1.0.4 - -#for setup script -GitPython==3.1.31 +GitPython>=3.1.31 diff --git a/setup.py b/setup.py deleted file mode 100755 index cd3142b21757a640147b51c8ad6c159f0814e1c9..0000000000000000000000000000000000000000 --- a/setup.py +++ /dev/null @@ -1,8 +0,0 @@ -#!/usr/bin/env python3 - -import git -sha = git.Repo(search_parent_directories=True).head.object.hexsha - -v = open("wikstraktor_version.py", "w") -v.write(f"version = '{sha}'") -v.close() diff --git a/wikstraklog.py b/wikstraklog.py index 239ee013491ab9d840eeeecdabd293aa10750146..74b912b5fd1dc128d7b302342431de1e37809e0a 100755 --- a/wikstraklog.py +++ b/wikstraklog.py @@ -42,7 +42,7 @@ class Wikstraklog: return res if __name__ == "__main__": - from wikstraktor_version import version as the_version - log = Wikstraklog(the_version, "en", "fr") + import git + log = Wikstraklog(git.Repo(search_parent_directories=True).head.object.hexsha, "en", "fr") log.set_context("blue", 123456789) log.add_log("exampleMethod", "no relevant content") diff --git a/wikstraktor.py b/wikstraktor.py index 35dfe17c83d3e27c88bb4a558b80ee3c63363717..a6830540e42c764cd5ea2942fd0e48f6f0506e03 100755 --- a/wikstraktor.py +++ b/wikstraktor.py @@ -3,7 +3,10 @@ import pywikibot import wikitextparser import importlib import json -from wikstraktor_version import version as the_version +#version +import git +the_version = git.Repo(search_parent_directories=True).head.object.hexsha +#logging from wikstraklog import Wikstraklog def get_list_string_level(wikitext): @@ -534,12 +537,12 @@ class ParserContext: class Wikstraktor: @classmethod - def get_instance(cls, wiki_language, entry_language): + def get_instance(cls, wiki_language, entry_language, logfile="wikstraktor.sqlite"): try: m_name = f"{wiki_language}_{entry_language}".capitalize() instance = getattr(importlib.import_module(f"parsers.{m_name.lower()}"), f"{m_name}_straktor")() instance.version = the_version - instance.log = Wikstraklog(the_version, entry_language, wiki_language) + instance.log = Wikstraklog(the_version, entry_language, wiki_language, logfile) except ModuleNotFoundError: print(f"parsers.{m_name.lower()} module not found or {m_name}_straktor not found in module") instance = None @@ -769,7 +772,8 @@ if __name__ == "__main__": parser = argparse.ArgumentParser(formatter_class=RawTextHelpFormatter, description="""Interroger un wiktionnaire \033[1m\033[32mex :\033[0m ‣\033[0m\033[32m./wikstraktor.py -m blue\033[0m - ‣\033[0m\033[32m./wikstraktor.py -m blue -f blue.json -A -C\033[0m + ‣\033[0m\033[32m./wikstraktor.py -m blue -f blue.json -AC\033[0m + ‣\033[0m\033[32m./wikstraktor.py -l en -w fr -m yellow -L /var/log/wikstraktor.sqlite\033[0m ‣\033[0m\033[32m./wikstraktor.py -l en -w fr -m blue -f blue.json -n -ACr\033[0m""") parser.add_argument("-l", "--language", help="la langue du mot", type=str, default = "en") parser.add_argument("-w", "--wiki_language", help="la langue du wiki", type=str, default = "en") @@ -779,10 +783,15 @@ if __name__ == "__main__": parser.add_argument("-C", "--compact", help="json sans indentation", action="store_true") parser.add_argument("-n", "--no_id", help="json sans id", action="store_true") parser.add_argument("-r", "--follow_redirections", help="pour suivre les redirections (ex: did → do)", action="store_true") + parser.add_argument("-L", "--log_file", help="le fichier sqlite où stocker les log\n(bien vérifier que l'utilisateur qui lance le script a\naccès en écriture à ce fichier\net au dossier qui le contient)", type=str, default=None) + args = parser.parse_args() if args.mot != None: - w = Wikstraktor.get_instance(args.wiki_language, args.language) + if args.log_file != None: + w = Wikstraktor.get_instance(args.wiki_language, args.language, args.log_file) + else: + w = Wikstraktor.get_instance(args.wiki_language, args.language) resp = None if w.fetch(args.mot, args.follow_redirections) > 0: resp = w.export(not args.no_id, args.force_ascii, args.compact)