Skip to content
Snippets Groups Projects
Commit b1025719 authored by Mathieu Loiseau's avatar Mathieu Loiseau
Browse files

Merge with sqlite

parents eaeb600b eaf3a4e8
No related branches found
No related tags found
No related merge requests found
...@@ -27,7 +27,6 @@ This project does depend on python packages. ...@@ -27,7 +27,6 @@ This project does depend on python packages.
python3 -m venv wikstraktorenv #optional for basic version python3 -m venv wikstraktorenv #optional for basic version
. wikstraktorenv/bin/activate #activate environment (optional) . wikstraktorenv/bin/activate #activate environment (optional)
pip install -r requirements.txt pip install -r requirements.txt
./setup.py
``` ```
### Wikstraktor Server ### Wikstraktor Server
...@@ -38,7 +37,6 @@ The following commands are extracted from the aforementionned documentation, it ...@@ -38,7 +37,6 @@ The following commands are extracted from the aforementionned documentation, it
python3 -m venv wikstraktorenv #create wikstraktorenv environment python3 -m venv wikstraktorenv #create wikstraktorenv environment
. wikstraktorenv/bin/activate #activate environment . wikstraktorenv/bin/activate #activate environment
pip install -r server_requirements.txt pip install -r server_requirements.txt
./setup.py
``` ```
## Use ## Use
...@@ -56,13 +54,14 @@ str(f) #convert content to json ...@@ -56,13 +54,14 @@ str(f) #convert content to json
#### Bash #### Bash
``` ```
usage: wikstraktor.py [-h] [-l LANGUAGE] [-w WIKI_LANGUAGE] [-m MOT] usage: wikstraktor.py [-h] [-l LANGUAGE] [-w WIKI_LANGUAGE] [-m MOT]
[-f DESTINATION_FILE] [-A] [-C] [-f DESTINATION_FILE] [-A] [-C] [-n] [-r] [-L LOG_FILE]
Interroger un wiktionnaire Interroger un wiktionnaire
ex : ex :
‣./wikstraktor.py -m blue ‣./wikstraktor.py -m blue
‣./wikstraktor.py -m blue -f blue.json -A -C ‣./wikstraktor.py -m blue -f blue.json -AC
‣./wikstraktor.py -l en -w fr -m blue -f blue.json -A -C ‣./wikstraktor.py -l en -w fr -m yellow -L /var/log/wikstraktor.sqlite
‣./wikstraktor.py -l en -w fr -m blue -f blue.json -n -ACr
options: options:
-h, --help show this help message and exit -h, --help show this help message and exit
...@@ -75,6 +74,14 @@ options: ...@@ -75,6 +74,14 @@ options:
le fichier dans lequel stocker le résultat le fichier dans lequel stocker le résultat
-A, --force_ascii json avec que des caractères ascii -A, --force_ascii json avec que des caractères ascii
-C, --compact json sans indentation -C, --compact json sans indentation
-n, --no_id json sans id
-r, --follow_redirections
pour suivre les redirections (ex: did → do)
-L LOG_FILE, --log_file LOG_FILE
le fichier sqlite où stocker les log
(bien vérifier que l'utilisateur qui lance le script a
accès en écriture à ce fichier
et au dossier qui le contient)
``` ```
### Wikstraktor Server ### Wikstraktor Server
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
from wikstraktor import Wikstraktor, Pronunciation, Sense, SubSense, Definition from wikstraktor import Wikstraktor, Pronunciation, Sense, SubSense, Definition
from parsers.en_constants import string_values from parsers.en_constants import string_values
import re
debugEty = 0 debugEty = 0
...@@ -13,6 +14,19 @@ class En_en_straktor(Wikstraktor): ...@@ -13,6 +14,19 @@ class En_en_straktor(Wikstraktor):
self.constants = string_values self.constants = string_values
self.site = self.pwb.Site(f'wiktionary:en') self.site = self.pwb.Site(f'wiktionary:en')
def process_audio_accent(self, audio, file_name = False):
if file_name: #on traite un nom de fichier
match = re.search(r'(e|E)(n|N)-(\w\w)-(.*)\.ogg', audio)
if match:
res = match.group(3).upper()
else:
res = None
else:
res = re.sub(r'((a|A)udio ?\(?)|\)?','',audio)
if res == "":
res = None
return res
def process_pronunciation(self, proContent): def process_pronunciation(self, proContent):
# TODO: ne marche que pour les listes à 2 niveaux, voir water pour 3 niveaux # TODO: ne marche que pour les listes à 2 niveaux, voir water pour 3 niveaux
l = proContent.get_lists()[0] l = proContent.get_lists()[0]
...@@ -29,7 +43,11 @@ class En_en_straktor(Wikstraktor): ...@@ -29,7 +43,11 @@ class En_en_straktor(Wikstraktor):
elif t.normal_name() == self.constants['t_ipa']: elif t.normal_name() == self.constants['t_ipa']:
p.set_transcription(t.arguments[1].value) p.set_transcription(t.arguments[1].value)
elif t.normal_name() == self.constants['t_snd']: elif t.normal_name() == self.constants['t_snd']:
p.add_sound(self.get_file_url(t.arguments[1].value), t.arguments[2].value) if len(t.arguments) > 2:
acc = self.process_audio_accent(t.arguments[2].value)
if acc == None:
acc = self.process_audio_accent(t.arguments[1].value, True)
p.add_sound(self.get_file_url(t.arguments[1].value), acc)
if p.ipa != None or p.has_accents() or p.has_sounds(): if p.ipa != None or p.has_accents() or p.has_sounds():
pronunciations.append(p) pronunciations.append(p)
else: else:
......
#necessary #necessary
pywikibot>=8.1.2 pywikibot>=8.1.2
wikitextparser>=0.51.0 wikitextparser>=0.51.0
importlib>=1.0.4 GitPython>=3.1.31
#for setup script
GitPython==3.1.31
#!/usr/bin/env python3
import git
sha = git.Repo(search_parent_directories=True).head.object.hexsha
v = open("wikstraktor_version.py", "w")
v.write(f"version = '{sha}'")
v.close()
...@@ -42,7 +42,7 @@ class Wikstraklog: ...@@ -42,7 +42,7 @@ class Wikstraklog:
return res return res
if __name__ == "__main__": if __name__ == "__main__":
from wikstraktor_version import version as the_version import git
log = Wikstraklog(the_version, "en", "fr") log = Wikstraklog(git.Repo(search_parent_directories=True).head.object.hexsha, "en", "fr")
log.set_context("blue", 123456789) log.set_context("blue", 123456789)
log.add_log("exampleMethod", "no relevant content") log.add_log("exampleMethod", "no relevant content")
...@@ -3,7 +3,10 @@ import pywikibot ...@@ -3,7 +3,10 @@ import pywikibot
import wikitextparser import wikitextparser
import importlib import importlib
import json import json
from wikstraktor_version import version as the_version #version
import git
the_version = git.Repo(search_parent_directories=True).head.object.hexsha
#logging
from wikstraklog import Wikstraklog from wikstraklog import Wikstraklog
def get_list_string_level(wikitext): def get_list_string_level(wikitext):
...@@ -534,12 +537,12 @@ class ParserContext: ...@@ -534,12 +537,12 @@ class ParserContext:
class Wikstraktor: class Wikstraktor:
@classmethod @classmethod
def get_instance(cls, wiki_language, entry_language): def get_instance(cls, wiki_language, entry_language, logfile="wikstraktor.sqlite"):
try: try:
m_name = f"{wiki_language}_{entry_language}".capitalize() m_name = f"{wiki_language}_{entry_language}".capitalize()
instance = getattr(importlib.import_module(f"parsers.{m_name.lower()}"), f"{m_name}_straktor")() instance = getattr(importlib.import_module(f"parsers.{m_name.lower()}"), f"{m_name}_straktor")()
instance.version = the_version instance.version = the_version
instance.log = Wikstraklog(the_version, entry_language, wiki_language) instance.log = Wikstraklog(the_version, entry_language, wiki_language, logfile)
except ModuleNotFoundError: except ModuleNotFoundError:
print(f"parsers.{m_name.lower()} module not found or {m_name}_straktor not found in module") print(f"parsers.{m_name.lower()} module not found or {m_name}_straktor not found in module")
instance = None instance = None
...@@ -769,7 +772,8 @@ if __name__ == "__main__": ...@@ -769,7 +772,8 @@ if __name__ == "__main__":
parser = argparse.ArgumentParser(formatter_class=RawTextHelpFormatter, description="""Interroger un wiktionnaire parser = argparse.ArgumentParser(formatter_class=RawTextHelpFormatter, description="""Interroger un wiktionnaire
\033[1m\033[32mex :\033[0m \033[1m\033[32mex :\033[0m
\033[0m\033[32m./wikstraktor.py -m blue\033[0m \033[0m\033[32m./wikstraktor.py -m blue\033[0m
\033[0m\033[32m./wikstraktor.py -m blue -f blue.json -A -C\033[0m \033[0m\033[32m./wikstraktor.py -m blue -f blue.json -AC\033[0m
\033[0m\033[32m./wikstraktor.py -l en -w fr -m yellow -L /var/log/wikstraktor.sqlite\033[0m
\033[0m\033[32m./wikstraktor.py -l en -w fr -m blue -f blue.json -n -ACr\033[0m""") \033[0m\033[32m./wikstraktor.py -l en -w fr -m blue -f blue.json -n -ACr\033[0m""")
parser.add_argument("-l", "--language", help="la langue du mot", type=str, default = "en") parser.add_argument("-l", "--language", help="la langue du mot", type=str, default = "en")
parser.add_argument("-w", "--wiki_language", help="la langue du wiki", type=str, default = "en") parser.add_argument("-w", "--wiki_language", help="la langue du wiki", type=str, default = "en")
...@@ -779,10 +783,15 @@ if __name__ == "__main__": ...@@ -779,10 +783,15 @@ if __name__ == "__main__":
parser.add_argument("-C", "--compact", help="json sans indentation", action="store_true") parser.add_argument("-C", "--compact", help="json sans indentation", action="store_true")
parser.add_argument("-n", "--no_id", help="json sans id", action="store_true") parser.add_argument("-n", "--no_id", help="json sans id", action="store_true")
parser.add_argument("-r", "--follow_redirections", help="pour suivre les redirections (ex: did → do)", action="store_true") parser.add_argument("-r", "--follow_redirections", help="pour suivre les redirections (ex: did → do)", action="store_true")
parser.add_argument("-L", "--log_file", help="le fichier sqlite où stocker les log\n(bien vérifier que l'utilisateur qui lance le script a\naccès en écriture à ce fichier\net au dossier qui le contient)", type=str, default=None)
args = parser.parse_args() args = parser.parse_args()
if args.mot != None: if args.mot != None:
w = Wikstraktor.get_instance(args.wiki_language, args.language) if args.log_file != None:
w = Wikstraktor.get_instance(args.wiki_language, args.language, args.log_file)
else:
w = Wikstraktor.get_instance(args.wiki_language, args.language)
resp = None resp = None
if w.fetch(args.mot, args.follow_redirections) > 0: if w.fetch(args.mot, args.follow_redirections) > 0:
resp = w.export(not args.no_id, args.force_ascii, args.compact) resp = w.export(not args.no_id, args.force_ascii, args.compact)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment