Skip to content
Snippets Groups Projects
Commit 6081dccb authored by Mathieu Loiseau's avatar Mathieu Loiseau
Browse files

Store sources

parent 2827aaf2
No related branches found
No related tags found
No related merge requests found
......@@ -9,4 +9,5 @@ KNM.csv
.~lock*
*.json
*.lwp
wikstraktor_version.py
wikstraktorenv
......@@ -17,8 +17,10 @@ This project does depend on python packages.
(maybe to be replaced by an automation of some sort, using a virtual environment might be better, see [server version](#wikstraktor-server))
* [```pip install pywikibot```](https://pypi.org/project/pywikibot/)
* [```pip install wikitextparser```](https://pypi.org/project/wikitextparser/)
* [```pip install gitpython```](https://gitpython.readthedocs.io/en/stable/)
* [```pip install importlib```](https://pypi.org/project/importlib/)
_Optional (for python 2.*, not tested)_
* run ``./setup.py`` (used to store wikstraktor version in wiktionary extracts)
### Wikstraktor Server
If you want wikstraktor as a server, you need to install [flask](https://flask.palletsprojects.com/en/2.0.x/installation/) and [flask-cors](https://flask-cors.readthedocs.io/en/latest/) — to allow other domains to query —, and best practice is to do so in a [virtual environment](https://docs.python.org/3/library/venv.html#module-venv).
......
......@@ -56,7 +56,6 @@ class En_en_straktor(Wikstraktor):
if l[i].pattern == '\\# ':
theDef = self.wtp.parse(l[i].items[0]).plain_text().strip()
if theDef != "":
print(theDef)# DEBUG:
newSense = Sense(self.entry_language, theDef, self.wiki_language)
#newSence.add_translation()
elif l[i].pattern == '\\#:':
......
#!/usr/bin/env python3
import git
sha = git.Repo(search_parent_directories=True).head.object.hexsha
v = open("wikstraktor_version.py", "w")
v.write(f"version = '{sha}'")
v.close()
......@@ -274,12 +274,12 @@ class SubSense(Sense):
class Entry:
#version_id : l'identifiant unique de la vesion de la page du wiktionnaire (pywikibot.Page.latest_revision_id)
def __init__(self, lemma, lang, wiki_lang, version_id):
def __init__(self, lemma, lang, wiki_lang, version_id, wkskt_version):
self.lemma = lemma
self.lang = lang
#Si un jour on mixe +ieurs données de plusieurs wiktionnaires, ce sera utile
self.sources = []
self.sources.append({wiki_lang:version_id})
self.sources.append({"wiktionary_language":wiki_lang,"permanentId":version_id,"wikstraktor_version":wkskt_version})
self.current_source = 0
self.pronunciations = []
self.pos = None
......@@ -368,11 +368,12 @@ class Entry:
return res
class ParserContext:
def __init__(self, entry, lang, wiki_lang, version_id):
def __init__(self, entry, lang, wiki_lang, wversion_id, version_id):
self.lemma = entry
self.lang = lang
self.wiki_lang = wiki_lang
self.version_id = version_id
self.page_version_id = wversion_id
self.wikstraktor_version = version_id
self.context = []
self.entries = []
......@@ -408,7 +409,7 @@ class ParserContext:
#Pb là dedans
def create_entry(self):
#Dans le dictionnaire de keys, il n'y a jamais de senses ou de POS
res = Entry(self.lemma, self.lang, self.wiki_lang, self.version_id)
res = Entry(self.lemma, self.lang, self.wiki_lang, self.page_version_id, self.wikstraktor_version)
for l in self.context:
if "pro" in l.keys():
res.set_pronunciations(l['pro'])
......@@ -446,6 +447,8 @@ class Wikstraktor:
try:
m_name = f"{wiki_language}_{entry_language}".capitalize()
instance = getattr(importlib.import_module(f"parsers.{m_name.lower()}"), f"{m_name}_straktor")()
from wikstraktor_version import version as v
instance.version = v
except ModuleNotFoundError:
print(f"parsers.{m_name.lower()} module not found or {m_name}_straktor not found in module")
instance = None
......@@ -486,7 +489,7 @@ class Wikstraktor:
return nb_entries_added
def parse(self, entry, v_id, sections):
self.parserContext = ParserContext(entry, self.entry_language, self.wiki_language, v_id)
self.parserContext = ParserContext(entry, self.entry_language, self.wiki_language, v_id, self.version)
for s in sections:
if s.title != None :
#handle wiki context
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment