Skip to content
Snippets Groups Projects
Commit 6081dccb authored by Mathieu Loiseau's avatar Mathieu Loiseau
Browse files

Store sources

parent 2827aaf2
No related branches found
No related tags found
No related merge requests found
...@@ -9,4 +9,5 @@ KNM.csv ...@@ -9,4 +9,5 @@ KNM.csv
.~lock* .~lock*
*.json *.json
*.lwp *.lwp
wikstraktor_version.py
wikstraktorenv wikstraktorenv
...@@ -17,8 +17,10 @@ This project does depend on python packages. ...@@ -17,8 +17,10 @@ This project does depend on python packages.
(maybe to be replaced by an automation of some sort, using a virtual environment might be better, see [server version](#wikstraktor-server)) (maybe to be replaced by an automation of some sort, using a virtual environment might be better, see [server version](#wikstraktor-server))
* [```pip install pywikibot```](https://pypi.org/project/pywikibot/) * [```pip install pywikibot```](https://pypi.org/project/pywikibot/)
* [```pip install wikitextparser```](https://pypi.org/project/wikitextparser/) * [```pip install wikitextparser```](https://pypi.org/project/wikitextparser/)
* [```pip install gitpython```](https://gitpython.readthedocs.io/en/stable/)
* [```pip install importlib```](https://pypi.org/project/importlib/) * [```pip install importlib```](https://pypi.org/project/importlib/)
_Optional (for python 2.*, not tested)_ _Optional (for python 2.*, not tested)_
* run ``./setup.py`` (used to store wikstraktor version in wiktionary extracts)
### Wikstraktor Server ### Wikstraktor Server
If you want wikstraktor as a server, you need to install [flask](https://flask.palletsprojects.com/en/2.0.x/installation/) and [flask-cors](https://flask-cors.readthedocs.io/en/latest/) — to allow other domains to query —, and best practice is to do so in a [virtual environment](https://docs.python.org/3/library/venv.html#module-venv). If you want wikstraktor as a server, you need to install [flask](https://flask.palletsprojects.com/en/2.0.x/installation/) and [flask-cors](https://flask-cors.readthedocs.io/en/latest/) — to allow other domains to query —, and best practice is to do so in a [virtual environment](https://docs.python.org/3/library/venv.html#module-venv).
......
...@@ -56,7 +56,6 @@ class En_en_straktor(Wikstraktor): ...@@ -56,7 +56,6 @@ class En_en_straktor(Wikstraktor):
if l[i].pattern == '\\# ': if l[i].pattern == '\\# ':
theDef = self.wtp.parse(l[i].items[0]).plain_text().strip() theDef = self.wtp.parse(l[i].items[0]).plain_text().strip()
if theDef != "": if theDef != "":
print(theDef)# DEBUG:
newSense = Sense(self.entry_language, theDef, self.wiki_language) newSense = Sense(self.entry_language, theDef, self.wiki_language)
#newSence.add_translation() #newSence.add_translation()
elif l[i].pattern == '\\#:': elif l[i].pattern == '\\#:':
......
#!/usr/bin/env python3
import git
sha = git.Repo(search_parent_directories=True).head.object.hexsha
v = open("wikstraktor_version.py", "w")
v.write(f"version = '{sha}'")
v.close()
...@@ -274,12 +274,12 @@ class SubSense(Sense): ...@@ -274,12 +274,12 @@ class SubSense(Sense):
class Entry: class Entry:
#version_id : l'identifiant unique de la vesion de la page du wiktionnaire (pywikibot.Page.latest_revision_id) #version_id : l'identifiant unique de la vesion de la page du wiktionnaire (pywikibot.Page.latest_revision_id)
def __init__(self, lemma, lang, wiki_lang, version_id): def __init__(self, lemma, lang, wiki_lang, version_id, wkskt_version):
self.lemma = lemma self.lemma = lemma
self.lang = lang self.lang = lang
#Si un jour on mixe +ieurs données de plusieurs wiktionnaires, ce sera utile #Si un jour on mixe +ieurs données de plusieurs wiktionnaires, ce sera utile
self.sources = [] self.sources = []
self.sources.append({wiki_lang:version_id}) self.sources.append({"wiktionary_language":wiki_lang,"permanentId":version_id,"wikstraktor_version":wkskt_version})
self.current_source = 0 self.current_source = 0
self.pronunciations = [] self.pronunciations = []
self.pos = None self.pos = None
...@@ -368,11 +368,12 @@ class Entry: ...@@ -368,11 +368,12 @@ class Entry:
return res return res
class ParserContext: class ParserContext:
def __init__(self, entry, lang, wiki_lang, version_id): def __init__(self, entry, lang, wiki_lang, wversion_id, version_id):
self.lemma = entry self.lemma = entry
self.lang = lang self.lang = lang
self.wiki_lang = wiki_lang self.wiki_lang = wiki_lang
self.version_id = version_id self.page_version_id = wversion_id
self.wikstraktor_version = version_id
self.context = [] self.context = []
self.entries = [] self.entries = []
...@@ -408,7 +409,7 @@ class ParserContext: ...@@ -408,7 +409,7 @@ class ParserContext:
#Pb là dedans #Pb là dedans
def create_entry(self): def create_entry(self):
#Dans le dictionnaire de keys, il n'y a jamais de senses ou de POS #Dans le dictionnaire de keys, il n'y a jamais de senses ou de POS
res = Entry(self.lemma, self.lang, self.wiki_lang, self.version_id) res = Entry(self.lemma, self.lang, self.wiki_lang, self.page_version_id, self.wikstraktor_version)
for l in self.context: for l in self.context:
if "pro" in l.keys(): if "pro" in l.keys():
res.set_pronunciations(l['pro']) res.set_pronunciations(l['pro'])
...@@ -446,6 +447,8 @@ class Wikstraktor: ...@@ -446,6 +447,8 @@ class Wikstraktor:
try: try:
m_name = f"{wiki_language}_{entry_language}".capitalize() m_name = f"{wiki_language}_{entry_language}".capitalize()
instance = getattr(importlib.import_module(f"parsers.{m_name.lower()}"), f"{m_name}_straktor")() instance = getattr(importlib.import_module(f"parsers.{m_name.lower()}"), f"{m_name}_straktor")()
from wikstraktor_version import version as v
instance.version = v
except ModuleNotFoundError: except ModuleNotFoundError:
print(f"parsers.{m_name.lower()} module not found or {m_name}_straktor not found in module") print(f"parsers.{m_name.lower()} module not found or {m_name}_straktor not found in module")
instance = None instance = None
...@@ -486,7 +489,7 @@ class Wikstraktor: ...@@ -486,7 +489,7 @@ class Wikstraktor:
return nb_entries_added return nb_entries_added
def parse(self, entry, v_id, sections): def parse(self, entry, v_id, sections):
self.parserContext = ParserContext(entry, self.entry_language, self.wiki_language, v_id) self.parserContext = ParserContext(entry, self.entry_language, self.wiki_language, v_id, self.version)
for s in sections: for s in sections:
if s.title != None : if s.title != None :
#handle wiki context #handle wiki context
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment