From 6081dccb19d28b14a167a21430fb9255a61c3564 Mon Sep 17 00:00:00 2001
From: Mathieu Loiseau <mathieu.loiseau@liris.cnrs.fr>
Date: Wed, 22 Mar 2023 18:05:20 +0100
Subject: [PATCH] Store sources

---
 .gitignore       |  1 +
 README.md        |  2 ++
 parsers/en_en.py |  1 -
 setup.py         |  8 ++++++++
 wikstraktor.py   | 15 +++++++++------
 5 files changed, 20 insertions(+), 7 deletions(-)
 create mode 100755 setup.py

diff --git a/.gitignore b/.gitignore
index 69e9892..7e96d59 100644
--- a/.gitignore
+++ b/.gitignore
@@ -9,4 +9,5 @@ KNM.csv
 .~lock*
 *.json
 *.lwp
+wikstraktor_version.py
 wikstraktorenv
diff --git a/README.md b/README.md
index 84975f0..f81037f 100644
--- a/README.md
+++ b/README.md
@@ -17,8 +17,10 @@ This project does depend on python packages.
 (maybe to be replaced by an automation of some sort, using a virtual environment might be better, see [server version](#wikstraktor-server))
 * [```pip install pywikibot```](https://pypi.org/project/pywikibot/)
 * [```pip install wikitextparser```](https://pypi.org/project/wikitextparser/)
+* [```pip install gitpython```](https://gitpython.readthedocs.io/en/stable/)
 * [```pip install importlib```](https://pypi.org/project/importlib/)  
 _Optional (for python 2.*, not tested)_
+* run ``./setup.py`` (used to store wikstraktor version in wiktionary extracts)
 
 ### Wikstraktor Server
 If you want wikstraktor as a server, you need to install [flask](https://flask.palletsprojects.com/en/2.0.x/installation/) and [flask-cors](https://flask-cors.readthedocs.io/en/latest/) — to allow other domains to query —, and best practice is to do so in a [virtual environment](https://docs.python.org/3/library/venv.html#module-venv).
diff --git a/parsers/en_en.py b/parsers/en_en.py
index cf93078..d840524 100644
--- a/parsers/en_en.py
+++ b/parsers/en_en.py
@@ -56,7 +56,6 @@ class En_en_straktor(Wikstraktor):
 			if l[i].pattern == '\\# ':
 				theDef = self.wtp.parse(l[i].items[0]).plain_text().strip()
 				if theDef != "":
-					print(theDef)# DEBUG:
 					newSense = Sense(self.entry_language, theDef, self.wiki_language)
 				#newSence.add_translation()
 			elif l[i].pattern == '\\#:':
diff --git a/setup.py b/setup.py
new file mode 100755
index 0000000..cd3142b
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,8 @@
+#!/usr/bin/env python3
+
+import git
+sha = git.Repo(search_parent_directories=True).head.object.hexsha
+
+v = open("wikstraktor_version.py", "w")
+v.write(f"version = '{sha}'")
+v.close()
diff --git a/wikstraktor.py b/wikstraktor.py
index c1e98fb..b567c94 100755
--- a/wikstraktor.py
+++ b/wikstraktor.py
@@ -274,12 +274,12 @@ class SubSense(Sense):
 
 class Entry:
 	#version_id : l'identifiant unique de la vesion de la page du wiktionnaire (pywikibot.Page.latest_revision_id)
-	def __init__(self, lemma, lang, wiki_lang, version_id):
+	def __init__(self, lemma, lang, wiki_lang, version_id, wkskt_version):
 		self.lemma = lemma
 		self.lang = lang
 		#Si un jour on mixe +ieurs données de plusieurs wiktionnaires, ce sera utile
 		self.sources = []
-		self.sources.append({wiki_lang:version_id})
+		self.sources.append({"wiktionary_language":wiki_lang,"permanentId":version_id,"wikstraktor_version":wkskt_version})
 		self.current_source = 0
 		self.pronunciations = []
 		self.pos = None
@@ -368,11 +368,12 @@ class Entry:
 		return res
 
 class ParserContext:
-	def __init__(self, entry, lang, wiki_lang, version_id):
+	def __init__(self, entry, lang, wiki_lang, wversion_id, version_id):
 		self.lemma = entry
 		self.lang = lang
 		self.wiki_lang = wiki_lang
-		self.version_id = version_id
+		self.page_version_id = wversion_id
+		self.wikstraktor_version = version_id
 		self.context = []
 		self.entries = []
 
@@ -408,7 +409,7 @@ class ParserContext:
 #Pb là dedans
 	def create_entry(self):
 		#Dans le dictionnaire de keys, il n'y a jamais de senses ou de POS
-		res = Entry(self.lemma, self.lang, self.wiki_lang, self.version_id)
+		res = Entry(self.lemma, self.lang, self.wiki_lang, self.page_version_id, self.wikstraktor_version)
 		for l in self.context:
 			if "pro" in l.keys():
 				res.set_pronunciations(l['pro'])
@@ -446,6 +447,8 @@ class Wikstraktor:
 		try:
 			m_name = f"{wiki_language}_{entry_language}".capitalize()
 			instance = getattr(importlib.import_module(f"parsers.{m_name.lower()}"), f"{m_name}_straktor")()
+			from wikstraktor_version import version as v
+			instance.version = v
 		except ModuleNotFoundError:
 			print(f"parsers.{m_name.lower()} module not found or {m_name}_straktor not found in module")
 			instance = None
@@ -486,7 +489,7 @@ class Wikstraktor:
 		return nb_entries_added
 
 	def parse(self, entry, v_id, sections):
-		self.parserContext = ParserContext(entry, self.entry_language, self.wiki_language, v_id)
+		self.parserContext = ParserContext(entry, self.entry_language, self.wiki_language, v_id, self.version)
 		for s in sections:
 			if s.title != None :
 				#handle wiki context
-- 
GitLab