Skip to content
Snippets Groups Projects
Commit bb67b48b authored by Mathieu Loiseau's avatar Mathieu Loiseau
Browse files

improving pron id

parent 7c707d79
No related branches found
No related tags found
No related merge requests found
......@@ -5,6 +5,7 @@ import importlib
import json
from wikstraktor_version import version as the_version
from wikstraklog import Wikstraklog
from copy import deepcopy as dc
import re
def get_list_string_level(wikitext):
......@@ -37,12 +38,17 @@ class SubInfo:
self.label = f"{prefix}_{self.__class__.prfx}{self.id}"
return self.label
def replace_src_in_id(self, former_src, new_src):
def replace_src_in_id(self, former_src, new_src, copy=False):
##Attention si on nettoie en mettant des sources partout, il faudra changer
res = None
if self.label != None and former_src != None and new_src != None :
self.label = re.sub(r'^([\w\.]+)-('+str(former_src)+')',r"\1-"+str(new_src), self.label)
res = self.label
if not copy:
self.label = re.sub(r'^([\w\.]+)-('+str(former_src)+')',r"\1-"+str(new_src), self.label)
res = self.label
else:
new_obj = dc(self)
new_obj.label = re.sub(r'^([\w\.]+)-('+str(former_src)+')',r"\1-"+str(new_src), self.label)
res = new_obj
return res
def get_src_from_id(self):
......@@ -242,7 +248,7 @@ class Sense(SubInfo):
return key in self.metadata.keys()
def get_id(self):
return self.id
return self.label
def set_domain(self, d):
self.domain = d
......@@ -272,13 +278,13 @@ class Sense(SubInfo):
self.translations.append(theTranslation)
def add_subsense(self, subsense):
if self.id!=None:
if self.label!=None:
subsense.set_id(self.set_id())
if subsense not in self.subsenses:
self.subsenses.append(subsense)
def __eq__(self, other):
res = isinstance(other, self.__class__) and self.id == other.id and len(self.definitions) == len(other.definitions) and len(self.examples) == len(other.examples) and len(self.translations) == len(other.translations) and self.domain == other.domain and len(other.metadata) == len(self.metadata) and other.regions == self.regions
res = isinstance(other, self.__class__) and len(self.definitions) == len(other.definitions) and len(self.examples) == len(other.examples) and len(self.translations) == len(other.translations) and self.domain == other.domain and len(other.metadata) == len(self.metadata) and other.regions == self.regions #and self.id == other.id
i = 0
while res and i < len(self.examples):
res = self.examples[i] in other.examples
......@@ -334,18 +340,17 @@ class Sense(SubInfo):
if len(self.subsenses) > 0:
res["Subsenses"] = {}
for t in self.subsenses:
res["Subsenses"][t.set_id(self.id)]= t.serializable(prefix)
res["Subsenses"][t.set_id(self.label)]= t.serializable(prefix)
return res
def __str__(self):
return json.dumps(self.serializable())
class SubSense(Sense):
def set_id(self, prefix=None):
if prefix != None and self.id == None:
self.id = f"{prefix}.{self.__class__.next_id}" #l'identifiant du sens
self.__class__.inc_n_id()
return self.id
def set_id(self, prefix, force = False):
if (self.label == None or force) and prefix != None:
self.label = f"{prefix}.{self.id}"
return self.label
class Entry:
#version_id : l'identifiant unique de la vesion de la page du wiktionnaire (pywikibot.Page.latest_revision_id)
......@@ -433,10 +438,10 @@ class Entry:
i += 1
for p in other.pronunciations:
src = p.get_src_from_id()
if src != None and src <= max_id and src_map[src] != src:
if src != None and src <= max_id and src < len(src_map) and src_map[src] != src:
#max_id, c'est parce qu'un même objet peut être
#à plusieurs endroits et avoir déjà été modifié
p.replace_src_in_id(src, src_map[src])
p=p.replace_src_in_id(src, src_map[src], True)
self.add_pronunciation(p)
for s in other.senses:
src = s.get_src_from_id()
......@@ -509,6 +514,10 @@ class ParserContext:
self.wikstraktor_version = version_id
self.context = []
self.entries = []
#reset counters
Sense.reset()
Sense.reset_sub_counters()
Pronunciation.reset()
def get_level(self):
if len(self.context) == 0:
......
No preview for this file type
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment