diff --git a/wikstraktor.py b/wikstraktor.py index 63e2ea2082094242f7b37b7f439ec07f47f45b4a..74e79aea39c9bdc47f078019ef18dbf20e9bc133 100755 --- a/wikstraktor.py +++ b/wikstraktor.py @@ -5,6 +5,7 @@ import importlib import json from wikstraktor_version import version as the_version from wikstraklog import Wikstraklog +from copy import deepcopy as dc import re def get_list_string_level(wikitext): @@ -37,12 +38,17 @@ class SubInfo: self.label = f"{prefix}_{self.__class__.prfx}{self.id}" return self.label - def replace_src_in_id(self, former_src, new_src): + def replace_src_in_id(self, former_src, new_src, copy=False): ##Attention si on nettoie en mettant des sources partout, il faudra changer res = None if self.label != None and former_src != None and new_src != None : - self.label = re.sub(r'^([\w\.]+)-('+str(former_src)+')',r"\1-"+str(new_src), self.label) - res = self.label + if not copy: + self.label = re.sub(r'^([\w\.]+)-('+str(former_src)+')',r"\1-"+str(new_src), self.label) + res = self.label + else: + new_obj = dc(self) + new_obj.label = re.sub(r'^([\w\.]+)-('+str(former_src)+')',r"\1-"+str(new_src), self.label) + res = new_obj return res def get_src_from_id(self): @@ -242,7 +248,7 @@ class Sense(SubInfo): return key in self.metadata.keys() def get_id(self): - return self.id + return self.label def set_domain(self, d): self.domain = d @@ -272,13 +278,13 @@ class Sense(SubInfo): self.translations.append(theTranslation) def add_subsense(self, subsense): - if self.id!=None: + if self.label!=None: subsense.set_id(self.set_id()) if subsense not in self.subsenses: self.subsenses.append(subsense) def __eq__(self, other): - res = isinstance(other, self.__class__) and self.id == other.id and len(self.definitions) == len(other.definitions) and len(self.examples) == len(other.examples) and len(self.translations) == len(other.translations) and self.domain == other.domain and len(other.metadata) == len(self.metadata) and other.regions == self.regions + res = isinstance(other, self.__class__) and len(self.definitions) == len(other.definitions) and len(self.examples) == len(other.examples) and len(self.translations) == len(other.translations) and self.domain == other.domain and len(other.metadata) == len(self.metadata) and other.regions == self.regions #and self.id == other.id i = 0 while res and i < len(self.examples): res = self.examples[i] in other.examples @@ -334,18 +340,17 @@ class Sense(SubInfo): if len(self.subsenses) > 0: res["Subsenses"] = {} for t in self.subsenses: - res["Subsenses"][t.set_id(self.id)]= t.serializable(prefix) + res["Subsenses"][t.set_id(self.label)]= t.serializable(prefix) return res def __str__(self): return json.dumps(self.serializable()) class SubSense(Sense): - def set_id(self, prefix=None): - if prefix != None and self.id == None: - self.id = f"{prefix}.{self.__class__.next_id}" #l'identifiant du sens - self.__class__.inc_n_id() - return self.id + def set_id(self, prefix, force = False): + if (self.label == None or force) and prefix != None: + self.label = f"{prefix}.{self.id}" + return self.label class Entry: #version_id : l'identifiant unique de la vesion de la page du wiktionnaire (pywikibot.Page.latest_revision_id) @@ -433,10 +438,10 @@ class Entry: i += 1 for p in other.pronunciations: src = p.get_src_from_id() - if src != None and src <= max_id and src_map[src] != src: + if src != None and src <= max_id and src < len(src_map) and src_map[src] != src: #max_id, c'est parce qu'un mÃĒme objet peut ÃĒtre #à plusieurs endroits et avoir dÊjà ÊtÊ modifiÊ - p.replace_src_in_id(src, src_map[src]) + p=p.replace_src_in_id(src, src_map[src], True) self.add_pronunciation(p) for s in other.senses: src = s.get_src_from_id() @@ -509,6 +514,10 @@ class ParserContext: self.wikstraktor_version = version_id self.context = [] self.entries = [] + #reset counters + Sense.reset() + Sense.reset_sub_counters() + Pronunciation.reset() def get_level(self): if len(self.context) == 0: diff --git a/wikstraktor.sqlite b/wikstraktor.sqlite index 03e32cc16f9aa765795c30f9feed594c88275095..38eec4ec2e202f310450075b1d066c36372dcce7 100644 Binary files a/wikstraktor.sqlite and b/wikstraktor.sqlite differ