From 3d184fb5c5885b185840c8c0af1857eb0d45dc05 Mon Sep 17 00:00:00 2001 From: Mathieu Loiseau <mathieu.loiseau@liris.cnrs.fr> Date: Tue, 30 May 2023 10:59:22 +0200 Subject: [PATCH] =?UTF-8?q?Gestion=20des=20d=C3=A9finitions=20vides?= =?UTF-8?q?=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- wikstraktor.py | 77 ++++++++++++++++++++++++++++++++------------------ 1 file changed, 49 insertions(+), 28 deletions(-) diff --git a/wikstraktor.py b/wikstraktor.py index 8fa0ff7..6339324 100755 --- a/wikstraktor.py +++ b/wikstraktor.py @@ -117,9 +117,15 @@ class Definition(SubInfo): if text != "": self.lang = lang self.text = text + self.metadata = {} else: raise ValueError(f"Definition.__init__: “{text}†empty definition.") + def add_metadata(self, key, value): + if key in self.metadata.keys(): + self.log.add_log("Definition.add_metadata", f"for {self.text} replaced {key}:“{self.metadata['key']}†by {key}:“{value}â€") + self.metadata["key"]=value + def __eq__(self, other): return isinstance(other, self.__class__) and self.lang == other.lang and self.text == other.text @@ -127,6 +133,8 @@ class Definition(SubInfo): res = super().serializable(prefix) res["lang"] = self.lang res[self.__class__.key] = self.text + if len(self.metadata.keys()) > 0 : + res["metadata"] = self.metadata return res class Translation(Definition): @@ -587,34 +595,44 @@ class Wikstraktor: res = self.wtp.parse(example_wiki_text).plain_text().strip() return res + #can be overloaded + def parse_definition(self, definition): + return self.wtp.parse(definition).plain_text().strip() + #can be overloaded def process_definition(self, definition, sub_items, def_level = True): - if def_level: - newSense = Sense(self.entry_language, self.wtp.parse(definition).plain_text().strip(), self.wiki_language) - pattern_ex = self.constants['sense_pattern'][0]["ex"] - pattern_subdef = self.constants['sense_pattern'][0]["add_subdef"] + self.constants['sense_pattern'][0]["def"] - else: - newSense = SubSense(self.entry_language, self.wtp.parse(definition).plain_text().strip(), self.wiki_language) - pattern_subdef = None - pattern_ex = self.constants['sense_pattern'][0]["add_subdef"] + self.constants['sense_pattern'][0]["ex"] - #Process examples - a = 0 - for item_list in sub_items: - if item_list.pattern == pattern_ex: - for item in item_list.items: - newSense.add_example(self.process_example(item)) - #Si on veut traiter les sous items (ex traductions), on peut utiliser - if def_level and item_list.pattern == pattern_subdef: - b = 0 - for item in item_list.items: - try: - sub_sub = item_list.sublists(b) - except IndexError as err: - sub_sub = [] - print(f"There is an error in the selection of subitems:\n\t{b}th item of\n\t{itm_list.sublists()}\ntriggered {err}") - newSense.add_subsense(self.process_definition(item, sub_sub, False)) - b += 1 - a += 1 + #does not process wk_en quotations + try: + if def_level: + newSense = Sense(self.entry_language, self.parse_definition(definition), self.wiki_language) + pattern_ex = self.constants['sense_pattern'][0]["ex"] + pattern_subdef = self.constants['sense_pattern'][0]["add_subdef"] + self.constants['sense_pattern'][0]["def"] + else: + newSense = SubSense(self.entry_language, self.parse_definition(definition), self.wiki_language) + pattern_subdef = None + pattern_ex = self.constants['sense_pattern'][0]["add_subdef"] + self.constants['sense_pattern'][0]["ex"] + #Process examples + a = 0 + for item_list in sub_items: + if item_list.pattern == pattern_ex: + for item in item_list.items: + newSense.add_example(self.process_example(item)) + #Si on veut traiter les sous items (ex traductions), on peut utiliser + if def_level and item_list.pattern == pattern_subdef: + b = 0 + for item in item_list.items: + try: + sub_sub = item_list.sublists(b) + except IndexError as err: + sub_sub = [] + print(f"There is an error in the selection of subitems:\n\t{b}th item of\n\t{itm_list.sublists()}\ntriggered {err}") + newSense.add_subsense(self.process_definition(item, sub_sub, False)) + b += 1 + a += 1 + except ValueError as err: + self.log.add_log("Wikstraktor.process_definition", f"“{definition}†processed as empty") + raise ValueError(f"Wikstraktor.process_definition with empty definition\n{err}") + return newSense def process_senses(self, sensesContent): @@ -626,8 +644,11 @@ class Wikstraktor: if l.pattern == self.constants['sense_pattern'][0]["def"]: i = 0 for item in l.items: - senses.append(self.process_definition(item, l.sublists(i))) - i += 1 + try: + senses.append(self.process_definition(item, l.sublists(i))) + i += 1 + except ValueError as err: + print("Skipped empty definition") return senses def __str__(self): -- GitLab