From c1e56ea4e6b643368836c01057e1b278afbd2778 Mon Sep 17 00:00:00 2001 From: Mathieu Loiseau <mathieu.loiseau@liris.cnrs.fr> Date: Fri, 9 Jun 2023 18:08:14 +0200 Subject: [PATCH] Allow pronounciation-less entries --- wikstraktor.py | 14 ++++++++++---- wikstraktor.sqlite | Bin 20480 -> 20480 bytes 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/wikstraktor.py b/wikstraktor.py index 4db70fb..7c1803a 100755 --- a/wikstraktor.py +++ b/wikstraktor.py @@ -377,7 +377,8 @@ class Entry: self.senses.append(s) def is_valid(self): - return self.lemma != None and len(self.pronunciations) > 0 and self.pos != None and len(self.senses) > 0 + return self.lemma != None and self.pos != None and len(self.senses) > 0 + # and len(self.pronunciations) > 0 ↠must work without pronounciations def __eq__(self, other): res = isinstance(other, self.__class__) and self.lemma == other.lemma and self.lang == other.lang and self.pos ==other.pos and len(self.pronunciations) == len(other.pronunciations) and len(self.senses) == len(other.senses) @@ -473,10 +474,11 @@ class ParserContext: pass #On ignore l'étymologie pour le moment else: tmp[k]=v - if(pro!=None and len(tmp)>0): + if len(tmp)>0 : #There can be no pronounciations for pos,senses in tmp.items(): e = Entry(self.lemma, self.lang, self.wiki_lang, self.page_version_id, self.wikstraktor_version) - e.set_pronunciations(pro) + if pro != None: + e.set_pronunciations(pro) e.set_pos(pos) e.set_senses(senses) #an improvement would be to remove that sense from context, but we test not to add doubles @@ -558,7 +560,9 @@ class Wikstraktor: if not found: i += 1 if found: - nb_entries_added = self.parse(page.title(), page.latest_revision_id, sections[i].sections)#self.wtp.parse(s.contents).sections) + nb_entries_added = self.parse(page.title(), page.latest_revision_id, sections[i].sections) + else: + self.log.add_log("Wikstraktor.fetch", f"“{graphy}†page not found") return nb_entries_added def parse(self, entry, v_id, sections): @@ -690,6 +694,8 @@ class Wikstraktor: senses = [] if len(l) > 1: self.log.add_log("Wikstraktor.process_senses", f"===== WARNING ======\nmore than one sense list, make sure we don't forget anything\nignored lists : \n{l[1:]}\n===================") + elif len(l) == 0: + self.log.add_log("Wikstraktor.process_senses", f"===== WARNING ======\nno sense") l = l[0] #l now contains a list of list items if l.pattern == self.constants['sense_pattern'][0]["def"]: i = 0 diff --git a/wikstraktor.sqlite b/wikstraktor.sqlite index f83e39e04000619fc360339dce8bec4a227f9bff..4bd4ff7ed7864b28501c509097276c377e190f0f 100644 GIT binary patch delta 343 zcmZozz}T>Wae_3X!9*Enegg)*a4%j41_nmH0}OlzHVX;_@ohf9=VHji!mGi+@60!u z_bIQ&#zG0+=te7jF5_51L1h-tMo}XJBV%0yGhG8q1w&&i6H6;Yvy@~bljPLIR0~rJ zGc$`cbMvHB6O$xE14{!#%M_DT(-bp9W8)+XlhiyQtSCt>%1g}2EKX(dG>n^EXl~C1 z7pt0l&{8^34{Vt*&@y&Z%Z$yej7><ftSB{wC4PAb(303dUAU>-R5JCTS=?k7|2+T& C6kPQH delta 55 zcmZozz}T>Wae_3X?nD`9eq9DV`<uKB3=E8X2N?JcY!(y<;@f<H&&809k>8nt-x(-i K$v@fIe=h)yd=3i$ -- GitLab