diff --git a/wikstraktor.py b/wikstraktor.py index 4db70fbe7ddb598b04d44a74794902d3103f07b0..7c1803afdcf12618b7f61db8b74f7d0cdbd46296 100755 --- a/wikstraktor.py +++ b/wikstraktor.py @@ -377,7 +377,8 @@ class Entry: self.senses.append(s) def is_valid(self): - return self.lemma != None and len(self.pronunciations) > 0 and self.pos != None and len(self.senses) > 0 + return self.lemma != None and self.pos != None and len(self.senses) > 0 + # and len(self.pronunciations) > 0 ↠must work without pronounciations def __eq__(self, other): res = isinstance(other, self.__class__) and self.lemma == other.lemma and self.lang == other.lang and self.pos ==other.pos and len(self.pronunciations) == len(other.pronunciations) and len(self.senses) == len(other.senses) @@ -473,10 +474,11 @@ class ParserContext: pass #On ignore l'étymologie pour le moment else: tmp[k]=v - if(pro!=None and len(tmp)>0): + if len(tmp)>0 : #There can be no pronounciations for pos,senses in tmp.items(): e = Entry(self.lemma, self.lang, self.wiki_lang, self.page_version_id, self.wikstraktor_version) - e.set_pronunciations(pro) + if pro != None: + e.set_pronunciations(pro) e.set_pos(pos) e.set_senses(senses) #an improvement would be to remove that sense from context, but we test not to add doubles @@ -558,7 +560,9 @@ class Wikstraktor: if not found: i += 1 if found: - nb_entries_added = self.parse(page.title(), page.latest_revision_id, sections[i].sections)#self.wtp.parse(s.contents).sections) + nb_entries_added = self.parse(page.title(), page.latest_revision_id, sections[i].sections) + else: + self.log.add_log("Wikstraktor.fetch", f"“{graphy}†page not found") return nb_entries_added def parse(self, entry, v_id, sections): @@ -690,6 +694,8 @@ class Wikstraktor: senses = [] if len(l) > 1: self.log.add_log("Wikstraktor.process_senses", f"===== WARNING ======\nmore than one sense list, make sure we don't forget anything\nignored lists : \n{l[1:]}\n===================") + elif len(l) == 0: + self.log.add_log("Wikstraktor.process_senses", f"===== WARNING ======\nno sense") l = l[0] #l now contains a list of list items if l.pattern == self.constants['sense_pattern'][0]["def"]: i = 0 diff --git a/wikstraktor.sqlite b/wikstraktor.sqlite index f83e39e04000619fc360339dce8bec4a227f9bff..4bd4ff7ed7864b28501c509097276c377e190f0f 100644 Binary files a/wikstraktor.sqlite and b/wikstraktor.sqlite differ