Skip to content
Snippets Groups Projects
Commit c1e56ea4 authored by Mathieu Loiseau's avatar Mathieu Loiseau
Browse files

Allow pronounciation-less entries

parent dc24ceae
No related branches found
No related tags found
No related merge requests found
...@@ -377,7 +377,8 @@ class Entry: ...@@ -377,7 +377,8 @@ class Entry:
self.senses.append(s) self.senses.append(s)
def is_valid(self): def is_valid(self):
return self.lemma != None and len(self.pronunciations) > 0 and self.pos != None and len(self.senses) > 0 return self.lemma != None and self.pos != None and len(self.senses) > 0
# and len(self.pronunciations) > 0 ← must work without pronounciations
def __eq__(self, other): def __eq__(self, other):
res = isinstance(other, self.__class__) and self.lemma == other.lemma and self.lang == other.lang and self.pos ==other.pos and len(self.pronunciations) == len(other.pronunciations) and len(self.senses) == len(other.senses) res = isinstance(other, self.__class__) and self.lemma == other.lemma and self.lang == other.lang and self.pos ==other.pos and len(self.pronunciations) == len(other.pronunciations) and len(self.senses) == len(other.senses)
...@@ -473,10 +474,11 @@ class ParserContext: ...@@ -473,10 +474,11 @@ class ParserContext:
pass #On ignore l'étymologie pour le moment pass #On ignore l'étymologie pour le moment
else: else:
tmp[k]=v tmp[k]=v
if(pro!=None and len(tmp)>0): if len(tmp)>0 : #There can be no pronounciations
for pos,senses in tmp.items(): for pos,senses in tmp.items():
e = Entry(self.lemma, self.lang, self.wiki_lang, self.page_version_id, self.wikstraktor_version) e = Entry(self.lemma, self.lang, self.wiki_lang, self.page_version_id, self.wikstraktor_version)
e.set_pronunciations(pro) if pro != None:
e.set_pronunciations(pro)
e.set_pos(pos) e.set_pos(pos)
e.set_senses(senses) e.set_senses(senses)
#an improvement would be to remove that sense from context, but we test not to add doubles #an improvement would be to remove that sense from context, but we test not to add doubles
...@@ -558,7 +560,9 @@ class Wikstraktor: ...@@ -558,7 +560,9 @@ class Wikstraktor:
if not found: if not found:
i += 1 i += 1
if found: if found:
nb_entries_added = self.parse(page.title(), page.latest_revision_id, sections[i].sections)#self.wtp.parse(s.contents).sections) nb_entries_added = self.parse(page.title(), page.latest_revision_id, sections[i].sections)
else:
self.log.add_log("Wikstraktor.fetch", f"{graphy}” page not found")
return nb_entries_added return nb_entries_added
def parse(self, entry, v_id, sections): def parse(self, entry, v_id, sections):
...@@ -690,6 +694,8 @@ class Wikstraktor: ...@@ -690,6 +694,8 @@ class Wikstraktor:
senses = [] senses = []
if len(l) > 1: if len(l) > 1:
self.log.add_log("Wikstraktor.process_senses", f"===== WARNING ======\nmore than one sense list, make sure we don't forget anything\nignored lists : \n{l[1:]}\n===================") self.log.add_log("Wikstraktor.process_senses", f"===== WARNING ======\nmore than one sense list, make sure we don't forget anything\nignored lists : \n{l[1:]}\n===================")
elif len(l) == 0:
self.log.add_log("Wikstraktor.process_senses", f"===== WARNING ======\nno sense")
l = l[0] #l now contains a list of list items l = l[0] #l now contains a list of list items
if l.pattern == self.constants['sense_pattern'][0]["def"]: if l.pattern == self.constants['sense_pattern'][0]["def"]:
i = 0 i = 0
......
No preview for this file type
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment