From 6b914e4ed160cf02597a6bf3780b55202647385d Mon Sep 17 00:00:00 2001 From: Mathieu Loiseau <mathieu.loiseau@liris.cnrs.fr> Date: Sat, 15 Jul 2023 11:55:31 +0200 Subject: [PATCH] no duplicate ids --- parsers/Structure_json.json | 63 -------------------------------- parsers/Structure_minimale.json | 8 ---- wikstraktor.py | 22 +++++++---- wikstraktor.sqlite | Bin 28672 -> 32768 bytes 4 files changed, 15 insertions(+), 78 deletions(-) delete mode 100644 parsers/Structure_json.json delete mode 100644 parsers/Structure_minimale.json diff --git a/parsers/Structure_json.json b/parsers/Structure_json.json deleted file mode 100644 index bb0cd9b..0000000 --- a/parsers/Structure_json.json +++ /dev/null @@ -1,63 +0,0 @@ -{ - "Headword":"sleep", - "Items":[ - { - "PartOfSpeech":"verb", - "Pronunciation":[ - { - "api":"\\ˈsliËp\\ ", - "hiragana":"", - "katakana":"", - "bopomofo":"", - "pinyin":"", - "romaji":"", - "accent1":"RP", - "url1":"https://upload.wikimedia.org/wikipedia/commons/1/19/LL-Q1860_%28eng%29-Back_ache-water.wav" - } - ], - "Senses":{ - "v1":{ - "Translations":[ - "translation1", - "...", - "translationn" - ], - "Image":[ - "Stilles Mineralwasser.jpg", - "..." - ], - "Definition":{"lang":"fr", "definition" : "blabla"}, - "Examples":[ - "blabla", - "blabli", - "blablou" - ], - "SubSenses":[ - { - "Definition":{"lang":"en", "definition" : "whatnot"}, - "Examples":[ - "subexa", - "subexb", - "subexz" - ] - } - ] - } - } - } - ] -} - -/*adapter à la généricité des données ? - \"User-defined fields 1\" - \"User-defined fields 2\" - \"User-defined fields 3\" - \"User-defined fields 4\" - \"User-defined fields 5\" - \"Supplementary field for devs 1\" - \"Supplementary field for devs 2\" - \"Supplementary field for devs 3\" - \"Supplementary field for devs 4\" - \"Supplementary field for devs 5\" - ... - \"Supplementary field for devs 10\ */ diff --git a/parsers/Structure_minimale.json b/parsers/Structure_minimale.json deleted file mode 100644 index f990a19..0000000 --- a/parsers/Structure_minimale.json +++ /dev/null @@ -1,8 +0,0 @@ -"lemma" -"partOfSpeech" : -{ - "sense" : - { - "definition" - } -} diff --git a/wikstraktor.py b/wikstraktor.py index ad85137..dae7979 100755 --- a/wikstraktor.py +++ b/wikstraktor.py @@ -64,6 +64,7 @@ class Sound: class Pronunciation(SubInfo): prfx = "prn" + next_id = 1 def __init__(self, prefix = None): super().__init__(prefix) @@ -122,6 +123,7 @@ class Pronunciation(SubInfo): class Definition(SubInfo): prfx = "def" key = "definition" + next_id = 1 def __init__(self, lang, text, prefix=None): super().__init__(prefix) @@ -143,9 +145,11 @@ class Definition(SubInfo): class Translation(Definition): prfx = "trad" key = "translation" + next_id = 1 class Example(SubInfo): prfx = "ex" + next_id = 1 def __init__(self, transcript, source=None, url=None, prefix=None): super().__init__(prefix) @@ -171,18 +175,12 @@ class Example(SubInfo): class Sense(SubInfo): prfx = "" + next_id = 1 def __init__(self, lang=None, definition=None, wiki_lang=None, prefix=None): self.lang = lang self.label = None self.set_id(prefix) - #On réinitialise les identifiants des sous-éléments - if not isinstance(self, SubSense): - Definition.reset() - Example.reset() - Translation.reset() - SubSense.reset() - self.definitions = [] #liste des définitions (elles auront une langue et un texte) self.subsenses = [] #liste des sous-définitions (récursif…) self.examples = [] #liste des exemples (un texte obligatoire, source et url sont optionnels) @@ -196,6 +194,15 @@ class Sense(SubInfo): except ValueError as err: raise ValueError(f"Sense.__init__() with empty definition\n{err}") + @classmethod + def reset(cls): + #On réinitialise les identifiants des sous-éléments + if cls.__name__ != "SubSense": + Definition.reset() + Example.reset() + Translation.reset() + SubSense.reset() + def add_metadata(self, key, value): if self.metadata_exists(key): print("Definition.add_metadata", f"for {self.text} replaced {key}:“{self.metadata[key]}†by {key}:“{value}â€") @@ -323,6 +330,7 @@ class Sense(SubInfo): return json.dumps(self.serializable()) class SubSense(Sense): + next_id = 1 def set_id(self, prefix=None): if prefix != None and self.label == None: self.label = f"{prefix}.{self.__class__.next_id}" #l'identifiant du sens diff --git a/wikstraktor.sqlite b/wikstraktor.sqlite index 1f34a516a0c391dab44d1401ff4ad34c2ac2e550..660ec27727267941a1d46949f21b24669ef6d801 100644 GIT binary patch delta 1994 zcmb`|Jxjwt7{KwgCVkU-Dx!j*6$A$<TyjZcvgjr{DJu9@HBEE%wV()sBGfM+tp&kB zCudRmEkyhbz6>rdE-oT^i%;Phd#)LN{O|6$OX(?8Duv6v+P0!7u}W$*w0*aBO*%C4 zCO#-J@<l$$2YDxNwSl%rQkjZ`qbs%d{+_YHHsxGty-KEA_L?h+V*E6A6TOc-Yu{R@ zN}I;Re&|p+uk04*PP~-_%eJ?7t8*v5K@FCsc3QVm)NvVeO(W;n-12=h@8vRfKpo4m zsF%<ALBN6$i)HejuV*;tK`|&Ec>BSgdUUy8?O!j?@+`nR2sT_@ZxkM`9#1E#TLo`s z8GIZr<BRYmfKj+%yVQ{nUj!e8OZg&v0bm?1bCmTRp9dd@%lPHm3-61cQME6rPAH_i zl_e)zc=K`MB7Pfth`vUCB3+uHPTcJdi6P}%5P=qHnXdT{W3>rBTBMXOPG<vP94_XS z%8%7L_&8j~7vTkfQMk!m{U7cF2Oot?`64_37>66;b(7!mJoq?V#uwo}z&M<`mMtHi o10RRW_#)f`7=^PD*J$`>Qh$Ngz(?Uyz6f6h7>CO|ZGYjbzsfaA6951J delta 85 zcmV-b0IL6hfC7Nv0gxL7SO5S32azB@0a&qMqz@1T59|N{000LM`w#XH@ek~?ArQ$A rk$@7j?;jOF76}I701loFEevuCkqT}JjtH&?$_C-FamNO;;btERKsg&J -- GitLab