From f9f10d4f8f04f216cffd4c408680048e60895120 Mon Sep 17 00:00:00 2001 From: Enzo Simonnet <enzosim@laposte.net> Date: Wed, 17 Jan 2024 15:14:54 -0500 Subject: [PATCH] =?UTF-8?q?version=20finalis=C3=A9e?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/wiktextract_wrapper.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/wiktextract_wrapper.py b/src/wiktextract_wrapper.py index b3761d8..3703823 100644 --- a/src/wiktextract_wrapper.py +++ b/src/wiktextract_wrapper.py @@ -30,8 +30,9 @@ class Wiktextract: #print(json.dumps(result), "\n\n\n") converted_result = self.convert(result) - print(json.dumps(converted_result)) - return json.dumps(converted_result) + print("Conv result: ", json.dumps(converted_result, ensure_ascii=False, indent=2)) + + return converted_result #return result def convert(self, data_format1): @@ -47,7 +48,7 @@ class Wiktextract: } ], "id": f"{pos.get('lang_code', '')}-{index}.{pos.get('pos', '')}.{pos.get('word', '')}", - f"{pos.get('pos', '')}": { + f"{pos.get('word', '')}": { "pos": pos.get("pos", ""), "pronunciations": [], "senses": {} @@ -68,16 +69,16 @@ class Wiktextract: "url": pron.get("mp3_url", pron.get("ogg_url", "")) } pronunciation["sounds"].append(sound) - transformed_pos[pos.get('pos', '')]['pronunciations'].append(pronunciation) + transformed_pos[pos.get('word', '')]['pronunciations'].append(pronunciation) pron_counter += 1 # Iterate through senses in the entry for sense_index, sense in enumerate(pos.get("senses", []), start=1): sense_id = f"{pos.get('lang_code', '')}-{index}.{pos.get('pos', '')}.{pos.get('word', '')}_{pos.get('pos', '')}{sense_index}" - transformed_pos[pos.get('pos', '')]["senses"][sense_id] = { + transformed_pos[pos.get('word', '')]["senses"][sense_id] = { "Definitions": [ { - "id": f"{pos.get('lang_code', '')}-{pos.get('word', '')}{pos.get('pos', '')}_def{index}", + "id": f"{pos.get('lang_code', '')}-{pos.get('word', '')}_{pos.get('pos', '')}_def{index}", "lang": pos.get('lang_code', ''), "definition": gloss } for index, gloss in enumerate(sense.get('glosses', []) + sense.get('raw_glosses', []), start=1) @@ -109,6 +110,9 @@ if __name__ == "__main__": args = parser.parse_args() if args.entry != None: wkstrkt = Wiktextract(args.wikt_language, args.word_language) - print(wkstrkt.parse_page(args.entry)) + result = wkstrkt.parse_page(args.entry) + print(json.dumps(result, ensure_ascii=False)) else: print("{'err':'You need to specify a word'}") + + -- GitLab