diff --git a/src/wiktextract_wrapper.py b/src/wiktextract_wrapper.py index 13cfdf0f70e3d666bc23f07e521f6d2650151311..0120311a06bc96869e8accefe5fba080b3bc2ea9 100644 --- a/src/wiktextract_wrapper.py +++ b/src/wiktextract_wrapper.py @@ -26,7 +26,7 @@ class Wiktextract: transformed_pos = { "sources": [ { - "wiktionary_language": pos["lang_code"], # Assuming this key is present in your data + "wiktionary_language": pos.get("lang_code", ""), # Assuming this key is present in your data "wiktextract_version": metadata.version("wiktextract"), "wrapper_version": git.Repo(search_parent_directories=True).head.object.hexsha } @@ -49,7 +49,7 @@ class Wiktextract: } if "audio" in pron: sound = { - "accent": pron.get("tags", [])[0], # Assuming the first tag is the accent + "accent": pron.get("tags", [])[0] if pron.get("tags") else None, # Assuming the first tag is the accent "url": pron.get("mp3_url", pron.get("ogg_url", "")) } pronunciation["sounds"].append(sound) @@ -62,7 +62,7 @@ class Wiktextract: transformed_pos[pos.get('pos', '')]["senses"][sense_id] = { "Definitions": [ { - "id": f"{pos['lang_code']}-{pos['word']}{pos['pos']}_def{index}", + "id": f"{pos.get('lang_code', '')}-{pos.get('word', '')}{pos.get('pos', '')}_def{index}", "lang": pos.get('lang_code', ''), "definition": gloss } for index, gloss in enumerate(sense.get('glosses', []) + sense.get('raw_glosses', []), start=1) @@ -70,7 +70,7 @@ class Wiktextract: "Examples": [ { "id": f"{pos.get('lang_code', '')}-{pos.get('word', '')}{pos.get('pos', '')}_ex{index}", - "example": example['text'] + "example": example.get('text', '') } for index, example in enumerate(sense.get('examples', []), start=1) ] } @@ -82,6 +82,7 @@ class Wiktextract: return transformed_data + if __name__ == "__main__": import argparse from argparse import RawTextHelpFormatter #pour le formattage de l'aide