diff --git a/parsers/fr_constants.py b/parsers/fr_constants.py index 73d68129734122ee52b397f66357b76bceb7452d..b769bba6893ad9fc9506941ff7b2ef6e9141a6e7 100644 --- a/parsers/fr_constants.py +++ b/parsers/fr_constants.py @@ -9,11 +9,50 @@ string_values = { "t_ipa":"pron", #template for transcription "t_snd":"écouter", #template for audio "t_acc":["US", "UK"], #template for accents +"regions":{ + "UK":"United Kingdom", + "United Kingdom":"United Kingdom", + "British":"Great Britain", + "GB":"Great Britain", + "Great Britain":"Great Britain", + "Scot":"Scotland", + "Scottish":"Scotland", + "Scotland":"Scotland", + "Irl":"Ireland", + "Irish":"Ireland", + "Ireland":"Ireland", + "Ulst":"Northern Ireland", + "Ulster":"Northern Ireland", + "Northern Ireland":"Northern Ireland", + "Wls":"Wales", + "Welsh":"Wales", + "Wales":"Wales", + "English":"England", + "Eng":"England", + "En":"England", + "England":"England", + "Canada":"Canada", + "Canadian":"Canada", + 'North American':'North America', + 'North America':"North America", + "US":"United States of America", + "USA":"United States of America", + "United States":"United States of America", + "United States of America":"United States of America", + "NZ":"New Zealand", + "New Zealand":"New Zealand", + "Au":"Australia", + "AU":"Australia", + "Australia":"Australia", + "India":"India", + "Indian":"India", + "Nigeria":"Nigeria", + "Nigerian":"Nigeria"}, "sense_pattern":[ ## structure(s) for sense patterns add_subdef is to be added to def patterns {"def":"\\#", "ex":"\\#\\*", "add_subdef":"\\#"} ], "POS":{ - "adjectif":["adjectif","adjectif qualificatif","adj"], + "Adj":["adjectif","adjectif qualificatif","adj"], "adjectif démonstratif":["adjectif démonstratif","adj-dém","adjectif dém"], "adjectif exclamatif":["adjectif exclamatif","adj-excl","adjectif exc"], "adjectif indéfini":["adjectif indéfini","adjectif ind","adj-indéf"], @@ -21,47 +60,47 @@ string_values = { "adjectif numéral":["adjectif numéral","adjectif num","adj-num"], "adjectif possessif":["adjectif possessif","adjectif pos","adj-pos"], "adjectif relatif":["adjectif relatif","adjectif rel","adj-rel"], - "adverbe":["Adverbe","adv"], + "Adv":["Adverbe","adv"], "adverbe indéfini":["adverbe indéfini","adv-ind","adverbe ind"], "adverbe interrogatif":["adverbe interrogatif","dverbe int","adv-int"], "adverbe pronominal":["adverbe pronominal","adv-pron","adverbe pro"], "adverbe relatif":["adverbe relatif","adv-rel","adverbe rel"], - "affixe":["affixe","aff"], - "article":["article","art"], + "Aff":["affixe","aff"], + "Art":["article","art"], "article défini":["article défini","article déf","art-déf"], "article indéfini":["article indéfini","art-indéf","article ind"], "article partitif":["article partitif","art-part","article par"], "circonfixe":["circonfixe","circon","circonf"], "classificateur":["classificateur","class","classif"], - "conjonction":["conjonction","conj"], + "Conj":["conjonction","conj"], "conjonction de coordination":["conjonction de coordination","conj-coord","conjonction coo"], "copule":["copule"], - "déterminant":["déterminant","dét"], + "Det":["déterminant","dét"], "enclitique":["cnclitique","encl"], "gismu":["gismu"], "infixe":["infixe","inf"], "interfixe":["interfixe","interf"], - "interjection":["interjection","interj"], + "Interj":["interjection","interj"], "lettre":["lettre"], "locution":["locution","loc"], "locution-phrase":["locution-phrase","loc-phr","phrase locution","phrase","locution-phrase"], - "nom commun":["nom","nom commun","substantif"], + "N":["nom","nom commun","substantif"], "nom de famille":["nom de famille","nom-fam"], - "nom propre":["nom propre","nom-pr"], + "NP":["nom propre","nom-pr"], "nom scientifique":["nom scientifique","nom-sciences","nom scient","nom science"], - "numéral":["numéral","num","numér"], + "Num":["numéral","num","numér"], "onomatopée":["onomatopée","onoma","onom"], - "particule":["particule","part"], + "Particle":["particule","part"], "particule numérale":["particule numérale","part-num","particule num"], "patronyme":["patronyme"], - "postposition":["postposition","postpos","post"], + "Postp":["postposition","postpos","post"], "pré-nom":["pré-nom"], "pré-verbe":["pré-verbe"], "préfixe":["préfixe","préf"], "prénom":["prénom"], - "préposition":["préposition","prép"], + "Prep":["préposition","prép"], "proclitique":["proclitique","procl"], - "pronom":["pronom"], + "Pro":["pronom"], "pronom démonstratif":["pronom démonstratif","pronom dém","pronom-dém"], "pronom indéfini":["pronom indéfini","pronom ind","pronom-indéf"], "pronom interrogatif":["pronom interrogatif","pronom int","pronom-int"], @@ -78,6 +117,6 @@ string_values = { "symbole":["symbole","symb"], "variante par contrainte typographique":["variante typographique","variante typo","variante par contrainte typographique","var-typo"], "verbe pronominal":["verbe pronominal","verb-pr","verbe pr"], - "verbe":["verbe","verb"] + "V":["verbe","verb"] } } diff --git a/test_wikstraktor.py b/test_wikstraktor.py index 00d62280ee1bf3a8a0904ec8f91e8be141fb94b9..a13c3fe07a1451bba5cd843508caf99368bd272d 100644 --- a/test_wikstraktor.py +++ b/test_wikstraktor.py @@ -5,7 +5,7 @@ if __name__ == "__main__": # print(e.get_file_url("File:LL-Q1860 (eng)-Nattes à chat----parent.wav")) # print(e.get_file_url("File:LL-Q1860 (eng)-Nattes à chat-parent.wav")) #e.fetch("water") - f.fetch("water") + f.fetch("blue") # print(e.fetch("test"), "entries added") #print(e) file_path = 'test.json' diff --git a/wikstraktor.sqlite b/wikstraktor.sqlite index cf340034be915fbe4d55478064deb092757f3e55..f83e39e04000619fc360339dce8bec4a227f9bff 100644 Binary files a/wikstraktor.sqlite and b/wikstraktor.sqlite differ