From f391b7f3b6c2a322f2eca90384ff6038851ba541 Mon Sep 17 00:00:00 2001
From: Enzo Simonnet <enzosim@laposte.net>
Date: Wed, 31 May 2023 11:12:11 +0200
Subject: [PATCH] =?UTF-8?q?Homog=C3=A9n=C3=A9isation=20des=20pos=20(rapide?=
 =?UTF-8?q?)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 parsers/fr_constants.py |  69 +++++++++++++++++++++++++++++++---------
 test_wikstraktor.py     |   2 +-
 wikstraktor.sqlite      | Bin 16384 -> 20480 bytes
 3 files changed, 55 insertions(+), 16 deletions(-)

diff --git a/parsers/fr_constants.py b/parsers/fr_constants.py
index 73d6812..b769bba 100644
--- a/parsers/fr_constants.py
+++ b/parsers/fr_constants.py
@@ -9,11 +9,50 @@ string_values = {
 "t_ipa":"pron", #template for transcription
 "t_snd":"écouter", #template for audio
 "t_acc":["US", "UK"], #template for accents
+"regions":{
+		"UK":"United Kingdom",
+		"United Kingdom":"United Kingdom",
+		"British":"Great Britain",
+		"GB":"Great Britain",
+		"Great Britain":"Great Britain",
+		"Scot":"Scotland",
+		"Scottish":"Scotland",
+		"Scotland":"Scotland",
+		"Irl":"Ireland",
+		"Irish":"Ireland",
+		"Ireland":"Ireland",
+		"Ulst":"Northern Ireland",
+		"Ulster":"Northern Ireland",
+		"Northern Ireland":"Northern Ireland",
+		"Wls":"Wales",
+		"Welsh":"Wales",
+		"Wales":"Wales",
+		"English":"England",
+		"Eng":"England",
+		"En":"England",
+		"England":"England",
+		"Canada":"Canada",
+		"Canadian":"Canada",
+		'North American':'North America',
+		'North America':"North America",
+		"US":"United States of America",
+		"USA":"United States of America",
+		"United States":"United States of America",
+		"United States of America":"United States of America",
+		"NZ":"New Zealand",
+		"New Zealand":"New Zealand",
+		"Au":"Australia",
+		"AU":"Australia",
+		"Australia":"Australia",
+		"India":"India",
+		"Indian":"India",
+		"Nigeria":"Nigeria",
+		"Nigerian":"Nigeria"},
 "sense_pattern":[ ## structure(s) for sense patterns add_subdef is to be added to def patterns
 	{"def":"\\#", "ex":"\\#\\*", "add_subdef":"\\#"}
 ],
 "POS":{
-	"adjectif":["adjectif","adjectif qualificatif","adj"],
+	"Adj":["adjectif","adjectif qualificatif","adj"],
 	"adjectif démonstratif":["adjectif démonstratif","adj-dém","adjectif dém"],
 	"adjectif exclamatif":["adjectif exclamatif","adj-excl","adjectif exc"],
 	"adjectif indéfini":["adjectif indéfini","adjectif ind","adj-indéf"],
@@ -21,47 +60,47 @@ string_values = {
 	"adjectif numéral":["adjectif numéral","adjectif num","adj-num"],
 	"adjectif possessif":["adjectif possessif","adjectif pos","adj-pos"],
 	"adjectif relatif":["adjectif relatif","adjectif rel","adj-rel"],
-	"adverbe":["Adverbe","adv"],
+	"Adv":["Adverbe","adv"],
 	"adverbe indéfini":["adverbe indéfini","adv-ind","adverbe ind"],
 	"adverbe interrogatif":["adverbe interrogatif","dverbe int","adv-int"],
 	"adverbe pronominal":["adverbe pronominal","adv-pron","adverbe pro"],
 	"adverbe relatif":["adverbe relatif","adv-rel","adverbe rel"],
-	"affixe":["affixe","aff"],
-	"article":["article","art"],
+	"Aff":["affixe","aff"],
+	"Art":["article","art"],
 	"article défini":["article défini","article déf","art-déf"],
 	"article indéfini":["article indéfini","art-indéf","article ind"],
 	"article partitif":["article partitif","art-part","article par"],
 	"circonfixe":["circonfixe","circon","circonf"],
 	"classificateur":["classificateur","class","classif"],
-	"conjonction":["conjonction","conj"],
+	"Conj":["conjonction","conj"],
 	"conjonction de coordination":["conjonction de coordination","conj-coord","conjonction coo"],
 	"copule":["copule"],
-	"déterminant":["déterminant","dét"],
+	"Det":["déterminant","dét"],
 	"enclitique":["cnclitique","encl"],
 	"gismu":["gismu"],
 	"infixe":["infixe","inf"],
 	"interfixe":["interfixe","interf"],
-	"interjection":["interjection","interj"],
+	"Interj":["interjection","interj"],
 	"lettre":["lettre"],
 	"locution":["locution","loc"],
 	"locution-phrase":["locution-phrase","loc-phr","phrase locution","phrase","locution-phrase"],
-	"nom commun":["nom","nom commun","substantif"],
+	"N":["nom","nom commun","substantif"],
 	"nom de famille":["nom de famille","nom-fam"],
-	"nom propre":["nom propre","nom-pr"],
+	"NP":["nom propre","nom-pr"],
 	"nom scientifique":["nom scientifique","nom-sciences","nom scient","nom science"],
-	"numéral":["numéral","num","numér"],
+	"Num":["numéral","num","numér"],
 	"onomatopée":["onomatopée","onoma","onom"],
-	"particule":["particule","part"],
+	"Particle":["particule","part"],
 	"particule numérale":["particule numérale","part-num","particule num"],
 	"patronyme":["patronyme"],
-	"postposition":["postposition","postpos","post"],
+	"Postp":["postposition","postpos","post"],
 	"pré-nom":["pré-nom"],
 	"pré-verbe":["pré-verbe"],
 	"préfixe":["préfixe","préf"],
 	"prénom":["prénom"],
-	"préposition":["préposition","prép"],
+	"Prep":["préposition","prép"],
 	"proclitique":["proclitique","procl"],
-	"pronom":["pronom"],
+	"Pro":["pronom"],
 	"pronom démonstratif":["pronom démonstratif","pronom dém","pronom-dém"],
 	"pronom indéfini":["pronom indéfini","pronom ind","pronom-indéf"],
 	"pronom interrogatif":["pronom interrogatif","pronom int","pronom-int"],
@@ -78,6 +117,6 @@ string_values = {
 	"symbole":["symbole","symb"],
 	"variante par contrainte typographique":["variante typographique","variante typo","variante par contrainte typographique","var-typo"],
 	"verbe pronominal":["verbe pronominal","verb-pr","verbe pr"],
-	"verbe":["verbe","verb"]
+	"V":["verbe","verb"]
 	}
 }
diff --git a/test_wikstraktor.py b/test_wikstraktor.py
index 00d6228..a13c3fe 100644
--- a/test_wikstraktor.py
+++ b/test_wikstraktor.py
@@ -5,7 +5,7 @@ if __name__ == "__main__":
 	# print(e.get_file_url("File:LL-Q1860 (eng)-Nattes à chat----parent.wav"))
 	# print(e.get_file_url("File:LL-Q1860 (eng)-Nattes à chat-parent.wav"))
 	#e.fetch("water")
-	f.fetch("water")
+	f.fetch("blue")
 	# print(e.fetch("test"), "entries added")
 	#print(e)
 	file_path = 'test.json'
diff --git a/wikstraktor.sqlite b/wikstraktor.sqlite
index cf340034be915fbe4d55478064deb092757f3e55..f83e39e04000619fc360339dce8bec4a227f9bff 100644
GIT binary patch
delta 582
zcma)(&2G~`6ou{JP^e1W2vq{As<(iYHjN{v?hlQGDik(ItdLlsDvL>c?T$2_X=Wxw
zQC*R?FF^4N@Bl23!UKdiU<sS<d4X=aVcee`6724rGjs2E?u<tE=(GKCzUWz&RXl|q
z*R$$7zwVinU7A>>$+$fGrZk?EUfMGS_uR-<(Y<ibjr`j!IIH7}#k;e&=B%@9%C9(u
zY3uB?_XUnuR;Pn@xjg5*I<eRMwZQW?yugRXMsxjHv$38v9@p+L@gAM_{h!(Q&8Hu@
z;9ksxQV?Q9In`9~^sT}WhyuNbu)EteHsx`<U4srrLgKZqXv~T3OUxY+0Wo(OYK<}%
zQ0-%v62^w+l^!5+h=m9tk`#HUKuCy4vOtZWVA4UUOcM#6;br%s=<~Rq^(qe+2i2QM
z#|!@vsTy_#W1`nLv(uJH62S*ZnBF@)goA^>#(e>XImshj!5JhHh7y%Qk{A;iGE-mr
zh|H`))w8ALNA^m#v^Ago2;OC1m$%2?g4Z*jeCM|9wk)@0{%PBVqh{>}{^-d+;d@(^
iDwqW)De{zw2vhxB3)q*0Dy)G*#!?#QQeK+nJop2vgSVIf

delta 100
zcmZozz}V2hI6<0Ki-CcGWuk&TzZQdDxEC*w$H;epf$zX(L4hE?%?J2g7`Yhve*=XX
zSonVf1z+$_4B*)OPo7PHS(uq+GP{BL#=>h%%#GTblNHUSCtom*oV-E9a`P5rA07a;
CFdD@G

-- 
GitLab