Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
ff2balex
Manage
Activity
Members
Labels
Plan
Issues
9
Issue boards
Milestones
Wiki
External wiki
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Lex gaMe
ff2balex
Commits
9783dc2b
Commit
9783dc2b
authored
4 months ago
by
Prénom Nom
Browse files
Options
Downloads
Patches
Plain Diff
amélioration worker
parent
18b1902a
No related branches found
Branches containing commit
No related tags found
1 merge request
!9
Affichage stats
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
src/workers/pyodide_worker.js
+183
-187
183 additions, 187 deletions
src/workers/pyodide_worker.js
with
183 additions
and
187 deletions
src/workers/pyodide_worker.js
+
183
−
187
View file @
9783dc2b
...
...
@@ -19,149 +19,146 @@ let authToken = null; // Stockage local du token
let
includeStopwords
=
false
;
// Stocker l'état de l'inclusion des mots outils
let
stoplistsByLang
=
{};
// Objet stockant les stoplists par langue
// --- Attente de la mise à jour de la stoplist ---
let
stoplistReady
=
new
Promise
((
resolve
)
=>
resolve
());
let
stoplist
s
Ready
=
new
Promise
((
resolve
)
=>
resolve
());
// Écouteur des messages reçus du background script
self
.
onmessage
=
async
(
event
)
=>
{
const
data
=
event
.
data
;
console
.
log
(
"
[WebWorker] Message reçu du Background:
"
,
data
);
if
(
data
.
command
===
"
pyodide-simplemma
"
)
{
if
(
pyodideLoaded
&&
simplemmaLoaded
)
{
console
.
log
(
"
[Worker] Pyodide et Simplemma déjà chargés.
"
);
self
.
postMessage
({
type
:
"
pyodide-simplemma
"
,
status
:
"
already_loaded
"
,
message
:
"
Pyodide et Simplemma déjà en mémoire
"
});
return
;
}
try
{
if
(
!
pyodideLoaded
)
{
console
.
log
(
"
[Worker] Chargement de Pyodide...
"
);
try
{
importScripts
(
`
${
LATEST_BASE_URL
}
pyodide.js`
);
}
catch
(
err
)
{
console
.
error
(
"
[Worker] Erreur lors de l'import de pyodide.js :
"
,
err
);
self
.
postMessage
({
type
:
"
pyodide-simplemma
"
,
status
:
"
error
"
,
message
:
err
.
toString
()
});
return
;
}
pyodide
=
await
loadPyodide
({
indexURL
:
LATEST_BASE_URL
});
await
pyodide
.
loadPackage
(
"
lzma
"
);
await
pyodide
.
loadPackage
(
"
micropip
"
);
pyodideLoaded
=
true
;
console
.
log
(
"
[Worker] Pyodide chargé avec succès !
"
);
}
if
(
!
simplemmaLoaded
)
{
console
.
log
(
"
[Worker] Installation de simplemma...
"
);
// On encapsule la logique dans une fonction asynchrone pour faciliter l'usage d'await
await
pyodide
.
runPythonAsync
(
`
import micropip
import asyncio
async def main():
print("Installation de simplemma...")
await micropip.install("simplemma")
print("Installation réussie.")
import simplemma
print("simplemma importé avec succès.")
# Test simple : extraction de tokens et lemmatisation
import re
def tokenize(text):
return re.findall(r"
\\
b
\\
w+
\\
b", text.lower())
phrase = "Simplemma est prêt"
tokens = tokenize(phrase)
print("Tokens extraits :", tokens)
lemmatized_tokens = [simplemma.lemmatize(token, lang="fr") for token in tokens]
print("Tokens lemmatisés :", lemmatized_tokens)
return lemmatized_tokens
await main()
`
);
simplemmaLoaded
=
true
;
console
.
log
(
"
[Worker] Simplemma installé avec succès !
"
);
}
// Envoyer confirmation au background script
self
.
postMessage
({
type
:
"
pyodide-simplemma
"
,
status
:
"
success
"
,
message
:
"
Pyodide et Simplemma chargés
"
});
}
catch
(
error
)
{
console
.
error
(
"
[Worker] Erreur lors du chargement de Pyodide ou Simplemma :
"
,
error
);
self
.
postMessage
({
type
:
"
pyodide-simplemma
"
,
status
:
"
error
"
,
message
:
error
.
toString
()
});
}
}
// --- Traitement du texte envoyé par stats.js ---
if
(
data
.
command
===
"
process-text
"
)
{
if
(
!
pyodideLoaded
)
{
console
.
log
(
"
[Worker] Pyodide non chargé.
"
);
self
.
postMessage
({
type
:
"
process-text
"
,
status
:
"
error
"
,
message
:
"
Pyodide pas encore chargé
"
});
return
;
}
console
.
log
(
"
[Worker] Texte reçu pour analyse :
"
,
data
.
text
);
try
{
const
result
=
await
pyodide
.
runPythonAsync
(
`
import json
import re
import simplemma
from simplemma import langdetect
def detect_language(text):
lang_scores = simplemma.langdetect(text, lang=("fr", "en", "es", "de", "it", "pt"))
return lang_scores[0][0] if lang_scores else "unk"
def tokenize(text):
return re.findall(r"
\\
b[a-zA-ZÀ-ÿ'-]+
\\
b", text.lower())
text = """
${
data
.
text
.
replace
(
/
\"
/g
,
'
\\
"
'
)}
"""
detected_lang = detect_language(text)
if detected_lang == "unk":
detected_lang = "other"
tokens = tokenize(text)
lemmatized_tokens = [simplemma.lemmatize(token, lang=detected_lang) for token in tokens]
const
{
command
,
...
data
}
=
event
.
data
;
console
.
log
(
"
[WebWorker] Message reçu du Background:
"
,
command
,
data
);
freq = {}
for token in lemmatized_tokens:
freq[token] = freq.get(token, 0) + 1
switch
(
command
)
{
json.dumps({"lang": detected_lang, "frequencies": freq}, ensure_ascii=False)
`
);
const
parsedResult
=
JSON
.
parse
(
result
);
const
detectedLang
=
parsedResult
.
lang
;
if
(
!
storedFrequencies
[
detectedLang
])
{
storedFrequencies
[
detectedLang
]
=
{};
case
"
pyodide-simplemma
"
:
if
(
pyodideLoaded
&&
simplemmaLoaded
)
{
console
.
log
(
"
[Worker] Pyodide et Simplemma déjà chargés.
"
);
self
.
postMessage
({
type
:
"
pyodide-simplemma
"
,
status
:
"
already_loaded
"
,
message
:
"
Pyodide et Simplemma déjà en mémoire
"
});
return
;
}
for
(
const
[
word
,
count
]
of
Object
.
entries
(
parsedResult
.
frequencies
))
{
storedFrequencies
[
detectedLang
][
word
]
=
(
storedFrequencies
[
detectedLang
][
word
]
||
0
)
+
count
;
try
{
if
(
!
pyodideLoaded
)
{
console
.
log
(
"
[Worker] Chargement de Pyodide...
"
);
try
{
importScripts
(
`
${
LATEST_BASE_URL
}
pyodide.js`
);
}
catch
(
err
)
{
console
.
error
(
"
[Worker] Erreur lors de l'import de pyodide.js :
"
,
err
);
self
.
postMessage
({
type
:
"
pyodide-simplemma
"
,
status
:
"
error
"
,
message
:
err
.
toString
()
});
return
;
}
pyodide
=
await
loadPyodide
({
indexURL
:
LATEST_BASE_URL
});
await
pyodide
.
loadPackage
(
"
lzma
"
);
await
pyodide
.
loadPackage
(
"
micropip
"
);
pyodideLoaded
=
true
;
console
.
log
(
"
[Worker] Pyodide chargé avec succès !
"
);
}
if
(
!
simplemmaLoaded
)
{
console
.
log
(
"
[Worker] Installation de simplemma...
"
);
await
pyodide
.
runPythonAsync
(
`
import micropip
import asyncio
async def main():
print("Installation de simplemma...")
await micropip.install("simplemma")
print("Installation réussie.")
import simplemma
print("simplemma importé avec succès.")
# Test simple : extraction de tokens et lemmatisation
import re
def tokenize(text):
return re.findall(r"
\\
b
\\
w+
\\
b", text.lower())
phrase = "Simplemma est prêt"
tokens = tokenize(phrase)
print("Tokens extraits :", tokens)
lemmatized_tokens = [simplemma.lemmatize(token, lang="fr") for token in tokens]
print("Tokens lemmatisés :", lemmatized_tokens)
return lemmatized_tokens
await main()
`
);
simplemmaLoaded
=
true
;
console
.
log
(
"
[Worker] Simplemma installé avec succès !
"
);
}
// Envoyer confirmation au background script
self
.
postMessage
({
type
:
"
pyodide-simplemma
"
,
status
:
"
success
"
,
message
:
"
Pyodide et Simplemma chargés
"
});
}
catch
(
error
)
{
console
.
error
(
"
[Worker] Erreur lors du chargement de Pyodide ou Simplemma :
"
,
error
);
self
.
postMessage
({
type
:
"
pyodide-simplemma
"
,
status
:
"
error
"
,
message
:
error
.
toString
()
});
}
self
.
postMessage
({
type
:
"
update-frequencies
"
,
frequencies
:
storedFrequencies
});
if
(
autoAddEnabled
)
{
checkThreshold
(
detectedLang
);
break
;
case
"
process-text
"
:
if
(
!
pyodideLoaded
)
{
console
.
log
(
"
[Worker] Pyodide non chargé.
"
);
self
.
postMessage
({
type
:
"
process-text
"
,
status
:
"
error
"
,
message
:
"
Pyodide pas encore chargé
"
});
return
;
}
}
catch
(
error
)
{
console
.
error
(
"
[Worker] Erreur dans l'analyse du texte :
"
,
error
);
}
}
if
(
data
.
command
===
"
update-preferences
"
)
{
userThreshold
=
data
.
threshold
;
trackedLanguages
=
data
.
trackedLanguages
;
autoAddEnabled
=
data
.
autoAdd
;
isAuthenticated
=
data
.
isAuthenticated
;
console
.
log
(
"
[Worker] Mise à jour des préférences :
"
,
{
userThreshold
,
trackedLanguages
,
autoAddEnabled
,
isAuthenticated
});
}
if
(
data
.
command
===
"
update-lexicons
"
)
{
console
.
log
(
"
[Worker] Texte reçu pour analyse :
"
,
data
.
text
);
try
{
const
result
=
await
pyodide
.
runPythonAsync
(
`
import json
import re
import simplemma
from simplemma import langdetect
def detect_language(text):
lang_scores = simplemma.langdetect(text, lang=("fr", "en", "es", "de", "it", "pt"))
return lang_scores[0][0] if lang_scores else "unk"
def tokenize(text):
return re.findall(r"
\\
b[a-zA-ZÀ-ÿ'-]+
\\
b", text.lower())
text = """
${
data
.
text
.
replace
(
/
\"
/g
,
'
\\
"
'
)}
"""
detected_lang = detect_language(text)
if detected_lang == "unk":
detected_lang = "other"
tokens = tokenize(text)
lemmatized_tokens = [simplemma.lemmatize(token, lang=detected_lang) for token in tokens]
freq = {}
for token in lemmatized_tokens:
freq[token] = freq.get(token, 0) + 1
json.dumps({"lang": detected_lang, "frequencies": freq}, ensure_ascii=False)
`
);
const
parsedResult
=
JSON
.
parse
(
result
);
const
detectedLang
=
parsedResult
.
lang
;
if
(
!
storedFrequencies
[
detectedLang
])
{
storedFrequencies
[
detectedLang
]
=
{};
}
for
(
const
[
word
,
count
]
of
Object
.
entries
(
parsedResult
.
frequencies
))
{
storedFrequencies
[
detectedLang
][
word
]
=
(
storedFrequencies
[
detectedLang
][
word
]
||
0
)
+
count
;
}
self
.
postMessage
({
type
:
"
update-frequencies
"
,
frequencies
:
storedFrequencies
});
if
(
autoAddEnabled
)
{
checkThreshold
(
detectedLang
);
}
}
catch
(
error
)
{
console
.
error
(
"
[Worker] Erreur dans l'analyse du texte :
"
,
error
);
}
break
;
case
"
update-preferences
"
:
userThreshold
=
data
.
threshold
;
trackedLanguages
=
data
.
trackedLanguages
;
autoAddEnabled
=
data
.
autoAdd
;
isAuthenticated
=
data
.
isAuthenticated
;
console
.
log
(
"
[Worker] Mise à jour des préférences :
"
,
{
userThreshold
,
trackedLanguages
,
autoAddEnabled
,
isAuthenticated
});
break
;
case
"
update-lexicons
"
:
userLexicons
=
data
.
lexicons
;
console
.
log
(
"
[Worker] Lexiques mis à jour :
"
,
userLexicons
);
}
break
;
if
(
data
.
command
===
"
update-auth-token
"
)
{
case
"
update-auth-token
"
:
authToken
=
data
.
accessToken
;
console
.
log
(
"
[Worker] Token mis à jour :
"
,
authToken
?
"
Disponible
"
:
"
Aucun token reçu
"
);
}
break
;
if
(
data
.
command
===
"
update-stoplist
"
)
{
case
"
update-stoplist
"
:
stoplistsReady
=
new
Promise
((
resolve
)
=>
{
if
(
data
.
stoplists
&&
typeof
data
.
stoplists
===
"
object
"
)
{
stoplistsByLang
=
{};
...
...
@@ -172,80 +169,80 @@ json.dumps({"lang": detected_lang, "frequencies": freq}, ensure_ascii=False)
}
else
{
console
.
warn
(
"
[Worker] ⚠ Stoplists reçues incorrectes ou vides.
"
);
}
resolve
();
//Stoplist prête
resolve
();
//
Stoplist prête
});
}
if
(
data
.
command
===
"
update-include-stopwords
"
)
{
break
;
case
"
update-include-stopwords
"
:
includeStopwords
=
data
.
includeStopwords
;
console
.
log
(
`[Worker] Mise à jour de includeStopwords :
${
includeStopwords
}
`
);
}
break
;
}
};
// --- Vérification du seuil et notification ---
let
pendingWords
=
{};
// Stocker temporairement les mots en attente d'ajout
let
addWordTimeout
=
null
;
// Timer pour regrouper les ajouts
async
function
checkThreshold
(
lang
)
{
// // Vérifier si la stoplist est définie et contient des mots
await
stoplistReady
;
// Attendre que la stoplist soit chargée
await
stoplistsReady
;
// Attendre que les stoplists soient chargées
if
(
!
autoAddEnabled
||
!
isAuthenticated
)
{
console
.
log
(
"
[Worker] Auto-Add désactivé ou utilisateur non connecté.
"
);
return
;
}
if
(
!
trackedLanguages
.
includes
(
lang
))
{
console
.
log
(
`[Worker] La langue
${
lang
}
n'est pas suivie.`
);
return
;
}
console
.
log
(
`[Worker] Vérification des fréquences pour la langue
${
lang
}
...`
);
if
(
!
storedFrequencies
[
lang
])
return
;
//Utiliser la bonne stoplist
const
stoplist
=
stoplistsByLang
[
lang
]
||
new
Set
();
const
shouldFilterStopwords
=
stoplist
.
size
>
0
&&
includeStopwords
;
console
.
log
(
`[Worker] Stoplist pour '
${
lang
}
' :
${
shouldFilterStopwords
?
"
Appliquée
"
:
"
Non appliquée
"
}
`
);
const
exceededWords
=
Object
.
entries
(
storedFrequencies
[
lang
])
.
filter
(([
word
,
count
])
=>
count
>=
userThreshold
&&
!
(
notifiedWords
[
lang
]
&&
notifiedWords
[
lang
].
includes
(
word
)))
.
map
(([
word
])
=>
word
);
// Appliquer le filtrage si nécessaire
const
finalWords
=
exceededWords
.
filter
(
word
=>
{
if
(
shouldFilterStopwords
)
{
const
isInStoplist
=
stoplist
.
has
(
word
);
console
.
log
(
`[Worker] Mot "
${
word
}
"
${
isInStoplist
?
"
EXCLU (dans la stoplist)
"
:
"
CONSERVÉ (pas dans la stoplist)
"
}
`
);
return
!
isInStoplist
;
}
return
true
;
// Si on ne filtre pas, garder tous les mots
});
console
.
log
(
"
[Worker] ⚠ Auto-Add désactivé ou utilisateur non connecté.
"
);
}
else
if
(
!
trackedLanguages
.
includes
(
lang
))
{
console
.
log
(
`[Worker] ⚠ La langue '
${
lang
}
' n'est pas suivie.`
);
}
else
{
console
.
log
(
`[Worker] Vérification des fréquences pour la langue '
${
lang
}
'...`
);
if
(
finalWords
.
length
===
0
)
return
;
if
(
!
notifiedWords
[
lang
])
notifiedWords
[
lang
]
=
[];
notifiedWords
[
lang
].
push
(...
finalWords
);
console
.
log
(
"
Mots dépassant le seuil :
"
,
finalWords
);
self
.
postMessage
({
type
:
"
threshold-exceeded
"
,
wordsAboveThreshold
:
finalWords
});
// Stocker les mots détectés pour un ajout groupé
if
(
!
pendingWords
[
lang
])
pendingWords
[
lang
]
=
[];
pendingWords
[
lang
].
push
(...
finalWords
);
// Déclencher un envoi groupé après un délai (3 secondes)
if
(
!
addWordTimeout
)
{
addWordTimeout
=
setTimeout
(
async
()
=>
{
await
processPendingWords
();
},
3000
);
const
stoplist
=
stoplistsByLang
[
lang
]
||
new
Set
();
const
shouldFilterStopwords
=
stoplist
.
size
>
0
&&
includeStopwords
;
console
.
log
(
`[Worker] 📝 Stoplist pour '
${
lang
}
' :
${
shouldFilterStopwords
?
"
Appliquée
"
:
"
Non appliquée
"
}
`
);
const
wordsFrequencies
=
storedFrequencies
[
lang
]
||
{};
const
notifiedSet
=
new
Set
(
notifiedWords
[
lang
]
||
[]);
// Filtrer les mots qui dépassent le seuil
const
exceededWords
=
Object
.
entries
(
wordsFrequencies
)
.
filter
(([
word
,
count
])
=>
count
>=
userThreshold
&&
!
notifiedSet
.
has
(
word
))
.
map
(([
word
])
=>
word
);
if
(
exceededWords
.
length
===
0
)
{
console
.
log
(
`[Worker] Aucun mot dépassant le seuil pour '
${
lang
}
'.`
);
}
else
{
// Filtrer selon la stoplist si nécessaire
const
finalWords
=
shouldFilterStopwords
?
exceededWords
.
filter
(
word
=>
{
const
isInStoplist
=
stoplist
.
has
(
word
);
if
(
isInStoplist
)
console
.
log
(
`[Worker] Mot "
${
word
}
" exclu (stoplist)`
);
return
!
isInStoplist
;
})
:
exceededWords
;
if
(
finalWords
.
length
===
0
)
{
console
.
log
(
`[Worker] Tous les mots dépassant le seuil pour '
${
lang
}
' sont dans la stoplist.`
);
}
else
{
// Ajouter les mots aux sets et logs
notifiedWords
[
lang
]
=
notifiedSet
;
finalWords
.
forEach
(
word
=>
notifiedSet
.
add
(
word
));
console
.
log
(
"
Mots dépassant le seuil :
"
,
finalWords
);
self
.
postMessage
({
type
:
"
threshold-exceeded
"
,
wordsAboveThreshold
:
finalWords
});
// Ajout aux mots en attente pour un envoi groupé
if
(
!
pendingWords
[
lang
])
pendingWords
[
lang
]
=
[];
pendingWords
[
lang
].
push
(...
finalWords
);
// Regrouper les ajouts en une seule tâche différée
if
(
!
addWordTimeout
)
{
addWordTimeout
=
setTimeout
(
processPendingWords
,
3000
);
}
}
}
}
}
//Traiter les ajouts groupés
async
function
processPendingWords
()
{
console
.
log
(
"
Traitement des mots à ajouter en lot...
"
);
...
...
@@ -262,7 +259,6 @@ async function processPendingWords() {
addWordTimeout
=
null
;
}
async
function
addWordToLexicon
(
lang
,
word
)
{
if
(
!
authToken
)
{
console
.
warn
(
"
Impossible d'ajouter le mot : Aucun token d’authentification.
"
);
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment