Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
ff2balex
Manage
Activity
Members
Labels
Plan
Issues
9
Issue boards
Milestones
Wiki
External wiki
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Lex gaMe
ff2balex
Commits
9783dc2b
Commit
9783dc2b
authored
3 months ago
by
Prénom Nom
Browse files
Options
Downloads
Patches
Plain Diff
amélioration worker
parent
18b1902a
No related branches found
Branches containing commit
No related tags found
1 merge request
!9
Affichage stats
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
src/workers/pyodide_worker.js
+183
-187
183 additions, 187 deletions
src/workers/pyodide_worker.js
with
183 additions
and
187 deletions
src/workers/pyodide_worker.js
+
183
−
187
View file @
9783dc2b
...
@@ -19,149 +19,146 @@ let authToken = null; // Stockage local du token
...
@@ -19,149 +19,146 @@ let authToken = null; // Stockage local du token
let
includeStopwords
=
false
;
// Stocker l'état de l'inclusion des mots outils
let
includeStopwords
=
false
;
// Stocker l'état de l'inclusion des mots outils
let
stoplistsByLang
=
{};
// Objet stockant les stoplists par langue
let
stoplistsByLang
=
{};
// Objet stockant les stoplists par langue
// --- Attente de la mise à jour de la stoplist ---
// --- Attente de la mise à jour de la stoplist ---
let
stoplistReady
=
new
Promise
((
resolve
)
=>
resolve
());
let
stoplist
s
Ready
=
new
Promise
((
resolve
)
=>
resolve
());
// Écouteur des messages reçus du background script
// Écouteur des messages reçus du background script
self
.
onmessage
=
async
(
event
)
=>
{
self
.
onmessage
=
async
(
event
)
=>
{
const
data
=
event
.
data
;
const
{
command
,
...
data
}
=
event
.
data
;
console
.
log
(
"
[WebWorker] Message reçu du Background:
"
,
data
);
console
.
log
(
"
[WebWorker] Message reçu du Background:
"
,
command
,
data
);
if
(
data
.
command
===
"
pyodide-simplemma
"
)
{
if
(
pyodideLoaded
&&
simplemmaLoaded
)
{
console
.
log
(
"
[Worker] Pyodide et Simplemma déjà chargés.
"
);
self
.
postMessage
({
type
:
"
pyodide-simplemma
"
,
status
:
"
already_loaded
"
,
message
:
"
Pyodide et Simplemma déjà en mémoire
"
});
return
;
}
try
{
if
(
!
pyodideLoaded
)
{
console
.
log
(
"
[Worker] Chargement de Pyodide...
"
);
try
{
importScripts
(
`
${
LATEST_BASE_URL
}
pyodide.js`
);
}
catch
(
err
)
{
console
.
error
(
"
[Worker] Erreur lors de l'import de pyodide.js :
"
,
err
);
self
.
postMessage
({
type
:
"
pyodide-simplemma
"
,
status
:
"
error
"
,
message
:
err
.
toString
()
});
return
;
}
pyodide
=
await
loadPyodide
({
indexURL
:
LATEST_BASE_URL
});
await
pyodide
.
loadPackage
(
"
lzma
"
);
await
pyodide
.
loadPackage
(
"
micropip
"
);
pyodideLoaded
=
true
;
console
.
log
(
"
[Worker] Pyodide chargé avec succès !
"
);
}
if
(
!
simplemmaLoaded
)
{
console
.
log
(
"
[Worker] Installation de simplemma...
"
);
// On encapsule la logique dans une fonction asynchrone pour faciliter l'usage d'await
await
pyodide
.
runPythonAsync
(
`
import micropip
import asyncio
async def main():
print("Installation de simplemma...")
await micropip.install("simplemma")
print("Installation réussie.")
import simplemma
print("simplemma importé avec succès.")
# Test simple : extraction de tokens et lemmatisation
import re
def tokenize(text):
return re.findall(r"
\\
b
\\
w+
\\
b", text.lower())
phrase = "Simplemma est prêt"
tokens = tokenize(phrase)
print("Tokens extraits :", tokens)
lemmatized_tokens = [simplemma.lemmatize(token, lang="fr") for token in tokens]
print("Tokens lemmatisés :", lemmatized_tokens)
return lemmatized_tokens
await main()
`
);
simplemmaLoaded
=
true
;
console
.
log
(
"
[Worker] Simplemma installé avec succès !
"
);
}
// Envoyer confirmation au background script
self
.
postMessage
({
type
:
"
pyodide-simplemma
"
,
status
:
"
success
"
,
message
:
"
Pyodide et Simplemma chargés
"
});
}
catch
(
error
)
{
console
.
error
(
"
[Worker] Erreur lors du chargement de Pyodide ou Simplemma :
"
,
error
);
self
.
postMessage
({
type
:
"
pyodide-simplemma
"
,
status
:
"
error
"
,
message
:
error
.
toString
()
});
}
}
// --- Traitement du texte envoyé par stats.js ---
if
(
data
.
command
===
"
process-text
"
)
{
if
(
!
pyodideLoaded
)
{
console
.
log
(
"
[Worker] Pyodide non chargé.
"
);
self
.
postMessage
({
type
:
"
process-text
"
,
status
:
"
error
"
,
message
:
"
Pyodide pas encore chargé
"
});
return
;
}
console
.
log
(
"
[Worker] Texte reçu pour analyse :
"
,
data
.
text
);
try
{
const
result
=
await
pyodide
.
runPythonAsync
(
`
import json
import re
import simplemma
from simplemma import langdetect
def detect_language(text):
lang_scores = simplemma.langdetect(text, lang=("fr", "en", "es", "de", "it", "pt"))
return lang_scores[0][0] if lang_scores else "unk"
def tokenize(text):
return re.findall(r"
\\
b[a-zA-ZÀ-ÿ'-]+
\\
b", text.lower())
text = """
${
data
.
text
.
replace
(
/
\"
/g
,
'
\\
"
'
)}
"""
detected_lang = detect_language(text)
if detected_lang == "unk":
detected_lang = "other"
tokens = tokenize(text)
lemmatized_tokens = [simplemma.lemmatize(token, lang=detected_lang) for token in tokens]
freq = {}
switch
(
command
)
{
for token in lemmatized_tokens:
freq[token] = freq.get(token, 0) + 1
case
"
pyodide-simplemma
"
:
json.dumps({"lang": detected_lang, "frequencies": freq}, ensure_ascii=False)
if
(
pyodideLoaded
&&
simplemmaLoaded
)
{
`
);
console
.
log
(
"
[Worker] Pyodide et Simplemma déjà chargés.
"
);
const
parsedResult
=
JSON
.
parse
(
result
);
self
.
postMessage
({
type
:
"
pyodide-simplemma
"
,
status
:
"
already_loaded
"
,
message
:
"
Pyodide et Simplemma déjà en mémoire
"
});
const
detectedLang
=
parsedResult
.
lang
;
return
;
if
(
!
storedFrequencies
[
detectedLang
])
{
storedFrequencies
[
detectedLang
]
=
{};
}
}
for
(
const
[
word
,
count
]
of
Object
.
entries
(
parsedResult
.
frequencies
))
{
try
{
storedFrequencies
[
detectedLang
][
word
]
=
(
storedFrequencies
[
detectedLang
][
word
]
||
0
)
+
count
;
if
(
!
pyodideLoaded
)
{
console
.
log
(
"
[Worker] Chargement de Pyodide...
"
);
try
{
importScripts
(
`
${
LATEST_BASE_URL
}
pyodide.js`
);
}
catch
(
err
)
{
console
.
error
(
"
[Worker] Erreur lors de l'import de pyodide.js :
"
,
err
);
self
.
postMessage
({
type
:
"
pyodide-simplemma
"
,
status
:
"
error
"
,
message
:
err
.
toString
()
});
return
;
}
pyodide
=
await
loadPyodide
({
indexURL
:
LATEST_BASE_URL
});
await
pyodide
.
loadPackage
(
"
lzma
"
);
await
pyodide
.
loadPackage
(
"
micropip
"
);
pyodideLoaded
=
true
;
console
.
log
(
"
[Worker] Pyodide chargé avec succès !
"
);
}
if
(
!
simplemmaLoaded
)
{
console
.
log
(
"
[Worker] Installation de simplemma...
"
);
await
pyodide
.
runPythonAsync
(
`
import micropip
import asyncio
async def main():
print("Installation de simplemma...")
await micropip.install("simplemma")
print("Installation réussie.")
import simplemma
print("simplemma importé avec succès.")
# Test simple : extraction de tokens et lemmatisation
import re
def tokenize(text):
return re.findall(r"
\\
b
\\
w+
\\
b", text.lower())
phrase = "Simplemma est prêt"
tokens = tokenize(phrase)
print("Tokens extraits :", tokens)
lemmatized_tokens = [simplemma.lemmatize(token, lang="fr") for token in tokens]
print("Tokens lemmatisés :", lemmatized_tokens)
return lemmatized_tokens
await main()
`
);
simplemmaLoaded
=
true
;
console
.
log
(
"
[Worker] Simplemma installé avec succès !
"
);
}
// Envoyer confirmation au background script
self
.
postMessage
({
type
:
"
pyodide-simplemma
"
,
status
:
"
success
"
,
message
:
"
Pyodide et Simplemma chargés
"
});
}
catch
(
error
)
{
console
.
error
(
"
[Worker] Erreur lors du chargement de Pyodide ou Simplemma :
"
,
error
);
self
.
postMessage
({
type
:
"
pyodide-simplemma
"
,
status
:
"
error
"
,
message
:
error
.
toString
()
});
}
}
self
.
postMessage
({
type
:
"
update-frequencies
"
,
frequencies
:
storedFrequencies
});
break
;
if
(
autoAddEnabled
)
{
checkThreshold
(
detectedLang
);
case
"
process-text
"
:
if
(
!
pyodideLoaded
)
{
console
.
log
(
"
[Worker] Pyodide non chargé.
"
);
self
.
postMessage
({
type
:
"
process-text
"
,
status
:
"
error
"
,
message
:
"
Pyodide pas encore chargé
"
});
return
;
}
}
}
catch
(
error
)
{
console
.
error
(
"
[Worker] Erreur dans l'analyse du texte :
"
,
error
);
}
}
if
(
data
.
command
===
"
update-preferences
"
)
{
console
.
log
(
"
[Worker] Texte reçu pour analyse :
"
,
data
.
text
);
userThreshold
=
data
.
threshold
;
try
{
trackedLanguages
=
data
.
trackedLanguages
;
const
result
=
await
pyodide
.
runPythonAsync
(
`
autoAddEnabled
=
data
.
autoAdd
;
import json
isAuthenticated
=
data
.
isAuthenticated
;
import re
console
.
log
(
"
[Worker] Mise à jour des préférences :
"
,
{
userThreshold
,
trackedLanguages
,
autoAddEnabled
,
isAuthenticated
});
import simplemma
}
from simplemma import langdetect
if
(
data
.
command
===
"
update-lexicons
"
)
{
def detect_language(text):
lang_scores = simplemma.langdetect(text, lang=("fr", "en", "es", "de", "it", "pt"))
return lang_scores[0][0] if lang_scores else "unk"
def tokenize(text):
return re.findall(r"
\\
b[a-zA-ZÀ-ÿ'-]+
\\
b", text.lower())
text = """
${
data
.
text
.
replace
(
/
\"
/g
,
'
\\
"
'
)}
"""
detected_lang = detect_language(text)
if detected_lang == "unk":
detected_lang = "other"
tokens = tokenize(text)
lemmatized_tokens = [simplemma.lemmatize(token, lang=detected_lang) for token in tokens]
freq = {}
for token in lemmatized_tokens:
freq[token] = freq.get(token, 0) + 1
json.dumps({"lang": detected_lang, "frequencies": freq}, ensure_ascii=False)
`
);
const
parsedResult
=
JSON
.
parse
(
result
);
const
detectedLang
=
parsedResult
.
lang
;
if
(
!
storedFrequencies
[
detectedLang
])
{
storedFrequencies
[
detectedLang
]
=
{};
}
for
(
const
[
word
,
count
]
of
Object
.
entries
(
parsedResult
.
frequencies
))
{
storedFrequencies
[
detectedLang
][
word
]
=
(
storedFrequencies
[
detectedLang
][
word
]
||
0
)
+
count
;
}
self
.
postMessage
({
type
:
"
update-frequencies
"
,
frequencies
:
storedFrequencies
});
if
(
autoAddEnabled
)
{
checkThreshold
(
detectedLang
);
}
}
catch
(
error
)
{
console
.
error
(
"
[Worker] Erreur dans l'analyse du texte :
"
,
error
);
}
break
;
case
"
update-preferences
"
:
userThreshold
=
data
.
threshold
;
trackedLanguages
=
data
.
trackedLanguages
;
autoAddEnabled
=
data
.
autoAdd
;
isAuthenticated
=
data
.
isAuthenticated
;
console
.
log
(
"
[Worker] Mise à jour des préférences :
"
,
{
userThreshold
,
trackedLanguages
,
autoAddEnabled
,
isAuthenticated
});
break
;
case
"
update-lexicons
"
:
userLexicons
=
data
.
lexicons
;
userLexicons
=
data
.
lexicons
;
console
.
log
(
"
[Worker] Lexiques mis à jour :
"
,
userLexicons
);
console
.
log
(
"
[Worker] Lexiques mis à jour :
"
,
userLexicons
);
}
break
;
if
(
data
.
command
===
"
update-auth-token
"
)
{
case
"
update-auth-token
"
:
authToken
=
data
.
accessToken
;
authToken
=
data
.
accessToken
;
console
.
log
(
"
[Worker] Token mis à jour :
"
,
authToken
?
"
Disponible
"
:
"
Aucun token reçu
"
);
console
.
log
(
"
[Worker] Token mis à jour :
"
,
authToken
?
"
Disponible
"
:
"
Aucun token reçu
"
);
}
break
;
if
(
data
.
command
===
"
update-stoplist
"
)
{
case
"
update-stoplist
"
:
stoplistsReady
=
new
Promise
((
resolve
)
=>
{
stoplistsReady
=
new
Promise
((
resolve
)
=>
{
if
(
data
.
stoplists
&&
typeof
data
.
stoplists
===
"
object
"
)
{
if
(
data
.
stoplists
&&
typeof
data
.
stoplists
===
"
object
"
)
{
stoplistsByLang
=
{};
stoplistsByLang
=
{};
...
@@ -172,80 +169,80 @@ json.dumps({"lang": detected_lang, "frequencies": freq}, ensure_ascii=False)
...
@@ -172,80 +169,80 @@ json.dumps({"lang": detected_lang, "frequencies": freq}, ensure_ascii=False)
}
else
{
}
else
{
console
.
warn
(
"
[Worker] ⚠ Stoplists reçues incorrectes ou vides.
"
);
console
.
warn
(
"
[Worker] ⚠ Stoplists reçues incorrectes ou vides.
"
);
}
}
resolve
();
//Stoplist prête
resolve
();
//
Stoplist prête
});
});
}
break
;
if
(
data
.
command
===
"
update-include-stopwords
"
)
{
case
"
update-include-stopwords
"
:
includeStopwords
=
data
.
includeStopwords
;
includeStopwords
=
data
.
includeStopwords
;
console
.
log
(
`[Worker] Mise à jour de includeStopwords :
${
includeStopwords
}
`
);
console
.
log
(
`[Worker] Mise à jour de includeStopwords :
${
includeStopwords
}
`
);
}
break
;
}
};
};
// --- Vérification du seuil et notification ---
// --- Vérification du seuil et notification ---
let
pendingWords
=
{};
// Stocker temporairement les mots en attente d'ajout
let
pendingWords
=
{};
// Stocker temporairement les mots en attente d'ajout
let
addWordTimeout
=
null
;
// Timer pour regrouper les ajouts
let
addWordTimeout
=
null
;
// Timer pour regrouper les ajouts
async
function
checkThreshold
(
lang
)
{
async
function
checkThreshold
(
lang
)
{
// // Vérifier si la stoplist est définie et contient des mots
await
stoplistsReady
;
// Attendre que les stoplists soient chargées
await
stoplistReady
;
// Attendre que la stoplist soit chargée
if
(
!
autoAddEnabled
||
!
isAuthenticated
)
{
if
(
!
autoAddEnabled
||
!
isAuthenticated
)
{
console
.
log
(
"
[Worker] Auto-Add désactivé ou utilisateur non connecté.
"
);
console
.
log
(
"
[Worker] ⚠ Auto-Add désactivé ou utilisateur non connecté.
"
);
return
;
}
else
if
(
!
trackedLanguages
.
includes
(
lang
))
{
}
console
.
log
(
`[Worker] ⚠ La langue '
${
lang
}
' n'est pas suivie.`
);
if
(
!
trackedLanguages
.
includes
(
lang
))
{
}
else
{
console
.
log
(
`[Worker] La langue
${
lang
}
n'est pas suivie.`
);
console
.
log
(
`[Worker] Vérification des fréquences pour la langue '
${
lang
}
'...`
);
return
;
}
console
.
log
(
`[Worker] Vérification des fréquences pour la langue
${
lang
}
...`
);
if
(
!
storedFrequencies
[
lang
])
return
;
//Utiliser la bonne stoplist
const
stoplist
=
stoplistsByLang
[
lang
]
||
new
Set
();
const
shouldFilterStopwords
=
stoplist
.
size
>
0
&&
includeStopwords
;
console
.
log
(
`[Worker] Stoplist pour '
${
lang
}
' :
${
shouldFilterStopwords
?
"
Appliquée
"
:
"
Non appliquée
"
}
`
);
const
exceededWords
=
Object
.
entries
(
storedFrequencies
[
lang
])
.
filter
(([
word
,
count
])
=>
count
>=
userThreshold
&&
!
(
notifiedWords
[
lang
]
&&
notifiedWords
[
lang
].
includes
(
word
)))
.
map
(([
word
])
=>
word
);
// Appliquer le filtrage si nécessaire
const
finalWords
=
exceededWords
.
filter
(
word
=>
{
if
(
shouldFilterStopwords
)
{
const
isInStoplist
=
stoplist
.
has
(
word
);
console
.
log
(
`[Worker] Mot "
${
word
}
"
${
isInStoplist
?
"
EXCLU (dans la stoplist)
"
:
"
CONSERVÉ (pas dans la stoplist)
"
}
`
);
return
!
isInStoplist
;
}
return
true
;
// Si on ne filtre pas, garder tous les mots
});
const
stoplist
=
stoplistsByLang
[
lang
]
||
new
Set
();
const
shouldFilterStopwords
=
stoplist
.
size
>
0
&&
includeStopwords
;
if
(
finalWords
.
length
===
0
)
return
;
console
.
log
(
`[Worker] 📝 Stoplist pour '
${
lang
}
' :
${
shouldFilterStopwords
?
"
Appliquée
"
:
"
Non appliquée
"
}
`
);
if
(
!
notifiedWords
[
lang
])
notifiedWords
[
lang
]
=
[];
notifiedWords
[
lang
].
push
(...
finalWords
);
const
wordsFrequencies
=
storedFrequencies
[
lang
]
||
{};
const
notifiedSet
=
new
Set
(
notifiedWords
[
lang
]
||
[]);
console
.
log
(
"
Mots dépassant le seuil :
"
,
finalWords
);
self
.
postMessage
({
type
:
"
threshold-exceeded
"
,
wordsAboveThreshold
:
finalWords
});
// Filtrer les mots qui dépassent le seuil
const
exceededWords
=
Object
.
entries
(
wordsFrequencies
)
// Stocker les mots détectés pour un ajout groupé
.
filter
(([
word
,
count
])
=>
count
>=
userThreshold
&&
!
notifiedSet
.
has
(
word
))
if
(
!
pendingWords
[
lang
])
pendingWords
[
lang
]
=
[];
.
map
(([
word
])
=>
word
);
pendingWords
[
lang
].
push
(...
finalWords
);
if
(
exceededWords
.
length
===
0
)
{
// Déclencher un envoi groupé après un délai (3 secondes)
console
.
log
(
`[Worker] Aucun mot dépassant le seuil pour '
${
lang
}
'.`
);
if
(
!
addWordTimeout
)
{
}
else
{
addWordTimeout
=
setTimeout
(
async
()
=>
{
// Filtrer selon la stoplist si nécessaire
await
processPendingWords
();
const
finalWords
=
shouldFilterStopwords
},
3000
);
?
exceededWords
.
filter
(
word
=>
{
const
isInStoplist
=
stoplist
.
has
(
word
);
if
(
isInStoplist
)
console
.
log
(
`[Worker] Mot "
${
word
}
" exclu (stoplist)`
);
return
!
isInStoplist
;
})
:
exceededWords
;
if
(
finalWords
.
length
===
0
)
{
console
.
log
(
`[Worker] Tous les mots dépassant le seuil pour '
${
lang
}
' sont dans la stoplist.`
);
}
else
{
// Ajouter les mots aux sets et logs
notifiedWords
[
lang
]
=
notifiedSet
;
finalWords
.
forEach
(
word
=>
notifiedSet
.
add
(
word
));
console
.
log
(
"
Mots dépassant le seuil :
"
,
finalWords
);
self
.
postMessage
({
type
:
"
threshold-exceeded
"
,
wordsAboveThreshold
:
finalWords
});
// Ajout aux mots en attente pour un envoi groupé
if
(
!
pendingWords
[
lang
])
pendingWords
[
lang
]
=
[];
pendingWords
[
lang
].
push
(...
finalWords
);
// Regrouper les ajouts en une seule tâche différée
if
(
!
addWordTimeout
)
{
addWordTimeout
=
setTimeout
(
processPendingWords
,
3000
);
}
}
}
}
}
}
}
//Traiter les ajouts groupés
//Traiter les ajouts groupés
async
function
processPendingWords
()
{
async
function
processPendingWords
()
{
console
.
log
(
"
Traitement des mots à ajouter en lot...
"
);
console
.
log
(
"
Traitement des mots à ajouter en lot...
"
);
...
@@ -262,7 +259,6 @@ async function processPendingWords() {
...
@@ -262,7 +259,6 @@ async function processPendingWords() {
addWordTimeout
=
null
;
addWordTimeout
=
null
;
}
}
async
function
addWordToLexicon
(
lang
,
word
)
{
async
function
addWordToLexicon
(
lang
,
word
)
{
if
(
!
authToken
)
{
if
(
!
authToken
)
{
console
.
warn
(
"
Impossible d'ajouter le mot : Aucun token d’authentification.
"
);
console
.
warn
(
"
Impossible d'ajouter le mot : Aucun token d’authentification.
"
);
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment