From c402d135bd44b924ab6f3e818b8ec6df1a07b9da Mon Sep 17 00:00:00 2001 From: Alice BRENON <alice.brenon@ens-lyon.fr> Date: Wed, 15 Jun 2022 22:07:54 +0200 Subject: [PATCH] Add a clean prototype of the classical tree approach --- lib/Text/Takkyuu.hs | 6 ++ lib/Text/Takkyuu/French.hs | 69 ++++++++++++ lib/Text/Takkyuu/Proto.hs | 171 ++++++++++++++++++++++++++++++ lib/Text/Takkyuu/Visualization.hs | 33 ++++++ takkyuu.cabal | 8 +- 5 files changed, 284 insertions(+), 3 deletions(-) create mode 100644 lib/Text/Takkyuu/French.hs create mode 100644 lib/Text/Takkyuu/Proto.hs create mode 100644 lib/Text/Takkyuu/Visualization.hs diff --git a/lib/Text/Takkyuu.hs b/lib/Text/Takkyuu.hs index c053c98..f41b2e6 100644 --- a/lib/Text/Takkyuu.hs +++ b/lib/Text/Takkyuu.hs @@ -1,3 +1,9 @@ module Text.Takkyuu ( + module Text.Takkyuu.Proto + , module Text.Takkyuu.French + , module Text.Takkyuu.Visualization ) where +import Text.Takkyuu.Proto +import Text.Takkyuu.Visualization +import Text.Takkyuu.French diff --git a/lib/Text/Takkyuu/French.hs b/lib/Text/Takkyuu/French.hs new file mode 100644 index 0000000..ba3ee0e --- /dev/null +++ b/lib/Text/Takkyuu/French.hs @@ -0,0 +1,69 @@ +{-# LANGUAGE NamedFieldPuns #-} +module Text.Takkyuu.French ( + POS(..) + , Gender(..) + , MorphoLabel(..) + , french + , tag + , syntax + ) where + +import Data.Char (isSpace, toLower) +import qualified Data.Map as Map (fromList) +import qualified Data.Set as Set (fromList) +import Text.Takkyuu.Proto (Game, Labeled(..), Label(..), dico, labelizer, rules) + +data POS = + Determiner + | Noun + | Particip + deriving (Eq, Ord, Show) + +data Gender = + Masculine + | Feminine + deriving (Eq, Ord, Show) + +data Number = + Singular + | Plural + deriving (Eq, Ord, Show) + +data MorphoLabel = MorphoLabel { + pos :: POS + , gender :: Gender + , number :: Number + } + deriving (Eq, Ord, Show) + +type LabeledWord = Labeled String MorphoLabel + +french :: Game Char -> Game String +french = dico id isSpace $ Set.fromList ["le", "la", "chat", "lait", "marché"] + +tag :: Game String -> Game LabeledWord +tag = labelizer (fmap toLower) $ Map.fromList [ + ("le", + [MorphoLabel {pos = Determiner, gender = Masculine, number = Singular}]) + , ("la", + [MorphoLabel {pos = Determiner, gender = Feminine, number = Singular}]) + , ("les", [ + MorphoLabel {pos = Determiner, gender = Masculine, number = Plural} + , MorphoLabel {pos = Determiner, gender = Feminine, number = Plural} + ]) + , ("chat", [MorphoLabel {pos = Noun, gender = Masculine, number = Singular}]) + , ("lait", [MorphoLabel {pos = Noun, gender = Masculine, number = Singular}]) + , ("marché", [ + MorphoLabel {pos = Noun, gender = Masculine, number = Singular} + , MorphoLabel {pos = Particip, gender = Masculine, number = Singular} + ]) + ] + +syntax :: Game LabeledWord -> Game [LabeledWord] +syntax = rules label . Set.fromList $ agreeSn <$> combinations + where + combinations = [(g, n) | g <- [Masculine, Feminine], n <- [Singular, Plural]] + agreeSn (gender, number) = [ + Label $ MorphoLabel {pos = Determiner, gender, number} + , Label $ MorphoLabel {pos = Noun, gender, number} + ] diff --git a/lib/Text/Takkyuu/Proto.hs b/lib/Text/Takkyuu/Proto.hs new file mode 100644 index 0000000..79e90be --- /dev/null +++ b/lib/Text/Takkyuu/Proto.hs @@ -0,0 +1,171 @@ +{-# LANGUAGE NamedFieldPuns #-} +module Text.Takkyuu.Proto ( + Game(..) + , Label(..) + , Labeled(..) + , game + , oneOf + , fuzz + , keep + , ignore + , dico + , labelizer + , fixTypo + , rules + ) where + +import Control.Applicative (Alternative(..)) +import Data.Map (Map, findWithDefault, insertWith) +import qualified Data.Map as Map (empty, lookup) +import Data.Set as Set (Set) +import qualified Data.Set as Set (empty, fromList, member) + +data Game a = + Over Bool + | On { + token :: a + , next :: Game a + , fallback :: Game a + } + deriving Show + +instance Semigroup (Game a) where + Over True <> g = g + o@(Over False) <> _ = o + o <> g = o { + next = next o <> g + , fallback = fallback o <> g + } + +instance Monoid (Game a) where + mempty = Over True + +instance Functor Game where + fmap _ (Over b) = Over b + fmap f o = o { + token = f $ token o + , next = f <$> next o + , fallback = f <$> fallback o + } + +instance Applicative Game where + pure token = On { + token + , next = mempty + , fallback = empty + } + + Over b <*> _ = Over b + _ <*> Over b = Over b + f <*> g = On { + token = (token f) (token g) + , next = (next f) <*> (next g) + , fallback = (fallback f) <*> (fallback g) + } + +instance Alternative Game where + empty = Over False + Over False <|> g = g + o@(Over True) <|> _ = o + o <|> g = o { + fallback = fallback o <|> g + } + +instance Monad Game where + Over b >>= _ = Over b + o >>= f = (f (token o) <> ((next o) >>= f)) <|> ((fallback o) >>= f) + +game :: [a] -> Game a +game = foldr ((<>) . pure) mempty + +oneOf :: [a] -> Game a +oneOf = foldr ((<|>) . pure) empty + +fuzz :: (Eq a, Ord a) => Set [a] -> Game a -> Game a +fuzz classes = (>>= alternatives) + where + alternatives t = oneOf (findWithDefault [t] t equiv) + equiv = foldl (\tmp0 l -> + foldl (\tmp1 x -> + insertWith (++) x l tmp1 + ) tmp0 l + ) Map.empty classes + +data Label l = + Unknown + | Label l + deriving (Eq, Ord, Show) + +data Labeled a l = Labeled { + item :: a + , label :: Label l + } + deriving (Eq, Ord, Show) + + +keep :: (a -> Bool) -> Game a -> Game a +keep predicate = (>>= select) + where + select a + | predicate a = pure a + | otherwise = Over False + +ignore :: (a -> Bool) -> Game a -> Game a +ignore p = keep (not . p) + +dico :: (Eq k, Ord k) => (a -> k) -> (k -> Bool) -> Set [k] -> Game a -> Game [a] +dico projector isBlank vocabulary = continueFrom start + where + start = ([], vocabulary) + continueFrom (stack@(_:_), candidates) (Over True) + | not $ Set.member [] candidates = pure (reverse stack) + continueFrom _ (Over b) = Over b + continueFrom state@(stack, candidates) (On {token, next, fallback}) + | isBlank (projector token) = + ((if null stack || Set.member [] candidates then empty else pure (reverse stack)) + <> continueFrom start next) + <|> continueFrom state next + <|> continueFrom state fallback + | otherwise = + let projection = projector token in + let newCandidates = foldMap (startingWith projection) candidates in + case (null newCandidates, Set.member [] newCandidates) of + (True, _) -> continueFrom state fallback <|> continueFrom (token:stack, newCandidates) next + (_, False) -> continueFrom (token:stack, newCandidates) next + <|> continueFrom state fallback + _ -> (pure (reverse $ token:stack) <> continueFrom start next) + <|> continueFrom (token:stack, newCandidates) next + <|> continueFrom state fallback + startingWith e (x:xs) = Set.fromList $ if e == x then [xs] else [] + startingWith _ [] = Set.empty + +rules :: (Eq k, Ord k) => (a -> k) -> Set [k] -> Game a -> Game [a] +rules projector vocabulary = continueFrom start + where + start = ([], vocabulary) + continueFrom _ (Over b) = Over b + continueFrom state@(stack, candidates) (On {token, next, fallback}) = + let projection = projector token in + let newCandidates = foldMap (startingWith projection) candidates in + case (null newCandidates, Set.member [] newCandidates) of + (True, _) -> continueFrom state fallback + (_, False) -> continueFrom (token:stack, newCandidates) next + <|> continueFrom state fallback + _ -> (pure (reverse $ token:stack) <> continueFrom start next) + <|> continueFrom (token:stack, newCandidates) next + <|> continueFrom state fallback + startingWith e (x:xs) = Set.fromList $ if e == x then [xs] else [] + startingWith _ [] = Set.empty + +labelizer :: Ord k => (a -> k) -> (Map k [b]) -> Game a -> Game (Labeled a b) +labelizer projector labels = (>>= alternatives) + where + alternatives t = + let possibleLabels = Map.lookup (projector t) labels in + oneOf $ maybe [Labeled t Unknown] (Labeled t . Label <$>) possibleLabels + +fixTypo :: Game Char -> Game Char +fixTypo = fuzz $ Set.fromList [ + "bh" + , "il1" + ] diff --git a/lib/Text/Takkyuu/Visualization.hs b/lib/Text/Takkyuu/Visualization.hs new file mode 100644 index 0000000..edc3d10 --- /dev/null +++ b/lib/Text/Takkyuu/Visualization.hs @@ -0,0 +1,33 @@ +{-# LANGUAGE NamedFieldPuns #-} +module Text.Takkyuu.Visualization ( + dump + , explore + , render + ) where + +import Text.Takkyuu.Proto (Game(..)) +import Text.Printf (printf) + +data Tree a = + Leaf Bool + | Tree { + unTree :: [(a, Tree a)] + } + deriving Show + +explore :: Game a -> Tree a +explore (Over b) = Leaf b +explore (On {token, next, fallback}) = + case explore fallback of + Leaf _ -> Tree [(token, explore next)] + Tree l -> Tree $ (token, explore next):l + +dump :: Show a => Tree a -> String +dump = unlines . dumpAux + where + dumpAux (Leaf b) = [if b then "o" else "x"] + dumpAux (Tree l) = l >>= f + f (a, tree) = printf "%s — %s" (show a) <$> dumpAux tree + +render :: Show a => Game a -> IO () +render = putStr . dump . explore diff --git a/takkyuu.cabal b/takkyuu.cabal index 762f4cc..fedc628 100644 --- a/takkyuu.cabal +++ b/takkyuu.cabal @@ -19,13 +19,15 @@ extra-source-files: CHANGELOG.md library exposed-modules: Text.Takkyuu - , Text.Takkyuu.Data - , Text.Takkyuu.IO - -- other-modules: + other-modules: Text.Takkyuu.French + , Text.Takkyuu.Proto + , Text.Takkyuu.Visualization -- other-extensions: build-depends: base >=4.14 && <4.15 + , containers >=0.6 && <0.7 hs-source-dirs: lib default-language: Haskell2010 + ghc-options: -Wall executable takkyuu main-is: Main.hs -- GitLab