diff --git a/scripts/textometry/compute-profile.hs b/scripts/textometry/compute-profile.hs index 4ac3480f2683af28120cd433ca071c9f00c05296..04aa4e2c818a4e5bd577f5d7a34f6c1c0ebe938d 100755 --- a/scripts/textometry/compute-profile.hs +++ b/scripts/textometry/compute-profile.hs @@ -2,11 +2,12 @@ {-# LANGUAGE DeriveGeneric, ExplicitNamespaces #-} import Conllu.Tree - ( Feat(..), IndexedDocument(..), IndexedSentence(..), IndexedWord(..) + ( Feat(..), IndexedDocument(..), IndexedSentence(..), IndexedToken(..) , positions ) import Control.Applicative ((<**>)) import Control.Monad.IO.Class (liftIO) import Control.Monad.Reader (ReaderT, asks, runReaderT) +import Conllu.Tree.Count (Count(..)) import Data.Csv (DefaultOrdered(..), ToNamedRecord(..)) import Data.ByteString as ByteString (readFile) import Data.List (sort) @@ -63,11 +64,13 @@ profile articleRecord = do analyse = pure . fmap (glue articleRecord) . findOccurrences findOccurrences :: IndexedDocument -> [Occurrence] -findOccurrences (IndexedDocument {_total, _sentences}) = +findOccurrences (IndexedDocument {_total = Count {tokens}, _sentences}) = groupOccurrences [] (_sentences >>= imperativeVerb) where + occurrence (position, size) = + Occurrence {event = "Imperative", position, size, totalSize = tokens} groupOccurrences :: [(Int, Int)] -> [Int] -> [Occurrence] - groupOccurrences stack [] = uncurry (Occurrence _total) <$> reverse stack + groupOccurrences stack [] = occurrence <$> reverse stack groupOccurrences [] (p:ps) = groupOccurrences [(p, 1)] ps groupOccurrences (tmp@(first, tmpLength):stack) (p:ps) | p == first + tmpLength =