Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • abrenon/outillage
1 result
Show changes
Commits on Source (3)
......@@ -3,6 +3,7 @@ module System.Script
, try
, warn ) where
import Control.Monad.IO.Class (MonadIO(..))
import System.Exit (die)
import System.Environment (getProgName)
import System.IO (hPutStrLn, stderr)
......@@ -13,8 +14,8 @@ syntax s = do
this <- getProgName
die $ printf "Syntax: %s %s" this s
try :: IO (Either String a) -> IO a
try = (>>= either die pure)
try :: MonadIO m => m (Either String a) -> m a
try = (>>= either (liftIO . die) pure)
warn :: String -> IO ()
warn = hPutStrLn stderr
#!/usr/bin/env -S runhaskell --ghc-arg="-Wall" --ghc-arg="-i lib" --ghc-arg="-fprint-potential-instances"
#!/usr/bin/env -S runhaskell --ghc-arg="-Wall" --ghc-arg="-i lib/haskell" --ghc-arg="-fprint-potential-instances"
import Data.Text as Text (Text)
import Data.Text.IO as Text (getContents, writeFile)
import System.Environment (getArgs)
......
#!/usr/bin/env -S runhaskell --ghc-arg="-Wall" --ghc-arg="-i lib/haskell"
{-# LANGUAGE DeriveGeneric, ExplicitNamespaces #-}
import Conllu.Parse (parseConllu)
import Conllu.Type (AW, CW(..), Doc, Feat(..), ID(..), Rel(..), Sent(..))
import Control.Applicative ((<**>))
import Control.Monad.IO.Class (liftIO)
import Control.Monad.Reader (ReaderT, asks, runReaderT)
import Data.ByteString.Lazy as ByteString (putStr)
import Data.Csv (ToNamedRecord(..), encodeWith)
import Data.List (find, foldl', partition, sort)
import Data.Tree (Forest, Tree(..))
import GEODE.Metadata
( type(@), ArticleRecord, DefaultFields(..), HasDefaultHeader(..), Record(..)
, WithDefaultHeader(..), glue, readNamedTsv, toTsv, tsvLines )
import GEODE.Metadata.TSV.Header (for, getHeader)
import GHC.Generics (Generic)
import Options.Applicative
( Parser, execParser, fullDesc, help, helper, info, long, metavar, progDesc
, short, strOption )
import System.FilePath ((</>))
import System.Script (try, warn)
data Config = Config
{ inputRoot :: FilePath
, inputTsv :: FilePath }
configParser :: Parser Config
configParser = Config
<$> strOption root
<*> strOption inputTsvArg
where
root = short 'r' <> long "root" <> metavar "CONLLU_DIRECTORY"
<> help "path to the base directory containing the syntax annotations"
inputTsvArg = short 'i' <> long "input" <> metavar "INPUT_TSV"
<> help "TSV file containing the article records to process"
getConfig :: IO Config
getConfig = execParser
(info
(configParser <**> helper)
(fullDesc
<> progDesc "A textometric tool to draw discursive profiles"))
data Position = Position
{ totalSize :: Int
, position :: Int } deriving Generic
instance ToNamedRecord Position
instance HasDefaultHeader Position where
defaultFields = DefaultFields ["position", "totalSize"]
type Measure = ArticleRecord @ Position
profile :: ArticleRecord -> ReaderT Config IO [WithDefaultHeader Measure]
profile articleRecord = do
path <- asks ((</> relativePath articleRecord "conllu") . inputRoot)
liftIO $ readFile path >>= either skipAndWarn analyse . parseConllu path
where
skipAndWarn msg = [] <$ warn msg
analyse = pure . fmap (glue articleRecord) . findOccurrences
findOccurrences :: Doc -> [Position]
findOccurrences doc =
Position total <$> (zip (reverse offsets) doc >>= imperativeVerb)
where
(offsets, total) = foldl' next ([], 0) $ _words <$> doc
next (tmpOffsets, partial) s = (partial:tmpOffsets, partial + length s)
getSID :: ID -> Maybe Int
getSID (SID n) = Just n
getSID _ = Nothing
imperativeVerb :: (Int, Sent) -> [Int]
imperativeVerb (offset, sentence) = sort (syntax sentence >>= findSpans False)
where
findSpans isImperative (Node {rootLabel, subForest}) =
case getMoods $ _feats rootLabel of
Just moods -> let nowImperative = moods == ["Imp"] in
consIf rootLabel nowImperative (findSpans nowImperative =<< subForest)
Nothing ->
consIf rootLabel isImperative (findSpans isImperative =<< subForest)
consIf a b = if b then (maybe id ((:) . (offset+)) $ getSID (_id a)) else id
getMoods = fmap _featValues . find (("Mood" ==) . _feat)
type Syntax = Forest (CW AW)
syntax :: Sent -> Syntax
syntax = build 0 . _words
where
build n cws =
let (pointing, others) = partition (pointingTo n) cws in
recurseOn others <$> pointing
recurseOn cws cw = Node
{ rootLabel = cw, subForest = maybe [] (`build` cws) $ getSID (_id cw) }
pointingTo n cw = maybe False (n ==) (getSID . _head =<< _rel cw)
main :: IO ()
main = getConfig >>= runReaderT chain
where
chain = try (asks inputTsv >>= liftIO . readNamedTsv) >>= searchAndDisplay
searchAndDisplay rows = do
liftIO . ByteString.putStr $ encodeWith toTsv [getHeader (for :: Measure)]
mapM_ (\ar -> profile ar >>= liftIO . tsvLines) rows