Skip to content
Snippets Groups Projects
Commit 0124d259 authored by Alice Brenon's avatar Alice Brenon
Browse files

Fix Tree indexer script + add 2 textometry scripts

parent 9ab20ad2
No related branches found
No related tags found
No related merge requests found
...@@ -8,13 +8,13 @@ import Control.Monad.IO.Class (MonadIO(..)) ...@@ -8,13 +8,13 @@ import Control.Monad.IO.Class (MonadIO(..))
import Control.Monad.Reader (MonadReader, asks, runReaderT) import Control.Monad.Reader (MonadReader, asks, runReaderT)
import Data.ByteString as ByteString (writeFile) import Data.ByteString as ByteString (writeFile)
import Data.Serialize (encode) import Data.Serialize (encode)
import GEODE.Metadata ( ArticleRecord, Record(..) , readNamedTsv ) import GEODE.Metadata (ArticleRecord, Document(..), Record(..), ReadTSV(..))
import Options.Applicative import Options.Applicative
( Parser, execParser, fullDesc, help, helper, info, long, metavar, progDesc ( Parser, execParser, fullDesc, help, helper, info, long, metavar, progDesc
, short, strOption ) , short, strOption )
import System.Directory (createDirectoryIfMissing) import System.Directory (createDirectoryIfMissing)
import System.FilePath ((</>), takeDirectory) import System.FilePath ((</>), takeDirectory)
import System.Script (try, warn) import System.Script (warn)
data Config = Config data Config = Config
{ inputRoot :: FilePath { inputRoot :: FilePath
...@@ -55,4 +55,4 @@ toTree articleRecord = do ...@@ -55,4 +55,4 @@ toTree articleRecord = do
main :: IO () main :: IO ()
main = getConfig >>= runReaderT chain main = getConfig >>= runReaderT chain
where where
chain = try (asks inputTsv >>= liftIO . readNamedTsv) >>= mapM_ toTree chain = asks inputTsv >>= liftIO . readTSV >>= mapM_ toTree . rows
#!/usr/bin/env -S runhaskell --ghc-arg="-Wall" --ghc-arg="-i lib/haskell" --ghc-arg="-i ../ghc-geode/lib"
{-# LANGUAGE ExplicitNamespaces #-}
import Conllu.Tree (IndexedDocument(..))
import Data.ByteString as ByteString (readFile)
import Data.Csv (DefaultOrdered(..), ToNamedRecord(..))
import Data.Serialize (decode)
import GEODE.Metadata
( type(@)(..), ArticleRecord(..), Document(..), ReadTSV(..), Record(..)
, WriteTSV(..), for, getHeader, glue )
import GHC.Generics (Generic)
import System.Environment (getArgs)
import System.FilePath ((</>))
import System.Script (syntax, warn)
newtype Size = Size { size :: Int } deriving (Eq, Generic, Ord, Show)
instance DefaultOrdered Size
instance ToNamedRecord Size
type Result = ArticleRecord @ Size
measureIn :: FilePath -> ArticleRecord -> IO ()
measureIn inputRoot article =
ByteString.readFile path >>= either warn measure . decode
where
path = inputRoot </> relativePath article "tree"
--skipAndWarn msg = Nothing <$ warn msg
measure (IndexedDocument {_total}) = writeTSV () [glue article $ Size _total]
main :: IO ()
main = getArgs >>= run
where
run [inputRoot] = do
Document {rows} <- readTSV ()
writeTSV () [getHeader (for :: Result)]
mapM_ (measureIn inputRoot) rows
run _ = syntax "INPUT_ROOT"
#!/usr/bin/env -S runhaskell --ghc-arg="-Wall" --ghc-arg="-ilib/haskell" --ghc-arg="-i/home/alice/Logiciel/ghc-geode/lib/GEODE/Metadata"
{-# LANGUAGE ExplicitNamespaces, OverloadedStrings #-}
import Control.Monad (foldM)
import Data.Csv (DefaultOrdered(..), ToNamedRecord(..))
import Data.Text as Text (Text, split, unpack)
import Data.Text.IO as Text (readFile)
import Data.Vector as Vector (fromList)
import GEODE.Metadata (type(@)(..), ArticleRecord(..), Document(..), Record(..), WriteTSV(..), for, getHeader)
import GHC.Generics (Generic)
import System.Environment (getArgs)
import System.Script (syntax, warn)
import Text.Filter (Editable(..))
import Text.Printf (printf)
newtype Result = Result { result :: Text } deriving (Eq, Generic, Ord, Show)
instance DefaultOrdered Result
instance ToNamedRecord Result
type Row = ArticleRecord @ Result
getResults :: [Text] -> IO (Document Row)
getResults = fmap buildDoc . foldM parseRow [] . drop 1
where
buildDoc stack = Document
{ header = getHeader (for :: Row)
, rows = Vector.fromList $ reverse stack }
parseRow :: [Row] -> Text -> IO [Row]
parseRow rows row = pickColumns (Text.split (== '\t') row)
where
pickColumns (uid: _: pivot: _) =
buildRow (onlyTarget pivot) . fromUID $ Text.unpack uid
pickColumns _ =
rows <$ warn (printf "Found less than 3 columns in row: %s" row)
buildRow _ (Left errorMessage) = rows <$ warn errorMessage
buildRow result (Right article) = pure $ (article :@: Result {result}):rows
onlyTarget t =
let l = Text.split (`elem` ['[', ']']) t in
if length l > 2 then l !! 1 else t
main :: IO ()
main = getArgs >>= run
where
run [input] =
Text.readFile input >>= getResults . enter >>= writeTSV ()
run _ = syntax "INPUT_TSV"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment