Skip to content
Snippets Groups Projects
Commit 0124d259 authored by Alice Brenon's avatar Alice Brenon
Browse files

Fix Tree indexer script + add 2 textometry scripts

parent 9ab20ad2
No related branches found
No related tags found
No related merge requests found
......@@ -8,13 +8,13 @@ import Control.Monad.IO.Class (MonadIO(..))
import Control.Monad.Reader (MonadReader, asks, runReaderT)
import Data.ByteString as ByteString (writeFile)
import Data.Serialize (encode)
import GEODE.Metadata ( ArticleRecord, Record(..) , readNamedTsv )
import GEODE.Metadata (ArticleRecord, Document(..), Record(..), ReadTSV(..))
import Options.Applicative
( Parser, execParser, fullDesc, help, helper, info, long, metavar, progDesc
, short, strOption )
import System.Directory (createDirectoryIfMissing)
import System.FilePath ((</>), takeDirectory)
import System.Script (try, warn)
import System.Script (warn)
data Config = Config
{ inputRoot :: FilePath
......@@ -55,4 +55,4 @@ toTree articleRecord = do
main :: IO ()
main = getConfig >>= runReaderT chain
where
chain = try (asks inputTsv >>= liftIO . readNamedTsv) >>= mapM_ toTree
chain = asks inputTsv >>= liftIO . readTSV >>= mapM_ toTree . rows
#!/usr/bin/env -S runhaskell --ghc-arg="-Wall" --ghc-arg="-i lib/haskell" --ghc-arg="-i ../ghc-geode/lib"
{-# LANGUAGE ExplicitNamespaces #-}
import Conllu.Tree (IndexedDocument(..))
import Data.ByteString as ByteString (readFile)
import Data.Csv (DefaultOrdered(..), ToNamedRecord(..))
import Data.Serialize (decode)
import GEODE.Metadata
( type(@)(..), ArticleRecord(..), Document(..), ReadTSV(..), Record(..)
, WriteTSV(..), for, getHeader, glue )
import GHC.Generics (Generic)
import System.Environment (getArgs)
import System.FilePath ((</>))
import System.Script (syntax, warn)
newtype Size = Size { size :: Int } deriving (Eq, Generic, Ord, Show)
instance DefaultOrdered Size
instance ToNamedRecord Size
type Result = ArticleRecord @ Size
measureIn :: FilePath -> ArticleRecord -> IO ()
measureIn inputRoot article =
ByteString.readFile path >>= either warn measure . decode
where
path = inputRoot </> relativePath article "tree"
--skipAndWarn msg = Nothing <$ warn msg
measure (IndexedDocument {_total}) = writeTSV () [glue article $ Size _total]
main :: IO ()
main = getArgs >>= run
where
run [inputRoot] = do
Document {rows} <- readTSV ()
writeTSV () [getHeader (for :: Result)]
mapM_ (measureIn inputRoot) rows
run _ = syntax "INPUT_ROOT"
#!/usr/bin/env -S runhaskell --ghc-arg="-Wall" --ghc-arg="-ilib/haskell" --ghc-arg="-i/home/alice/Logiciel/ghc-geode/lib/GEODE/Metadata"
{-# LANGUAGE ExplicitNamespaces, OverloadedStrings #-}
import Control.Monad (foldM)
import Data.Csv (DefaultOrdered(..), ToNamedRecord(..))
import Data.Text as Text (Text, split, unpack)
import Data.Text.IO as Text (readFile)
import Data.Vector as Vector (fromList)
import GEODE.Metadata (type(@)(..), ArticleRecord(..), Document(..), Record(..), WriteTSV(..), for, getHeader)
import GHC.Generics (Generic)
import System.Environment (getArgs)
import System.Script (syntax, warn)
import Text.Filter (Editable(..))
import Text.Printf (printf)
newtype Result = Result { result :: Text } deriving (Eq, Generic, Ord, Show)
instance DefaultOrdered Result
instance ToNamedRecord Result
type Row = ArticleRecord @ Result
getResults :: [Text] -> IO (Document Row)
getResults = fmap buildDoc . foldM parseRow [] . drop 1
where
buildDoc stack = Document
{ header = getHeader (for :: Row)
, rows = Vector.fromList $ reverse stack }
parseRow :: [Row] -> Text -> IO [Row]
parseRow rows row = pickColumns (Text.split (== '\t') row)
where
pickColumns (uid: _: pivot: _) =
buildRow (onlyTarget pivot) . fromUID $ Text.unpack uid
pickColumns _ =
rows <$ warn (printf "Found less than 3 columns in row: %s" row)
buildRow _ (Left errorMessage) = rows <$ warn errorMessage
buildRow result (Right article) = pure $ (article :@: Result {result}):rows
onlyTarget t =
let l = Text.split (`elem` ['[', ']']) t in
if length l > 2 then l !! 1 else t
main :: IO ()
main = getArgs >>= run
where
run [input] =
Text.readFile input >>= getResults . enter >>= writeTSV ()
run _ = syntax "INPUT_TSV"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment