#!/usr/bin/env -S runhaskell --ghc-arg="-Wall" --ghc-arg="-i lib" {-# LANGUAGE DeriveGeneric, ExplicitNamespaces, OverloadedStrings #-} import Data.Aeson (ToJSON(..), defaultOptions, encode, genericToEncoding) import Data.ByteString.Lazy.Char8 as ByteString (putStrLn) import Data.Text (Text) import Data.Text.IO as Text (readFile) import GEODE.Metadata (type (@)(..), Record(..), readNamedTsv) import GEODE.Metadata.ProdigyMeta (ParagraphMeta) import GHC.Generics (Generic) import System.Environment (getArgs) import System.FilePath ((</>)) import System.Script (syntax, try) data Paragraph = Paragraph { text :: Text , meta :: ParagraphMeta } deriving Generic instance ToJSON Paragraph where toEncoding = genericToEncoding defaultOptions loadParagraph :: FilePath -> ParagraphMeta -> IO Paragraph loadParagraph source meta@(paragraphRecord :@: _) = do text <- Text.readFile (source </> relativePath paragraphRecord "txt") pure $ Paragraph {text, meta} main :: IO () main = getArgs >>= run where run [inputMeta, source] = try (readNamedTsv inputMeta) >>= mapM_ (toJSON source) run _ = syntax "INPUT_METADATA SOURCE_DIRECTORY" toJSON source parMeta = loadParagraph source parMeta >>= ByteString.putStrLn . encode