diff --git a/scripts/ML/prodigy-corpus.hs b/scripts/ML/prodigy-corpus.hs
new file mode 100755
index 0000000000000000000000000000000000000000..360e67a6f9759e5d585380dde2ef839d55286fd8
--- /dev/null
+++ b/scripts/ML/prodigy-corpus.hs
@@ -0,0 +1,34 @@
+#!/usr/bin/env -S runhaskell --ghc-arg="-Wall" --ghc-arg="-i lib"
+{-# LANGUAGE DeriveGeneric #-}
+import Data.Aeson (ToJSON(..), defaultOptions, encode, genericToEncoding)
+import Data.ByteString.Lazy.Char8 as ByteString (putStrLn)
+import Data.Text (Text)
+import Data.Text.IO as Text (readFile)
+import GEODE.Metadata (readNamedTsv)
+import GEODE.Metadata.File (relativePath)
+import GEODE.Metadata.PrimaryKey.Paragraph (ParagraphPK)
+import GHC.Generics (Generic)
+import System.Environment (getArgs)
+import System.FilePath ((</>))
+import System.Script (syntax, try)
+
+data Paragraph = Paragraph
+  { text :: Text
+  , meta :: ParagraphPK } deriving Generic
+
+instance ToJSON Paragraph where
+  toEncoding = genericToEncoding defaultOptions
+
+loadParagraph :: FilePath -> ParagraphPK -> IO Paragraph
+loadParagraph source meta = do
+  text <- Text.readFile (source </> relativePath meta "txt")
+  pure $ Paragraph {text, meta}
+
+main :: IO ()
+main = getArgs >>= run
+  where
+    run [inputMeta, source] =
+      try (readNamedTsv inputMeta) >>= mapM_ (prodigyText source)
+    run _ = syntax "INPUT_METADATA SOURCE_DIRECTORY"
+    prodigyText source pK =
+      loadParagraph source pK >>= ByteString.putStrLn . encode