diff --git a/scripts/subcorpus/get-files.hs b/scripts/subcorpus/get-files.hs index d97d1d539423e59f8538f0d94789eed0ec2cf9dc..a034604e4dcca1e74a32904330ec3df32b2bea53 100755 --- a/scripts/subcorpus/get-files.hs +++ b/scripts/subcorpus/get-files.hs @@ -1,14 +1,17 @@ #!/usr/bin/env -S runhaskell --ghc-arg="-Wall" --ghc-arg="-i lib" -import GEODE.Metadata (ArticleRecord(..), readNamedTsv, relativePath) +import Data.Csv (FromNamedRecord) +import Data.Vector (Vector) +import GEODE.Metadata (ArticleRecord, Record(..), readNamedTsv) +import GEODE.Metadata.ParagraphRecord (ParagraphRecord) import System.Directory (createDirectoryIfMissing, doesFileExist) import System.Environment (getArgs) import System.FilePath ((</>), takeDirectory) import System.Posix.Files (createLink) import System.Script (syntax, try) -link :: String -> FilePath -> FilePath -> ArticleRecord -> IO () -link extension sourceRoot targetRoot article = do +link :: Record a => String -> FilePath -> FilePath -> a -> IO () +link extension sourceRoot targetRoot record = do fileExists <- doesFileExist sourceFile if fileExists then do @@ -17,13 +20,23 @@ link extension sourceRoot targetRoot article = do else putStrLn sourceFile where - fileName = relativePath article extension + fileName = relativePath record extension sourceFile = sourceRoot </> fileName targetFile = targetRoot </> fileName +type As a = a -> IO () + +run :: [String] -> IO () +run [files, extension, source, target] = + tsvData >>= either reTryAsArticles (mapM_ (mkLink :: As ParagraphRecord)) + where + tsvData :: (FromNamedRecord a, Record a) => IO (Either String (Vector a)) + tsvData = readNamedTsv files + mkLink :: Record a => As a + mkLink = link extension source target + reTryAsArticles _ = try tsvData >>= (mapM_ (mkLink :: As ArticleRecord)) + +run _ = syntax "FILES.tsv EXTENSION SOURCE_DIRECTORY TARGET_DIRECTORY" + main :: IO () main = getArgs >>= run - where - run [files, extension, source, target] = - try (readNamedTsv files) >>= mapM_ (link extension source target) - run _ = syntax "FILES.tsv EXTENSION SOURCE_DIRECTORY TARGET_DIRECTORY"