Skip to content
Snippets Groups Projects
Commit ada94702 authored by Alice Brenon's avatar Alice Brenon
Browse files

Temporary hack to be able to extract paragraph subcorpora as well article ones...

Temporary hack to be able to extract paragraph subcorpora as well article ones (TODO: expose something similar to readNamedTsv returning Either (Vector ParagraphRecord) (Vector ArticleRecord))
parent 4f70645a
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/env -S runhaskell --ghc-arg="-Wall" --ghc-arg="-i lib"
import GEODE.Metadata (ArticleRecord(..), readNamedTsv, relativePath)
import Data.Csv (FromNamedRecord)
import Data.Vector (Vector)
import GEODE.Metadata (ArticleRecord, Record(..), readNamedTsv)
import GEODE.Metadata.ParagraphRecord (ParagraphRecord)
import System.Directory (createDirectoryIfMissing, doesFileExist)
import System.Environment (getArgs)
import System.FilePath ((</>), takeDirectory)
import System.Posix.Files (createLink)
import System.Script (syntax, try)
link :: String -> FilePath -> FilePath -> ArticleRecord -> IO ()
link extension sourceRoot targetRoot article = do
link :: Record a => String -> FilePath -> FilePath -> a -> IO ()
link extension sourceRoot targetRoot record = do
fileExists <- doesFileExist sourceFile
if fileExists
then do
......@@ -17,13 +20,23 @@ link extension sourceRoot targetRoot article = do
else
putStrLn sourceFile
where
fileName = relativePath article extension
fileName = relativePath record extension
sourceFile = sourceRoot </> fileName
targetFile = targetRoot </> fileName
type As a = a -> IO ()
run :: [String] -> IO ()
run [files, extension, source, target] =
tsvData >>= either reTryAsArticles (mapM_ (mkLink :: As ParagraphRecord))
where
tsvData :: (FromNamedRecord a, Record a) => IO (Either String (Vector a))
tsvData = readNamedTsv files
mkLink :: Record a => As a
mkLink = link extension source target
reTryAsArticles _ = try tsvData >>= (mapM_ (mkLink :: As ArticleRecord))
run _ = syntax "FILES.tsv EXTENSION SOURCE_DIRECTORY TARGET_DIRECTORY"
main :: IO ()
main = getArgs >>= run
where
run [files, extension, source, target] =
try (readNamedTsv files) >>= mapM_ (link extension source target)
run _ = syntax "FILES.tsv EXTENSION SOURCE_DIRECTORY TARGET_DIRECTORY"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment