Skip to content
Snippets Groups Projects
Commit 669b56e6 authored by Alice Brenon's avatar Alice Brenon
Browse files

Add script to split train and test coming from prodigy

parent 1c0964af
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/env -S runhaskell --ghc-arg="-Wall" --ghc-arg="-i lib" --ghc-arg="-fprint-potential-instances"
import Data.Text as Text (Text)
import Data.Text.IO as Text (getContents, writeFile)
import System.Environment (getArgs)
import System.Script (syntax, try)
import Text.Filter (Editable(..))
import Text.Read (readEither)
parseRatio :: String -> Either String Float
parseRatio input = asFloat >>= checkBounds
where
checkBounds float
| float >= 0 && float < 1 = pure float
| otherwise = Left "Ratio must represent a number between 0 and 1"
asFloat =
case reverse input of
'%':f -> (/100) <$> readEither (reverse f)
_ -> readEither input
split :: Float -> [Text] -> ([Text], [Text])
split ratio texts = splitAt cutLine texts
where
cutLine = round . (ratio *) . fromIntegral $ length texts
main :: IO ()
main = getArgs >>= run
where
run [trainRatio, trainPath, testPath] = do
ratio <- try (pure $ parseRatio trainRatio)
(train, test) <- split ratio . enter <$> Text.getContents
Text.writeFile trainPath $ leave train
Text.writeFile testPath $ leave test
run _ = syntax "TRAIN_RATIO TRAIN_PATH TEST_PATH"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment