From 63c08d182d600dce0bfceabeaa3310f60ef43ede Mon Sep 17 00:00:00 2001 From: Alice BRENON <alice.brenon@ens-lyon.fr> Date: Tue, 27 Jun 2023 20:04:44 +0200 Subject: [PATCH] Add script to make output from TXM useable --- scripts/textometry/scoresByDomain.hs | 50 ++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100755 scripts/textometry/scoresByDomain.hs diff --git a/scripts/textometry/scoresByDomain.hs b/scripts/textometry/scoresByDomain.hs new file mode 100755 index 0000000..226dc01 --- /dev/null +++ b/scripts/textometry/scoresByDomain.hs @@ -0,0 +1,50 @@ +#!/usr/bin/env -S runhaskell --ghc-arg="-Wall" --ghc-arg="-i lib" +{-# LANGUAGE OverloadedStrings #-} + +import Data.Text as Text + ( Text, cons, drop, dropEnd, intercalate, isPrefixOf, length, lines, replace + , snoc, splitOn ) +import Data.Text.IO as Text (getLine, interact, putStrLn) +import System.Exit (die) + +data ColumnSelector = ColumnSelector + { position :: Int + , name :: Text } deriving Show +newtype Header = Header [ColumnSelector] deriving Show + +getColumns :: Text -> [Text] +getColumns = splitOn "\t" + +getHeader :: [Text] -> IO Header +getHeader [] = die "no columns" +getHeader (first:otherColumns) = pure $ + Header (ColumnSelector 0 first:foldr keepScores [] (zip [1..] otherColumns)) + where + keepScores (position, name) tmp + | score `isPrefixOf` name = + ColumnSelector + { position + , name = Text.drop (Text.length score) $ dropEnd 1 name } + : tmp + | otherwise = tmp + score = "score_:" + +toTsv :: [Text] -> Text +toTsv [] = "" +toTsv (first:otherColumns) = intercalate "\t" (escape first:otherColumns) + where + escape s = '"' `cons` replace "\"" "\"\"" s `snoc` '"' + +filterLine :: [Int] -> Text -> Text +filterLine positions = toTsv . extract . getColumns + where + extract columns = (columns !!) <$> positions + +main :: IO () +main = do + Header header <- getHeader.getColumns =<< Text.getLine + Text.putStrLn . toTsv $ name <$> header + Text.interact + (textUnlines . map (filterLine $ position <$> header) . Text.lines) + where + textUnlines = Text.intercalate "\n" -- GitLab