From 63c08d182d600dce0bfceabeaa3310f60ef43ede Mon Sep 17 00:00:00 2001
From: Alice BRENON <alice.brenon@ens-lyon.fr>
Date: Tue, 27 Jun 2023 20:04:44 +0200
Subject: [PATCH] Add script to make output from TXM useable

---
 scripts/textometry/scoresByDomain.hs | 50 ++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)
 create mode 100755 scripts/textometry/scoresByDomain.hs

diff --git a/scripts/textometry/scoresByDomain.hs b/scripts/textometry/scoresByDomain.hs
new file mode 100755
index 0000000..226dc01
--- /dev/null
+++ b/scripts/textometry/scoresByDomain.hs
@@ -0,0 +1,50 @@
+#!/usr/bin/env -S runhaskell --ghc-arg="-Wall" --ghc-arg="-i lib"
+{-# LANGUAGE OverloadedStrings #-}
+
+import Data.Text as Text
+  ( Text, cons, drop, dropEnd, intercalate, isPrefixOf, length, lines, replace
+  , snoc, splitOn )
+import Data.Text.IO as Text (getLine, interact, putStrLn)
+import System.Exit (die)
+
+data ColumnSelector = ColumnSelector
+  { position :: Int
+  , name :: Text } deriving Show
+newtype Header = Header [ColumnSelector] deriving Show
+
+getColumns :: Text -> [Text]
+getColumns = splitOn "\t"
+
+getHeader :: [Text] -> IO Header
+getHeader [] = die "no columns"
+getHeader (first:otherColumns) = pure $
+  Header (ColumnSelector 0 first:foldr keepScores [] (zip [1..] otherColumns))
+  where
+    keepScores (position, name) tmp
+      | score `isPrefixOf` name =
+        ColumnSelector
+          { position
+          , name = Text.drop (Text.length score) $ dropEnd 1 name }
+        : tmp
+      | otherwise = tmp
+    score = "score_:"
+
+toTsv :: [Text] -> Text
+toTsv [] = ""
+toTsv (first:otherColumns) = intercalate "\t" (escape first:otherColumns)
+  where
+    escape s = '"' `cons` replace "\"" "\"\"" s `snoc` '"'
+
+filterLine :: [Int] -> Text -> Text
+filterLine positions = toTsv . extract . getColumns
+  where
+    extract columns = (columns !!) <$> positions
+
+main :: IO ()
+main = do
+  Header header <- getHeader.getColumns =<< Text.getLine
+  Text.putStrLn . toTsv $ name <$> header
+  Text.interact
+    (textUnlines . map (filterLine $ position <$> header) . Text.lines)
+  where
+    textUnlines = Text.intercalate "\n"
-- 
GitLab