Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • abrenon/outillage
1 result
Show changes
Showing
with 272 additions and 165 deletions
from GEODE import uid
def TwoAnnotations(text, first, second):
textUID = text if type(text) == str else uid(text)
return f"Found two annotations for {textUID}: '{first}' and '{second}'"
#!/usr/bin/env python3
from Corpus import corpus
#!/usr/bin/env -S PYTHONPATH=lib/python python3
from GEODE import corpus
import sys
if __name__ == '__main__':
......
#!/usr/bin/env python3
from EDdA.classification import heatmap
from EDdA.store import preparePath
import GEODE.discursive as discursive
#!/usr/bin/env -S PYTHONPATH=lib/python python3
from GEODE.Visualisation import heatmap
from GEODE.Classification import discursiveFunctions, knowledgeDomains
import pandas
from sklearn.metrics import classification_report, confusion_matrix
from sys import argv
......@@ -9,11 +9,13 @@ from sys import argv
def evaluate(truth, predictions, outputDirectory):
matrix = confusion_matrix(truth,
predictions,
labels=list(discursive.functions),
labels=knowledgeDomains,
#labels=discursiveFunctions
normalize='true')
heatmap(matrix,
preparePath(f"{outputDirectory}/confusion.png"),
labels=discursive.functions)
f"{outputDirectory}/confusion.png",
labels=knowledgeDomains)
#labels=discursiveFunctions)
with open(f"{outputDirectory}/report.json", 'w') as json:
print(classification_report(truth, predictions, output_dict=True),
file=json)
......@@ -24,4 +26,4 @@ def evaluate(truth, predictions, outputDirectory):
if __name__ == '__main__':
truth = pandas.read_csv(argv[1], sep='\t')
predictions = pandas.read_csv(argv[2], sep='\t')
evaluate(truth['paragraphFunction'], predictions['label'], argv[3])
evaluate(truth['super_domain'], predictions['label'], argv[3])
import numpy
import random
import torch
def set_random():
seed_value = 42
random.seed(seed_value)
numpy.random.seed(seed_value)
torch.manual_seed(seed_value)
torch.cuda.manual_seed_all(seed_value)
#!/usr/bin/env python3
#!/usr/bin/env -S PYTHONPATH=lib/python python3
from BERT import Classifier
from Corpus import corpus
import GEODE.discursive as discursive
from GEODE import corpus, discursiveFunctions, toTSV
import pandas
from sys import argv
......@@ -10,12 +10,12 @@ def rateClass(name, answer, score):
def combine(row):
classes = [(name, row[name], row[name + 'Score'])
for name in discursive.functions]
for name in discursiveFunctions]
return max(classes, key=lambda c: rateClass(*c))[0]
def label(modelsRoot, source):
records = pandas.DataFrame(source.get_all('key'))
for name in discursive.functions:
for name in discursiveFunctions:
classify = Classifier(f"{modelsRoot}/{name}")
content = source.get_all('content')
records[name], records[name + 'Score'] = classify(content)
......@@ -23,4 +23,4 @@ def label(modelsRoot, source):
return records
if __name__ == '__main__':
label(argv[1], corpus(argv[2])).to_csv(argv[3], sep='\t', index=False)
toTSV(argv[3], label(argv[1], corpus(argv[2])))
#!/usr/bin/env python3
#!/usr/bin/env -S PYTHONPATH=lib/python python3
from BERT import Classifier
from GEODE import corpus, toTSV
import pandas
from Corpus import corpus
from sys import argv
def label(classify, source, name='label'):
......@@ -26,6 +27,4 @@ def label(classify, source, name='label'):
return records
if __name__ == '__main__':
classify = Classifier(argv[1])
source = corpus(argv[2])
label(classify, source).to_csv(argv[3], sep='\t', index=False)
toTSV(argv[3], label(Classifier(argv[1]), corpus(argv[2])))
#!/usr/bin/env -S runhaskell --ghc-arg="-Wall" --ghc-arg="-i lib" --ghc-arg="-fprint-potential-instances"
{-# LANGUAGE ExplicitNamespaces, OverloadedStrings #-}
import Data.Aeson ((.:), FromJSON(..), Value(..), encode, withArray, withText, eitherDecode)
import Data.Aeson.Types (prependFailure, typeMismatch)
import Control.Applicative ((<|>))
import Control.Monad.Except (MonadError(..), ExceptT(..), runExceptT)
import Data.Aeson ((.:), FromJSON(..), Object, Value(..), encode, withArray, withObject, withText, eitherDecode)
import Data.Aeson.Types (Parser, prependFailure, typeMismatch)
import Data.ByteString.Lazy as BS (null, readFile, split)
import Data.ByteString.Lazy.Char8 as BS (unpack)
import Data.String (IsString(..))
import Data.Text as Text (Text)
import Data.Vector as Vector (head)
import GEODE.Metadata (type (@)(..), tsvFile)
import GEODE.Metadata (type (@)(..), Record(..), tsvFile)
import GEODE.Metadata.ProdigyMeta (Classification(..), ClassifiedParagraph)
import System.Environment (getArgs)
import System.Script (try, syntax, warn)
import Text.Printf (printf)
data Row = Unclassified String | Full ClassifiedParagraph
instance {-# OVERLAPS #-} FromJSON Row where
{-
parseJSON o@(Object v) = do
paragraphMeta@(paragraphRecord :@: _) <- v .: "meta"
--classified <- v .: "accept" >>= parseClassification
classified <- v .: "label"
pure $ either (\_ -> Unclassified $ debug paragraphRecord) (Full . (paragraphMeta :@:)) classified
where
parseClassification = withArray "Classification" singleValue
singleValue a
| not $ Prelude.null a =
withText "domain" (pure . Right . Classification) (Vector.head a)
singleValue _ = pure $ Left
("Looks like " ++ BS.unpack (encode o) ++ " was not classified, ignoring for now")
debug record =
"Looks like " ++ uid record ++ " was not classified, ignoring for now"
parseJSON o@(Object v) = do
paragraphMeta <- v .: "meta" >>= parseJSON
classified <- v .: "accept" >>= parseClassification
......@@ -24,10 +45,47 @@ instance {-# OVERLAPS #-} FromJSON Row where
singleValue _ = pure $ Left
("Looks like " ++ debug ++ " was not classified, ignoring for now")
debug = BS.unpack $ encode o
-}
parseJSON = withObject "Row" parseRow
where
parseRow o = do
paragraphMeta <- o .: "meta"
getRow paragraphMeta
<$> runExceptT (classification o)
getRow paragraphMeta@(paragraphRecord :@: _) = either
(Unclassified . debug paragraphRecord)
(Full . (paragraphMeta :@:) . Classification)
classification :: Object -> ExceptT String Parser Text
classification o = do
getTextField "answer" o >>= isAccept
getTextField "label"
--o .: "label" >>= withText "label" pure
--checkAnswer o = ExceptT
-- ((o .: "answer" >>= withText "answer" (pure . isAccept))
-- <|> pure (Left "answer field is missing"))
isAccept "accept" = pure ()
isAccept s = throwError $ printf "answer was \"%s\" and not \"accept\"" s
--isAccept s = Left $ printf "answer was \"%s\" and not \"accept\"" s
debug record = printf "Ignoring %s: %s" (uid record)
getTextField :: String -> Object -> ExceptT String Parser Text
getTextField name o = getField >>= ensureIsText
where
getField :: ExceptT String Parser Value
getField = ExceptT $
(Right <$> (o .: fromString name)) <|> catch "is missing"
ensureIsText :: Value -> ExceptT String Parser Text
ensureIsText v = ExceptT $
withText name (pure . Right) v <|> catch "is not text"
catch :: String -> Parser (Either String a)
catch = pure . Left . printf "%s field %s" name
{-
parseJSON invalid =
prependFailure "parsing ClassifiedParagraph failed, "
prependFailure "parsing Row failed, "
(typeMismatch "Object" invalid)
-}
logIgnored :: [Row] -> IO [ClassifiedParagraph]
logIgnored = foldr keepFull (pure [])
......
#!/usr/bin/env -S runhaskell --ghc-arg="-Wall" --ghc-arg="-i lib"
#!/usr/bin/env -S runhaskell --ghc-arg="-Wall" --ghc-arg="-i lib/haskell"
{-# LANGUAGE DeriveGeneric, ExplicitNamespaces, OverloadedStrings #-}
import Data.Aeson (ToJSON(..), defaultOptions, encode, genericToEncoding)
import Data.ByteString.Lazy.Char8 as ByteString (putStrLn)
import Data.Csv (FromNamedRecord(..), ToNamedRecord(..))
import Data.Text (Text)
import Data.Text.IO as Text (readFile)
import GEODE.Metadata (type (@)(..), Record(..), readNamedTsv)
import GEODE.Metadata.ProdigyMeta
(Classification(..), ClassifiedParagraph, ParagraphMeta)
import GEODE.Metadata
( type (@)(..), DefaultFields(..), HasDefaultHeader(..), Record(..)
, readNamedTsv )
import GEODE.Metadata.ProdigyMeta (ParagraphMeta)
-- (Classification(..), ClassifiedParagraph, ParagraphMeta)
import GHC.Generics (Generic)
import System.Environment (getArgs)
import System.FilePath ((</>))
......@@ -15,21 +18,51 @@ import System.Script (syntax, try)
data Paragraph = Paragraph
{ text :: Text
, meta :: ParagraphMeta
, accept :: [Text] } deriving Generic
--, label :: Text
} deriving Generic
newtype DatasetContent = DatasetContent { content :: Text } deriving Generic
instance ToNamedRecord DatasetContent
instance FromNamedRecord DatasetContent
instance HasDefaultHeader DatasetContent where
defaultFields = DefaultFields ["content"]
type DatasetRow = ParagraphMeta @ DatasetContent
{-
data Paragraph = Paragraph
{ text :: Text
, meta :: ParagraphMeta
, accept :: [Text]
} deriving Generic
-}
instance ToJSON Paragraph where
toEncoding = genericToEncoding defaultOptions
loadParagraph :: FilePath -> ClassifiedParagraph -> IO Paragraph
loadParagraph source (meta@(paragraphRecord :@: _) :@: classification) = do
--loadParagraph :: FilePath -> ClassifiedParagraph -> IO Paragraph
fromFile :: FilePath -> ParagraphMeta -> IO Paragraph
--loadParagraph source (meta@(paragraphRecord :@: _) :@: classification) = do
fromFile source meta@(paragraphRecord :@: _) = do
text <- Text.readFile (source </> relativePath paragraphRecord "txt")
pure $ Paragraph {text, meta, accept = [paragraphFunction classification]}
--pure $ Paragraph {text, meta, accept = []}
--pure $ Paragraph {text, meta, accept = [paragraphFunction classification], answer = "accept"}
--pure $ Paragraph {text, meta, label = paragraphFunction classification}
pure $ Paragraph {text, meta}
fromRow :: DatasetRow -> IO Paragraph
fromRow (meta :@: (DatasetContent {content})) =
pure $ Paragraph {text = content, meta}
main :: IO ()
main = getArgs >>= run
where
run [inputMeta, source] =
try (readNamedTsv inputMeta) >>= mapM_ (printJSON source)
run _ = syntax "INPUT_METADATA SOURCE_DIRECTORY"
printJSON source parMeta =
loadParagraph source parMeta >>= ByteString.putStrLn . encode
run [dataset] = f fromRow dataset
run [inputMeta, source] = f (fromFile source) inputMeta
-- try (readNamedTsv inputMeta) >>= mapM_ (printJSON source)
run _ = syntax "INPUT_METADATA SOURCE_DIRECTORY | TSV_DATASET"
f loader input =
try (readNamedTsv input) >>= mapM_ (\row -> loader row >>= ByteString.putStrLn . encode)
--printJSON source parMeta =
-- loadParagraph source parMeta >>= ByteString.putStrLn . encode
#!/usr/bin/env python3
#!/usr/bin/env -S PYTHONPATH=lib/python python3
from GEODE import toKey
from GEODE import toKey, toTSV
import pandas
import JSONL
import sys
def tsv_row(annotation):
def toRow(annotation):
return {'work': annotation['meta']['work'],
'volume': annotation['meta']['volume'],
'article': annotation['meta']['article'],
......@@ -15,9 +15,9 @@ def tsv_row(annotation):
}
def acceptedToTSV(inputJSONL, outputTSV):
annotations = pandas.DataFrame(
sorted([tsv_row(a) for a in inputJSONL], key=toKey))
annotations.to_csv(outputTSV, sep='\t', index=False)
toTSV(outputTSV, [toRow(a) for a in inputJSONL if a['answer'] == 'accept'])
#toTSV(outputTSV, [toRow(a) for a in inputJSONL if a['answer'] == 'accept'],
# sortBy=None)
if __name__ == '__main__':
acceptedToTSV(JSONL.load(sys.argv[1]), sys.argv[2])
#!/usr/bin/env python3
#!/usr/bin/env -S PYTHONPATH=lib/python python3
from Corpus import Directory
from GEODE import toKey, uid
import GEODE.discursive as discursive
from GEODE.util import initialise
import pandas
from GEODE import Directory, discursiveFunctions, toKey, toTSV, uid
from split import initialise, toIterator
from split.Error import Contradiction, NoLabelLeft, TwoAnnotations, UnknownAnswer
from unbalanceLimiter import unbalanceLimiter
import JSONL
import sys
def subDict(d, keys):
return {key: d[key] for key in keys}
def initialiseTexts(texts, key, annotation):
initialise(texts,
key,
......@@ -26,7 +22,7 @@ def byLabel(annotations):
answer = annotation['answer']
initialise(labels, label, {'accept': [], 'reject': [], 'ignore': []})
if answer not in labels[label]:
print(f"Unsupported answer '{answer}' for annotation {annotation}")
UnknownAnswer(annotation, answer)
else:
labels[label][answer].append(annotation)
return labels
......@@ -34,7 +30,6 @@ def byLabel(annotations):
def erase(texts, error, key, reason):
error[key] = texts[key]['row']
del texts[key]
print(reason)
def accept(texts, errors, label, accepted):
for annotation in accepted:
......@@ -43,8 +38,8 @@ def accept(texts, errors, label, accepted):
initialiseTexts(texts, key, annotation)
previous = texts[key]['accept']
if previous is not None:
reason = f"Found two annotations for {uid(annotation['meta'])}: '{label}' and '{previous}'"
erase(texts, errors, key, reason)
TwoAnnotations(annotation, previous, label)
erase(texts, errors, key)
else:
texts[key]['accept'] = label
......@@ -55,19 +50,21 @@ def reject(texts, errors, label, rejected):
initialiseTexts(texts, key, annotation)
previous = texts[key]['accept']
if previous is not None and previous == label:
erase(texts, errors, key, f"Contradiction found for {uid(annotation['meta'])}: function {label} should be both accepted and rejected")
Contradiction(annotation, label)
erase(texts, errors, key)
else:
texts[key]['reject'].add(label)
def checkRejects(texts, errors):
for key, text in texts.items():
countRejected = len(text['reject'])
countFunctions = len(discursive.functions)
countFunctions = len(discursiveFunctions)
if countRejected == countFunctions:
reason = f"No possible function left for {uid(text['row'])}"
erase(texts, errors, key, reason)
NoLabelLeft(text['row'])
erase(texts, errors, key)
elif text['accept'] is None and countRejected == countFunctions - 1:
text['accept'] = discursive.functions.difference(text['reject']).pop()
free = set(discursiveFunctions).difference(text['reject'])
text['accept'] = free.pop()
print(f"Infered {uid(text['row'])} to be {text['accept']}, only discursive function left unrejected")
def byText(byLabelAnnotations):
......@@ -79,19 +76,9 @@ def byText(byLabelAnnotations):
checkRejects(texts, errors)
return texts.values(), errors.values()
def toTsv(filePath, data):
rows = sorted(data, key=toKey)
pandas.DataFrame(rows).to_csv(filePath, sep='\t', index=False)
def toIterator(*args):
for arg in args:
for elem in arg:
yield elem
def exportCorpus(rootDirectory, texts, errors):
corpus = Directory(rootDirectory)
corpus.save(sorted(toIterator([t['row'] for t in texts], errors),
key=toKey))
corpus.save(toIterator([t['row'] for t in texts], errors))
def indexByKey(annotations):
return {toKey(annotation['meta']): annotation for annotation in annotations}
......@@ -110,16 +97,19 @@ def toRow(answer):
def exportLabels(rootDirectory, labels):
for label, answers in labels.items():
toTsv(f"{rootDirectory}/{label}.tsv",
toIterator(map(toRow('accept'), answers['accept']),
map(toRow('reject'), allRejects(labels, label))))
toTSV(f"{rootDirectory}/{label}.tsv",
unbalanceLimiter(
toIterator(map(toRow('accept'), answers['accept']),
map(toRow('reject'), allRejects(labels, label))),
maxRatio=4,
attribute='answer'))
def multiJSONLToDirectory(jsonl, outputDirectory):
byLabelAnnotations = byLabel(jsonl)
texts, errors = byText(byLabelAnnotations)
exportCorpus(outputDirectory, texts, errors)
if len(errors) > 0:
toTsv(f"{outputDirectory}/errors.tsv", errors)
toTSV(f"{outputDirectory}/errors.tsv", errors)
exportLabels(outputDirectory, byLabelAnnotations)
if __name__ == '__main__':
......
#!/usr/bin/env python3
from Corpus import Directory, SelfContained
from GEODE import fromKey, toKey
import GEODE.discursive as discursive
from prodigyAcceptedJSONLToTSV import acceptedToTSV
from sys import argv
def isAccepted(key, row):
return row['answer'] == 'accept'
def withLabel(corpus, label):
return lambda key, row: dict(**corpus.full(key, row)
, paragraphFunction=label)
def simpleTrainOfMulti(multiDirectory, outputTSV):
annotations = []
for className in discursive.functions:
corpus = Directory(multiDirectory, tsv_filename=className)
p = withLabel(corpus, className)
annotations += list(corpus.get_all(projector=p, where=isAccepted))
output = SelfContained(outputTSV)
output.save(sorted(annotations, key=toKey))
if __name__ == '__main__':
simpleTrainOfMulti(argv[1], argv[2])
#!/usr/bin/env python3
from Corpus import Directory
#!/usr/bin/env -S PYTHONPATH=lib/python python3
from GEODE import toKey
from GEODE.Error import TwoAnnotations
from GEODE.util import initialise, parseRatio
from split import initialise, parseRatio
from split.Error import TwoAnnotations
import JSONL
from random import shuffle
from sys import argv, stdin
from prodigyAcceptedJSONLToTSV import acceptedToTSV
from prodigyMultiJSONLToDirectory import multiJSONLToDirectory
def getTexts(inputJSONL):
texts = {}
......@@ -16,19 +14,20 @@ def getTexts(inputJSONL):
key = toKey(annotation['meta'])
if key not in errors:
initialise(texts, key, {'accept': None, 'reject': []})
if annotation['answer'] == 'accept':
previous = texts[key]['accept']
if previous is None:
texts[key]['accept'] = annotation
else:
print(TwoAnnotations(annotations['meta'],
previous['label'],
texts[key]['label']))
errors.add(key)
else:
texts[key]['reject'].append(annotation)
sortByAnswer(texts, errors, key, annotation)
return texts
def sortByAnswer(texts, errors, key, annotation):
if annotation['answer'] == 'accept':
previous = texts[key]['accept']
if previous is None:
texts[key]['accept'] = annotation
else:
TwoAnnotations(annotation, previous['label'], annotation['label'])
errors.add(key)
else:
texts[key]['reject'].append(annotation)
def getTest(texts, trainRatio):
accepted = [key for key, t in texts.items() if t['accept'] is not None]
shuffle(accepted)
......@@ -42,16 +41,22 @@ def allAnnotations(text):
return [text['accept']] + text['reject']
def getTrain(texts, test):
return [annotation
for key in sorted(texts.keys()) if key not in test
for annotation in allAnnotations(texts[key])]
train = []
waste = []
for key in sorted(texts.keys()):
if key not in test:
train += allAnnotations(texts[key])
else:
waste += texts[key]['reject']
return train, waste
def splitMulti(jsonl, trainRatio, trainOutput, testOutput):
texts = getTexts(jsonl)
test = getTest(texts, trainRatio)
train = getTrain(texts, test)
multiJSONLToDirectory(train, trainOutput)
acceptedToTSV(test.values(), testOutput)
train, waste = getTrain(texts, test)
print(f"{len(waste)} negative annotations about texts in the test set have been discarded from the training one")
JSONL.save(trainOutput, train)
JSONL.save(testOutput, test.values())
if __name__ == '__main__':
splitMulti(JSONL.load(stdin), parseRatio(argv[1]), argv[2], argv[3])
#!/usr/bin/env python3
from BERT import Trainer
from LabeledData import LabeledData
import sys
if __name__ == '__main__':
labeled_data = LabeledData(sys.argv[1])
trainer = Trainer(sys.argv[2], labeled_data)
trainer()
#!/usr/bin/env python3
from BERT import BERT, Trainer
from Corpus import Directory
import GEODE.discursive as discursive
from LabeledData import LabeledData
#!/usr/bin/env -S PYTHONPATH=lib/python python3
from BERT import BERT, LabeledData, Trainer
from GEODE import Directory, discursiveFunctions
import os
import sys
......@@ -20,5 +19,5 @@ def trainSubClassifier(trainRoot, modelRoot, className):
trainer()
if __name__ == '__main__':
for className in discursive.functions:
for className in discursiveFunctions:
trainSubClassifier(sys.argv[1], sys.argv[2], className)
#!/usr/bin/env python3
from Corpus import corpus
from BERT import Trainer
from LabeledData import LabeledData
#!/usr/bin/env -S PYTHONPATH=lib/python python3
from BERT import Trainer, LabeledData
from GEODE import corpus
import os
import sys
if __name__ == '__main__':
labeled_data = LabeledData(corpus(sys.argv[1]), "paragraphFunction")
trainer = Trainer(sys.argv[2], labeled_data)
modelPath = sys.argv[2]
os.makedirs(modelPath, exist_ok=True)
trainer = Trainer(modelPath, labeled_data)
trainer()
#!/bin/sh
BASE_DIR="${0%/*}"
BASE_DIR="${0%%/*}"
source ${BASE_DIR}/lib.sh
source ${BASE_DIR}/lib/bash.sh
if [ "$#" != 2 ]
then
......@@ -16,5 +16,5 @@ fi
FILES_TSV="${TARGET}/files.tsv"
printf "book tome rank headWord name page\n" > "${FILES_TSV}"
${BASE_DIR}/EDdA/extract-from-source.sh "${SOURCE}/EDdA/ARTFL" ${TARGET} >> "${FILES_TSV}"
${BASE_DIR}/LGE/extract-from-source.sh "${SOURCE}/LGE/BnF" ${TARGET} >> "${FILES_TSV}"
${BASE_DIR}/scripts/EDdA/extract-from-source.sh "${SOURCE}/EDdA/ARTFL" ${TARGET} >> "${FILES_TSV}"
${BASE_DIR}/scripts/LGE/extract-from-source.sh "${SOURCE}/LGE/BnF" ${TARGET} >> "${FILES_TSV}"
#!/usr/bin/env -S runhaskell --ghc-arg="-Wall" --ghc-arg="-i lib/haskell"
import Control.Applicative ((<**>), (<|>))
import Data.List (foldl')
import Data.Text (Text)
import Data.Text.IO as Text (getContents, writeFile)
import Options.Applicative
( Parser, execParser, flag', fullDesc, help, helper, info, long, metavar
, progDesc, short, strArgument, strOption)
import System.Directory (createDirectoryIfMissing)
import System.FilePath (takeDirectory)
import Text.Filter (Editable(..))
import Text.Printf (printf)
import Text.Regex.TDFA ((=~))
data Mode = Discard | StartWith | EndWith
data Config = Config
{ splitPattern :: String
, mode :: Mode
, outputPattern :: String }
configParser :: Parser Config
configParser = Config
<$> strOption on
<*> (flag' StartWith startWith <|> flag' EndWith endWith <|> pure Discard)
<*> strArgument outputPattern
where
on = short 'o' <> long "on" <> metavar "REGEX"
<> help "pattern of the lines on which to split"
outputPattern =
metavar "OUTPUT_PATTERN" <> help "pattern of the output files"
startWith =
short 's' <> long "start" <> help "a part begins with the pattern"
endWith = short 'e' <> long "end" <> help "a part ends with the pattern"
getConfig :: IO Config
getConfig = execParser
(info
(configParser <**> helper)
(fullDesc
<> progDesc "A tool to split a textual flow on a predefined line or prefix"))
split :: Config -> [Text] -> [[Text]]
split (Config {splitPattern, mode}) = reverse . close . foldl' aggregate ([], [])
where
close (currentPart, previousParts) = reverse currentPart:previousParts
aggregate tmp@(currentPart, previousParts) line
| line =~ splitPattern =
case mode of
Discard -> ([], close tmp)
StartWith -> ([line], close tmp)
EndWith -> ([], close (line:currentPart, previousParts))
| otherwise = (line:currentPart, previousParts)
create :: Editable a => Config -> [a] -> IO ()
create (Config {outputPattern}) = mapM_ createFile . zip [1..] . fmap leave
where
createFile :: (Int, Text) -> IO ()
createFile (i, content) =
let path = printf outputPattern i in do
createDirectoryIfMissing True (takeDirectory path)
Text.writeFile path content
main :: IO ()
main = do
config <- getConfig
Text.getContents >>= create config . split config . enter