Skip to content
Snippets Groups Projects
Commit 50dc0c6e authored by Alice Brenon's avatar Alice Brenon
Browse files

First draft

parents
No related branches found
No related tags found
No related merge requests found
# ---> Haskell
dist
dist-*
cabal-dev
*.o
*.hi
*.chi
*.chs.h
*.dyn_o
*.dyn_hi
.hpc
.hsenv
.cabal-sandbox/
cabal.sandbox.config
*.prof
*.aux
*.hp
*.eventlog
.stack-work/
cabal.project.local
cabal.project.local~
.HTF/
.ghc.environment.*
# Revision history for InvisiXML
## 0.1.0.0 -- YYYY-mm-dd
* First version. Released on an unsuspecting world.
cabal-version: >=1.10
name: InvisiXML
version: 0.1.0.0
synopsis: XML without the tags
description:
InvisiXML provides a way to represent an XML file as two separate components:
its tree structure and its (normalized) raw text. This allows to process the
content of an XML file with tools expecting raw text and to combine multiple
XML annotations of the same text easily.
homepage: https://gitlab.liris.cnrs.fr/abrenon/invisixml
-- bug-reports:
license: BSD3
license-file: LICENSE
author: Alice BRENON
maintainer: alice.brenon@ens-lyon.fr
-- copyright:
category: Text
build-type: Simple
extra-source-files: CHANGELOG.md
library
exposed-modules: Text.InvisiXML
, Text.InvisiXML.Error
, Text.InvisiXML.Namespace
, Text.XML.Light.Serializer
other-modules: Data.PositionTree
build-depends: base >=4.12 && <4.15
, containers
, mtl
, text
, xml
hs-source-dirs: lib
default-language: Haskell2010
ghc-options: -Wall
executable invisiXML
main-is: Main.hs
build-depends: base >=4.12 && <4.15
, InvisiXML
, mtl
, text
hs-source-dirs: app
default-language: Haskell2010
ghc-options: -Wall
test-suite InvisiXML-test
default-language: Haskell2010
type: exitcode-stdio-1.0
hs-source-dirs: test
main-is: MyLibTest.hs
build-depends: base >=4.12 && <4.13
ghc-options: -Wall
LICENSE 0 → 100644
Copyright (c) 2021, Alice BRENON
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials provided
with the distribution.
* Neither the name of Alice BRENON nor the names of other
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import Distribution.Simple
main = defaultMain
module Main where
import Control.Monad.Except (runExceptT)
import qualified Data.Text.IO as Text (putStr)
import Text.InvisiXML (InvisiXML(..), parse)
import Text.XML.Light.Serializer (encode)
main :: IO ()
main = getContents >>= runExceptT . parse >>= either (fail . show) display
where
display result = do
Text.putStr $ text result
putStr . encode $ structure result
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE FlexibleContexts #-}
module Data.PositionTree (
Node(..)
, Position(..)
, PositionTree(..)
, addSibling
, empty
, merge
, offset
, origin
) where
import Control.Monad.Except (MonadError)
import Data.Map as Map (Map, alter, foldrWithKey)
import qualified Data.Map as Map (empty)
import qualified Data.Map.Merge.Strict as Map (merge, preserveMissing, zipWithMatched)
import Data.Text as Text (Text, length)
import Text.XML.Light.Serializer (FromXML(..), ToXML(..), (.=))
import Text.InvisiXML.Error (StructureError)
import Text.InvisiXML.Namespace (addAttr, ixml, setChildren)
newtype Position = Position {
getPosition :: Int
} deriving (Show, Read, Eq, Ord)
origin :: Position
origin = Position 0
offset :: Text -> Position -> Position
offset input (Position from) = Position (from + Text.length input)
data Node a =
Point a
| Range {
to :: Position
, value :: a
, children :: PositionTree a
}
deriving (Show)
instance ToXML a => ToXML (Node a) where
toXML (Point p) = toXML p
toXML (Range {to = Position p, value, children}) =
setChildren (toXML children) . addAttr (ixml "to" .= p) <$> toXML value
newtype PositionTree a = PositionTree (Map Position [Node a]) deriving (Show)
instance ToXML a => ToXML (PositionTree a) where
toXML (PositionTree m) = foldrWithKey nodesToXML [] m
where
nodesToXML (Position at) nodes l =
(addAttr (ixml "at" .= at) <$> (toXML =<< nodes)) ++ l
addSibling :: Position -> Node a -> PositionTree a -> PositionTree a
addSibling at node (PositionTree m) = PositionTree $ alter pushNode at m
where
pushNode Nothing = Just [node]
pushNode (Just l) = Just (l ++ [node])
empty :: PositionTree a
empty = PositionTree Map.empty
merge :: MonadError StructureError m =>
PositionTree a -> PositionTree a -> m (PositionTree a)
merge (PositionTree m0) (PositionTree m1) = return . PositionTree $
Map.merge
Map.preserveMissing
Map.preserveMissing
(Map.zipWithMatched undefined)
m0
m1
{-# LANGUAGE NamedFieldPuns #-}
{-# LANGUAGE FlexibleContexts #-}
module Text.InvisiXML (
InvisiXML(..)
, Structure(..)
, merge
, parse
) where
import Control.Monad.Except (MonadError(..))
import Control.Monad.State (StateT(..), execStateT, gets, modify, state)
import Data.Char (isSpace)
import Data.List (uncons)
import Data.PositionTree as PositionTree (
Node(..), PositionTree, Position, addSibling, children, empty
, offset, origin
)
import qualified Data.PositionTree as PositionTree (merge)
import Data.Text (Text)
import qualified Data.Text as Text (concat, null, pack)
import Text.InvisiXML.Error (Occurrence(..), StructureError(..), XMLError(..))
import Text.InvisiXML.Namespace (prefix, uRI, ixml)
import Text.XML.Light (
Attr(..), Content(..), Element(..), Line, QName(..), node, showCData
, showContent
)
import Text.XML.Light.Lexer (Token(..), XmlSource, tokens)
import Text.XML.Light.Serializer (FromXML(..), ToXML(..))
data FrozenElement = FrozenElement {
frozenName :: QName
, frozenAttrs :: [Attr]
} deriving (Show)
instance ToXML FrozenElement where
toXML (FrozenElement {frozenName, frozenAttrs}) =
[Elem Element {
elName = frozenName
, elAttribs = frozenAttrs
, elContent = []
, elLine = Nothing
}]
data Structure = Structure {
positionTree :: PositionTree FrozenElement
} deriving (Show)
instance ToXML Structure where
toXML (Structure s) = [Elem $ node (ixml "structure") ([Attr ns uRI], toXML s)]
where
ns = QName prefix (Just uRI) (Just "xmlns")
onTree :: (PositionTree FrozenElement -> PositionTree FrozenElement) -> Structure -> Structure
onTree f = Structure . f . positionTree
data InvisiXML = InvisiXML {
structure :: Structure
, text :: Text
}
data TagOccurrence = TagOccurrence {
tag :: QName
, openLine :: Line
}
type Context = Maybe TagOccurrence
data ParsingState = ParsingState {
input :: [Token]
, at :: Position
, context :: Context
, stack :: [Text]
, subStructure :: Structure
}
openStream :: [Token] -> ParsingState
openStream input = ParsingState {
input
, at = origin
, context = Nothing
, stack = []
, subStructure = Structure empty
}
type Parser = StateT ParsingState
pop :: Monad m => Parser m (Maybe Token)
pop = gets (uncons . input) >>= updateState
where
updateState Nothing = return Nothing
updateState (Just (t, input)) =
state $ \parsingState -> (Just t, parsingState {input})
appendText :: Monad m => String -> Parser m ()
appendText s = modify append
where
t = Text.pack $ unindent s
unindent ('\n':s') = '\n':(unindent $ dropWhile isSpace s')
{-
case dropWhile isSpace s' of
[] -> []
s2 -> '\n':(unindent s2)
-}
unindent (c:s') = c:(unindent s')
unindent [] = []
append parsingState
| Text.null t = parsingState
| otherwise = parsingState {
at = offset t $ at parsingState
, stack = t : stack parsingState
}
appendNode :: Monad m => Maybe Position -> Node FrozenElement -> Parser m ()
appendNode forcedAt n = modify $ \parsingState@(ParsingState {at}) ->
let position = maybe at id forcedAt in
parsingState {
subStructure = addSibling position n `onTree` subStructure parsingState
}
enter :: Monad m => TagOccurrence -> Parser m (Position, (Context, Structure))
enter newTag = state $ \parsingState -> (
(at parsingState, (context parsingState, subStructure parsingState))
, parsingState {context = Just newTag, subStructure = Structure empty}
)
restore :: Monad m => (Context, Structure) -> Parser m Position
restore (context, subStructure) = state $ \parsingState ->
(at parsingState, parsingState {context, subStructure})
checkout :: MonadError XMLError m => Context -> Parser m ()
checkout actual = do
expected <- gets context
case (expected, actual) of
(Nothing, Just (TagOccurrence {tag, openLine})) ->
throwError . ClosingUnopen tag $ Line openLine
(Just (TagOccurrence {tag, openLine}), Nothing) ->
throwError . Unclosed tag $ Line openLine
(Just tO0, Just tO1)
| tag tO0 /= tag tO1 ->
throwError $ ClosingDoesntMatch (tag tO0) (tag tO1) (Line $ openLine tO1)
_ -> return ()
parse :: (XmlSource s, MonadError XMLError m) => s -> m InvisiXML
parse = fmap collectState . execStateT fillStructure . openStream . tokens
where
collectState (ParsingState {stack, subStructure}) = InvisiXML {
structure = subStructure
, text = Text.concat $ reverse stack
}
fillStructure :: MonadError XMLError m => Parser m ()
fillStructure = pop >>= maybe (checkout Nothing) handle
handle :: MonadError XMLError m => Token -> Parser m ()
handle (TokStart _ qName attrs True) =
appendNode Nothing (Point $ FrozenElement qName attrs) *> fillStructure
handle (TokStart line qName attrs False) = do
(start, current) <- enter $ TagOccurrence {tag = qName, openLine = line}
fillStructure
Structure children <- gets subStructure
to <- restore current
appendNode (Just start) (Range {to, value = FrozenElement qName attrs, children})
fillStructure
handle (TokEnd line qName) =
checkout . Just $ TagOccurrence {tag = qName, openLine = line}
handle (TokCRef s) = appendText (showContent $ CRef s) *> fillStructure
handle (TokText t) = appendText (showCData t) *> fillStructure
merge :: MonadError StructureError m => Structure -> Structure -> m Structure
merge s0 s1 = Structure <$> PositionTree.merge (positionTree s0) (positionTree s1)
module Text.InvisiXML.Error (
InvisiXMLError(..)
, Occurrence(..)
, StructureError(..)
, XMLError(..)
) where
import Text.XML.Light (Line, QName)
data Occurrence =
Line Line
| EOF
deriving Show
data XMLError =
ClosingUnopen QName Occurrence
| ClosingDoesntMatch QName QName Occurrence
| Unclosed QName Occurrence
deriving Show
data StructureError =
DifferentTextError
| OverlappingStructureError
deriving Show
data InvisiXMLError =
XMLError XMLError
| StructureError StructureError
deriving Show
{-# LANGUAGE NamedFieldPuns #-}
module Text.InvisiXML.Namespace (
uRI
, prefix
, ixml
--, at
--, to
, addAttr
, setChildren
) where
--import Data.PositionTree (Position(..))
import Text.XML.Light (Attr(..), Content(..), Element(..), QName(..), add_attr)
uRI :: String
uRI = "https://gitlab.liris.cnrs.fr/abrenon/InvisiXML"
prefix :: String
prefix = "ixml"
ixml :: String -> QName
ixml qName = QName {qName, qURI = Just uRI, qPrefix = Just prefix}
{-
at :: Position -> Attr
at (Position p) = Attr (ixml "at") (show p)
to :: Position -> Attr
to (Position p) = Attr (ixml "to") (show p)
-}
onContent :: (Element -> Element) -> Content -> Content
onContent f (Elem e) = Elem (f e)
onContent _ x = x
addAttr :: Attr -> Content -> Content
addAttr = onContent . add_attr
setChildren :: [Content] -> Content -> Content
setChildren elContent = onContent $ \e -> e {elContent}
module Text.XML.Light.Serializer (
FromXML(..)
, ToXML(..)
, (.=)
, decode
, encode
) where
import Data.List (intercalate)
import Text.XML.Light (Attr(..), Content, QName, parseXML, ppContent)
import Text.XML.Light.Lexer (XmlSource)
class FromXML a where
fromXML :: [Content] -> Either String a
class ToXML a where
toXML :: a -> [Content]
encode :: ToXML a => a -> String
encode = intercalate "\n" . fmap ppContent . toXML
decode :: (XmlSource s, FromXML a) => s -> Maybe a
decode = either (\_ -> Nothing) Just . eitherDecode
eitherDecode :: (XmlSource s, FromXML a) => s -> Either String a
eitherDecode = fromXML . parseXML
(.=) :: Show v => QName -> v -> Attr
k .= v = Attr k $ show v
module Main (main) where
main :: IO ()
main = putStrLn "Test suite not yet implemented."
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment