1 {-# LANGUAGE OverloadedStrings #-} 2 {-# LANGUAGE PatternGuards #-} 3 4 module Text.XmlHtml.HTML.Render where 5 6 import Blaze.ByteString.Builder 7 import Control.Applicative 8 import Data.Maybe 9 import Data.Monoid 10 import qualified Text.Parsec as P 11 import Text.XmlHtml.Common 12 import Text.XmlHtml.TextParser 13 import Text.XmlHtml.HTML.Meta 14 import qualified Text.XmlHtml.HTML.Parse as P 15 import Text.XmlHtml.XML.Render (docTypeDecl, entity) 16 17 import Data.Text (Text) 18 import qualified Data.Text as T 19 20 import qualified Data.Set as S 21 22 ------------------------------------------------------------------------------ 23 -- | And, the rendering code. 24 render :: Encoding -> Maybe DocType -> [Node] -> Builder 25 render e dt ns = byteOrder 26 `mappend` docTypeDecl e dt 27 `mappend` nodes 28 where byteOrder | isUTF16 e = fromText e "\xFEFF" -- byte order mark 29 | otherwise = mempty 30 nodes | null ns = mempty 31 | otherwise = firstNode e (head ns) 32 `mappend` (mconcat $ map (node e) (tail ns)) 33 34 35 ------------------------------------------------------------------------------ 36 -- | HTML allows & so long as it is not "ambiguous" (i.e., looks like an 37 -- entity). So we have a special case for that. 38 escaped :: [Char] -> Encoding -> Text -> Builder 39 escaped _ _ "" = mempty 40 escaped bad e t = 41 let (p,s) = T.break (`elem` bad) t 42 r = T.uncons s 43 in fromText e p `mappend` case r of 44 Nothing 45 -> mempty 46 Just ('&',ss) | isLeft (parseText ambigAmp "" s) 47 -> fromText e "&" `mappend` escaped bad e ss 48 Just (c,ss) 49 -> entity e c `mappend` escaped bad e ss 50 where isLeft = either (const True) (const False) 51 ambigAmp = P.char '&' *> 52 (P.finishCharRef *> return () <|> P.finishEntityRef *> return ()) 53 54 55 ------------------------------------------------------------------------------ 56 node :: Encoding -> Node -> Builder 57 node e (TextNode t) = escaped "<>&" e t 58 node e (Comment t) | "--" `T.isInfixOf` t = error "Invalid comment" 59 | "-" `T.isSuffixOf` t = error "Invalid comment" 60 | otherwise = fromText e "<!--" 61 `mappend` fromText e t 62 `mappend` fromText e "-->" 63 node e (Element t a c) = 64 let tbase = T.toLower $ snd $ T.breakOnEnd ":" t 65 in element e t tbase a c 66 67 68 ------------------------------------------------------------------------------ 69 -- | Process the first node differently to encode leading whitespace. This 70 -- lets us be sure that @parseHTML@ is a left inverse to @render@. 71 firstNode :: Encoding -> Node -> Builder 72 firstNode e (Comment t) = node e (Comment t) 73 firstNode e (Element t a c) = node e (Element t a c) 74 firstNode _ (TextNode "") = mempty 75 firstNode e (TextNode t) = let (c,t') = fromJust $ T.uncons t 76 in escaped "<>& \t\r\n" e (T.singleton c) 77 `mappend` node e (TextNode t') 78 79 80 ------------------------------------------------------------------------------ 81 -- XXX: Should do something to avoid concatting large CDATA sections before 82 -- writing them to the output. 83 element :: Encoding -> Text -> Text -> [(Text, Text)] -> [Node] -> Builder 84 element e t tb a c 85 | tb `S.member` voidTags && null c = 86 fromText e "<" 87 `mappend` fromText e t 88 `mappend` (mconcat $ map (attribute e) a) 89 `mappend` fromText e " />" 90 | tb `S.member` voidTags = 91 error $ T.unpack t ++ " must be empty" 92 | tb `S.member` rawTextTags, 93 all isTextNode c, 94 let s = T.concat (map nodeText c), 95 not ("</" `T.append` t `T.isInfixOf` s) = 96 fromText e "<" 97 `mappend` fromText e t 98 `mappend` (mconcat $ map (attribute e) a) 99 `mappend` fromText e ">" 100 `mappend` fromText e s 101 `mappend` fromText e "</" 102 `mappend` fromText e t 103 `mappend` fromText e ">" 104 | tb `S.member` rawTextTags, 105 [ TextNode _ ] <- c = 106 error $ T.unpack t ++ " cannot contain text looking like its end tag" 107 | tb `S.member` rawTextTags = 108 error $ T.unpack t ++ " cannot contain child elements or comments" 109 | otherwise = 110 fromText e "<" 111 `mappend` fromText e t 112 `mappend` (mconcat $ map (attribute e) a) 113 `mappend` fromText e ">" 114 `mappend` (mconcat $ map (node e) c) 115 `mappend` fromText e "</" 116 `mappend` fromText e t 117 `mappend` fromText e ">" 118 119 120 ------------------------------------------------------------------------------ 121 attribute :: Encoding -> (Text, Text) -> Builder 122 attribute e (n,v) 123 | v == "" = 124 fromText e " " 125 `mappend` fromText e n 126 | not ("\'" `T.isInfixOf` v) = 127 fromText e " " 128 `mappend` fromText e n 129 `mappend` fromText e "=\'" 130 `mappend` escaped "&" e v 131 `mappend` fromText e "\'" 132 | otherwise = 133 fromText e " " 134 `mappend` fromText e n 135 `mappend` fromText e "=\"" 136 `mappend` escaped "&\"" e v 137 `mappend` fromText e "\"" 138