1 {-# LANGUAGE OverloadedStrings #-} 2 3 module Text.XmlHtml.XML.Render where 4 5 import Blaze.ByteString.Builder 6 import Data.Char 7 import Data.Maybe 8 import Data.Monoid 9 import Text.XmlHtml.Common 10 11 import Data.Text (Text) 12 import qualified Data.Text as T 13 14 15 ------------------------------------------------------------------------------ 16 render :: Encoding -> Maybe DocType -> [Node] -> Builder 17 render e dt ns = byteOrder 18 `mappend` xmlDecl e 19 `mappend` docTypeDecl e dt 20 `mappend` nodes 21 where byteOrder | isUTF16 e = fromText e "\xFEFF" -- byte order mark 22 | otherwise = mempty 23 nodes | null ns = mempty 24 | otherwise = firstNode e (head ns) 25 `mappend` (mconcat $ map (node e) (tail ns)) 26 27 28 ------------------------------------------------------------------------------ 29 xmlDecl :: Encoding -> Builder 30 xmlDecl e = fromText e "<?xml version=\"1.0\" encoding=\"" 31 `mappend` fromText e (encodingName e) 32 `mappend` fromText e "\"?>\n" 33 34 35 ------------------------------------------------------------------------------ 36 docTypeDecl :: Encoding -> Maybe DocType -> Builder 37 docTypeDecl _ Nothing = mempty 38 docTypeDecl e (Just (DocType tag ext int)) = fromText e "<!DOCTYPE " 39 `mappend` fromText e tag 40 `mappend` externalID e ext 41 `mappend` internalSubset e int 42 `mappend` fromText e ">\n" 43 44 45 ------------------------------------------------------------------------------ 46 externalID :: Encoding -> ExternalID -> Builder 47 externalID _ NoExternalID = mempty 48 externalID e (System sid) = fromText e " SYSTEM " 49 `mappend` sysID e sid 50 externalID e (Public pid sid) = fromText e " PUBLIC " 51 `mappend` pubID e pid 52 `mappend` fromText e " " 53 `mappend` sysID e sid 54 55 56 ------------------------------------------------------------------------------ 57 internalSubset :: Encoding -> InternalSubset -> Builder 58 internalSubset _ NoInternalSubset = mempty 59 internalSubset e (InternalText t) = fromText e " " `mappend` fromText e t 60 61 62 ------------------------------------------------------------------------------ 63 sysID :: Encoding -> Text -> Builder 64 sysID e sid | not ("\'" `T.isInfixOf` sid) = fromText e "\'" 65 `mappend` fromText e sid 66 `mappend` fromText e "\'" 67 | not ("\"" `T.isInfixOf` sid) = fromText e "\"" 68 `mappend` fromText e sid 69 `mappend` fromText e "\"" 70 | otherwise = error "SYSTEM id is invalid" 71 72 73 ------------------------------------------------------------------------------ 74 pubID :: Encoding -> Text -> Builder 75 pubID e sid | not ("\"" `T.isInfixOf` sid) = fromText e "\"" 76 `mappend` fromText e sid 77 `mappend` fromText e "\"" 78 | otherwise = error "PUBLIC id is invalid" 79 80 81 ------------------------------------------------------------------------------ 82 node :: Encoding -> Node -> Builder 83 node e (TextNode t) = escaped "<>&" e t 84 node e (Comment t) | "--" `T.isInfixOf` t = error "Invalid comment" 85 | "-" `T.isSuffixOf` t = error "Invalid comment" 86 | otherwise = fromText e "<!--" 87 `mappend` fromText e t 88 `mappend` fromText e "-->" 89 node e (Element t a c) = element e t a c 90 91 92 ------------------------------------------------------------------------------ 93 -- | Process the first node differently to encode leading whitespace. This 94 -- lets us be sure that @parseXML@ is a left inverse to @render@. 95 firstNode :: Encoding -> Node -> Builder 96 firstNode e (Comment t) = node e (Comment t) 97 firstNode e (Element t a c) = node e (Element t a c) 98 firstNode _ (TextNode "") = mempty 99 firstNode e (TextNode t) = let (c,t') = fromJust $ T.uncons t 100 in escaped "<>& \t\r\n" e (T.singleton c) 101 `mappend` node e (TextNode t') 102 103 104 ------------------------------------------------------------------------------ 105 escaped :: [Char] -> Encoding -> Text -> Builder 106 escaped _ _ "" = mempty 107 escaped bad e t = let (p,s) = T.break (`elem` bad) t 108 r = T.uncons s 109 in fromText e p `mappend` case r of 110 Nothing -> mempty 111 Just (c,ss) -> entity e c `mappend` escaped bad e ss 112 113 114 ------------------------------------------------------------------------------ 115 entity :: Encoding -> Char -> Builder 116 entity e '&' = fromText e "&" 117 entity e '<' = fromText e "<" 118 entity e '>' = fromText e ">" 119 entity e '\"' = fromText e """ 120 entity e c = fromText e "&#" 121 `mappend` fromText e (T.pack (show (ord c))) 122 `mappend` fromText e ";" 123 124 125 ------------------------------------------------------------------------------ 126 element :: Encoding -> Text -> [(Text, Text)] -> [Node] -> Builder 127 element e t a [] = fromText e "<" 128 `mappend` fromText e t 129 `mappend` (mconcat $ map (attribute e) a) 130 `mappend` fromText e "/>" 131 element e t a c = fromText e "<" 132 `mappend` fromText e t 133 `mappend` (mconcat $ map (attribute e) a) 134 `mappend` fromText e ">" 135 `mappend` (mconcat $ map (node e) c) 136 `mappend` fromText e "</" 137 `mappend` fromText e t 138 `mappend` fromText e ">" 139 140 141 ------------------------------------------------------------------------------ 142 attribute :: Encoding -> (Text, Text) -> Builder 143 attribute e (n,v) | not ("\'" `T.isInfixOf` v) = fromText e " " 144 `mappend` fromText e n 145 `mappend` fromText e "=\'" 146 `mappend` escaped "<&" e v 147 `mappend` fromText e "\'" 148 | otherwise = fromText e " " 149 `mappend` fromText e n 150 `mappend` fromText e "=\"" 151 `mappend` escaped "<&\"" e v 152 `mappend` fromText e "\"" 153