1 {-# LANGUAGE OverloadedStrings #-}
    2 
    3 module Text.XmlHtml.XML.Render where
    4 
    5 import           Blaze.ByteString.Builder
    6 import           Data.Char
    7 import           Data.Maybe
    8 import           Data.Monoid
    9 import           Text.XmlHtml.Common
   10 
   11 import           Data.Text (Text)
   12 import qualified Data.Text as T
   13 
   14 
   15 ------------------------------------------------------------------------------
   16 render :: Encoding -> Maybe DocType -> [Node] -> Builder
   17 render e dt ns = byteOrder
   18        `mappend` xmlDecl e
   19        `mappend` docTypeDecl e dt
   20        `mappend` nodes
   21     where byteOrder | isUTF16 e = fromText e "\xFEFF" -- byte order mark
   22                     | otherwise = mempty
   23           nodes | null ns   = mempty
   24                 | otherwise = firstNode e (head ns)
   25                     `mappend` (mconcat $ map (node e) (tail ns))
   26 
   27 
   28 ------------------------------------------------------------------------------
   29 xmlDecl :: Encoding -> Builder
   30 xmlDecl e = fromText e "<?xml version=\"1.0\" encoding=\""
   31             `mappend` fromText e (encodingName e)
   32             `mappend` fromText e "\"?>\n"
   33 
   34 
   35 ------------------------------------------------------------------------------
   36 docTypeDecl :: Encoding -> Maybe DocType -> Builder
   37 docTypeDecl _ Nothing                      = mempty
   38 docTypeDecl e (Just (DocType tag ext int)) = fromText e "<!DOCTYPE "
   39                                    `mappend` fromText e tag
   40                                    `mappend` externalID e ext
   41                                    `mappend` internalSubset e int
   42                                    `mappend` fromText e ">\n"
   43 
   44 
   45 ------------------------------------------------------------------------------
   46 externalID :: Encoding -> ExternalID -> Builder
   47 externalID _ NoExternalID     = mempty
   48 externalID e (System sid)     = fromText e " SYSTEM "
   49                                 `mappend` sysID e sid
   50 externalID e (Public pid sid) = fromText e " PUBLIC "
   51                                 `mappend` pubID e pid
   52                                 `mappend` fromText e " "
   53                                 `mappend` sysID e sid
   54 
   55 
   56 ------------------------------------------------------------------------------
   57 internalSubset :: Encoding -> InternalSubset -> Builder
   58 internalSubset _ NoInternalSubset = mempty
   59 internalSubset e (InternalText t) = fromText e " " `mappend` fromText e t
   60 
   61 
   62 ------------------------------------------------------------------------------
   63 sysID :: Encoding -> Text -> Builder
   64 sysID e sid | not ("\'" `T.isInfixOf` sid) = fromText e "\'"
   65                                              `mappend` fromText e sid
   66                                              `mappend` fromText e "\'"
   67             | not ("\"" `T.isInfixOf` sid) = fromText e "\""
   68                                              `mappend` fromText e sid
   69                                              `mappend` fromText e "\""
   70             | otherwise               = error "SYSTEM id is invalid"
   71 
   72 
   73 ------------------------------------------------------------------------------
   74 pubID :: Encoding -> Text -> Builder
   75 pubID e sid | not ("\"" `T.isInfixOf` sid) = fromText e "\""
   76                                              `mappend` fromText e sid
   77                                              `mappend` fromText e "\""
   78             | otherwise               = error "PUBLIC id is invalid"
   79 
   80 
   81 ------------------------------------------------------------------------------
   82 node :: Encoding -> Node -> Builder
   83 node e (TextNode t)                        = escaped "<>&" e t
   84 node e (Comment t) | "--" `T.isInfixOf` t  = error "Invalid comment"
   85                    | "-" `T.isSuffixOf` t  = error "Invalid comment"
   86                    | otherwise             = fromText e "<!--"
   87                                              `mappend` fromText e t
   88                                              `mappend` fromText e "-->"
   89 node e (Element t a c)                     = element e t a c
   90 
   91 
   92 ------------------------------------------------------------------------------
   93 -- | Process the first node differently to encode leading whitespace.  This
   94 -- lets us be sure that @parseXML@ is a left inverse to @render@.
   95 firstNode :: Encoding -> Node -> Builder
   96 firstNode e (Comment t)     = node e (Comment t)
   97 firstNode e (Element t a c) = node e (Element t a c)
   98 firstNode _ (TextNode "")   = mempty
   99 firstNode e (TextNode t)    = let (c,t') = fromJust $ T.uncons t
  100                               in escaped "<>& \t\r\n" e (T.singleton c)
  101                                  `mappend` node e (TextNode t')
  102 
  103 
  104 ------------------------------------------------------------------------------
  105 escaped :: [Char] -> Encoding -> Text -> Builder
  106 escaped _   _ "" = mempty
  107 escaped bad e t  = let (p,s) = T.break (`elem` bad) t
  108                        r     = T.uncons s
  109                    in  fromText e p `mappend` case r of
  110                          Nothing     -> mempty
  111                          Just (c,ss) -> entity e c `mappend` escaped bad e ss
  112 
  113 
  114 ------------------------------------------------------------------------------
  115 entity :: Encoding -> Char -> Builder
  116 entity e '&'  = fromText e "&amp;"
  117 entity e '<'  = fromText e "&lt;"
  118 entity e '>'  = fromText e "&gt;"
  119 entity e '\"' = fromText e "&quot;"
  120 entity e c    = fromText e "&#"
  121                 `mappend` fromText e (T.pack (show (ord c)))
  122                 `mappend` fromText e ";"
  123 
  124 
  125 ------------------------------------------------------------------------------
  126 element :: Encoding -> Text -> [(Text, Text)] -> [Node] -> Builder
  127 element e t a [] = fromText e "<"
  128         `mappend` fromText e t
  129         `mappend` (mconcat $ map (attribute e) a)
  130         `mappend` fromText e "/>"
  131 element e t a c = fromText e "<"
  132         `mappend` fromText e t
  133         `mappend` (mconcat $ map (attribute e) a)
  134         `mappend` fromText e ">"
  135         `mappend` (mconcat $ map (node e) c)
  136         `mappend` fromText e "</"
  137         `mappend` fromText e t
  138         `mappend` fromText e ">"
  139 
  140 
  141 ------------------------------------------------------------------------------
  142 attribute :: Encoding -> (Text, Text) -> Builder
  143 attribute e (n,v) | not ("\'" `T.isInfixOf` v) = fromText e " "
  144                                        `mappend` fromText e n
  145                                        `mappend` fromText e "=\'"
  146                                        `mappend` escaped "<&" e v
  147                                        `mappend` fromText e "\'"
  148                   | otherwise                  = fromText e " "
  149                                        `mappend` fromText e n
  150                                        `mappend` fromText e "=\""
  151                                        `mappend` escaped "<&\"" e v
  152                                        `mappend` fromText e "\""
  153