1 {-# LANGUAGE OverloadedStrings #-}
    2 {-# LANGUAGE PatternGuards     #-}
    3 
    4 module Text.XmlHtml.HTML.Render where
    5 
    6 import           Blaze.ByteString.Builder
    7 import           Control.Applicative
    8 import           Data.Maybe
    9 import           Data.Monoid
   10 import qualified Text.Parsec as P
   11 import           Text.XmlHtml.Common
   12 import           Text.XmlHtml.TextParser
   13 import           Text.XmlHtml.HTML.Meta
   14 import qualified Text.XmlHtml.HTML.Parse as P
   15 import           Text.XmlHtml.XML.Render (docTypeDecl, entity)
   16 
   17 import           Data.Text (Text)
   18 import qualified Data.Text as T
   19 
   20 import qualified Data.Set as S
   21 
   22 ------------------------------------------------------------------------------
   23 -- | And, the rendering code.
   24 render :: Encoding -> Maybe DocType -> [Node] -> Builder
   25 render e dt ns = byteOrder
   26        `mappend` docTypeDecl e dt
   27        `mappend` nodes
   28     where byteOrder | isUTF16 e = fromText e "\xFEFF" -- byte order mark
   29                     | otherwise = mempty
   30           nodes | null ns   = mempty
   31                 | otherwise = firstNode e (head ns)
   32                     `mappend` (mconcat $ map (node e) (tail ns))
   33 
   34 
   35 ------------------------------------------------------------------------------
   36 -- | HTML allows & so long as it is not "ambiguous" (i.e., looks like an
   37 -- entity).  So we have a special case for that.
   38 escaped :: [Char] -> Encoding -> Text -> Builder
   39 escaped _   _ "" = mempty
   40 escaped bad e t  =
   41     let (p,s) = T.break (`elem` bad) t
   42         r     = T.uncons s
   43     in  fromText e p `mappend` case r of
   44             Nothing
   45                 -> mempty
   46             Just ('&',ss) | isLeft (parseText ambigAmp "" s)
   47                 -> fromText e "&" `mappend` escaped bad e ss
   48             Just (c,ss)
   49                 -> entity e c `mappend` escaped bad e ss
   50   where isLeft   = either (const True) (const False)
   51         ambigAmp = P.char '&' *>
   52             (P.finishCharRef *> return () <|> P.finishEntityRef *> return ())
   53 
   54 
   55 ------------------------------------------------------------------------------
   56 node :: Encoding -> Node -> Builder
   57 node e (TextNode t)                        = escaped "<>&" e t
   58 node e (Comment t) | "--" `T.isInfixOf`  t = error "Invalid comment"
   59                    | "-"  `T.isSuffixOf` t = error "Invalid comment"
   60                    | otherwise             = fromText e "<!--"
   61                                              `mappend` fromText e t
   62                                              `mappend` fromText e "-->"
   63 node e (Element t a c)                     =
   64     let tbase = T.toLower $ snd $ T.breakOnEnd ":" t
   65     in  element e t tbase a c
   66 
   67 
   68 ------------------------------------------------------------------------------
   69 -- | Process the first node differently to encode leading whitespace.  This
   70 -- lets us be sure that @parseHTML@ is a left inverse to @render@.
   71 firstNode :: Encoding -> Node -> Builder
   72 firstNode e (Comment t)     = node e (Comment t)
   73 firstNode e (Element t a c) = node e (Element t a c)
   74 firstNode _ (TextNode "")   = mempty
   75 firstNode e (TextNode t)    = let (c,t') = fromJust $ T.uncons t
   76                               in escaped "<>& \t\r\n" e (T.singleton c)
   77                                  `mappend` node e (TextNode t')
   78 
   79 
   80 ------------------------------------------------------------------------------
   81 -- XXX: Should do something to avoid concatting large CDATA sections before
   82 -- writing them to the output.
   83 element :: Encoding -> Text -> Text -> [(Text, Text)] -> [Node] -> Builder
   84 element e t tb a c
   85     | tb `S.member` voidTags && null c         =
   86         fromText e "<"
   87         `mappend` fromText e t
   88         `mappend` (mconcat $ map (attribute e) a)
   89         `mappend` fromText e " />"
   90     | tb `S.member` voidTags                   =
   91         error $ T.unpack t ++ " must be empty"
   92     | tb `S.member` rawTextTags,
   93       all isTextNode c,
   94       let s = T.concat (map nodeText c),
   95       not ("</" `T.append` t `T.isInfixOf` s) =
   96         fromText e "<"
   97         `mappend` fromText e t
   98         `mappend` (mconcat $ map (attribute e) a)
   99         `mappend` fromText e ">"
  100         `mappend` fromText e s
  101         `mappend` fromText e "</"
  102         `mappend` fromText e t
  103         `mappend` fromText e ">"
  104     | tb `S.member` rawTextTags,
  105       [ TextNode _ ] <- c                     =
  106         error $ T.unpack t ++ " cannot contain text looking like its end tag"
  107     | tb `S.member` rawTextTags                =
  108         error $ T.unpack t ++ " cannot contain child elements or comments"
  109     | otherwise =
  110         fromText e "<"
  111         `mappend` fromText e t
  112         `mappend` (mconcat $ map (attribute e) a)
  113         `mappend` fromText e ">"
  114         `mappend` (mconcat $ map (node e) c)
  115         `mappend` fromText e "</"
  116         `mappend` fromText e t
  117         `mappend` fromText e ">"
  118 
  119 
  120 ------------------------------------------------------------------------------
  121 attribute :: Encoding -> (Text, Text) -> Builder
  122 attribute e (n,v)
  123     | v == ""                    =
  124         fromText e " "
  125         `mappend` fromText e n
  126     | not ("\'" `T.isInfixOf` v) =
  127         fromText e " "
  128         `mappend` fromText e n
  129         `mappend` fromText e "=\'"
  130         `mappend` escaped "&" e v
  131         `mappend` fromText e "\'"
  132     | otherwise                  =
  133         fromText e " "
  134         `mappend` fromText e n
  135         `mappend` fromText e "=\""
  136         `mappend` escaped "&\"" e v
  137         `mappend` fromText e "\""
  138