1 ------------------------------------------------------------------------------
    2 -- | Parsers and renderers for XML and HTML 5.  Although the formats are
    3 --   treated differently, the data types used by each are the same, which
    4 --   makes it easy to write code that works with the element structure of
    5 --   either XML or HTML 5 documents.
    6 --
    7 --   Limitations:
    8 --
    9 --   * The XML parser does not parse internal DOCTYPE subsets.  They are just
   10 --     stored as blocks of text, with minimal scanning done to match quotes
   11 --     and brackets to determine the end.
   12 --
   13 --   * Since DTDs are not parsed, the XML parser fails on entity references,
   14 --     except for those defined internally.  You cannot use this library for
   15 --     parsing XML documents with entity references outside the predefined
   16 --     set.
   17 --
   18 --   * The HTML 5 parser is not a compliant HTML parser.  Instead, it is a
   19 --     parser for valid HTML 5 content.  It should only be used on content
   20 --     that you have reason to believe is probably correct, since the
   21 --     compatibility features of HTML 5 are missing.  This is the wrong
   22 --     library on which to build a web spider.
   23 --
   24 --   * Both parsers accept fragments of documents, by which is meant that
   25 --     they do not enforce the top-level structure of the document.  Files
   26 --     may contain more than one root element, for example.
   27 module Text.XmlHtml (
   28     -- * Types
   29     Document(..),
   30     Node(..),
   31     DocType(..),
   32     ExternalID(..),
   33     InternalSubset(..),
   34     Encoding(..),
   35 
   36     -- * Manipulating documents
   37     isTextNode,
   38     isComment,
   39     isElement,
   40     tagName,
   41     getAttribute,
   42     hasAttribute,
   43     setAttribute,
   44     nodeText,
   45     childNodes,
   46     childElements,
   47     childElementsTag,
   48     childElementTag,
   49     descendantNodes,
   50     descendantElements,
   51     descendantElementsTag,
   52     descendantElementTag,
   53 
   54     -- * Parsing
   55     parseXML,
   56     parseHTML,
   57 
   58     -- * Rendering
   59     render
   60     ) where
   61 
   62 ------------------------------------------------------------------------------
   63 import           Blaze.ByteString.Builder (Builder)
   64 import           Data.ByteString (ByteString)
   65 
   66 import           Text.XmlHtml.Common
   67 import           Text.XmlHtml.TextParser
   68 
   69 import qualified Text.XmlHtml.XML.Parse as XML
   70 import qualified Text.XmlHtml.XML.Render as XML
   71 
   72 import qualified Text.XmlHtml.HTML.Parse as HTML
   73 import qualified Text.XmlHtml.HTML.Render as HTML
   74 
   75 
   76 ------------------------------------------------------------------------------
   77 -- | Parses the given XML fragment.
   78 parseXML :: String
   79          -- ^ Name of document source (perhaps a filename) for error messages
   80          -> ByteString
   81          -- ^ Document contents
   82          -> Either String Document
   83          -- ^ The document or an error message
   84 parseXML = parse XML.docFragment
   85 
   86 
   87 ------------------------------------------------------------------------------
   88 -- | Parses the given HTML fragment.  This enables HTML quirks mode, which
   89 --   changes the parsing algorithm to parse valid HTML 5 documents correctly.
   90 parseHTML :: String
   91           -- ^ Name of document source (perhaps a filename) for error messages
   92           -> ByteString
   93           -- ^ Document contents
   94           -> Either String Document
   95           -- ^ The document or an error message
   96 parseHTML = parse HTML.docFragment
   97 
   98 
   99 ------------------------------------------------------------------------------
  100 -- | Renders a 'Document'.
  101 render :: Document -> Builder
  102 render (XmlDocument  e dt ns) = XML.render  e dt ns
  103 render (HtmlDocument e dt ns) = HTML.render e dt ns
  104