1 {-# LANGUAGE OverloadedStrings #-}
    2 
    3 module Text.XmlHtml.XML.Parse where
    4 
    5 import           Control.Applicative
    6 import           Control.Monad
    7 import           Data.Char
    8 import           Data.List
    9 import           Data.Maybe
   10 import           Text.XmlHtml.Common
   11 import           Text.XmlHtml.TextParser
   12 
   13 import qualified Text.Parsec as P
   14 
   15 import           Data.Map (Map)
   16 import qualified Data.Map as M
   17 
   18 import           Data.Text (Text)
   19 import qualified Data.Text as T
   20 
   21 
   22 ------------------------------------------------------------------------------
   23 -- | This is my best guess as to the best rule for handling document fragments
   24 -- for processing.  It is essentially modeled after document, but allowing
   25 -- multiple nodes.
   26 docFragment :: Encoding -> Parser Document
   27 docFragment e = do
   28     (dt, nodes1) <- prolog
   29     nodes2       <- content
   30     return $ XmlDocument e dt (nodes1 ++ nodes2)
   31 
   32 
   33 ------------------------------------------------------------------------------
   34 -- Everything from here forward is translated from the XML specification.   --
   35 ------------------------------------------------------------------------------
   36 
   37 {-
   38     Map from numbered productions in the XML specification to symbols here:
   39 
   40     PROD  SPEC NAME          PARSER NAME           NOTES
   41     -----|------------------|---------------------|-------
   42     [1]   document           document
   43     [2]   Char                                     {2}
   44     [3]   S                  whiteSpace
   45     [4]   NameStartChar      isNameStartChar       {1}
   46     [4a]  NameChar           isNameChar            {1}
   47     [5]   Name               name
   48     [6]   Names              names
   49     [7]   Nmtoken            nmtoken
   50     [8]   Nmtokens           nmtokens
   51     [9]   EntityValue                              {4}
   52     [10]  AttValue           attrValue
   53     [11]  SystemLiteral      systemLiteral
   54     [12]  PubidLiteral       pubIdLiteral
   55     [13]  PubidChar          isPubIdChar           {1}
   56     [14]  CharData           charData
   57     [15]  Comment            comment
   58     [16]  PI                 processingInstruction
   59     [17]  PITarget           piTarget
   60     [18]  CDSect             cdSect
   61     [19]  CDStart            cdSect                {3}
   62     [20]  CData              cdSect                {3}
   63     [21]  CDEnd              cdSect                {3}
   64     [22]  prolog             prolog
   65     [23]  XMLDecl            xmlDecl
   66     [24]  VersionInfo        versionInfo
   67     [25]  Eq                 eq
   68     [26]  VersionNum         versionInfo           {3}
   69     [27]  Misc               misc
   70     [28]  doctypedecl        docTypeDecl
   71     [28a] DeclSep                                  {4}
   72     [28b] intSubset                                {4}
   73     [29]  markupdecl                               {4}
   74     [30]  extSubset                                {4}
   75     [31]  extSubsetDecl                            {4}
   76     [32]  SDDecl             sdDecl
   77     [39]  element            element
   78     [40]  STag               emptyOrStartTag
   79     [41]  Attribute          attribute
   80     [42]  ETag               endTag
   81     [43]  content            content
   82     [44]  EmptyElemTag       emptyOrStartTag
   83     [45]  elementDecl                              {4}
   84     [46]  contentSpec                              {4}
   85     [47]  children                                 {4}
   86     [48]  cp                                       {4}
   87     [49]  choice                                   {4}
   88     [50]  seq                                      {4}
   89     [51]  Mixed                                    {4}
   90     [52]  AttlistDecl                              {4}
   91     [53]  AttDef                                   {4}
   92     [54]  AttType                                  {4}
   93     [55]  StringType                               {4}
   94     [56]  TokenizedType                            {4}
   95     [57]  EnumeratedType                           {4}
   96     [58]  NotationType                             {4}
   97     [59]  Enumeration                              {4}
   98     [60]  DefaultDecl                              {4}
   99     [61]  conditionalSect                          {4}
  100     [62]  includeSect                              {4}
  101     [63]  ignoreSect                               {4}
  102     [64]  ignoreSectContents                       {4}
  103     [65]  Ignore                                   {4}
  104     [66]  CharRef            charRef
  105     [67]  Reference          reference
  106     [68]  EntityRef          entityRef
  107     [69]  PEReference                              {4}
  108     [70]  EntityDecl                               {4}
  109     [71]  GEDecl                                   {4}
  110     [72]  PEDecl                                   {4}
  111     [73]  EntityDef                                {4}
  112     [74]  PEDef                                    {4}
  113     [75]  ExternalID         externalID
  114     [76]  NDataDecl                                {4}
  115     [77]  TextDecl           textDecl
  116     [78]  extParsedEnt       extParsedEnt
  117     [80]  EncodingDecl       encodingDecl
  118     [81]  EncName            encodingDecl          {3}
  119     [82]  NotationDecl                             {4}
  120     [83]  PublicID                                 {4}
  121     [84]  Letter                                   {5}
  122     [85]  BaseChar                                 {5}
  123     [86]  Ideographic                              {5}
  124     [87]  CombiningChar                            {5}
  125     [88]  Digit                                    {5}
  126     [89]  Extender                                 {5}
  127 
  128     Notes:
  129         {1} - These productions match single characters, and so are
  130               implemented as predicates instead of parsers.
  131         {3} - Denotes a production which is not exposed as a top-level symbol
  132               because it is trivial and included in another definition.
  133         {4} - This module does not contain a parser for the DTD subsets, so
  134               grammar that occurs only in DTD subsets is not defined.
  135         {5} - These are orphaned productions for character classes.
  136 -}
  137 
  138 
  139 ------------------------------------------------------------------------------
  140 whiteSpace :: Parser ()
  141 whiteSpace = some (P.satisfy (`elem` " \t\r\n")) *> return ()
  142 
  143 
  144 ------------------------------------------------------------------------------
  145 isNameStartChar :: Char -> Bool
  146 isNameStartChar c | c == ':'                         = True
  147                   | c == '_'                         = True
  148                   | c >= 'a'       && c <= 'z'       = True
  149                   | c >= 'A'       && c <= 'Z'       = True
  150                   | c >= '\xc0'    && c <= '\xd6'    = True
  151                   | c >= '\xd8'    && c <= '\xf6'    = True
  152                   | c >= '\xf8'    && c <= '\x2ff'   = True
  153                   | c >= '\x370'   && c <= '\x37d'   = True
  154                   | c >= '\x37f'   && c <= '\x1fff'  = True
  155                   | c >= '\x200c'  && c <= '\x200d'  = True
  156                   | c >= '\x2070'  && c <= '\x218f'  = True
  157                   | c >= '\x2c00'  && c <= '\x2fef'  = True
  158                   | c >= '\x3001'  && c <= '\xd7ff'  = True
  159                   | c >= '\xf900'  && c <= '\xfdcf'  = True
  160                   | c >= '\xfdf0'  && c <= '\xfffd'  = True
  161                   | c >= '\x10000' && c <= '\xeffff' = True
  162                   | otherwise                        = False
  163 
  164 
  165 ------------------------------------------------------------------------------
  166 isNameChar :: Char -> Bool
  167 isNameChar c | isNameStartChar c                = True
  168              | c == '-'                         = True
  169              | c == '.'                         = True
  170              | c == '\xb7'                      = True
  171              | c >= '0'       && c <= '9'       = True
  172              | c >= '\x300'   && c <= '\x36f'   = True
  173              | c >= '\x203f'  && c <= '\x2040'  = True
  174              | otherwise                        = False
  175 
  176 
  177 ------------------------------------------------------------------------------
  178 name :: Parser Text
  179 name = do
  180     c <- P.satisfy isNameStartChar
  181     r <- takeWhile0 isNameChar
  182     return $ T.cons c r
  183 
  184 
  185 ------------------------------------------------------------------------------
  186 attrValue :: Parser Text
  187 attrValue = fmap T.concat (singleQuoted <|> doubleQuoted)
  188   where
  189     singleQuoted = P.char '\'' *> refTill "<&\'" <* P.char '\''
  190     doubleQuoted = P.char '\"' *> refTill "<&\"" <* P.char '\"'
  191     refTill end = many (takeWhile1 (not . (`elem` end)) <|> reference)
  192 
  193 
  194 ------------------------------------------------------------------------------
  195 systemLiteral :: Parser Text
  196 systemLiteral = singleQuoted <|> doubleQuoted
  197   where
  198     singleQuoted = do
  199         _ <- P.char '\''
  200         x <- takeWhile0 (not . (== '\''))
  201         _ <- P.char '\''
  202         return x
  203     doubleQuoted = do
  204         _ <- P.char '\"'
  205         x <- takeWhile0 (not . (== '\"'))
  206         _ <- P.char '\"'
  207         return x
  208 
  209 
  210 ------------------------------------------------------------------------------
  211 pubIdLiteral :: Parser Text
  212 pubIdLiteral = singleQuoted <|> doubleQuoted
  213   where
  214     singleQuoted = do
  215         _ <- P.char '\''
  216         x <- takeWhile0 (\c -> isPubIdChar c && c /= '\'')
  217         _ <- P.char '\''
  218         return x
  219     doubleQuoted = do
  220         _ <- P.char '\"'
  221         x <- takeWhile0 isPubIdChar
  222         _ <- P.char '\"'
  223         return x
  224 
  225 
  226 ------------------------------------------------------------------------------
  227 isPubIdChar :: Char -> Bool
  228 isPubIdChar c | c >= 'a' && c <= 'z'                 = True
  229               | c >= 'A' && c <= 'Z'                 = True
  230               | c >= '0' && c <= '9'                 = True
  231               | c `elem` " \r\n-\'()+,./:=?;!*#@$_%" = True
  232               | otherwise                            = False
  233 
  234 
  235 ------------------------------------------------------------------------------
  236 -- | The requirement to not contain "]]>" is for SGML compatibility.  We
  237 -- deliberately choose to not enforce it.  This makes the parser accept
  238 -- strictly more documents than a standards-compliant parser.
  239 charData :: Parser Node
  240 charData = TextNode <$> takeWhile1 (not . (`elem` "<&"))
  241 
  242 
  243 ------------------------------------------------------------------------------
  244 comment :: Parser (Maybe Node)
  245 comment = text "<!--" *> (Just <$> Comment <$> commentText) <* text "-->"
  246   where
  247     commentText = fmap T.concat $ many $
  248         nonDash <|> P.try (T.cons <$> P.char '-' <*> nonDash)
  249     nonDash = takeWhile1 (not . (== '-'))
  250 
  251 
  252 ------------------------------------------------------------------------------
  253 -- | Always returns Nothing since there's no representation for a PI in the
  254 -- document tree.
  255 processingInstruction :: Parser (Maybe Node)
  256 processingInstruction = do
  257     _ <- text "<?"
  258     _ <- piTarget
  259     _ <- emptyEnd <|> contentEnd
  260     return Nothing
  261   where
  262     emptyEnd   = P.try (P.string "?>")
  263     contentEnd = P.try $ do
  264         _ <- whiteSpace
  265         P.manyTill P.anyChar (P.try $ text "?>")
  266 
  267 ------------------------------------------------------------------------------
  268 piTarget :: Parser ()
  269 piTarget = do
  270     n <- name
  271     when (T.map toLower n == "xml") $ fail "xml declaration can't occur here"
  272 
  273 
  274 ------------------------------------------------------------------------------
  275 cdata :: [Char] -> Parser a -> Parser Node
  276 cdata cs end = TextNode <$> T.concat <$> P.manyTill part end
  277   where part = takeWhile1 (not . (`elem` cs))
  278              <|> T.singleton <$> P.anyChar
  279 
  280 
  281 ------------------------------------------------------------------------------
  282 cdSect :: Parser (Maybe Node)
  283 cdSect = Just <$> do
  284     _ <- text "<![CDATA["
  285     cdata "]" (text "]]>")
  286 
  287 
  288 ------------------------------------------------------------------------------
  289 prolog :: Parser (Maybe DocType, [Node])
  290 prolog = do
  291     _      <- optional xmlDecl
  292     nodes1 <- many misc
  293     rest   <- optional $ do
  294         dt     <- docTypeDecl
  295         nodes2 <- many misc
  296         return (dt, nodes2)
  297     case rest of
  298         Nothing           -> return (Nothing, catMaybes nodes1)
  299         Just (dt, nodes2) -> return (Just dt, catMaybes (nodes1 ++ nodes2))
  300 
  301 
  302 ------------------------------------------------------------------------------
  303 -- | Return value is the encoding, if present.
  304 xmlDecl :: Parser (Maybe Text)
  305 xmlDecl = do
  306     _ <- text "<?xml"
  307     _ <- versionInfo
  308     e <- optional encodingDecl
  309     _ <- optional sdDecl
  310     _ <- optional whiteSpace
  311     _ <- text "?>"
  312     return e
  313 
  314 
  315 ------------------------------------------------------------------------------
  316 versionInfo :: Parser ()
  317 versionInfo = do
  318     whiteSpace *> text "version" *> eq *> (singleQuoted <|> doubleQuoted)
  319   where
  320     singleQuoted = P.char '\'' *> versionNum <* P.char '\''
  321     doubleQuoted = P.char '\"' *> versionNum <* P.char '\"'
  322     versionNum   = do
  323         _ <- text "1."
  324         _ <- some (P.satisfy (\c -> c >= '0' && c <= '9'))
  325         return ()
  326 
  327 
  328 ------------------------------------------------------------------------------
  329 eq :: Parser ()
  330 eq = optional whiteSpace *> P.char '=' *> optional whiteSpace *> return ()
  331 
  332 
  333 ------------------------------------------------------------------------------
  334 misc :: Parser (Maybe Node)
  335 misc = comment <|> processingInstruction <|> (whiteSpace *> return Nothing)
  336 
  337 
  338 ------------------------------------------------------------------------------
  339 -- | Internal subset is parsed, but ignored since we don't have data types to
  340 -- store it.
  341 docTypeDecl :: Parser DocType
  342 docTypeDecl = do
  343     _      <- text "<!DOCTYPE"
  344     whiteSpace
  345     tag    <- name
  346     _      <- optional whiteSpace
  347     extid  <- externalID
  348     _      <- optional whiteSpace
  349     intsub <- internalDoctype
  350     _      <- P.char '>'
  351     return (DocType tag extid intsub)
  352 
  353 
  354 ------------------------------------------------------------------------------
  355 -- | States for the DOCTYPE internal subset state machine.
  356 data InternalDoctypeState = IDSStart
  357                           | IDSScanning Int
  358                           | IDSInQuote Int Char
  359                           | IDSCommentS1 Int
  360                           | IDSCommentS2 Int
  361                           | IDSCommentS3 Int
  362                           | IDSComment Int
  363                           | IDSCommentD1 Int
  364                           | IDSCommentE1 Int
  365 
  366 
  367 ------------------------------------------------------------------------------
  368 -- | Internal DOCTYPE subset.  We don't actually parse this; just scan through
  369 -- and look for the end, and store it in a block of text.
  370 internalDoctype :: Parser InternalSubset
  371 internalDoctype = InternalText <$> T.pack <$> scanText (dfa IDSStart)
  372               <|> return NoInternalSubset
  373   where dfa IDSStart '[' = ScanNext (dfa (IDSScanning 0))
  374         dfa IDSStart _   = ScanFail "Not a DOCTYPE internal subset"
  375         dfa (IDSInQuote n c) d
  376           | c == d                = ScanNext (dfa (IDSScanning n))
  377           | otherwise             = ScanNext (dfa (IDSInQuote n c))
  378         dfa (IDSScanning n) '['   = ScanNext (dfa (IDSScanning (n+1)))
  379         dfa (IDSScanning 0) ']'   = ScanFinish
  380         dfa (IDSScanning n) ']'   = ScanNext (dfa (IDSScanning (n-1)))
  381         dfa (IDSScanning n) '\''  = ScanNext (dfa (IDSInQuote n '\''))
  382         dfa (IDSScanning n) '\"'  = ScanNext (dfa (IDSInQuote n '\"'))
  383         dfa (IDSScanning n) '<'   = ScanNext (dfa (IDSCommentS1 n))
  384         dfa (IDSScanning n) _     = ScanNext (dfa (IDSScanning n))
  385         dfa (IDSCommentS1 n) '['  = ScanNext (dfa (IDSScanning (n+1)))
  386         dfa (IDSCommentS1 0) ']'  = ScanFinish
  387         dfa (IDSCommentS1 n) ']'  = ScanNext (dfa (IDSScanning (n-1)))
  388         dfa (IDSCommentS1 n) '\'' = ScanNext (dfa (IDSInQuote n '\''))
  389         dfa (IDSCommentS1 n) '\"' = ScanNext (dfa (IDSInQuote n '\"'))
  390         dfa (IDSCommentS1 n) '!'  = ScanNext (dfa (IDSCommentS2 n))
  391         dfa (IDSCommentS1 n) _    = ScanNext (dfa (IDSScanning n))
  392         dfa (IDSCommentS2 n) '['  = ScanNext (dfa (IDSScanning (n+1)))
  393         dfa (IDSCommentS2 0) ']'  = ScanFinish
  394         dfa (IDSCommentS2 n) ']'  = ScanNext (dfa (IDSScanning (n-1)))
  395         dfa (IDSCommentS2 n) '\'' = ScanNext (dfa (IDSInQuote n '\''))
  396         dfa (IDSCommentS2 n) '\"' = ScanNext (dfa (IDSInQuote n '\"'))
  397         dfa (IDSCommentS2 n) '-'  = ScanNext (dfa (IDSCommentS3 n))
  398         dfa (IDSCommentS2 n) _    = ScanNext (dfa (IDSScanning n))
  399         dfa (IDSCommentS3 n) '['  = ScanNext (dfa (IDSScanning (n+1)))
  400         dfa (IDSCommentS3 0) ']'  = ScanFinish
  401         dfa (IDSCommentS3 n) ']'  = ScanNext (dfa (IDSScanning (n-1)))
  402         dfa (IDSCommentS3 n) '\'' = ScanNext (dfa (IDSInQuote n '\''))
  403         dfa (IDSCommentS3 n) '\"' = ScanNext (dfa (IDSInQuote n '\"'))
  404         dfa (IDSCommentS3 n) '-'  = ScanNext (dfa (IDSComment n))
  405         dfa (IDSCommentS3 n) _    = ScanNext (dfa (IDSScanning n))
  406         dfa (IDSComment n) '-'    = ScanNext (dfa (IDSCommentD1 n))
  407         dfa (IDSComment n) _      = ScanNext (dfa (IDSComment n))
  408         dfa (IDSCommentD1 n) '-'  = ScanNext (dfa (IDSCommentE1 n))
  409         dfa (IDSCommentD1 n) _    = ScanNext (dfa (IDSComment n))
  410         dfa (IDSCommentE1 n) '>'  = ScanNext (dfa (IDSScanning n))
  411         dfa (IDSCommentE1 _) _    = ScanFail "Poorly formatted comment"
  412 
  413 
  414 ------------------------------------------------------------------------------
  415 sdDecl :: Parser ()
  416 sdDecl = do
  417     _ <- P.try $ whiteSpace *> text "standalone"
  418     eq
  419     _ <- single <|> double
  420     return ()
  421   where
  422     single = P.char '\'' *> yesno <* P.char '\''
  423     double = P.char '\"' *> yesno <* P.char '\"'
  424     yesno  = text "yes" <|> text "no"
  425 
  426 
  427 ------------------------------------------------------------------------------
  428 element :: Parser Node
  429 element = do
  430     (t,a,b) <- emptyOrStartTag
  431     if b then return (Element t a [])
  432          else nonEmptyElem t a
  433   where
  434     nonEmptyElem t a = do
  435         c <- content
  436         endTag t
  437         return (Element t a c)
  438 
  439 
  440 ------------------------------------------------------------------------------
  441 -- | Results are (tag name, attributes, isEmpty)
  442 emptyOrStartTag :: Parser (Text, [(Text, Text)], Bool)
  443 emptyOrStartTag = do
  444     t <- P.try $ P.char '<' *> name
  445     a <- many $ P.try $ do
  446         whiteSpace
  447         attribute
  448     when (hasDups a) $ fail "Duplicate attribute names in element"
  449     _ <- optional whiteSpace
  450     e <- optional (P.char '/')
  451     _ <- P.char '>'
  452     return (t, a, isJust e)
  453   where
  454     hasDups a = length (nub (map fst a)) < length a
  455 
  456 
  457 ------------------------------------------------------------------------------
  458 attribute :: Parser (Text, Text)
  459 attribute = do
  460     n <- name
  461     eq
  462     v <- attrValue
  463     return (n,v)
  464 
  465 
  466 ------------------------------------------------------------------------------
  467 endTag :: Text -> Parser ()
  468 endTag s = do
  469     _ <- text "</"
  470     t <- name
  471     when (s /= t) $ fail $ "mismatched tags: </" ++ T.unpack t ++
  472                            "> found inside <" ++ T.unpack s ++ "> tag"
  473     _ <- optional whiteSpace
  474     _ <- text ">"
  475     return ()
  476 
  477 
  478 ------------------------------------------------------------------------------
  479 content :: Parser [Node]
  480 content = do
  481     n  <- optional charData
  482     ns <- fmap concat $ many $ do
  483         s <- ((Just <$> TextNode <$> reference)
  484                <|> cdSect
  485                <|> processingInstruction
  486                <|> comment
  487                <|> fmap Just element)
  488         t <- optional charData
  489         return [s,t]
  490     return $ coalesceText $ catMaybes (n:ns)
  491   where
  492     coalesceText (TextNode s : TextNode t : ns)
  493         = coalesceText (TextNode (T.append s t) : ns)
  494     coalesceText (n:ns)
  495         = n : coalesceText ns
  496     coalesceText []
  497         = []
  498 
  499 
  500 ------------------------------------------------------------------------------
  501 charRef :: Parser Text
  502 charRef = hexCharRef <|> decCharRef
  503   where
  504     decCharRef = do
  505         _ <- text "&#"
  506         ds <- some digit
  507         _ <- P.char ';'
  508         let c = chr $ foldl' (\a b -> 10 * a + b) 0 ds
  509         when (not (isValidChar c)) $ fail $
  510             "Reference is not a valid character"
  511         return $ T.singleton c
  512       where
  513         digit = do
  514             d <- P.satisfy (\c -> c >= '0' && c <= '9')
  515             return (ord d - ord '0')
  516     hexCharRef = do
  517         _ <- text "&#x"
  518         ds <- some digit
  519         _ <- P.char ';'
  520         let c = chr $ foldl' (\a b -> 16 * a + b) 0 ds
  521         when (not (isValidChar c)) $ fail $
  522             "Reference is not a valid character"
  523         return $ T.singleton c
  524       where
  525         digit = num <|> upper <|> lower
  526         num = do
  527             d <- P.satisfy (\c -> c >= '0' && c <= '9')
  528             return (ord d - ord '0')
  529         upper = do
  530             d <- P.satisfy (\c -> c >= 'A' && c <= 'F')
  531             return (10 + ord d - ord 'A')
  532         lower = do
  533             d <- P.satisfy (\c -> c >= 'a' && c <= 'f')
  534             return (10 + ord d - ord 'a')
  535 
  536 
  537 ------------------------------------------------------------------------------
  538 reference :: Parser Text
  539 reference = charRef <|> entityRef
  540 
  541 
  542 ------------------------------------------------------------------------------
  543 entityRef :: Parser Text
  544 entityRef = do
  545     _ <- P.char '&'
  546     n <- name
  547     _ <- P.char ';'
  548     case M.lookup n entityRefLookup of
  549         Nothing -> fail $ "Unknown entity reference: " ++ T.unpack n
  550         Just t  -> return t
  551   where
  552     entityRefLookup :: Map Text Text
  553     entityRefLookup = M.fromList [
  554         ("amp", "&"),
  555         ("lt", "<"),
  556         ("gt", ">"),
  557         ("apos", "\'"),
  558         ("quot", "\"")
  559         ]
  560 
  561 
  562 ------------------------------------------------------------------------------
  563 externalID :: Parser ExternalID
  564 externalID = systemID <|> publicID <|> return NoExternalID
  565   where
  566     systemID = do
  567         _ <- text "SYSTEM"
  568         whiteSpace
  569         fmap System systemLiteral
  570     publicID = do
  571         _ <- text "PUBLIC"
  572         whiteSpace
  573         pid <- pubIdLiteral
  574         whiteSpace
  575         sid <- systemLiteral
  576         return (Public pid sid)
  577 
  578 
  579 ------------------------------------------------------------------------------
  580 encodingDecl :: Parser Text
  581 encodingDecl = do
  582     _ <- P.try $ whiteSpace *> text "encoding"
  583     _ <- eq
  584     singleQuoted <|> doubleQuoted
  585   where
  586     singleQuoted = P.char '\'' *> encName <* P.char '\''
  587     doubleQuoted = P.char '\"' *> encName <* P.char '\"'
  588     encName      = do
  589         c  <- P.satisfy isEncStart
  590         cs <- takeWhile0 isEnc
  591         return (T.cons c cs)
  592     isEncStart c | c >= 'A' && c <= 'Z' = True
  593                  | c >= 'a' && c <= 'z' = True
  594                  | otherwise = False
  595     isEnc      c | c >= 'A' && c <= 'Z' = True
  596                  | c >= 'a' && c <= 'z' = True
  597                  | c >= '0' && c <= '9' = True
  598                  | c `elem` "._-"       = True
  599                  | otherwise = False
  600