1 {-# LANGUAGE OverloadedStrings #-}
    2 
    3 module Snap.Internal.Parsing where
    4 
    5 import           Control.Arrow (first)
    6 import           Data.ByteString.Char8 (ByteString)
    7 import qualified Data.ByteString.Char8 as S
    8 import qualified Data.ByteString.Lazy.Char8 as L
    9 import qualified Data.CaseInsensitive as CI
   10 import           Data.CaseInsensitive (CI)
   11 import           Data.Char (isAlpha, isAscii, isControl)
   12 import           Control.Applicative
   13 import           Control.Monad
   14 import           Data.Attoparsec.Char8 hiding (Done, many)
   15 import qualified Data.Attoparsec.Char8 as Atto
   16 import           Data.ByteString.Nums.Careless.Int (int)
   17 import           Data.Int
   18 import qualified Data.Vector.Unboxed as Vec
   19 import           Data.Vector.Unboxed (Vector)
   20 import           Prelude hiding (head, take, takeWhile)
   21 
   22 
   23 ------------------------------------------------------------------------------
   24 fullyParse :: ByteString -> Parser a -> Either String a
   25 fullyParse s p =
   26     case r' of
   27       (Fail _ _ e)    -> Left e
   28       (Partial _)     -> Left "parse failed"
   29       (Atto.Done _ x) -> Right x
   30   where
   31     r  = parse p s
   32     r' = feed r ""
   33 
   34 
   35 ------------------------------------------------------------------------------
   36 parseNum :: Parser Int64
   37 parseNum = liftM int $ Atto.takeWhile1 Atto.isDigit
   38 
   39 
   40 ------------------------------------------------------------------------------
   41 -- | Parsers for different tokens in an HTTP request.
   42 sp, digit, letter :: Parser Char
   43 sp       = char ' '
   44 digit    = satisfy isDigit
   45 letter   = satisfy isAlpha
   46 
   47 
   48 ------------------------------------------------------------------------------
   49 untilEOL :: Parser ByteString
   50 untilEOL = takeWhile notend
   51   where
   52     notend c = not $ c == '\r' || c == '\n'
   53 
   54 
   55 ------------------------------------------------------------------------------
   56 crlf :: Parser ByteString
   57 crlf = string "\r\n"
   58 
   59 
   60 ------------------------------------------------------------------------------
   61 -- | Parser for zero or more spaces.
   62 spaces :: Parser [Char]
   63 spaces = many sp
   64 
   65 
   66 ------------------------------------------------------------------------------
   67 pSpaces :: Parser ByteString
   68 pSpaces = takeWhile isSpace
   69 
   70 
   71 ------------------------------------------------------------------------------
   72 fieldChars :: Parser ByteString
   73 fieldChars = takeWhile isFieldChar
   74   where
   75     isFieldChar c = (Vec.!) fieldCharTable (fromEnum c)
   76 
   77 
   78 ------------------------------------------------------------------------------
   79 fieldCharTable :: Vector Bool
   80 fieldCharTable = Vec.generate 256 f
   81   where
   82     f d = let c=toEnum d in (isDigit c) || (isAlpha c) || c == '-' || c == '_'
   83 
   84 
   85 ------------------------------------------------------------------------------
   86 -- | Parser for request headers.
   87 pHeaders :: Parser [(ByteString, ByteString)]
   88 pHeaders = many header
   89   where
   90     header = {-# SCC "pHeaders/header" #-}
   91              liftA2 (,)
   92                  fieldName
   93                  (char ':' *> spaces *> contents)
   94 
   95     fieldName = {-# SCC "pHeaders/fieldName" #-}
   96                 liftA2 S.cons letter fieldChars
   97 
   98     contents = {-# SCC "pHeaders/contents" #-}
   99                liftA2 S.append
  100                    (untilEOL <* crlf)
  101                    (continuation <|> pure S.empty)
  102 
  103     isLeadingWS w = {-# SCC "pHeaders/isLeadingWS" #-}
  104                     elem w wstab
  105 
  106     wstab = " \t"
  107 
  108     leadingWhiteSpace = {-# SCC "pHeaders/leadingWhiteSpace" #-}
  109                         takeWhile1 isLeadingWS
  110 
  111     continuation = {-# SCC "pHeaders/continuation" #-}
  112                    liftA2 S.cons
  113                           (leadingWhiteSpace *> pure ' ')
  114                           contents
  115 
  116 
  117 ------------------------------------------------------------------------------
  118 -- unhelpfully, the spec mentions "old-style" cookies that don't have quotes
  119 -- around the value. wonderful.
  120 pWord :: Parser ByteString
  121 pWord = pQuotedString <|> (takeWhile (/= ';'))
  122 
  123 
  124 ------------------------------------------------------------------------------
  125 pQuotedString :: Parser ByteString
  126 pQuotedString = q *> quotedText <* q
  127   where
  128     quotedText = (S.concat . reverse) <$> f []
  129 
  130     f soFar = do
  131         t <- takeWhile qdtext
  132 
  133         let soFar' = t:soFar
  134 
  135         -- RFC says that backslash only escapes for <">
  136         choice [ string "\\\"" *> f ("\"" : soFar')
  137                , pure soFar' ]
  138 
  139 
  140     q = char '\"'
  141 
  142     qdtext = matchAll [ isRFCText, (/= '\"'), (/= '\\') ]
  143 
  144 
  145 ------------------------------------------------------------------------------
  146 {-# INLINE isRFCText #-}
  147 isRFCText :: Char -> Bool
  148 isRFCText = not . isControl
  149 
  150 
  151 ------------------------------------------------------------------------------
  152 {-# INLINE matchAll #-}
  153 matchAll :: [ Char -> Bool ] -> Char -> Bool
  154 matchAll x c = and $ map ($ c) x
  155 
  156 
  157 ------------------------------------------------------------------------------
  158 pAvPairs :: Parser [(ByteString, ByteString)]
  159 pAvPairs = do
  160     a <- pAvPair
  161     b <- many (pSpaces *> char ';' *> pSpaces *> pAvPair)
  162 
  163     return $ a:b
  164 
  165 
  166 ------------------------------------------------------------------------------
  167 pAvPair :: Parser (ByteString, ByteString)
  168 pAvPair = do
  169     key <- pToken <* pSpaces
  170     val <- liftM trim (option "" $ char '=' *> pSpaces *> pWord)
  171 
  172     return (key, val)
  173 
  174 
  175 ------------------------------------------------------------------------------
  176 pParameter :: Parser (ByteString, ByteString)
  177 pParameter = do
  178     key <- pToken <* pSpaces
  179     val <- liftM trim (char '=' *> pSpaces *> pWord)
  180     return (trim key, val)
  181 
  182 
  183 ------------------------------------------------------------------------------
  184 trim :: ByteString -> ByteString
  185 trim = snd . S.span isSpace . fst . S.spanEnd isSpace
  186 
  187 
  188 ------------------------------------------------------------------------------
  189 pValueWithParameters :: Parser (ByteString, [(CI ByteString, ByteString)])
  190 pValueWithParameters = do
  191     value  <- liftM trim (pSpaces *> takeWhile (/= ';'))
  192     params <- many pParam
  193     return (value, map (first CI.mk) params)
  194   where
  195     pParam = pSpaces *> char ';' *> pSpaces *> pParameter
  196 
  197 ------------------------------------------------------------------------------
  198 pContentTypeWithParameters ::
  199     Parser (ByteString, [(CI ByteString, ByteString)])
  200 pContentTypeWithParameters = do
  201     value  <- liftM trim (pSpaces *> takeWhile (not . isSep))
  202     params <- many (pSpaces *> satisfy isSep *> pSpaces *> pParameter)
  203     return (value, map (first CI.mk) params)
  204   where
  205     isSep c = c == ';' || c == ','
  206 
  207 ------------------------------------------------------------------------------
  208 pToken :: Parser ByteString
  209 pToken = takeWhile isToken
  210 
  211 
  212 ------------------------------------------------------------------------------
  213 {-# INLINE isToken #-}
  214 isToken :: Char -> Bool
  215 isToken c = (Vec.!) tokenTable (fromEnum c)
  216   where
  217     tokenTable :: Vector Bool
  218     tokenTable = Vec.generate 256 (f . toEnum)
  219 
  220     f = matchAll [ isAscii
  221                  , not . isControl
  222                  , not . isSpace
  223                  , not . flip elem [ '(', ')', '<', '>', '@', ',', ';'
  224                                    , ':', '\\', '\"', '/', '[', ']'
  225                                    , '?', '=', '{', '}' ]
  226                  ]
  227 
  228 
  229 ------------------------------------------------------------------------------
  230 -- utility functions
  231 ------------------------------------------------------------------------------
  232 
  233 
  234 ------------------------------------------------------------------------------
  235 strictize :: L.ByteString -> ByteString
  236 strictize         = S.concat . L.toChunks