1 {-# LANGUAGE BangPatterns       #-}
    2 {-# LANGUAGE DeriveDataTypeable #-}
    3 {-# LANGUAGE OverloadedStrings  #-}
    4 {-# LANGUAGE PackageImports     #-}
    5 {-# LANGUAGE RankNTypes         #-}
    6 {-# LANGUAGE ViewPatterns       #-}
    7 
    8 module Snap.Internal.Http.Parser
    9   ( IRequest(..)
   10   , HttpParseException
   11   , parseRequest
   12   , readChunkedTransferEncoding
   13   , iterParser
   14   , parseCookie
   15   , parseUrlEncoded
   16   , strictize
   17   ) where
   18 
   19 
   20 ------------------------------------------------------------------------------
   21 import           Control.Arrow (second)
   22 import           Control.Exception
   23 import           Control.Monad (liftM)
   24 import           Control.Monad.Trans
   25 import           Data.Attoparsec hiding (many, Result(..))
   26 import           Data.Attoparsec.Enumerator
   27 import           Data.ByteString.Char8 (ByteString)
   28 import qualified Data.ByteString.Char8 as S
   29 import qualified Data.ByteString.Unsafe as S
   30 import           Data.ByteString.Internal (w2c)
   31 import qualified Data.ByteString.Lazy.Char8 as L
   32 import qualified Data.ByteString.Nums.Careless.Hex as Cvt
   33 import           Data.Char
   34 import           Data.List (foldl')
   35 import           Data.Int
   36 import           Data.Map (Map)
   37 import qualified Data.Map as Map
   38 import           Data.Maybe (catMaybes)
   39 import           Data.Typeable
   40 import           Prelude hiding (head, take, takeWhile)
   41 ----------------------------------------------------------------------------
   42 import           Snap.Internal.Http.Types
   43 import           Snap.Internal.Debug
   44 import           Snap.Internal.Iteratee.Debug
   45 import           Snap.Internal.Parsing hiding (pHeaders)
   46 import           Snap.Iteratee hiding (map, take)
   47 
   48 
   49 ------------------------------------------------------------------------------
   50 -- | an internal version of the headers part of an HTTP request
   51 data IRequest = IRequest
   52     { iMethod :: Method
   53     , iRequestUri :: ByteString
   54     , iHttpVersion :: (Int,Int)
   55     , iRequestHeaders :: [(ByteString, ByteString)]
   56     }
   57 
   58 
   59 ------------------------------------------------------------------------------
   60 instance Show IRequest where
   61     show (IRequest m u v r) =
   62         concat [ show m
   63                , " "
   64                , show u
   65                , " "
   66                , show v
   67                , " "
   68                , show r ]
   69 
   70 
   71 ------------------------------------------------------------------------------
   72 data HttpParseException = HttpParseException String deriving (Typeable, Show)
   73 instance Exception HttpParseException
   74 
   75 ------------------------------------------------------------------------------
   76 parseRequest :: (Monad m) => Iteratee ByteString m (Maybe IRequest)
   77 parseRequest = do
   78     eof <- isEOF
   79     if eof
   80       then return Nothing
   81       else do
   82         line <- pLine
   83         if S.null line
   84           then parseRequest
   85           else do
   86             let (!mStr,!s)   = bSp line
   87             let (!uri,!vStr) = bSp s
   88 
   89             !method <- methodFromString mStr
   90 
   91             let ver@(!_,!_) = pVer vStr
   92 
   93             hdrs    <- pHeaders
   94             return $ Just $ IRequest method uri ver hdrs
   95 
   96   where
   97     pVer s = if S.isPrefixOf "HTTP/" s
   98                then let (a,b) = bDot $ S.drop 5 s
   99                     in (read $ S.unpack a, read $ S.unpack b)
  100                else (1,0)
  101 
  102     isSp  = (== ' ')
  103     bSp   = splitWith isSp
  104     isDot = (== '.')
  105     bDot  = splitWith isDot
  106 
  107 
  108 ------------------------------------------------------------------------------
  109 pLine :: (Monad m) => Iteratee ByteString m ByteString
  110 pLine = continue $ k S.empty
  111   where
  112     k _ EOF = throwError $
  113               HttpParseException "parse error: expected line ending in crlf"
  114     k !pre (Chunks xs) =
  115         if S.null b
  116           then continue $ k a
  117           else yield a (Chunks [S.drop 2 b])
  118       where
  119         (!a,!b) = S.breakSubstring "\r\n" s
  120         !s      = S.append pre s'
  121         !s'     = S.concat xs
  122 
  123 
  124 ------------------------------------------------------------------------------
  125 splitWith :: (Char -> Bool) -> ByteString -> (ByteString,ByteString)
  126 splitWith !f !s = let (!a,!b) = S.break f s
  127                       !b'     = S.dropWhile f b
  128                   in (a, b')
  129 
  130 
  131 ------------------------------------------------------------------------------
  132 pHeaders :: Monad m => Iteratee ByteString m [(ByteString,ByteString)]
  133 pHeaders = do
  134     f <- go id
  135     return $! f []
  136   where
  137     go !dlistSoFar = {-# SCC "pHeaders/go" #-} do
  138         line <- pLine
  139         if S.null line
  140           then return dlistSoFar
  141           else do
  142             let (!k,!v) = pOne line
  143             vf <- pCont id
  144             let vs = vf []
  145             let !v' = S.concat (v:vs)
  146             go (dlistSoFar . ((k,v'):))
  147 
  148       where
  149         pOne s = let (k,v) = splitWith (== ':') s
  150                  in (trim k, trim v)
  151 
  152         isCont c = c == ' ' || c == '\t'
  153 
  154         pCont !dlist = do
  155             mbS  <- peek
  156             maybe (return dlist)
  157                   (\s -> if S.null s
  158                            then head >> pCont dlist
  159                            else if isCont $ w2c $ S.unsafeHead s
  160                                   then procCont dlist
  161                                   else return dlist)
  162                   mbS
  163 
  164         procCont !dlist = do
  165             line <- pLine
  166             let !t = trim line
  167             pCont (dlist . (" ":) . (t:))
  168 
  169 
  170 ------------------------------------------------------------------------------
  171 methodFromString :: (Monad m) => ByteString -> Iteratee ByteString m Method
  172 methodFromString "GET"     = return GET
  173 methodFromString "POST"    = return POST
  174 methodFromString "HEAD"    = return HEAD
  175 methodFromString "PUT"     = return PUT
  176 methodFromString "DELETE"  = return DELETE
  177 methodFromString "TRACE"   = return TRACE
  178 methodFromString "OPTIONS" = return OPTIONS
  179 methodFromString "CONNECT" = return CONNECT
  180 methodFromString s         = 
  181     throwError $ HttpParseException $ "Bad method '" ++ S.unpack s ++ "'"
  182 
  183 
  184 ------------------------------------------------------------------------------
  185 readChunkedTransferEncoding :: (MonadIO m) =>
  186                                Enumeratee ByteString ByteString m a
  187 readChunkedTransferEncoding =
  188     chunkParserToEnumeratee $
  189     iterateeDebugWrapper "pGetTransferChunk" $
  190     iterParser pGetTransferChunk
  191 
  192 
  193 ------------------------------------------------------------------------------
  194 chunkParserToEnumeratee :: (MonadIO m) =>
  195                            Iteratee ByteString m (Maybe ByteString)
  196                         -> Enumeratee ByteString ByteString m a
  197 chunkParserToEnumeratee getChunk client = do
  198     mbB <- getChunk
  199     maybe finishIt sendBS mbB
  200 
  201   where
  202     sendBS s = do
  203         step <- lift $ runIteratee $ enumBS s client
  204         chunkParserToEnumeratee getChunk step
  205 
  206     finishIt = lift $ runIteratee $ enumEOF client
  207 
  208 
  209 ------------------------------------------------------------------------------
  210 -- parse functions
  211 ------------------------------------------------------------------------------
  212 
  213 ------------------------------------------------------------------------------
  214 pGetTransferChunk :: Parser (Maybe ByteString)
  215 pGetTransferChunk = do
  216     !hex <- liftM fromHex $ (takeWhile (isHexDigit . w2c))
  217     takeTill ((== '\r') . w2c)
  218     crlf
  219     if hex <= 0
  220       then return Nothing
  221       else do
  222           x <- take hex
  223           crlf
  224           return $ Just x
  225   where
  226     fromHex :: ByteString -> Int
  227     fromHex s = Cvt.hex (L.fromChunks [s])
  228 
  229 
  230 ------------------------------------------------------------------------------
  231 -- COOKIE PARSING
  232 ------------------------------------------------------------------------------
  233 
  234 -- these definitions try to mirror RFC-2068 (the HTTP/1.1 spec) and RFC-2109
  235 -- (cookie spec): please point out any errors!
  236 
  237 ------------------------------------------------------------------------------
  238 pCookies :: Parser [Cookie]
  239 pCookies = do
  240     -- grab kvps and turn to strict bytestrings
  241     kvps <- pAvPairs
  242 
  243     return $ map toCookie $ filter (not . S.isPrefixOf "$" . fst) kvps
  244 
  245   where
  246     toCookie (nm,val) = Cookie nm val Nothing Nothing Nothing
  247 
  248 
  249 ------------------------------------------------------------------------------
  250 parseCookie :: ByteString -> Maybe [Cookie]
  251 parseCookie = parseToCompletion pCookies
  252 
  253 
  254 ------------------------------------------------------------------------------
  255 -- application/x-www-form-urlencoded
  256 ------------------------------------------------------------------------------
  257 
  258 ------------------------------------------------------------------------------
  259 parseUrlEncoded :: ByteString -> Map ByteString [ByteString]
  260 parseUrlEncoded s = foldl' (\m (k,v) -> Map.insertWith' (++) k [v] m)
  261                            Map.empty
  262                            decoded
  263   where
  264     breakApart = (second (S.drop 1)) . S.break (== '=')
  265 
  266     parts :: [(ByteString,ByteString)]
  267     parts = map breakApart $ S.split '&' s
  268 
  269     urldecode = parseToCompletion pUrlEscaped
  270 
  271     decodeOne (a,b) = do
  272         a' <- urldecode a
  273         b' <- urldecode b
  274         return (a',b')
  275 
  276     decoded = catMaybes $ map decodeOne parts
  277 
  278