1 {-# LANGUAGE OverloadedStrings #-} 2 3 module Snap.Internal.Parsing where 4 5 import Control.Arrow (first) 6 import Data.ByteString.Char8 (ByteString) 7 import qualified Data.ByteString.Char8 as S 8 import qualified Data.ByteString.Lazy.Char8 as L 9 import qualified Data.CaseInsensitive as CI 10 import Data.CaseInsensitive (CI) 11 import Data.Char (isAlpha, isAscii, isControl) 12 import Control.Applicative 13 import Control.Monad 14 import Data.Attoparsec.Char8 hiding (Done, many) 15 import qualified Data.Attoparsec.Char8 as Atto 16 import Data.ByteString.Nums.Careless.Int (int) 17 import Data.Int 18 import qualified Data.Vector.Unboxed as Vec 19 import Data.Vector.Unboxed (Vector) 20 import Prelude hiding (head, take, takeWhile) 21 22 23 ------------------------------------------------------------------------------ 24 fullyParse :: ByteString -> Parser a -> Either String a 25 fullyParse s p = 26 case r' of 27 (Fail _ _ e) -> Left e 28 (Partial _) -> Left "parse failed" 29 (Atto.Done _ x) -> Right x 30 where 31 r = parse p s 32 r' = feed r "" 33 34 35 ------------------------------------------------------------------------------ 36 parseNum :: Parser Int64 37 parseNum = liftM int $ Atto.takeWhile1 Atto.isDigit 38 39 40 ------------------------------------------------------------------------------ 41 -- | Parsers for different tokens in an HTTP request. 42 sp, digit, letter :: Parser Char 43 sp = char ' ' 44 digit = satisfy isDigit 45 letter = satisfy isAlpha 46 47 48 ------------------------------------------------------------------------------ 49 untilEOL :: Parser ByteString 50 untilEOL = takeWhile notend 51 where 52 notend c = not $ c == '\r' || c == '\n' 53 54 55 ------------------------------------------------------------------------------ 56 crlf :: Parser ByteString 57 crlf = string "\r\n" 58 59 60 ------------------------------------------------------------------------------ 61 -- | Parser for zero or more spaces. 62 spaces :: Parser [Char] 63 spaces = many sp 64 65 66 ------------------------------------------------------------------------------ 67 pSpaces :: Parser ByteString 68 pSpaces = takeWhile isSpace 69 70 71 ------------------------------------------------------------------------------ 72 fieldChars :: Parser ByteString 73 fieldChars = takeWhile isFieldChar 74 where 75 isFieldChar c = (Vec.!) fieldCharTable (fromEnum c) 76 77 78 ------------------------------------------------------------------------------ 79 fieldCharTable :: Vector Bool 80 fieldCharTable = Vec.generate 256 f 81 where 82 f d = let c=toEnum d in (isDigit c) || (isAlpha c) || c == '-' || c == '_' 83 84 85 ------------------------------------------------------------------------------ 86 -- | Parser for request headers. 87 pHeaders :: Parser [(ByteString, ByteString)] 88 pHeaders = many header 89 where 90 header = {-# SCC "pHeaders/header" #-} 91 liftA2 (,) 92 fieldName 93 (char ':' *> spaces *> contents) 94 95 fieldName = {-# SCC "pHeaders/fieldName" #-} 96 liftA2 S.cons letter fieldChars 97 98 contents = {-# SCC "pHeaders/contents" #-} 99 liftA2 S.append 100 (untilEOL <* crlf) 101 (continuation <|> pure S.empty) 102 103 isLeadingWS w = {-# SCC "pHeaders/isLeadingWS" #-} 104 elem w wstab 105 106 wstab = " \t" 107 108 leadingWhiteSpace = {-# SCC "pHeaders/leadingWhiteSpace" #-} 109 takeWhile1 isLeadingWS 110 111 continuation = {-# SCC "pHeaders/continuation" #-} 112 liftA2 S.cons 113 (leadingWhiteSpace *> pure ' ') 114 contents 115 116 117 ------------------------------------------------------------------------------ 118 -- unhelpfully, the spec mentions "old-style" cookies that don't have quotes 119 -- around the value. wonderful. 120 pWord :: Parser ByteString 121 pWord = pQuotedString <|> (takeWhile (/= ';')) 122 123 124 ------------------------------------------------------------------------------ 125 pQuotedString :: Parser ByteString 126 pQuotedString = q *> quotedText <* q 127 where 128 quotedText = (S.concat . reverse) <$> f [] 129 130 f soFar = do 131 t <- takeWhile qdtext 132 133 let soFar' = t:soFar 134 135 -- RFC says that backslash only escapes for <"> 136 choice [ string "\\\"" *> f ("\"" : soFar') 137 , pure soFar' ] 138 139 140 q = char '\"' 141 142 qdtext = matchAll [ isRFCText, (/= '\"'), (/= '\\') ] 143 144 145 ------------------------------------------------------------------------------ 146 {-# INLINE isRFCText #-} 147 isRFCText :: Char -> Bool 148 isRFCText = not . isControl 149 150 151 ------------------------------------------------------------------------------ 152 {-# INLINE matchAll #-} 153 matchAll :: [ Char -> Bool ] -> Char -> Bool 154 matchAll x c = and $ map ($ c) x 155 156 157 ------------------------------------------------------------------------------ 158 pAvPairs :: Parser [(ByteString, ByteString)] 159 pAvPairs = do 160 a <- pAvPair 161 b <- many (pSpaces *> char ';' *> pSpaces *> pAvPair) 162 163 return $ a:b 164 165 166 ------------------------------------------------------------------------------ 167 pAvPair :: Parser (ByteString, ByteString) 168 pAvPair = do 169 key <- pToken <* pSpaces 170 val <- liftM trim (option "" $ char '=' *> pSpaces *> pWord) 171 172 return (key, val) 173 174 175 ------------------------------------------------------------------------------ 176 pParameter :: Parser (ByteString, ByteString) 177 pParameter = do 178 key <- pToken <* pSpaces 179 val <- liftM trim (char '=' *> pSpaces *> pWord) 180 return (trim key, val) 181 182 183 ------------------------------------------------------------------------------ 184 trim :: ByteString -> ByteString 185 trim = snd . S.span isSpace . fst . S.spanEnd isSpace 186 187 188 ------------------------------------------------------------------------------ 189 pValueWithParameters :: Parser (ByteString, [(CI ByteString, ByteString)]) 190 pValueWithParameters = do 191 value <- liftM trim (pSpaces *> takeWhile (/= ';')) 192 params <- many pParam 193 return (value, map (first CI.mk) params) 194 where 195 pParam = pSpaces *> char ';' *> pSpaces *> pParameter 196 197 ------------------------------------------------------------------------------ 198 pContentTypeWithParameters :: 199 Parser (ByteString, [(CI ByteString, ByteString)]) 200 pContentTypeWithParameters = do 201 value <- liftM trim (pSpaces *> takeWhile (not . isSep)) 202 params <- many (pSpaces *> satisfy isSep *> pSpaces *> pParameter) 203 return (value, map (first CI.mk) params) 204 where 205 isSep c = c == ';' || c == ',' 206 207 ------------------------------------------------------------------------------ 208 pToken :: Parser ByteString 209 pToken = takeWhile isToken 210 211 212 ------------------------------------------------------------------------------ 213 {-# INLINE isToken #-} 214 isToken :: Char -> Bool 215 isToken c = (Vec.!) tokenTable (fromEnum c) 216 where 217 tokenTable :: Vector Bool 218 tokenTable = Vec.generate 256 (f . toEnum) 219 220 f = matchAll [ isAscii 221 , not . isControl 222 , not . isSpace 223 , not . flip elem [ '(', ')', '<', '>', '@', ',', ';' 224 , ':', '\\', '\"', '/', '[', ']' 225 , '?', '=', '{', '}' ] 226 ] 227 228 229 ------------------------------------------------------------------------------ 230 -- utility functions 231 ------------------------------------------------------------------------------ 232 233 234 ------------------------------------------------------------------------------ 235 strictize :: L.ByteString -> ByteString 236 strictize = S.concat . L.toChunks