1 {-# LANGUAGE DeriveDataTypeable        #-}
    2 {-# LANGUAGE ExistentialQuantification #-}
    3 {-# LANGUAGE OverloadedStrings         #-}
    4 {-# LANGUAGE PackageImports            #-}
    5 {-# LANGUAGE ScopedTypeVariables       #-}
    6 
    7 module Snap.Util.GZip
    8 ( withCompression
    9 , withCompression' ) where
   10 
   11 import           Blaze.ByteString.Builder
   12 import qualified Codec.Compression.GZip as GZip
   13 import qualified Codec.Compression.Zlib as Zlib
   14 import           Control.Concurrent
   15 import           Control.Applicative hiding (many)
   16 import           Control.Exception
   17 import           Control.Monad
   18 import           Control.Monad.Trans
   19 import           Data.Attoparsec.Char8 hiding (Done)
   20 import qualified Data.ByteString.Lazy.Char8 as L
   21 import           Data.ByteString.Char8 (ByteString)
   22 import           Data.Maybe
   23 import qualified Data.Set as Set
   24 import           Data.Set (Set)
   25 import           Data.Typeable
   26 import           Prelude hiding (catch, takeWhile)
   27 
   28 ----------------------------------------------------------------------------
   29 import           Snap.Internal.Debug
   30 import           Snap.Internal.Parsing
   31 import           Snap.Iteratee
   32 import qualified Snap.Iteratee as I
   33 import           Snap.Types
   34 
   35 
   36 ------------------------------------------------------------------------------
   37 -- | Runs a 'Snap' web handler with compression if available.
   38 --
   39 -- If the client has indicated support for @gzip@ or @compress@ in its
   40 -- @Accept-Encoding@ header, and the @Content-Type@ in the response is one of
   41 -- the following types:
   42 --
   43 --   * @application/x-javascript@
   44 --
   45 --   * @text/css@
   46 --
   47 --   * @text/html@
   48 --
   49 --   * @text/javascript@
   50 --
   51 --   * @text/plain@
   52 --
   53 --   * @text/xml@
   54 --
   55 --   * @application/x-font-truetype@
   56 --
   57 -- Then the given handler's output stream will be compressed,
   58 -- @Content-Encoding@ will be set in the output headers, and the
   59 -- @Content-Length@ will be cleared if it was set. (We can't process the
   60 -- stream in O(1) space if the length is known beforehand.)
   61 --
   62 -- The wrapped handler will be run to completion, and then the 'Response'
   63 -- that's contained within the 'Snap' monad state will be passed to
   64 -- 'finishWith' to prevent further processing.
   65 --
   66 withCompression :: MonadSnap m
   67                 => m a   -- ^ the web handler to run
   68                 -> m ()
   69 withCompression = withCompression' compressibleMimeTypes
   70 
   71 
   72 ------------------------------------------------------------------------------
   73 -- | The same as 'withCompression', with control over which MIME types to
   74 -- compress.
   75 withCompression' :: MonadSnap m
   76                  => Set ByteString
   77                     -- ^ set of compressible MIME types
   78                  -> m a
   79                     -- ^ the web handler to run
   80                  -> m ()
   81 withCompression' mimeTable action = do
   82     _    <- action
   83     resp <- getResponse
   84 
   85     -- If a content-encoding is already set, do nothing. This prevents
   86     -- "withCompression $ withCompression m" from ruining your day.
   87     when (not $ isJust $ getHeader "Content-Encoding" resp) $ do
   88        let mbCt = getHeader "Content-Type" resp
   89 
   90        debug $ "withCompression', content-type is " ++ show mbCt
   91 
   92        case mbCt of
   93          (Just ct) -> when (Set.member ct mimeTable) chkAcceptEncoding
   94          _         -> return $! ()
   95 
   96 
   97     getResponse >>= finishWith
   98 
   99   where
  100     chkAcceptEncoding = do
  101         req <- getRequest
  102         debug $ "checking accept-encoding"
  103         let mbAcc = getHeader "Accept-Encoding" req
  104         debug $ "accept-encoding is " ++ show mbAcc
  105         let s = fromMaybe "" mbAcc
  106 
  107         types <- liftIO $ parseAcceptEncoding s
  108 
  109         chooseType types
  110 
  111 
  112     chooseType []               = return $! ()
  113     chooseType ("gzip":_)       = gzipCompression "gzip"
  114     chooseType ("compress":_)   = compressCompression "compress"
  115     chooseType ("x-gzip":_)     = gzipCompression "x-gzip"
  116     chooseType ("x-compress":_) = compressCompression "x-compress"
  117     chooseType (_:xs)           = chooseType xs
  118 
  119 
  120 ------------------------------------------------------------------------------
  121 -- private following
  122 ------------------------------------------------------------------------------
  123 
  124 
  125 ------------------------------------------------------------------------------
  126 compressibleMimeTypes :: Set ByteString
  127 compressibleMimeTypes = Set.fromList [ "application/x-font-truetype"
  128                                      , "application/x-javascript"
  129                                      , "text/css"
  130                                      , "text/html"
  131                                      , "text/javascript"
  132                                      , "text/plain"
  133                                      , "text/xml" ]
  134 
  135 
  136 
  137 
  138 ------------------------------------------------------------------------------
  139 gzipCompression :: MonadSnap m => ByteString -> m ()
  140 gzipCompression ce = modifyResponse f
  141   where
  142     f = setHeader "Content-Encoding" ce .
  143         setHeader "Vary" "Accept-Encoding" .
  144         clearContentLength .
  145         modifyResponseBody gcompress
  146 
  147 
  148 ------------------------------------------------------------------------------
  149 compressCompression :: MonadSnap m => ByteString -> m ()
  150 compressCompression ce = modifyResponse f
  151   where
  152     f = setHeader "Content-Encoding" ce .
  153         setHeader "Vary" "Accept-Encoding" .
  154         clearContentLength .
  155         modifyResponseBody ccompress
  156 
  157 
  158 ------------------------------------------------------------------------------
  159 -- FIXME: use zlib-bindings
  160 gcompress :: forall a . Enumerator Builder IO a
  161           -> Enumerator Builder IO  a
  162 gcompress = compressEnumerator GZip.compress
  163 
  164 
  165 ------------------------------------------------------------------------------
  166 ccompress :: forall a . Enumerator Builder IO a
  167           -> Enumerator Builder IO a
  168 ccompress = compressEnumerator Zlib.compress
  169 
  170 
  171 ------------------------------------------------------------------------------
  172 compressEnumerator :: forall a .
  173                       (L.ByteString -> L.ByteString)
  174                    -> Enumerator Builder IO a
  175                    -> Enumerator Builder IO a
  176 compressEnumerator compFunc enum' origStep = do
  177     let iter = joinI $ I.map fromByteString origStep
  178     step <- lift $ runIteratee iter
  179     writeEnd <- liftIO $ newChan
  180     readEnd  <- liftIO $ newChan
  181     tid      <- liftIO $ forkIO $ threadProc readEnd writeEnd
  182 
  183     let enum = mapEnum fromByteString toByteString enum'
  184     let outEnum = enum (f readEnd writeEnd tid step)
  185     mapIter toByteString fromByteString outEnum
  186 
  187   where
  188     --------------------------------------------------------------------------
  189     streamFinished :: Stream ByteString -> Bool
  190     streamFinished EOF        = True
  191     streamFinished (Chunks _) = False
  192 
  193 
  194     --------------------------------------------------------------------------
  195     consumeSomeOutput :: Chan (Either SomeException (Stream ByteString))
  196                       -> Step ByteString IO a
  197                       -> Iteratee ByteString IO (Step ByteString IO a)
  198     consumeSomeOutput writeEnd step = do
  199         e <- lift $ isEmptyChan writeEnd
  200         if e
  201           then return step
  202           else do
  203             ech <- lift $ readChan writeEnd
  204             either throwError
  205                    (\ch -> do
  206                         step' <- checkDone (\k -> lift $ runIteratee $ k ch)
  207                                            step
  208                         consumeSomeOutput writeEnd step')
  209                    ech
  210 
  211     --------------------------------------------------------------------------
  212     consumeRest :: Chan (Either SomeException (Stream ByteString))
  213                 -> Step ByteString IO a
  214                 -> Iteratee ByteString IO a
  215     consumeRest writeEnd step = do
  216         ech <- lift $ readChan writeEnd
  217         either throwError
  218                (\ch -> do
  219                    step' <- checkDone (\k -> lift $ runIteratee $ k ch) step
  220                    if (streamFinished ch)
  221                       then returnI step'
  222                       else consumeRest writeEnd step')
  223                ech
  224 
  225     --------------------------------------------------------------------------
  226     f _ _ _ (Error e) = Error e
  227     f _ _ _ (Yield x _) = Yield x EOF
  228     f readEnd writeEnd tid st@(Continue k) = Continue $ \ch ->
  229         case ch of
  230           EOF -> do
  231             lift $ writeChan readEnd Nothing
  232             x <- consumeRest writeEnd st
  233             lift $ killThread tid
  234             return x
  235 
  236           (Chunks xs) -> do
  237             mapM_ (lift . writeChan readEnd . Just) xs
  238             step' <- consumeSomeOutput writeEnd (Continue k)
  239             returnI $ f readEnd writeEnd tid step'
  240 
  241 
  242     --------------------------------------------------------------------------
  243     threadProc :: Chan (Maybe ByteString)
  244                -> Chan (Either SomeException (Stream ByteString))
  245                -> IO ()
  246     threadProc readEnd writeEnd = do
  247         stream <- getChanContents readEnd
  248 
  249         let bs = L.fromChunks $ streamToChunks stream
  250         let output = L.toChunks $ compFunc bs
  251 
  252         runIt output `catch` \(e::SomeException) ->
  253             writeChan writeEnd $ Left e
  254 
  255       where
  256         runIt (x:xs) = do
  257             writeChan writeEnd (toChunk x) >> runIt xs
  258 
  259         runIt []     = do
  260             writeChan writeEnd $ Right EOF
  261 
  262     --------------------------------------------------------------------------
  263     streamToChunks []            = []
  264     streamToChunks (Nothing:_)   = []
  265     streamToChunks ((Just x):xs) = x:(streamToChunks xs)
  266 
  267 
  268     --------------------------------------------------------------------------
  269     toChunk = Right . Chunks . (:[])
  270 
  271 
  272 ------------------------------------------------------------------------------
  273 -- We're not gonna bother with quality values; we'll do gzip or compress in
  274 -- that order.
  275 acceptParser :: Parser [ByteString]
  276 acceptParser = do
  277     xs <- option [] $ (:[]) <$> encoding
  278     ys <- many (char ',' *> encoding)
  279     endOfInput
  280     return $ xs ++ ys
  281   where
  282     encoding = skipSpace *> c <* skipSpace
  283 
  284     c = do
  285         x <- coding
  286         option () qvalue
  287         return x
  288 
  289     qvalue = do
  290         skipSpace
  291         char ';'
  292         skipSpace
  293         char 'q'
  294         skipSpace
  295         char '='
  296         float
  297         return ()
  298 
  299     coding = string "*" <|> takeWhile isCodingChar
  300 
  301     isCodingChar ch = isDigit ch || isAlpha_ascii ch || ch == '-' || ch == '_'
  302 
  303     float = takeWhile isDigit >>
  304             option () (char '.' >> takeWhile isDigit >> pure ())
  305 
  306 
  307 ------------------------------------------------------------------------------
  308 data BadAcceptEncodingException = BadAcceptEncodingException
  309    deriving (Typeable)
  310 
  311 
  312 ------------------------------------------------------------------------------
  313 instance Show BadAcceptEncodingException where
  314     show BadAcceptEncodingException = "bad 'accept-encoding' header"
  315 
  316 
  317 ------------------------------------------------------------------------------
  318 instance Exception BadAcceptEncodingException
  319 
  320 
  321 ------------------------------------------------------------------------------
  322 parseAcceptEncoding :: ByteString -> IO [ByteString]
  323 parseAcceptEncoding s =
  324     case r of
  325       Left _ -> throwIO BadAcceptEncodingException
  326       Right x -> return x
  327   where
  328     r = fullyParse s acceptParser
  329