1 {-# LANGUAGE BangPatterns      #-}
    2 {-# LANGUAGE OverloadedStrings #-}
    3 {-# LANGUAGE RankNTypes        #-}
    4 
    5 module Snap.Internal.Iteratee.BoyerMooreHorspool
    6   ( bmhEnumeratee
    7   , MatchInfo(..) )
    8   where
    9 
   10 import           Control.Monad.State
   11 import qualified Data.ByteString as S
   12 import           Data.ByteString (ByteString)
   13 import           Data.ByteString.Unsafe as S
   14 import           Data.Enumerator hiding (head, filter, last, map)
   15 import qualified Data.Enumerator.List as EL
   16 import           Data.Int
   17 import qualified Data.Vector.Unboxed as V
   18 import qualified Data.Vector.Unboxed.Mutable  as MV
   19 import           Prelude               hiding (head, last)
   20 
   21 
   22 --{-# INLINE debug #-}
   23 --debug :: MonadIO m => String -> m ()
   24 --debug s = liftIO $ putStrLn s
   25 --debug _ = return ()
   26 
   27 ------------------------------------------------------------------------------
   28 data MatchInfo = Match !ByteString
   29                | NoMatch !ByteString
   30   deriving (Show)
   31 
   32 
   33 -- We return strict bytestring because we always expect a chunk to be bigger
   34 -- than the needle
   35 lookahead :: (MonadIO m) =>
   36              Int
   37           -> Iteratee ByteString m (Either ByteString ByteString)
   38 lookahead n = go id n
   39   where
   40     go !dlist !k = do
   41         EL.head >>= maybe
   42                         (do
   43                             let !ls = S.concat $ dlist []
   44                             -- debug $ "lookahead " ++ show n
   45                             --  ++ " failing, returning " ++ show ls
   46 
   47                             return $ Left ls)
   48                         (\x -> do
   49                              let !l  = S.length x
   50                              let !r  = k - l
   51                              let !d' = dlist . (x:)
   52 
   53                              if r <= 0
   54                                then do
   55                                    let !ls = S.concat $ d' []
   56                                    -- debug $ "lookahead " ++ show n
   57                                    --  ++ " successfully returning "
   58                                    --  ++ show ls
   59                                    return $ Right $ ls
   60                                else go d' r)
   61 {-# INLINE lookahead #-}
   62 
   63 matches :: ByteString     -- ^ needle
   64         -> Int            -- ^ needle start
   65         -> Int            -- ^ needle end (inclusive)
   66         -> ByteString     -- ^ haystack
   67         -> Int            -- ^ haystack start
   68         -> Int            -- ^ haystack end (inclusive)
   69         -> Bool
   70 matches !needle !nstart !nend' !haystack !hstart !hend' =
   71     go nend' hend'
   72   where
   73     go !nend !hend =
   74         if nend < nstart || hend < hstart
   75           then True
   76           else let !nc = S.unsafeIndex needle nend
   77                    !hc = S.unsafeIndex haystack hend
   78                in if nc /= hc
   79                     then False
   80                     else go (nend-1) (hend-1)
   81 {-# INLINE matches #-}
   82 
   83 
   84 bmhEnumeratee :: (MonadIO m) =>
   85                  ByteString
   86               -> Step MatchInfo m a
   87               -> Iteratee ByteString m (Step MatchInfo m a)
   88 bmhEnumeratee needle _step = do
   89     -- debug $ "boyermoore: needle=" ++ show needle
   90     cDone _step iter
   91   where
   92     {-# INLINE cDone #-}
   93     cDone (Continue k) f = f k
   94     cDone step _ = yield step (Chunks [])
   95 
   96 
   97     iter !k = {-# SCC "bmh/iter" #-} do
   98         lookahead nlen >>= either (finishAndEOF k . (:[]))
   99                                   (startSearch k)
  100 
  101     finishAndEOF k xs = {-# SCC "finishAndEOF" #-} do
  102         -- debug $ "finishAndEOF, returning NoMatch for " ++ show xs
  103         step <- lift $ runIteratee $ k $
  104                 Chunks (map NoMatch $ filter (not . S.null) xs)
  105         cDone step (\k' -> lift $ runIteratee $ k' EOF)
  106 
  107 
  108     startSearch !k !haystack = {-# SCC "startSearch" #-} do
  109         -- debug $ "startsearch: " ++ show haystack
  110         if S.null haystack
  111            then lookahead nlen >>=
  112                 either (\s -> finishAndEOF k [s])
  113                        (startSearch k)
  114            else go 0
  115       where
  116         !hlen = S.length haystack
  117 
  118         go !hidx
  119           | hend >= hlen = crossBound hidx
  120           | otherwise = {-# SCC "go" #-} do
  121               let match = matches needle 0 last haystack hidx hend
  122               -- debug $ "go " ++ show hidx ++ ", hend=" ++ show hend
  123               --           ++ ", match was " ++ show match
  124               if match
  125                 then {-# SCC "go/match" #-} do
  126                   let !nomatch = S.take hidx haystack
  127                   let !aftermatch = S.drop (hend+1) haystack
  128 
  129                   step <- if not $ S.null nomatch
  130                             then lift $ runIteratee $ k $ Chunks [NoMatch nomatch]
  131                             else return $ Continue k
  132 
  133                   cDone step $ \k' -> do
  134                       step' <- lift $ runIteratee $ k' $ Chunks [Match needle]
  135                       cDone step' $ \k'' -> startSearch k'' aftermatch
  136                 else {-# SCC "go/nomatch" #-} do
  137                   -- skip ahead
  138                   let c = S.unsafeIndex haystack hend
  139                   let !skip = V.unsafeIndex table $ fromEnum c
  140                   go (hidx + skip)
  141           where
  142             !hend = hidx + nlen - 1
  143 
  144         mkCoeff hidx = let !ll = hlen - hidx
  145                            !nm = nlen - ll
  146                        in (ll,nm)
  147                                         
  148         crossBound !hidx0 = {-# SCC "crossBound" #-} do
  149             let (!leftLen, needMore) = mkCoeff hidx0
  150 
  151             lookahead needMore >>=
  152              either (\s -> finishAndEOF k [haystack, s])
  153                     (runNext hidx0 leftLen needMore)
  154           where
  155             runNext !hidx !leftLen !needMore !nextHaystack = do
  156                let match1 = matches needle leftLen last
  157                                     nextHaystack 0 (needMore-1)
  158                let match2 = matches needle 0 (leftLen-1)
  159                                     haystack hidx (hlen-1)
  160 
  161                -- debug $ "crossbound match1=" ++ show match1
  162                --           ++ " match2=" ++ show match2
  163 
  164                if match1 && match2
  165                  then {-# SCC "crossBound/match" #-} do
  166                    let !nomatch = S.take hidx haystack
  167                    let !aftermatch = S.drop needMore nextHaystack
  168 
  169                    -- FIXME: merge this code w/ above
  170                    step <- if not $ S.null nomatch
  171                              then lift $ runIteratee $ k $
  172                                   Chunks [NoMatch nomatch]
  173                              else return $ Continue k
  174 
  175                    -- debug $ "matching"
  176                    cDone step $ \k' -> do
  177                        step' <- lift $ runIteratee $ k' $
  178                                 Chunks [Match needle]
  179                        cDone step' $ \k'' ->
  180                            startSearch k'' aftermatch
  181 
  182                  else {-# SCC "crossBound/nomatch" #-} do
  183                    let c = S.unsafeIndex nextHaystack $ needMore-1
  184                    let p = V.unsafeIndex table (fromEnum c)
  185 
  186                    -- debug $ "p was " ++ show p ++ ", ll=" ++ show leftLen
  187                    if p < leftLen
  188                      then do
  189                        let !hidx' = hidx+p
  190                        let (!leftLen', needMore') = mkCoeff hidx'
  191                        let !nextlen = S.length nextHaystack
  192                        if (nextlen < needMore')
  193                          then do
  194                            -- this should be impossibly rare
  195                            lookahead (needMore' - nextlen) >>=
  196                              either (\s -> finishAndEOF k [ haystack
  197                                                           , nextHaystack
  198                                                           , s ])
  199                                     (\s -> runNext hidx' leftLen' needMore' $
  200                                            S.append nextHaystack s)
  201                          else runNext hidx' leftLen' needMore' nextHaystack
  202                      else do
  203                        let sidx = p - leftLen
  204                        let (!crumb, !rest) = S.splitAt sidx nextHaystack
  205                        step <- lift $ runIteratee $ k $
  206                                Chunks $ map NoMatch $
  207                                filter (not . S.null) [haystack, crumb]
  208 
  209                        cDone step $ flip startSearch rest
  210 
  211 
  212     !nlen = S.length needle
  213 
  214     !last = nlen - 1
  215 
  216     !table = V.create $ do
  217         t <- MV.replicate 256 nlen
  218         go t
  219 
  220       where
  221         go !t = go' 0
  222           where
  223             go' !i | i >= last  = return t
  224                    | otherwise = do
  225                 let c = fromEnum $ S.unsafeIndex needle i
  226                 MV.unsafeWrite t c (last - i)
  227                 go' $! i+1
  228 
  229 {-
  230 testIt :: ByteString -> [ByteString] -> IO [MatchInfo]
  231 testIt needle haystack = do
  232     consumeStep <- runIteratee EL.consume
  233     eteeStep    <- runIteratee $ etee consumeStep
  234     -- iter :: Iteratee ByteString m (Step MatchInfo m [MatchInfo])
  235     let iter = enumList 1 haystack eteeStep
  236     finalInnerStep <- run_ iter
  237     run_ $ returnI finalInnerStep
  238 
  239   where
  240     etee = bmhEnumeratee needle
  241 -}