1 {-# LANGUAGE BangPatterns #-} 2 {-# LANGUAGE OverloadedStrings #-} 3 {-# LANGUAGE RankNTypes #-} 4 5 module Snap.Internal.Iteratee.BoyerMooreHorspool 6 ( bmhEnumeratee 7 , MatchInfo(..) ) 8 where 9 10 import Control.Monad.State 11 import qualified Data.ByteString as S 12 import Data.ByteString (ByteString) 13 import Data.ByteString.Unsafe as S 14 import Data.Enumerator hiding (head, filter, last, map) 15 import qualified Data.Enumerator.List as EL 16 import Data.Int 17 import qualified Data.Vector.Unboxed as V 18 import qualified Data.Vector.Unboxed.Mutable as MV 19 import Prelude hiding (head, last) 20 21 22 --{-# INLINE debug #-} 23 --debug :: MonadIO m => String -> m () 24 --debug s = liftIO $ putStrLn s 25 --debug _ = return () 26 27 ------------------------------------------------------------------------------ 28 data MatchInfo = Match !ByteString 29 | NoMatch !ByteString 30 deriving (Show) 31 32 33 -- We return strict bytestring because we always expect a chunk to be bigger 34 -- than the needle 35 lookahead :: (MonadIO m) => 36 Int 37 -> Iteratee ByteString m (Either ByteString ByteString) 38 lookahead n = go id n 39 where 40 go !dlist !k = do 41 EL.head >>= maybe 42 (do 43 let !ls = S.concat $ dlist [] 44 -- debug $ "lookahead " ++ show n 45 -- ++ " failing, returning " ++ show ls 46 47 return $ Left ls) 48 (\x -> do 49 let !l = S.length x 50 let !r = k - l 51 let !d' = dlist . (x:) 52 53 if r <= 0 54 then do 55 let !ls = S.concat $ d' [] 56 -- debug $ "lookahead " ++ show n 57 -- ++ " successfully returning " 58 -- ++ show ls 59 return $ Right $ ls 60 else go d' r) 61 {-# INLINE lookahead #-} 62 63 matches :: ByteString -- ^ needle 64 -> Int -- ^ needle start 65 -> Int -- ^ needle end (inclusive) 66 -> ByteString -- ^ haystack 67 -> Int -- ^ haystack start 68 -> Int -- ^ haystack end (inclusive) 69 -> Bool 70 matches !needle !nstart !nend' !haystack !hstart !hend' = 71 go nend' hend' 72 where 73 go !nend !hend = 74 if nend < nstart || hend < hstart 75 then True 76 else let !nc = S.unsafeIndex needle nend 77 !hc = S.unsafeIndex haystack hend 78 in if nc /= hc 79 then False 80 else go (nend-1) (hend-1) 81 {-# INLINE matches #-} 82 83 84 bmhEnumeratee :: (MonadIO m) => 85 ByteString 86 -> Step MatchInfo m a 87 -> Iteratee ByteString m (Step MatchInfo m a) 88 bmhEnumeratee needle _step = do 89 -- debug $ "boyermoore: needle=" ++ show needle 90 cDone _step iter 91 where 92 {-# INLINE cDone #-} 93 cDone (Continue k) f = f k 94 cDone step _ = yield step (Chunks []) 95 96 97 iter !k = {-# SCC "bmh/iter" #-} do 98 lookahead nlen >>= either (finishAndEOF k . (:[])) 99 (startSearch k) 100 101 finishAndEOF k xs = {-# SCC "finishAndEOF" #-} do 102 -- debug $ "finishAndEOF, returning NoMatch for " ++ show xs 103 step <- lift $ runIteratee $ k $ 104 Chunks (map NoMatch $ filter (not . S.null) xs) 105 cDone step (\k' -> lift $ runIteratee $ k' EOF) 106 107 108 startSearch !k !haystack = {-# SCC "startSearch" #-} do 109 -- debug $ "startsearch: " ++ show haystack 110 if S.null haystack 111 then lookahead nlen >>= 112 either (\s -> finishAndEOF k [s]) 113 (startSearch k) 114 else go 0 115 where 116 !hlen = S.length haystack 117 118 go !hidx 119 | hend >= hlen = crossBound hidx 120 | otherwise = {-# SCC "go" #-} do 121 let match = matches needle 0 last haystack hidx hend 122 -- debug $ "go " ++ show hidx ++ ", hend=" ++ show hend 123 -- ++ ", match was " ++ show match 124 if match 125 then {-# SCC "go/match" #-} do 126 let !nomatch = S.take hidx haystack 127 let !aftermatch = S.drop (hend+1) haystack 128 129 step <- if not $ S.null nomatch 130 then lift $ runIteratee $ k $ Chunks [NoMatch nomatch] 131 else return $ Continue k 132 133 cDone step $ \k' -> do 134 step' <- lift $ runIteratee $ k' $ Chunks [Match needle] 135 cDone step' $ \k'' -> startSearch k'' aftermatch 136 else {-# SCC "go/nomatch" #-} do 137 -- skip ahead 138 let c = S.unsafeIndex haystack hend 139 let !skip = V.unsafeIndex table $ fromEnum c 140 go (hidx + skip) 141 where 142 !hend = hidx + nlen - 1 143 144 mkCoeff hidx = let !ll = hlen - hidx 145 !nm = nlen - ll 146 in (ll,nm) 147 148 crossBound !hidx0 = {-# SCC "crossBound" #-} do 149 let (!leftLen, needMore) = mkCoeff hidx0 150 151 lookahead needMore >>= 152 either (\s -> finishAndEOF k [haystack, s]) 153 (runNext hidx0 leftLen needMore) 154 where 155 runNext !hidx !leftLen !needMore !nextHaystack = do 156 let match1 = matches needle leftLen last 157 nextHaystack 0 (needMore-1) 158 let match2 = matches needle 0 (leftLen-1) 159 haystack hidx (hlen-1) 160 161 -- debug $ "crossbound match1=" ++ show match1 162 -- ++ " match2=" ++ show match2 163 164 if match1 && match2 165 then {-# SCC "crossBound/match" #-} do 166 let !nomatch = S.take hidx haystack 167 let !aftermatch = S.drop needMore nextHaystack 168 169 -- FIXME: merge this code w/ above 170 step <- if not $ S.null nomatch 171 then lift $ runIteratee $ k $ 172 Chunks [NoMatch nomatch] 173 else return $ Continue k 174 175 -- debug $ "matching" 176 cDone step $ \k' -> do 177 step' <- lift $ runIteratee $ k' $ 178 Chunks [Match needle] 179 cDone step' $ \k'' -> 180 startSearch k'' aftermatch 181 182 else {-# SCC "crossBound/nomatch" #-} do 183 let c = S.unsafeIndex nextHaystack $ needMore-1 184 let p = V.unsafeIndex table (fromEnum c) 185 186 -- debug $ "p was " ++ show p ++ ", ll=" ++ show leftLen 187 if p < leftLen 188 then do 189 let !hidx' = hidx+p 190 let (!leftLen', needMore') = mkCoeff hidx' 191 let !nextlen = S.length nextHaystack 192 if (nextlen < needMore') 193 then do 194 -- this should be impossibly rare 195 lookahead (needMore' - nextlen) >>= 196 either (\s -> finishAndEOF k [ haystack 197 , nextHaystack 198 , s ]) 199 (\s -> runNext hidx' leftLen' needMore' $ 200 S.append nextHaystack s) 201 else runNext hidx' leftLen' needMore' nextHaystack 202 else do 203 let sidx = p - leftLen 204 let (!crumb, !rest) = S.splitAt sidx nextHaystack 205 step <- lift $ runIteratee $ k $ 206 Chunks $ map NoMatch $ 207 filter (not . S.null) [haystack, crumb] 208 209 cDone step $ flip startSearch rest 210 211 212 !nlen = S.length needle 213 214 !last = nlen - 1 215 216 !table = V.create $ do 217 t <- MV.replicate 256 nlen 218 go t 219 220 where 221 go !t = go' 0 222 where 223 go' !i | i >= last = return t 224 | otherwise = do 225 let c = fromEnum $ S.unsafeIndex needle i 226 MV.unsafeWrite t c (last - i) 227 go' $! i+1 228 229 {- 230 testIt :: ByteString -> [ByteString] -> IO [MatchInfo] 231 testIt needle haystack = do 232 consumeStep <- runIteratee EL.consume 233 eteeStep <- runIteratee $ etee consumeStep 234 -- iter :: Iteratee ByteString m (Step MatchInfo m [MatchInfo]) 235 let iter = enumList 1 haystack eteeStep 236 finalInnerStep <- run_ iter 237 run_ $ returnI finalInnerStep 238 239 where 240 etee = bmhEnumeratee needle 241 -}