Some kit for radix tries.
parent
0ec419feca
commit
0bbc68493f
|
@ -625,6 +625,8 @@ library
|
|||
Data.HTTPSEverywhere.Rules.Internal.Parser,
|
||||
Data.HTTPSEverywhere.Rules.Internal.Raw,
|
||||
Data.HTTPSEverywhere.Rules.Internal.Types,
|
||||
Data.ByteString.RadixTrie,
|
||||
Data.Text.RadixTrie,
|
||||
Data.Text.ICU.Extras,
|
||||
Paths_https_everywhere_rules
|
||||
build-depends:
|
||||
|
@ -642,7 +644,8 @@ library
|
|||
string-conversions,
|
||||
taggy-lens,
|
||||
text,
|
||||
text-icu
|
||||
text-icu,
|
||||
vector
|
||||
hs-source-dirs:
|
||||
src
|
||||
default-language:
|
||||
|
|
|
@ -0,0 +1,143 @@
|
|||
{-# LANGUAGE DeriveFunctor #-}
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
{-# LANGUAGE RecordWildCards #-}
|
||||
{-# LANGUAGE TupleSections #-}
|
||||
{-# LANGUAGE ViewPatterns #-}
|
||||
{-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
|
||||
|
||||
module Data.ByteString.RadixTrie (
|
||||
RadixTrie,
|
||||
empty,
|
||||
lookup,
|
||||
lookupDefault,
|
||||
elem,
|
||||
enumerate,
|
||||
singleton,
|
||||
insert,
|
||||
) where
|
||||
|
||||
import Prelude hiding
|
||||
(null, lookup, drop, length, head, tail, take, length, last, elem)
|
||||
import Data.ByteString
|
||||
(ByteString, uncons, isPrefixOf, null, drop, length, head, tail,
|
||||
take, length, last, append, cons)
|
||||
import Data.Vector (Vector, (!), generate, modify, imap)
|
||||
import qualified Data.Vector as Vector (toList)
|
||||
import qualified Data.Vector.Mutable as Mutable (modify)
|
||||
import Data.Maybe (fromMaybe, isJust, maybeToList)
|
||||
|
||||
data Leaf a =
|
||||
Prefix ByteString (RadixTrie a) |
|
||||
Split (Vector {- 2 ^ 8 -} (Maybe (RadixTrie a))) |
|
||||
Vacuum deriving (Show, Functor)
|
||||
|
||||
data RadixTrie a =
|
||||
RadixTrie { label :: Maybe a, children :: Leaf a }
|
||||
deriving (Show, Functor)
|
||||
|
||||
empty :: RadixTrie a
|
||||
empty = RadixTrie Nothing Vacuum
|
||||
|
||||
lookup :: ByteString -> (RadixTrie a -> Maybe a)
|
||||
lookup str RadixTrie{..}
|
||||
| null str = label
|
||||
| otherwise = case children of
|
||||
Prefix prefix child ->
|
||||
if prefix `isPrefixOf` str
|
||||
then drop (length prefix) str `lookup` child
|
||||
else Nothing
|
||||
Split vector -> do
|
||||
(c, cs) <- uncons str
|
||||
trie <- vector ! fromIntegral c
|
||||
lookup cs trie
|
||||
Vacuum -> Nothing
|
||||
|
||||
lookupDefault :: a -> ByteString -> (RadixTrie a -> a)
|
||||
lookupDefault def = fmap (fmap (fromMaybe def)) lookup
|
||||
|
||||
elem :: ByteString -> (RadixTrie a -> Bool)
|
||||
elem = fmap (fmap isJust) lookup
|
||||
|
||||
enumerate :: RadixTrie a -> [(ByteString, a)]
|
||||
enumerate (RadixTrie label (Prefix prefix trie)) =
|
||||
maybe [] (\x -> [("", x)]) label ++
|
||||
map (\(x, y) -> (prefix `append` x, y)) (enumerate trie)
|
||||
enumerate (RadixTrie label (Split vector)) =
|
||||
maybe [] (\x -> [("", x)]) label ++
|
||||
concat (Vector.toList
|
||||
(imap (\i t -> maybe []
|
||||
(map (\(x, y) ->
|
||||
(cons (fromIntegral i) x, y)) . enumerate) t)
|
||||
vector))
|
||||
enumerate (RadixTrie a Vacuum) = map ("",) (maybeToList a)
|
||||
|
||||
(?=) :: Eq a => a -> a -> Maybe ()
|
||||
m ?= n = if m == n then return () else Nothing
|
||||
|
||||
singleton' :: ByteString -> (RadixTrie a -> RadixTrie a)
|
||||
singleton' str end
|
||||
| null str = end
|
||||
| length str == 1 = empty { children =
|
||||
Split . generate (2 ^ (8 :: Int)) $ \i -> do
|
||||
_ <- fromIntegral i ?= head str
|
||||
return end }
|
||||
| otherwise = empty { children = Prefix str end }
|
||||
|
||||
singleton :: ByteString -> a -> RadixTrie a
|
||||
singleton str value = singleton' str (empty { label = return value })
|
||||
|
||||
data PrefixResult =
|
||||
Equal |
|
||||
Disjoint |
|
||||
Extends ByteString |
|
||||
Prefixes ByteString
|
||||
|
||||
prefixResult :: ByteString -> ByteString -> PrefixResult
|
||||
prefixResult source target =
|
||||
case (source `isPrefixOf` target, target `isPrefixOf` source) of
|
||||
(True, True) -> Equal
|
||||
(False, False) -> Disjoint
|
||||
(True, False) -> Extends $ drop (length source) target
|
||||
(False, True) -> Prefixes $ drop (length target) source
|
||||
|
||||
update :: Int -> (Maybe a -> a) -> (Vector (Maybe a) -> Vector (Maybe a))
|
||||
update n f v = modify (\v' -> Mutable.modify v' (Just . f) n) v
|
||||
|
||||
insert :: ByteString -> a -> (RadixTrie a -> RadixTrie a)
|
||||
insert (uncons -> Just (fromIntegral -> c, cs)) val
|
||||
r@(RadixTrie _ (Split split)) = r { children = Split $
|
||||
update c (maybe (singleton cs val) (insert cs val)) split }
|
||||
insert _ val r@(RadixTrie _ (Split _)) = r { label = return val }
|
||||
insert text val r@(RadixTrie label (Prefix prefix trie)) =
|
||||
case prefixResult prefix text of
|
||||
Equal -> r { children = Prefix text trie }
|
||||
Extends e -> r { children = Prefix prefix $ insert e val trie }
|
||||
Disjoint ->
|
||||
r { children = Split . generate (2 ^ (8 :: Int)) $ \i ->
|
||||
if i == fromIntegral (head text)
|
||||
then return $ singleton (tail text) val
|
||||
else if i == fromIntegral (head prefix)
|
||||
then return $ singleton' (tail prefix) trie
|
||||
else Nothing }
|
||||
Prefixes p -> case length text of
|
||||
0 -> r { label = return val }
|
||||
1 -> r { children = Split . generate (2 ^ (8 :: Int)) $ \i ->
|
||||
if i == fromIntegral (head text) then
|
||||
return $ singleton (tail text) val
|
||||
else if i == fromIntegral (head prefix) then
|
||||
return $ singleton' (tail prefix) trie
|
||||
else Nothing }
|
||||
_ -> RadixTrie label . Prefix (take (length text) prefix) $
|
||||
case length prefix - length text of
|
||||
0 -> error "The impossible happened!"
|
||||
1 -> RadixTrie (Just val)
|
||||
(Split . generate (2 ^ (8 :: Int)) $ \i ->
|
||||
fmap (const trie) (i ?= fromIntegral (last prefix)))
|
||||
_ -> RadixTrie (Just val) (Prefix p trie)
|
||||
insert text val (RadixTrie value Vacuum) =
|
||||
(singleton' text (empty { label = return val })) { label = value }
|
||||
|
||||
example :: RadixTrie ByteString
|
||||
example = foldr (.) id insertions empty where
|
||||
insertions =
|
||||
map (\x -> insert x x) ["test", "slow", "slower", "water"]
|
|
@ -0,0 +1,30 @@
|
|||
module Data.Text.RadixTrie (
|
||||
RadixTrie,
|
||||
empty,
|
||||
lookup,
|
||||
lookupDefault,
|
||||
elem,
|
||||
insert,
|
||||
enumerate
|
||||
) where
|
||||
|
||||
import Prelude hiding (lookup, elem)
|
||||
import Data.ByteString.RadixTrie (RadixTrie, empty)
|
||||
import qualified Data.ByteString.RadixTrie as RT
|
||||
import qualified Data.Text as Strict (Text)
|
||||
import Data.Text.Encoding (encodeUtf8, decodeUtf8)
|
||||
|
||||
lookup :: Strict.Text -> (RadixTrie a -> Maybe a)
|
||||
lookup t r = RT.lookup (encodeUtf8 t) r
|
||||
|
||||
lookupDefault :: a -> Strict.Text -> (RadixTrie a -> a)
|
||||
lookupDefault x t r = RT.lookupDefault x (encodeUtf8 t) r
|
||||
|
||||
elem :: Strict.Text -> (RadixTrie a -> Bool)
|
||||
elem t r = RT.elem (encodeUtf8 t) r
|
||||
|
||||
insert :: Strict.Text -> a -> (RadixTrie a -> RadixTrie a)
|
||||
insert t x r = RT.insert (encodeUtf8 t) x r
|
||||
|
||||
enumerate :: RadixTrie a -> [(Strict.Text, a)]
|
||||
enumerate t = map (\(x, y) -> (decodeUtf8 x, y)) (RT.enumerate t)
|
Loading…
Reference in New Issue