Some kit for radix tries.

trie
vi 2018-06-28 21:35:56 -06:00
parent 0ec419feca
commit 0bbc68493f
3 changed files with 177 additions and 1 deletions

View File

@ -625,6 +625,8 @@ library
Data.HTTPSEverywhere.Rules.Internal.Parser,
Data.HTTPSEverywhere.Rules.Internal.Raw,
Data.HTTPSEverywhere.Rules.Internal.Types,
Data.ByteString.RadixTrie,
Data.Text.RadixTrie,
Data.Text.ICU.Extras,
Paths_https_everywhere_rules
build-depends:
@ -642,7 +644,8 @@ library
string-conversions,
taggy-lens,
text,
text-icu
text-icu,
vector
hs-source-dirs:
src
default-language:

View File

@ -0,0 +1,143 @@
{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE ViewPatterns #-}
{-# OPTIONS_GHC -fno-warn-unused-top-binds #-}
module Data.ByteString.RadixTrie (
RadixTrie,
empty,
lookup,
lookupDefault,
elem,
enumerate,
singleton,
insert,
) where
import Prelude hiding
(null, lookup, drop, length, head, tail, take, length, last, elem)
import Data.ByteString
(ByteString, uncons, isPrefixOf, null, drop, length, head, tail,
take, length, last, append, cons)
import Data.Vector (Vector, (!), generate, modify, imap)
import qualified Data.Vector as Vector (toList)
import qualified Data.Vector.Mutable as Mutable (modify)
import Data.Maybe (fromMaybe, isJust, maybeToList)
data Leaf a =
Prefix ByteString (RadixTrie a) |
Split (Vector {- 2 ^ 8 -} (Maybe (RadixTrie a))) |
Vacuum deriving (Show, Functor)
data RadixTrie a =
RadixTrie { label :: Maybe a, children :: Leaf a }
deriving (Show, Functor)
empty :: RadixTrie a
empty = RadixTrie Nothing Vacuum
lookup :: ByteString -> (RadixTrie a -> Maybe a)
lookup str RadixTrie{..}
| null str = label
| otherwise = case children of
Prefix prefix child ->
if prefix `isPrefixOf` str
then drop (length prefix) str `lookup` child
else Nothing
Split vector -> do
(c, cs) <- uncons str
trie <- vector ! fromIntegral c
lookup cs trie
Vacuum -> Nothing
lookupDefault :: a -> ByteString -> (RadixTrie a -> a)
lookupDefault def = fmap (fmap (fromMaybe def)) lookup
elem :: ByteString -> (RadixTrie a -> Bool)
elem = fmap (fmap isJust) lookup
enumerate :: RadixTrie a -> [(ByteString, a)]
enumerate (RadixTrie label (Prefix prefix trie)) =
maybe [] (\x -> [("", x)]) label ++
map (\(x, y) -> (prefix `append` x, y)) (enumerate trie)
enumerate (RadixTrie label (Split vector)) =
maybe [] (\x -> [("", x)]) label ++
concat (Vector.toList
(imap (\i t -> maybe []
(map (\(x, y) ->
(cons (fromIntegral i) x, y)) . enumerate) t)
vector))
enumerate (RadixTrie a Vacuum) = map ("",) (maybeToList a)
(?=) :: Eq a => a -> a -> Maybe ()
m ?= n = if m == n then return () else Nothing
singleton' :: ByteString -> (RadixTrie a -> RadixTrie a)
singleton' str end
| null str = end
| length str == 1 = empty { children =
Split . generate (2 ^ (8 :: Int)) $ \i -> do
_ <- fromIntegral i ?= head str
return end }
| otherwise = empty { children = Prefix str end }
singleton :: ByteString -> a -> RadixTrie a
singleton str value = singleton' str (empty { label = return value })
data PrefixResult =
Equal |
Disjoint |
Extends ByteString |
Prefixes ByteString
prefixResult :: ByteString -> ByteString -> PrefixResult
prefixResult source target =
case (source `isPrefixOf` target, target `isPrefixOf` source) of
(True, True) -> Equal
(False, False) -> Disjoint
(True, False) -> Extends $ drop (length source) target
(False, True) -> Prefixes $ drop (length target) source
update :: Int -> (Maybe a -> a) -> (Vector (Maybe a) -> Vector (Maybe a))
update n f v = modify (\v' -> Mutable.modify v' (Just . f) n) v
insert :: ByteString -> a -> (RadixTrie a -> RadixTrie a)
insert (uncons -> Just (fromIntegral -> c, cs)) val
r@(RadixTrie _ (Split split)) = r { children = Split $
update c (maybe (singleton cs val) (insert cs val)) split }
insert _ val r@(RadixTrie _ (Split _)) = r { label = return val }
insert text val r@(RadixTrie label (Prefix prefix trie)) =
case prefixResult prefix text of
Equal -> r { children = Prefix text trie }
Extends e -> r { children = Prefix prefix $ insert e val trie }
Disjoint ->
r { children = Split . generate (2 ^ (8 :: Int)) $ \i ->
if i == fromIntegral (head text)
then return $ singleton (tail text) val
else if i == fromIntegral (head prefix)
then return $ singleton' (tail prefix) trie
else Nothing }
Prefixes p -> case length text of
0 -> r { label = return val }
1 -> r { children = Split . generate (2 ^ (8 :: Int)) $ \i ->
if i == fromIntegral (head text) then
return $ singleton (tail text) val
else if i == fromIntegral (head prefix) then
return $ singleton' (tail prefix) trie
else Nothing }
_ -> RadixTrie label . Prefix (take (length text) prefix) $
case length prefix - length text of
0 -> error "The impossible happened!"
1 -> RadixTrie (Just val)
(Split . generate (2 ^ (8 :: Int)) $ \i ->
fmap (const trie) (i ?= fromIntegral (last prefix)))
_ -> RadixTrie (Just val) (Prefix p trie)
insert text val (RadixTrie value Vacuum) =
(singleton' text (empty { label = return val })) { label = value }
example :: RadixTrie ByteString
example = foldr (.) id insertions empty where
insertions =
map (\x -> insert x x) ["test", "slow", "slower", "water"]

View File

@ -0,0 +1,30 @@
module Data.Text.RadixTrie (
RadixTrie,
empty,
lookup,
lookupDefault,
elem,
insert,
enumerate
) where
import Prelude hiding (lookup, elem)
import Data.ByteString.RadixTrie (RadixTrie, empty)
import qualified Data.ByteString.RadixTrie as RT
import qualified Data.Text as Strict (Text)
import Data.Text.Encoding (encodeUtf8, decodeUtf8)
lookup :: Strict.Text -> (RadixTrie a -> Maybe a)
lookup t r = RT.lookup (encodeUtf8 t) r
lookupDefault :: a -> Strict.Text -> (RadixTrie a -> a)
lookupDefault x t r = RT.lookupDefault x (encodeUtf8 t) r
elem :: Strict.Text -> (RadixTrie a -> Bool)
elem t r = RT.elem (encodeUtf8 t) r
insert :: Strict.Text -> a -> (RadixTrie a -> RadixTrie a)
insert t x r = RT.insert (encodeUtf8 t) x r
enumerate :: RadixTrie a -> [(Strict.Text, a)]
enumerate t = map (\(x, y) -> (decodeUtf8 x, y)) (RT.enumerate t)