diff --git a/https-everywhere-rules.cabal b/https-everywhere-rules.cabal index d996717..1416570 100644 --- a/https-everywhere-rules.cabal +++ b/https-everywhere-rules.cabal @@ -625,6 +625,8 @@ library Data.HTTPSEverywhere.Rules.Internal.Parser, Data.HTTPSEverywhere.Rules.Internal.Raw, Data.HTTPSEverywhere.Rules.Internal.Types, + Data.ByteString.RadixTrie, + Data.Text.RadixTrie, Data.Text.ICU.Extras, Paths_https_everywhere_rules build-depends: @@ -642,7 +644,8 @@ library string-conversions, taggy-lens, text, - text-icu + text-icu, + vector hs-source-dirs: src default-language: diff --git a/src/Data/ByteString/RadixTrie.hs b/src/Data/ByteString/RadixTrie.hs new file mode 100644 index 0000000..db8a792 --- /dev/null +++ b/src/Data/ByteString/RadixTrie.hs @@ -0,0 +1,143 @@ +{-# LANGUAGE DeriveFunctor #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE ViewPatterns #-} +{-# OPTIONS_GHC -fno-warn-unused-top-binds #-} + +module Data.ByteString.RadixTrie ( + RadixTrie, + empty, + lookup, + lookupDefault, + elem, + enumerate, + singleton, + insert, +) where + +import Prelude hiding + (null, lookup, drop, length, head, tail, take, length, last, elem) +import Data.ByteString + (ByteString, uncons, isPrefixOf, null, drop, length, head, tail, + take, length, last, append, cons) +import Data.Vector (Vector, (!), generate, modify, imap) +import qualified Data.Vector as Vector (toList) +import qualified Data.Vector.Mutable as Mutable (modify) +import Data.Maybe (fromMaybe, isJust, maybeToList) + +data Leaf a = + Prefix ByteString (RadixTrie a) | + Split (Vector {- 2 ^ 8 -} (Maybe (RadixTrie a))) | + Vacuum deriving (Show, Functor) + +data RadixTrie a = + RadixTrie { label :: Maybe a, children :: Leaf a } + deriving (Show, Functor) + +empty :: RadixTrie a +empty = RadixTrie Nothing Vacuum + +lookup :: ByteString -> (RadixTrie a -> Maybe a) +lookup str RadixTrie{..} + | null str = label + | otherwise = case children of + Prefix prefix child -> + if prefix `isPrefixOf` str + then drop (length prefix) str `lookup` child + else Nothing + Split vector -> do + (c, cs) <- uncons str + trie <- vector ! fromIntegral c + lookup cs trie + Vacuum -> Nothing + +lookupDefault :: a -> ByteString -> (RadixTrie a -> a) +lookupDefault def = fmap (fmap (fromMaybe def)) lookup + +elem :: ByteString -> (RadixTrie a -> Bool) +elem = fmap (fmap isJust) lookup + +enumerate :: RadixTrie a -> [(ByteString, a)] +enumerate (RadixTrie label (Prefix prefix trie)) = + maybe [] (\x -> [("", x)]) label ++ + map (\(x, y) -> (prefix `append` x, y)) (enumerate trie) +enumerate (RadixTrie label (Split vector)) = + maybe [] (\x -> [("", x)]) label ++ + concat (Vector.toList + (imap (\i t -> maybe [] + (map (\(x, y) -> + (cons (fromIntegral i) x, y)) . enumerate) t) + vector)) +enumerate (RadixTrie a Vacuum) = map ("",) (maybeToList a) + +(?=) :: Eq a => a -> a -> Maybe () +m ?= n = if m == n then return () else Nothing + +singleton' :: ByteString -> (RadixTrie a -> RadixTrie a) +singleton' str end + | null str = end + | length str == 1 = empty { children = + Split . generate (2 ^ (8 :: Int)) $ \i -> do + _ <- fromIntegral i ?= head str + return end } + | otherwise = empty { children = Prefix str end } + +singleton :: ByteString -> a -> RadixTrie a +singleton str value = singleton' str (empty { label = return value }) + +data PrefixResult = + Equal | + Disjoint | + Extends ByteString | + Prefixes ByteString + +prefixResult :: ByteString -> ByteString -> PrefixResult +prefixResult source target = + case (source `isPrefixOf` target, target `isPrefixOf` source) of + (True, True) -> Equal + (False, False) -> Disjoint + (True, False) -> Extends $ drop (length source) target + (False, True) -> Prefixes $ drop (length target) source + +update :: Int -> (Maybe a -> a) -> (Vector (Maybe a) -> Vector (Maybe a)) +update n f v = modify (\v' -> Mutable.modify v' (Just . f) n) v + +insert :: ByteString -> a -> (RadixTrie a -> RadixTrie a) +insert (uncons -> Just (fromIntegral -> c, cs)) val + r@(RadixTrie _ (Split split)) = r { children = Split $ + update c (maybe (singleton cs val) (insert cs val)) split } +insert _ val r@(RadixTrie _ (Split _)) = r { label = return val } +insert text val r@(RadixTrie label (Prefix prefix trie)) = + case prefixResult prefix text of + Equal -> r { children = Prefix text trie } + Extends e -> r { children = Prefix prefix $ insert e val trie } + Disjoint -> + r { children = Split . generate (2 ^ (8 :: Int)) $ \i -> + if i == fromIntegral (head text) + then return $ singleton (tail text) val + else if i == fromIntegral (head prefix) + then return $ singleton' (tail prefix) trie + else Nothing } + Prefixes p -> case length text of + 0 -> r { label = return val } + 1 -> r { children = Split . generate (2 ^ (8 :: Int)) $ \i -> + if i == fromIntegral (head text) then + return $ singleton (tail text) val + else if i == fromIntegral (head prefix) then + return $ singleton' (tail prefix) trie + else Nothing } + _ -> RadixTrie label . Prefix (take (length text) prefix) $ + case length prefix - length text of + 0 -> error "The impossible happened!" + 1 -> RadixTrie (Just val) + (Split . generate (2 ^ (8 :: Int)) $ \i -> + fmap (const trie) (i ?= fromIntegral (last prefix))) + _ -> RadixTrie (Just val) (Prefix p trie) +insert text val (RadixTrie value Vacuum) = + (singleton' text (empty { label = return val })) { label = value } + +example :: RadixTrie ByteString +example = foldr (.) id insertions empty where + insertions = + map (\x -> insert x x) ["test", "slow", "slower", "water"] diff --git a/src/Data/Text/RadixTrie.hs b/src/Data/Text/RadixTrie.hs new file mode 100644 index 0000000..7934f42 --- /dev/null +++ b/src/Data/Text/RadixTrie.hs @@ -0,0 +1,30 @@ +module Data.Text.RadixTrie ( + RadixTrie, + empty, + lookup, + lookupDefault, + elem, + insert, + enumerate +) where + +import Prelude hiding (lookup, elem) +import Data.ByteString.RadixTrie (RadixTrie, empty) +import qualified Data.ByteString.RadixTrie as RT +import qualified Data.Text as Strict (Text) +import Data.Text.Encoding (encodeUtf8, decodeUtf8) + +lookup :: Strict.Text -> (RadixTrie a -> Maybe a) +lookup t r = RT.lookup (encodeUtf8 t) r + +lookupDefault :: a -> Strict.Text -> (RadixTrie a -> a) +lookupDefault x t r = RT.lookupDefault x (encodeUtf8 t) r + +elem :: Strict.Text -> (RadixTrie a -> Bool) +elem t r = RT.elem (encodeUtf8 t) r + +insert :: Strict.Text -> a -> (RadixTrie a -> RadixTrie a) +insert t x r = RT.insert (encodeUtf8 t) x r + +enumerate :: RadixTrie a -> [(Strict.Text, a)] +enumerate t = map (\(x, y) -> (decodeUtf8 x, y)) (RT.enumerate t)