Remove the IO bottleneck.

Don't redundantly readIO/parse the rulesets; do this once, lazily
carrying out the involved operations. Rulesets are invariant over
executions.

This improves performance by a few orders of magnitude. Though at some
point we should substitute linear search for lookup on a generalised
suffix tree of rooted domains.

Breaking interface change, though I'll likely restore the old form
soon with IO TH.
master
vi 2017-12-06 13:21:57 +01:00
parent e2d9556c6f
commit ed1933f2c5
5 changed files with 37 additions and 25 deletions

View File

@ -9,11 +9,12 @@ connections when possible."
```haskell
λ: :m + Data.HTTPSEverywhere.Rules Network.URI
λ: rulesets <- getRulesets
λ: let Just eff = parseURI "http://www.eff.org/document/eff-and-aclu-amicus-brief-klayman"
λ: rewriteURL eff
λ: rewriteURL rulesets eff
https://www.eff.org/document/eff-and-aclu-amicus-brief-klayman
λ: :m + Network.HTTP.Client Network.HTTP.Client.TLS Control.Applicative Control.Lens
λ: (req, hub) <- (,) <*> getUri <$> parseUrl "https://github.com/fmap/https-everywhere-rules"
λ: newManager tlsManagerSettings >>= httpNoBody req <&> destroyCookieJar . responseCookieJar >>= mapM (rewriteCookie hub)
λ: newManager tlsManagerSettings >>= httpNoBody req <&> destroyCookieJar . responseCookieJar >>= mapM (rewriteCookie rulesets hub)
[Cookie {cookie_name = "_gh_sess", cookie_value = "eyJzZXNzaW9uX2lkIjoiNjBlM2FiOTIxNTdhZTNhNDE5YWQ0ZTk4ZWQzNDRjMjEiLCJzcHlfcmVwbyI6ImZtYXAvaHR0cHMtZXZlcnl3aGVyZS1ydWxlcyIsInNweV9yZXBvX2F0IjoxNDA4ODk2OTM2LCJfY3NyZl90b2tlbiI6IktkbTlwN2JqNGptVmhrYjFIUm9BbkV0a1JTQXRDUXJid2g4VWo4N1g0Q1U9In0%3D--d378daa262b8c12bb82246d5de6b3adc353a3db7", cookie_expiry_time = 3013-12-25 00:00:00 UTC, cookie_domain = "github.com", cookie_path = "/", cookie_creation_time = 2014-08-24 16:15:37.815144 UTC, cookie_last_access_time = 2014-08-24 16:15:37.815144 UTC, cookie_persistent = False, cookie_host_only = True, cookie_secure_only = True, cookie_http_only = True},Cookie {cookie_name = "logged_in", cookie_value = "no", cookie_expiry_time = 2034-08-24 16:15:36 UTC, cookie_domain = "github.com", cookie_path = "/", cookie_creation_time = 2014-08-24 16:15:37.815144 UTC, cookie_last_access_time = 2014-08-24 16:15:37.815144 UTC, cookie_persistent = True, cookie_host_only = False, cookie_secure_only = True, cookie_http_only = True}]
```

View File

@ -6,7 +6,7 @@ import "foldl" Control.Foldl (Fold(..))
import qualified "foldl" Control.Foldl as Foldl
import "base" Control.Monad (forever)
import "base" Data.Bool (bool)
import "https-everywhere-rules" Data.HTTPSEverywhere.Rules (rewriteURL)
import "https-everywhere-rules" Data.HTTPSEverywhere.Rules (rewriteURL, RuleSet, getRulesets)
import "base" Data.Monoid (Sum(..))
import "network-uri" Network.URI (URI, parseURI)
import "pipes" Pipes (Pipe, Producer, lift, yield, await, (>->))
@ -16,10 +16,10 @@ import qualified "pipes" Pipes.Prelude as Pipes (fold)
parse :: Monad m => Pipe String URI m ()
parse = forever $ parseURI <$> await >>= maybe (return ()) yield
check :: Pipe URI Bool IO ()
check = forever $ do
check :: [RuleSet] -> Pipe URI Bool IO ()
check rs = forever $ do
src <- await
tgt <- lift $ rewriteURL src
tgt <- lift $ rewriteURL rs src
yield $ src /= tgt
fold :: Monad m => Fold a b -> Producer a m () -> m b
@ -29,4 +29,6 @@ proportion :: Fold Bool (Int, Int)
proportion = (,) <$> Foldl.foldMap (Sum . bool 0 1) getSum <*> Foldl.length
main :: IO ()
main = fold proportion (stdinLn >-> parse >-> check) >>= print
main = do
rulesets <- getRulesets
fold proportion (stdinLn >-> parse >-> check rulesets) >>= print

View File

@ -8,7 +8,7 @@ import "base" Data.Monoid ((<>))
import "bytestring" Data.ByteString.Char8 (pack, unpack)
import "http-proxy" Network.HTTP.Proxy (runProxySettings, defaultProxySettings, Settings(..), Request(..))
import "http-types" Network.HTTP.Types (status302)
import "https-everywhere-rules" Data.HTTPSEverywhere.Rules (rewriteURL)
import "https-everywhere-rules" Data.HTTPSEverywhere.Rules (getRulesets, rewriteURL, RuleSet)
import "network-uri" Network.URI (URI, parseURI)
import "wai" Network.Wai (Response, responseLBS)
@ -18,12 +18,14 @@ import "wai" Network.Wai (Response, responseLBS)
httpRedirect :: URI -> Response
httpRedirect to = responseLBS status302 [("Location", pack . show $ to)] ""
tryHTTPS :: Request -> IO (Either Response Request)
tryHTTPS Request{..} = case parseURI . unpack $ requestPath <> queryString of
Just uri -> rewriteURL uri <&> \rewrite -> if rewrite == uri
tryHTTPS :: [RuleSet] -> Request -> IO (Either Response Request)
tryHTTPS rs Request{..} = case parseURI . unpack $ requestPath <> queryString of
Just uri -> rewriteURL rs uri <&> \rewrite -> if rewrite == uri
then return Request{..}
else Left $ httpRedirect rewrite
Nothing -> return $ Right Request{..}
main :: IO ()
main = runProxySettings $ defaultProxySettings { proxyRequestModifier = tryHTTPS }
main = do
rulesets <- getRulesets
runProxySettings $ defaultProxySettings { proxyRequestModifier = tryHTTPS rulesets }

View File

@ -1,4 +1,6 @@
module Data.HTTPSEverywhere.Rules (
RuleSet,
getRulesets,
rewriteURL,
rewriteCookie
) where
@ -12,11 +14,11 @@ import Network.URI (URI)
import Pipes ((>->))
import Pipes.Prelude (head, null)
import Control.Monad (join)
import Data.HTTPSEverywhere.Rules.Internal (getRulesetsMatching, havingRulesThatTrigger, havingCookieRulesThatTrigger, setSecureFlag)
import Data.HTTPSEverywhere.Rules.Internal (getRulesets, getRulesetsMatching, havingRulesThatTrigger, havingCookieRulesThatTrigger, setSecureFlag, RuleSet)
rewriteURL :: URI -> IO URI
rewriteURL url = getRulesetsMatching url >-> havingRulesThatTrigger url & head <&> fromMaybe url . join
rewriteURL :: [RuleSet] -> URI -> IO URI
rewriteURL rs url = getRulesetsMatching rs url >-> havingRulesThatTrigger url & head <&> fromMaybe url . join
rewriteCookie :: URI -> Cookie -> IO Cookie
rewriteCookie url cookie = null producer <&> setSecureFlag cookie `bool` cookie
where producer = getRulesetsMatching url >-> havingCookieRulesThatTrigger cookie
rewriteCookie :: [RuleSet] -> URI -> Cookie -> IO Cookie
rewriteCookie rs url cookie = null producer <&> setSecureFlag cookie `bool` cookie
where producer = getRulesetsMatching rs url >-> havingCookieRulesThatTrigger cookie

View File

@ -1,10 +1,12 @@
{-# LANGUAGE CPP #-}
module Data.HTTPSEverywhere.Rules.Internal (
getRulesets,
getRulesetsMatching,
havingRulesThatTrigger,
havingCookieRulesThatTrigger,
setSecureFlag,
RuleSet,
#ifdef TEST
hasTargetMatching,
hasTriggeringRuleOn,
@ -23,20 +25,23 @@ import Data.Maybe (isJust)
import Network.HTTP.Client (Cookie(..))
import Network.URI (URI)
import Pipes (Pipe, Producer, for, each, await, yield, lift, (>->))
import Pipes.Prelude (filter)
import Pipes.Prelude (filter, toListM)
import Data.HTTPSEverywhere.Rules.Internal.Parser (parseRuleSets)
import qualified Data.HTTPSEverywhere.Rules.Internal.Raw as Raw (getRule, getRules)
import Data.HTTPSEverywhere.Rules.Internal.Types (RuleSet(..), Target(..), Exclusion(..), Rule(..), CookieRule(..))
getRulesets :: Producer RuleSet IO ()
getRulesets = lift Raw.getRules
getRulesets' :: Producer RuleSet IO ()
getRulesets' = lift Raw.getRules
>>= flip (for . each) (flip (for . each) yield <=< lift . (parseRuleSets <$$> Raw.getRule))
getRulesetsMatching :: URI -> Producer RuleSet IO ()
getRulesetsMatching url = getRulesets
>-> filter (flip hasTargetMatching url)
>-> filter (not . flip hasExclusionMatching url)
getRulesets :: IO [RuleSet]
getRulesets = toListM getRulesets'
getRulesetsMatching :: [RuleSet] -> URI -> Producer RuleSet IO ()
getRulesetsMatching rs url = each rs
>-> filter (flip hasTargetMatching url)
>-> filter (not . flip hasExclusionMatching url)
havingRulesThatTrigger :: URI -> Pipe RuleSet (Maybe URI) IO ()
havingRulesThatTrigger url = flip hasTriggeringRuleOn url <$> await