Remove the IO bottleneck.

Don't redundantly readIO/parse the rulesets; do this once, lazily
carrying out the involved operations. Rulesets are invariant over
executions.

This improves performance by a few orders of magnitude. Though at some
point we should substitute linear search for lookup on a generalised
suffix tree of rooted domains.

Breaking interface change, though I'll likely restore the old form
soon with IO TH.
master
vi 2017-12-06 13:21:57 +01:00
parent e2d9556c6f
commit ed1933f2c5
5 changed files with 37 additions and 25 deletions

View File

@ -9,11 +9,12 @@ connections when possible."
```haskell ```haskell
λ: :m + Data.HTTPSEverywhere.Rules Network.URI λ: :m + Data.HTTPSEverywhere.Rules Network.URI
λ: rulesets <- getRulesets
λ: let Just eff = parseURI "http://www.eff.org/document/eff-and-aclu-amicus-brief-klayman" λ: let Just eff = parseURI "http://www.eff.org/document/eff-and-aclu-amicus-brief-klayman"
λ: rewriteURL eff λ: rewriteURL rulesets eff
https://www.eff.org/document/eff-and-aclu-amicus-brief-klayman https://www.eff.org/document/eff-and-aclu-amicus-brief-klayman
λ: :m + Network.HTTP.Client Network.HTTP.Client.TLS Control.Applicative Control.Lens λ: :m + Network.HTTP.Client Network.HTTP.Client.TLS Control.Applicative Control.Lens
λ: (req, hub) <- (,) <*> getUri <$> parseUrl "https://github.com/fmap/https-everywhere-rules" λ: (req, hub) <- (,) <*> getUri <$> parseUrl "https://github.com/fmap/https-everywhere-rules"
λ: newManager tlsManagerSettings >>= httpNoBody req <&> destroyCookieJar . responseCookieJar >>= mapM (rewriteCookie hub) λ: newManager tlsManagerSettings >>= httpNoBody req <&> destroyCookieJar . responseCookieJar >>= mapM (rewriteCookie rulesets hub)
[Cookie {cookie_name = "_gh_sess", cookie_value = "eyJzZXNzaW9uX2lkIjoiNjBlM2FiOTIxNTdhZTNhNDE5YWQ0ZTk4ZWQzNDRjMjEiLCJzcHlfcmVwbyI6ImZtYXAvaHR0cHMtZXZlcnl3aGVyZS1ydWxlcyIsInNweV9yZXBvX2F0IjoxNDA4ODk2OTM2LCJfY3NyZl90b2tlbiI6IktkbTlwN2JqNGptVmhrYjFIUm9BbkV0a1JTQXRDUXJid2g4VWo4N1g0Q1U9In0%3D--d378daa262b8c12bb82246d5de6b3adc353a3db7", cookie_expiry_time = 3013-12-25 00:00:00 UTC, cookie_domain = "github.com", cookie_path = "/", cookie_creation_time = 2014-08-24 16:15:37.815144 UTC, cookie_last_access_time = 2014-08-24 16:15:37.815144 UTC, cookie_persistent = False, cookie_host_only = True, cookie_secure_only = True, cookie_http_only = True},Cookie {cookie_name = "logged_in", cookie_value = "no", cookie_expiry_time = 2034-08-24 16:15:36 UTC, cookie_domain = "github.com", cookie_path = "/", cookie_creation_time = 2014-08-24 16:15:37.815144 UTC, cookie_last_access_time = 2014-08-24 16:15:37.815144 UTC, cookie_persistent = True, cookie_host_only = False, cookie_secure_only = True, cookie_http_only = True}] [Cookie {cookie_name = "_gh_sess", cookie_value = "eyJzZXNzaW9uX2lkIjoiNjBlM2FiOTIxNTdhZTNhNDE5YWQ0ZTk4ZWQzNDRjMjEiLCJzcHlfcmVwbyI6ImZtYXAvaHR0cHMtZXZlcnl3aGVyZS1ydWxlcyIsInNweV9yZXBvX2F0IjoxNDA4ODk2OTM2LCJfY3NyZl90b2tlbiI6IktkbTlwN2JqNGptVmhrYjFIUm9BbkV0a1JTQXRDUXJid2g4VWo4N1g0Q1U9In0%3D--d378daa262b8c12bb82246d5de6b3adc353a3db7", cookie_expiry_time = 3013-12-25 00:00:00 UTC, cookie_domain = "github.com", cookie_path = "/", cookie_creation_time = 2014-08-24 16:15:37.815144 UTC, cookie_last_access_time = 2014-08-24 16:15:37.815144 UTC, cookie_persistent = False, cookie_host_only = True, cookie_secure_only = True, cookie_http_only = True},Cookie {cookie_name = "logged_in", cookie_value = "no", cookie_expiry_time = 2034-08-24 16:15:36 UTC, cookie_domain = "github.com", cookie_path = "/", cookie_creation_time = 2014-08-24 16:15:37.815144 UTC, cookie_last_access_time = 2014-08-24 16:15:37.815144 UTC, cookie_persistent = True, cookie_host_only = False, cookie_secure_only = True, cookie_http_only = True}]
``` ```

View File

@ -6,7 +6,7 @@ import "foldl" Control.Foldl (Fold(..))
import qualified "foldl" Control.Foldl as Foldl import qualified "foldl" Control.Foldl as Foldl
import "base" Control.Monad (forever) import "base" Control.Monad (forever)
import "base" Data.Bool (bool) import "base" Data.Bool (bool)
import "https-everywhere-rules" Data.HTTPSEverywhere.Rules (rewriteURL) import "https-everywhere-rules" Data.HTTPSEverywhere.Rules (rewriteURL, RuleSet, getRulesets)
import "base" Data.Monoid (Sum(..)) import "base" Data.Monoid (Sum(..))
import "network-uri" Network.URI (URI, parseURI) import "network-uri" Network.URI (URI, parseURI)
import "pipes" Pipes (Pipe, Producer, lift, yield, await, (>->)) import "pipes" Pipes (Pipe, Producer, lift, yield, await, (>->))
@ -16,10 +16,10 @@ import qualified "pipes" Pipes.Prelude as Pipes (fold)
parse :: Monad m => Pipe String URI m () parse :: Monad m => Pipe String URI m ()
parse = forever $ parseURI <$> await >>= maybe (return ()) yield parse = forever $ parseURI <$> await >>= maybe (return ()) yield
check :: Pipe URI Bool IO () check :: [RuleSet] -> Pipe URI Bool IO ()
check = forever $ do check rs = forever $ do
src <- await src <- await
tgt <- lift $ rewriteURL src tgt <- lift $ rewriteURL rs src
yield $ src /= tgt yield $ src /= tgt
fold :: Monad m => Fold a b -> Producer a m () -> m b fold :: Monad m => Fold a b -> Producer a m () -> m b
@ -29,4 +29,6 @@ proportion :: Fold Bool (Int, Int)
proportion = (,) <$> Foldl.foldMap (Sum . bool 0 1) getSum <*> Foldl.length proportion = (,) <$> Foldl.foldMap (Sum . bool 0 1) getSum <*> Foldl.length
main :: IO () main :: IO ()
main = fold proportion (stdinLn >-> parse >-> check) >>= print main = do
rulesets <- getRulesets
fold proportion (stdinLn >-> parse >-> check rulesets) >>= print

View File

@ -8,7 +8,7 @@ import "base" Data.Monoid ((<>))
import "bytestring" Data.ByteString.Char8 (pack, unpack) import "bytestring" Data.ByteString.Char8 (pack, unpack)
import "http-proxy" Network.HTTP.Proxy (runProxySettings, defaultProxySettings, Settings(..), Request(..)) import "http-proxy" Network.HTTP.Proxy (runProxySettings, defaultProxySettings, Settings(..), Request(..))
import "http-types" Network.HTTP.Types (status302) import "http-types" Network.HTTP.Types (status302)
import "https-everywhere-rules" Data.HTTPSEverywhere.Rules (rewriteURL) import "https-everywhere-rules" Data.HTTPSEverywhere.Rules (getRulesets, rewriteURL, RuleSet)
import "network-uri" Network.URI (URI, parseURI) import "network-uri" Network.URI (URI, parseURI)
import "wai" Network.Wai (Response, responseLBS) import "wai" Network.Wai (Response, responseLBS)
@ -18,12 +18,14 @@ import "wai" Network.Wai (Response, responseLBS)
httpRedirect :: URI -> Response httpRedirect :: URI -> Response
httpRedirect to = responseLBS status302 [("Location", pack . show $ to)] "" httpRedirect to = responseLBS status302 [("Location", pack . show $ to)] ""
tryHTTPS :: Request -> IO (Either Response Request) tryHTTPS :: [RuleSet] -> Request -> IO (Either Response Request)
tryHTTPS Request{..} = case parseURI . unpack $ requestPath <> queryString of tryHTTPS rs Request{..} = case parseURI . unpack $ requestPath <> queryString of
Just uri -> rewriteURL uri <&> \rewrite -> if rewrite == uri Just uri -> rewriteURL rs uri <&> \rewrite -> if rewrite == uri
then return Request{..} then return Request{..}
else Left $ httpRedirect rewrite else Left $ httpRedirect rewrite
Nothing -> return $ Right Request{..} Nothing -> return $ Right Request{..}
main :: IO () main :: IO ()
main = runProxySettings $ defaultProxySettings { proxyRequestModifier = tryHTTPS } main = do
rulesets <- getRulesets
runProxySettings $ defaultProxySettings { proxyRequestModifier = tryHTTPS rulesets }

View File

@ -1,4 +1,6 @@
module Data.HTTPSEverywhere.Rules ( module Data.HTTPSEverywhere.Rules (
RuleSet,
getRulesets,
rewriteURL, rewriteURL,
rewriteCookie rewriteCookie
) where ) where
@ -12,11 +14,11 @@ import Network.URI (URI)
import Pipes ((>->)) import Pipes ((>->))
import Pipes.Prelude (head, null) import Pipes.Prelude (head, null)
import Control.Monad (join) import Control.Monad (join)
import Data.HTTPSEverywhere.Rules.Internal (getRulesetsMatching, havingRulesThatTrigger, havingCookieRulesThatTrigger, setSecureFlag) import Data.HTTPSEverywhere.Rules.Internal (getRulesets, getRulesetsMatching, havingRulesThatTrigger, havingCookieRulesThatTrigger, setSecureFlag, RuleSet)
rewriteURL :: URI -> IO URI rewriteURL :: [RuleSet] -> URI -> IO URI
rewriteURL url = getRulesetsMatching url >-> havingRulesThatTrigger url & head <&> fromMaybe url . join rewriteURL rs url = getRulesetsMatching rs url >-> havingRulesThatTrigger url & head <&> fromMaybe url . join
rewriteCookie :: URI -> Cookie -> IO Cookie rewriteCookie :: [RuleSet] -> URI -> Cookie -> IO Cookie
rewriteCookie url cookie = null producer <&> setSecureFlag cookie `bool` cookie rewriteCookie rs url cookie = null producer <&> setSecureFlag cookie `bool` cookie
where producer = getRulesetsMatching url >-> havingCookieRulesThatTrigger cookie where producer = getRulesetsMatching rs url >-> havingCookieRulesThatTrigger cookie

View File

@ -1,10 +1,12 @@
{-# LANGUAGE CPP #-} {-# LANGUAGE CPP #-}
module Data.HTTPSEverywhere.Rules.Internal ( module Data.HTTPSEverywhere.Rules.Internal (
getRulesets,
getRulesetsMatching, getRulesetsMatching,
havingRulesThatTrigger, havingRulesThatTrigger,
havingCookieRulesThatTrigger, havingCookieRulesThatTrigger,
setSecureFlag, setSecureFlag,
RuleSet,
#ifdef TEST #ifdef TEST
hasTargetMatching, hasTargetMatching,
hasTriggeringRuleOn, hasTriggeringRuleOn,
@ -23,20 +25,23 @@ import Data.Maybe (isJust)
import Network.HTTP.Client (Cookie(..)) import Network.HTTP.Client (Cookie(..))
import Network.URI (URI) import Network.URI (URI)
import Pipes (Pipe, Producer, for, each, await, yield, lift, (>->)) import Pipes (Pipe, Producer, for, each, await, yield, lift, (>->))
import Pipes.Prelude (filter) import Pipes.Prelude (filter, toListM)
import Data.HTTPSEverywhere.Rules.Internal.Parser (parseRuleSets) import Data.HTTPSEverywhere.Rules.Internal.Parser (parseRuleSets)
import qualified Data.HTTPSEverywhere.Rules.Internal.Raw as Raw (getRule, getRules) import qualified Data.HTTPSEverywhere.Rules.Internal.Raw as Raw (getRule, getRules)
import Data.HTTPSEverywhere.Rules.Internal.Types (RuleSet(..), Target(..), Exclusion(..), Rule(..), CookieRule(..)) import Data.HTTPSEverywhere.Rules.Internal.Types (RuleSet(..), Target(..), Exclusion(..), Rule(..), CookieRule(..))
getRulesets :: Producer RuleSet IO () getRulesets' :: Producer RuleSet IO ()
getRulesets = lift Raw.getRules getRulesets' = lift Raw.getRules
>>= flip (for . each) (flip (for . each) yield <=< lift . (parseRuleSets <$$> Raw.getRule)) >>= flip (for . each) (flip (for . each) yield <=< lift . (parseRuleSets <$$> Raw.getRule))
getRulesetsMatching :: URI -> Producer RuleSet IO () getRulesets :: IO [RuleSet]
getRulesetsMatching url = getRulesets getRulesets = toListM getRulesets'
>-> filter (flip hasTargetMatching url)
>-> filter (not . flip hasExclusionMatching url) getRulesetsMatching :: [RuleSet] -> URI -> Producer RuleSet IO ()
getRulesetsMatching rs url = each rs
>-> filter (flip hasTargetMatching url)
>-> filter (not . flip hasExclusionMatching url)
havingRulesThatTrigger :: URI -> Pipe RuleSet (Maybe URI) IO () havingRulesThatTrigger :: URI -> Pipe RuleSet (Maybe URI) IO ()
havingRulesThatTrigger url = flip hasTriggeringRuleOn url <$> await havingRulesThatTrigger url = flip hasTriggeringRuleOn url <$> await