From 4708e7fc8cb69c39c8559a1d8fb321b3e404d899 Mon Sep 17 00:00:00 2001 From: vi Date: Tue, 5 Dec 2017 20:54:40 +0100 Subject: [PATCH] Avoid the regular expression engine in parsing rule targets. I read in the bible (https://www.eff.org/https-everywhere/rulesets) that: """ To cover all of a domain's subdomains, you may want to specify a wildcard target like *.twitter.com. Specifying this type of left-side wildcard matches any host name with .twitter.com as a suffix, e.g. www.twitter.com or urls.api.twitter.com. You can also specify a right-side wildcard like www.google.*. Right-side wildcards, unlike left-side wildcards, apply only one level deep. So if you want to cover all countries you'll generally need to specify www.google.*, www.google.co.*, and www.google.com.* to cover domains like www.google.co.uk or www.google.com.au. """ The previous interpretation is both incorrect (because right wildcards only apply one level deep) and potentially expensive (regular expression matching is exponential in the worst-case.) --- https-everywhere-rules.cabal | 2 ++ .../HTTPSEverywhere/Rules/Internal/Parser.hs | 32 ++++++++++++------- .../Rules/Internal/ParserSpec.hs | 11 +++++-- 3 files changed, 31 insertions(+), 14 deletions(-) diff --git a/https-everywhere-rules.cabal b/https-everywhere-rules.cabal index f0cabd1..586e22e 100644 --- a/https-everywhere-rules.cabal +++ b/https-everywhere-rules.cabal @@ -46,6 +46,7 @@ library functor-infix >= 0.0 && < 0.1, http-client >= 0.3 && < 0.6, lens >= 4.3 && < 4.16, + mtl >= 2.2 && < 2.3, network >= 2.6 && < 2.7, network-uri >= 2.6 && < 2.7, pipes >= 4.1 && < 4.4, @@ -73,6 +74,7 @@ test-suite spec hspec >= 2.0 && < 2.5, http-client >= 0.3 && < 0.6, lens >= 4.3 && < 4.16, + mtl >= 2.2 && < 2.3, network >= 2.6 && < 2.7, network-uri >= 2.6 && < 2.7, pipes >= 4.1 && < 4.4, diff --git a/src/Data/HTTPSEverywhere/Rules/Internal/Parser.hs b/src/Data/HTTPSEverywhere/Rules/Internal/Parser.hs index 225a3ce..f2fb1c6 100644 --- a/src/Data/HTTPSEverywhere/Rules/Internal/Parser.hs +++ b/src/Data/HTTPSEverywhere/Rules/Internal/Parser.hs @@ -1,6 +1,7 @@ {-# LANGUAGE CPP #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE LambdaCase #-} module Data.HTTPSEverywhere.Rules.Internal.Parser ( parseRuleSets, @@ -9,15 +10,18 @@ module Data.HTTPSEverywhere.Rules.Internal.Parser ( #endif ) where -import Prelude hiding (head, last, tail, init, length) +import Prelude hiding (take, drop) import Control.Applicative ((<$>)) import Control.Lens (toListOf, only, to, (^..), (^.), (&), (<&>), _Just) import Control.Monad (join) +import Control.Monad.State (State, evalState, modify', gets) +import Data.Bifunctor (bimap) +import Data.Bool (bool) import Data.Functor.Infix ((<$$>)) -import Data.Maybe (catMaybes, fromJust, fromMaybe) +import Data.Maybe (catMaybes) import Data.String.Conversions (cs) import qualified Data.Text as Strict (Text) -import Data.Text (append, head, last, tail, init, replace, length) +import Data.Text (dropEnd, dropWhileEnd, drop, isSuffixOf, takeEnd, take) import qualified Data.Text.Lazy as Lazy (Text) import Network.HTTP.Client (Cookie(..)) import Network.URI (URI(uriAuthority), URIAuth(uriRegName), parseURI) @@ -37,15 +41,21 @@ parseRuleSet xml = xml ^. attr "name" <&> \ruleSetName -> do ruleSetCookieRules = xml ^.. allNamed (only "securecookie") . to parseCookieRule & catMaybes RuleSet ruleSetName ruleSetTargets ruleSetRules ruleSetExclusions ruleSetCookieRules +parseTarget' :: State (Strict.Text, Strict.Text) Bool +parseTarget' = do + gets ((== ".*") . takeEnd 2 . fst) >>= bool (return ()) + (modify' (bimap (dropEnd 2) (dropEnd 1 . dropWhileEnd (/= '.')))) + gets (take 2 . fst) >>= \case + "*." -> do + suffix <- gets (drop 1 . fst) -- Don't drop the dot, we expect some prefix. + isSuffixOf suffix <$> gets snd + _ -> gets (uncurry (==)) + parseTarget :: Strict.Text -> Target -parseTarget = Target . checkRegName . fromJust . match . fromWildcard . replace "." "\\." - where fromWildcard str - | length str == 0 = str - | head str == '*' = flip append ".*" $ tail str - | last str == '*' = append ".*" $ init str - | otherwise = str - checkRegName predicate = fromMaybe False . (predicate <$$> getRegName) - getRegName = cs . uriRegName <$$> uriAuthority +parseTarget target = Target $ \uri -> do + case uriRegName <$> uriAuthority uri of + Just domain -> evalState parseTarget' (target, cs domain) + Nothing -> False parseRule :: Element -> Maybe Rule parseRule element = do diff --git a/test/Data/HTTPSEverywhere/Rules/Internal/ParserSpec.hs b/test/Data/HTTPSEverywhere/Rules/Internal/ParserSpec.hs index 1798715..ac48b41 100644 --- a/test/Data/HTTPSEverywhere/Rules/Internal/ParserSpec.hs +++ b/test/Data/HTTPSEverywhere/Rules/Internal/ParserSpec.hs @@ -22,11 +22,16 @@ spec = do it "*.facebook.com should match s-static.ak.facebook.com" $ do let url = unsafeParseURL "http://s-static.ak.facebook.com" checkTarget "*.facebook.com" url `shouldBe` True - it "google.* should match google.com.id" $ do - let url = unsafeParseURL "http://google.com.id" + it "google.* should match google.nl" $ do + let url = unsafeParseURL "http://google.nl" checkTarget "google.*" url `shouldBe` True - it "*.google.* should not match google.com.id" $ do + -- Right-side wildcards apply only one-level deep. + it "google.* should not match google.com.id" $ do let url = unsafeParseURL "http://google.com.id" + checkTarget "google.*" url `shouldBe` False + -- Wildcards expect there exist some prefix. + it "*.google.* should not match google.nl" $ do + let url = unsafeParseURL "http://google.nl" checkTarget "*.google.*" url `shouldBe` False describe "parseRuleSets" $ do let rulesets = parseRuleSets fixture