Avoid the regular expression engine in parsing rule targets.

I read in the bible (https://www.eff.org/https-everywhere/rulesets) that:

"""
To cover all of a domain's subdomains, you may want to specify a
wildcard target like *.twitter.com. Specifying this type of left-side
wildcard matches any host name with .twitter.com as a suffix, e.g.
www.twitter.com or urls.api.twitter.com. You can also specify a
right-side wildcard like www.google.*. Right-side wildcards, unlike
left-side wildcards, apply only one level deep. So if you want to
cover all countries you'll generally need to specify www.google.*,
www.google.co.*, and www.google.com.* to cover domains like
www.google.co.uk or www.google.com.au.
"""

The previous interpretation is both incorrect (because right wildcards
only apply one level deep) and potentially expensive (regular
expression matching is exponential in the worst-case.)
master
vi 2017-12-05 20:54:40 +01:00
parent bb8b765ba0
commit 4708e7fc8c
3 changed files with 31 additions and 14 deletions

View File

@ -46,6 +46,7 @@ library
functor-infix >= 0.0 && < 0.1, functor-infix >= 0.0 && < 0.1,
http-client >= 0.3 && < 0.6, http-client >= 0.3 && < 0.6,
lens >= 4.3 && < 4.16, lens >= 4.3 && < 4.16,
mtl >= 2.2 && < 2.3,
network >= 2.6 && < 2.7, network >= 2.6 && < 2.7,
network-uri >= 2.6 && < 2.7, network-uri >= 2.6 && < 2.7,
pipes >= 4.1 && < 4.4, pipes >= 4.1 && < 4.4,
@ -73,6 +74,7 @@ test-suite spec
hspec >= 2.0 && < 2.5, hspec >= 2.0 && < 2.5,
http-client >= 0.3 && < 0.6, http-client >= 0.3 && < 0.6,
lens >= 4.3 && < 4.16, lens >= 4.3 && < 4.16,
mtl >= 2.2 && < 2.3,
network >= 2.6 && < 2.7, network >= 2.6 && < 2.7,
network-uri >= 2.6 && < 2.7, network-uri >= 2.6 && < 2.7,
pipes >= 4.1 && < 4.4, pipes >= 4.1 && < 4.4,

View File

@ -1,6 +1,7 @@
{-# LANGUAGE CPP #-} {-# LANGUAGE CPP #-}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-} {-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE LambdaCase #-}
module Data.HTTPSEverywhere.Rules.Internal.Parser ( module Data.HTTPSEverywhere.Rules.Internal.Parser (
parseRuleSets, parseRuleSets,
@ -9,15 +10,18 @@ module Data.HTTPSEverywhere.Rules.Internal.Parser (
#endif #endif
) where ) where
import Prelude hiding (head, last, tail, init, length) import Prelude hiding (take, drop)
import Control.Applicative ((<$>)) import Control.Applicative ((<$>))
import Control.Lens (toListOf, only, to, (^..), (^.), (&), (<&>), _Just) import Control.Lens (toListOf, only, to, (^..), (^.), (&), (<&>), _Just)
import Control.Monad (join) import Control.Monad (join)
import Control.Monad.State (State, evalState, modify', gets)
import Data.Bifunctor (bimap)
import Data.Bool (bool)
import Data.Functor.Infix ((<$$>)) import Data.Functor.Infix ((<$$>))
import Data.Maybe (catMaybes, fromJust, fromMaybe) import Data.Maybe (catMaybes)
import Data.String.Conversions (cs) import Data.String.Conversions (cs)
import qualified Data.Text as Strict (Text) import qualified Data.Text as Strict (Text)
import Data.Text (append, head, last, tail, init, replace, length) import Data.Text (dropEnd, dropWhileEnd, drop, isSuffixOf, takeEnd, take)
import qualified Data.Text.Lazy as Lazy (Text) import qualified Data.Text.Lazy as Lazy (Text)
import Network.HTTP.Client (Cookie(..)) import Network.HTTP.Client (Cookie(..))
import Network.URI (URI(uriAuthority), URIAuth(uriRegName), parseURI) import Network.URI (URI(uriAuthority), URIAuth(uriRegName), parseURI)
@ -37,15 +41,21 @@ parseRuleSet xml = xml ^. attr "name" <&> \ruleSetName -> do
ruleSetCookieRules = xml ^.. allNamed (only "securecookie") . to parseCookieRule & catMaybes ruleSetCookieRules = xml ^.. allNamed (only "securecookie") . to parseCookieRule & catMaybes
RuleSet ruleSetName ruleSetTargets ruleSetRules ruleSetExclusions ruleSetCookieRules RuleSet ruleSetName ruleSetTargets ruleSetRules ruleSetExclusions ruleSetCookieRules
parseTarget' :: State (Strict.Text, Strict.Text) Bool
parseTarget' = do
gets ((== ".*") . takeEnd 2 . fst) >>= bool (return ())
(modify' (bimap (dropEnd 2) (dropEnd 1 . dropWhileEnd (/= '.'))))
gets (take 2 . fst) >>= \case
"*." -> do
suffix <- gets (drop 1 . fst) -- Don't drop the dot, we expect some prefix.
isSuffixOf suffix <$> gets snd
_ -> gets (uncurry (==))
parseTarget :: Strict.Text -> Target parseTarget :: Strict.Text -> Target
parseTarget = Target . checkRegName . fromJust . match . fromWildcard . replace "." "\\." parseTarget target = Target $ \uri -> do
where fromWildcard str case uriRegName <$> uriAuthority uri of
| length str == 0 = str Just domain -> evalState parseTarget' (target, cs domain)
| head str == '*' = flip append ".*" $ tail str Nothing -> False
| last str == '*' = append ".*" $ init str
| otherwise = str
checkRegName predicate = fromMaybe False . (predicate <$$> getRegName)
getRegName = cs . uriRegName <$$> uriAuthority
parseRule :: Element -> Maybe Rule parseRule :: Element -> Maybe Rule
parseRule element = do parseRule element = do

View File

@ -22,11 +22,16 @@ spec = do
it "*.facebook.com should match s-static.ak.facebook.com" $ do it "*.facebook.com should match s-static.ak.facebook.com" $ do
let url = unsafeParseURL "http://s-static.ak.facebook.com" let url = unsafeParseURL "http://s-static.ak.facebook.com"
checkTarget "*.facebook.com" url `shouldBe` True checkTarget "*.facebook.com" url `shouldBe` True
it "google.* should match google.com.id" $ do it "google.* should match google.nl" $ do
let url = unsafeParseURL "http://google.com.id" let url = unsafeParseURL "http://google.nl"
checkTarget "google.*" url `shouldBe` True checkTarget "google.*" url `shouldBe` True
it "*.google.* should not match google.com.id" $ do -- Right-side wildcards apply only one-level deep.
it "google.* should not match google.com.id" $ do
let url = unsafeParseURL "http://google.com.id" let url = unsafeParseURL "http://google.com.id"
checkTarget "google.*" url `shouldBe` False
-- Wildcards expect there exist some prefix.
it "*.google.* should not match google.nl" $ do
let url = unsafeParseURL "http://google.nl"
checkTarget "*.google.*" url `shouldBe` False checkTarget "*.google.*" url `shouldBe` False
describe "parseRuleSets" $ do describe "parseRuleSets" $ do
let rulesets = parseRuleSets fixture let rulesets = parseRuleSets fixture