From f78421e09f8a6b3c636d3d321294cc0056b3a72c Mon Sep 17 00:00:00 2001 From: vi Date: Sun, 10 Aug 2014 04:23:41 +0800 Subject: [PATCH] Incomplete parser for HTTPS Everywhere rulesets. --- .gitignore | 1 + DEBT.md | 2 + LICENSE | 20 +++++++ README.md | 8 +++ Setup.hs | 2 + https-everywhere-rules.cabal | 51 +++++++++++++++++ src/Data/HTTPSEverywhere/Rules.hs | 15 +++++ src/Data/HTTPSEverywhere/Rules/Internal.hs | 15 +++++ .../HTTPSEverywhere/Rules/Internal/Parser.hs | 55 ++++++++++++++++++ .../HTTPSEverywhere/Rules/Internal/Types.hs | 23 ++++++++ src/Data/Text/ICU/Extras.hs | 56 +++++++++++++++++++ 11 files changed, 248 insertions(+) create mode 100644 .gitignore create mode 100644 DEBT.md create mode 100644 LICENSE create mode 100644 README.md create mode 100644 Setup.hs create mode 100644 https-everywhere-rules.cabal create mode 100644 src/Data/HTTPSEverywhere/Rules.hs create mode 100644 src/Data/HTTPSEverywhere/Rules/Internal.hs create mode 100644 src/Data/HTTPSEverywhere/Rules/Internal/Parser.hs create mode 100644 src/Data/HTTPSEverywhere/Rules/Internal/Types.hs create mode 100644 src/Data/Text/ICU/Extras.hs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..849ddff --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +dist/ diff --git a/DEBT.md b/DEBT.md new file mode 100644 index 0000000..213d847 --- /dev/null +++ b/DEBT.md @@ -0,0 +1,2 @@ +* HTTPSEverywhere rewrites are expressed using Javascript regular expressions, + our parser uses ICU. What are the differences between these two languages? diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..8daf44f --- /dev/null +++ b/LICENSE @@ -0,0 +1,20 @@ +Copyright (c) 2014 vi + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..0fe0f4f --- /dev/null +++ b/README.md @@ -0,0 +1,8 @@ +[https-everywhere-rules](https://github.com/fmap/https-everywhere-rules) +======================================================================== + +![](https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTqn7bFPbRaxk-XU6PWLnbTvo7MtjGMFQw4RA4SZ0u23vzUS7AD5Q) + +Haskell package providing high-level access to [HTTPS Everywhere][1] rulesets. + + [1]: https://www.eff.org/https-everywhere diff --git a/Setup.hs b/Setup.hs new file mode 100644 index 0000000..9a994af --- /dev/null +++ b/Setup.hs @@ -0,0 +1,2 @@ +import Distribution.Simple +main = defaultMain diff --git a/https-everywhere-rules.cabal b/https-everywhere-rules.cabal new file mode 100644 index 0000000..22a4184 --- /dev/null +++ b/https-everywhere-rules.cabal @@ -0,0 +1,51 @@ +name: + https-everywhere-rules +version: + 0.0.1 +synopsis: + High-level access to HTTPS Everywhere rulesets. +homepage: + https://github.com/fmap/https-everywhere-rules +license: + MIT +license-file: + LICENSE +author: + vi +maintainer: + vi@zalora.com +category: + Data +build-type: + Simple +extra-source-files: + README.md +cabal-version: + >=1.10 + +library + exposed-modules: + Data.HTTPSEverywhere.Rules + other-modules: + Data.HTTPSEverywhere.Rules.Internal, + Data.HTTPSEverywhere.Rules.Internal.Parser, + Data.HTTPSEverywhere.Rules.Internal.Types + build-depends: + base >= 4.7 && < 4.8, + attoparsec >= 0.12 && < 0.13, + http-client >= 0.3 && < 0.4, + https-everywhere-rules-raw >= 4.0 && < 4.1, + text >= 1.1 && < 1.2, + pipes >= 4.1 && < 4.2, + errors >= 1.4 && < 1.5, + lens >= 4.3 && < 4.4, + functor-infix >= 0.0 && < 0.1, + string-conversions >= 0.3 && < 0.4, + text-icu >= 0.6 && < 0.7, + taggy-lens >= 0.1 && < 0.2 + hs-source-dirs: + src + default-language: + Haskell2010 + ghc-options: + -Wall diff --git a/src/Data/HTTPSEverywhere/Rules.hs b/src/Data/HTTPSEverywhere/Rules.hs new file mode 100644 index 0000000..c57fa2e --- /dev/null +++ b/src/Data/HTTPSEverywhere/Rules.hs @@ -0,0 +1,15 @@ +module Data.HTTPSEverywhere.Rules ( + rewriteURL, + rewriteCookie +) where + +import Data.Text (Text) +import Network.HTTP.Client (Cookie) + +import Data.HTTPSEverywhere.Rules.Internal () + +rewriteURL :: Text -> IO Text +rewriteURL = undefined + +rewriteCookie :: Cookie -> IO Cookie +rewriteCookie = undefined diff --git a/src/Data/HTTPSEverywhere/Rules/Internal.hs b/src/Data/HTTPSEverywhere/Rules/Internal.hs new file mode 100644 index 0000000..c1a5bf4 --- /dev/null +++ b/src/Data/HTTPSEverywhere/Rules/Internal.hs @@ -0,0 +1,15 @@ +module Data.HTTPSEverywhere.Rules.Internal ( + getRulesets +) where + +import Prelude hiding (readFile) +import Control.Monad ((<=<)) +import Data.Functor.Infix ((<$$>)) +import Data.HTTPSEverywhere.Rules.Raw (getRule, getRules) +import Pipes (Producer, for, each, yield, lift) + +import Data.HTTPSEverywhere.Rules.Internal.Parser (parseRuleSets) +import Data.HTTPSEverywhere.Rules.Internal.Types (RuleSet) + +getRulesets :: Producer RuleSet IO () +getRulesets = lift getRules >>= flip (for . each) (flip (for . each) yield <=< lift . (parseRuleSets <$$> getRule)) diff --git a/src/Data/HTTPSEverywhere/Rules/Internal/Parser.hs b/src/Data/HTTPSEverywhere/Rules/Internal/Parser.hs new file mode 100644 index 0000000..e4d8cb6 --- /dev/null +++ b/src/Data/HTTPSEverywhere/Rules/Internal/Parser.hs @@ -0,0 +1,55 @@ +{-# LANGUAGE MultiWayIf #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RecordWildCards #-} + +module Data.HTTPSEverywhere.Rules.Internal.Parser ( + parseRuleSets +) where + +import Prelude hiding (head, last, tail, init) +import Control.Lens (toListOf, only, to, (^..), (^.), (&), (<&>), _Just) +import Data.Functor.Infix ((<$>),(<$$>)) +import Data.Maybe (catMaybes, fromJust) +import Data.String.Conversions (cs) +import qualified Data.Text as Strict (Text) +import Data.Text (append, head, last, tail, init) +import qualified Data.Text.Lazy as Lazy (Text) +import Network.HTTP.Client (Cookie(..)) +import Text.Taggy.Lens (html, allNamed, attr, Element) + +import Data.HTTPSEverywhere.Rules.Internal.Types (RuleSet(..), Target(..), Rule(..), Exclusion(..), CookieRule(..)) +import Data.Text.ICU.Extras (match, findAndReplace) + +parseRuleSets :: Lazy.Text -> [RuleSet] +parseRuleSets = catMaybes <$$> toListOf $ html . allNamed (only "ruleset") . to parseRuleSet + +parseRuleSet :: Element -> Maybe RuleSet +parseRuleSet xml = xml ^. attr "name" <&> \ruleSetName -> do + let ruleSetTargets = xml ^.. allNamed (only "target") . attr "name" . _Just . to parseTarget + ruleSetRules = xml ^.. allNamed (only "rule") . to parseRule & catMaybes + ruleSetExclusions = xml ^.. allNamed (only "exclusion") . attr "pattern" . _Just . to parseExclusion & catMaybes + ruleSetCookieRules = xml ^.. allNamed (only "securecookie") . to parseCookieRule & catMaybes + RuleSet ruleSetName ruleSetTargets ruleSetRules ruleSetExclusions ruleSetCookieRules + +parseTarget :: Strict.Text -> Target +parseTarget = Target . fromJust . match . fromWildcard + where fromWildcard str = if + | head str == '*' -> prepend ".*" $ tail str + | last str == '*' -> append ".*" $ init str + | otherwise -> str + prepend = flip append + +parseRule :: Element -> Maybe Rule +parseRule element = do + pattern <- element ^. attr "from" + replace <- element ^. attr "to" + findAndReplace pattern replace <&> Rule + +parseExclusion :: Strict.Text -> Maybe Exclusion +parseExclusion = Exclusion <$$> match + +parseCookieRule :: Element -> Maybe CookieRule +parseCookieRule element = CookieRule <$> do + hostMatches <- element ^. attr "host" . to (>>= match) + nameMatches <- element ^. attr "name" . to (>>= match) + return $ \Cookie{..} -> hostMatches (cs cookie_name) && nameMatches (cs cookie_domain) diff --git a/src/Data/HTTPSEverywhere/Rules/Internal/Types.hs b/src/Data/HTTPSEverywhere/Rules/Internal/Types.hs new file mode 100644 index 0000000..0c0b67a --- /dev/null +++ b/src/Data/HTTPSEverywhere/Rules/Internal/Types.hs @@ -0,0 +1,23 @@ +module Data.HTTPSEverywhere.Rules.Internal.Types ( + RuleSet(..), + Target(..), + Rule(..), + Exclusion(..), + CookieRule(..) +) where + +import Data.Text (Text) +import Network.HTTP.Client (Cookie) + +newtype Rule = Rule { getRule :: Text -> Maybe Text } +newtype Target = Target { getTarget :: Text -> Bool } +newtype Exclusion = Exclusion { getExclusion :: Text -> Bool } +newtype CookieRule = CookieRule { getCookieRule :: Cookie -> Bool } + +data RuleSet = RuleSet + { ruleSetName :: Text + , ruleSetTargets :: [Target] + , ruleSetRules :: [Rule] + , ruleSetExclusions :: [Exclusion] + , ruleSetCookieRules :: [CookieRule] + } diff --git a/src/Data/Text/ICU/Extras.hs b/src/Data/Text/ICU/Extras.hs new file mode 100644 index 0000000..42ab400 --- /dev/null +++ b/src/Data/Text/ICU/Extras.hs @@ -0,0 +1,56 @@ +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE OverloadedStrings #-} +{-# OPTIONS_GHC -fno-warn-name-shadowing #-} + +module Data.Text.ICU.Extras ( + match, + findAndReplace +) where + +import Control.Applicative ((<$>), (*>), (<*>), (<|>)) +import Control.Monad ((<=<)) +import Control.Error (hush) +import Data.Attoparsec.Text (Parser, char, digit, takeWhile1, string, many', parseOnly) +import Data.Functor.Infix ((<$$>), (<&>)) +import Data.Maybe (isJust, fromJust) +import Data.Monoid (Monoid(mappend,mconcat)) +import Data.Text (Text) +import Data.Text.ICU (regex', find, group, Match) + +match :: Text -> Maybe (Text -> Bool) +match = isJust <$$> find <$$> hush . regex' [] + +findAndReplace :: Text -> Text -> Maybe (Text -> Maybe Text) +findAndReplace pattern replacement = do + findAnd <- find <$$> hush $ regex' [] pattern + replace <- flip runReplacement <$> parseReplacement replacement + return $ replace <=< findAnd + +type Replacement = [Segment] + +data Segment = Reference Int | Literal Text deriving (Show) + +parseReference :: Parser Segment +parseReference = char '$' *> digit <&> Reference . read . return + +parseLiteral :: Parser Segment +parseLiteral = Literal <$> takeWhile1 (/= '$') + +parseLiteralWithPrecedingDollar :: Parser Segment +parseLiteralWithPrecedingDollar = Literal <$$> mappend <$> string "$" <*> takeWhile1 (/= '$') + +parseSegment :: Parser Segment +parseSegment = parseLiteral <|> parseReference <|> parseLiteralWithPrecedingDollar + +parseReplacement :: Text -> Maybe Replacement +parseReplacement = hush . parseOnly (many' parseSegment) + +runReplacement :: Match -> Replacement -> Maybe Text +runReplacement (flip group -> group) = mconcat <$$> invert . map dereference + where dereference = \case + Reference n -> group n + Literal str -> Just str + invert xs = if all isJust xs + then Just $ fromJust <$> xs + else Nothing