Incomplete parser for HTTPS Everywhere rulesets.

master
vi 2014-08-10 04:23:41 +08:00
commit f78421e09f
11 changed files with 248 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
dist/

2
DEBT.md Normal file
View File

@ -0,0 +1,2 @@
* HTTPSEverywhere rewrites are expressed using Javascript regular expressions,
our parser uses ICU. What are the differences between these two languages?

20
LICENSE Normal file
View File

@ -0,0 +1,20 @@
Copyright (c) 2014 vi
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

8
README.md Normal file
View File

@ -0,0 +1,8 @@
[https-everywhere-rules](https://github.com/fmap/https-everywhere-rules)
========================================================================
![](https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTqn7bFPbRaxk-XU6PWLnbTvo7MtjGMFQw4RA4SZ0u23vzUS7AD5Q)
Haskell package providing high-level access to [HTTPS Everywhere][1] rulesets.
[1]: https://www.eff.org/https-everywhere

2
Setup.hs Normal file
View File

@ -0,0 +1,2 @@
import Distribution.Simple
main = defaultMain

View File

@ -0,0 +1,51 @@
name:
https-everywhere-rules
version:
0.0.1
synopsis:
High-level access to HTTPS Everywhere rulesets.
homepage:
https://github.com/fmap/https-everywhere-rules
license:
MIT
license-file:
LICENSE
author:
vi
maintainer:
vi@zalora.com
category:
Data
build-type:
Simple
extra-source-files:
README.md
cabal-version:
>=1.10
library
exposed-modules:
Data.HTTPSEverywhere.Rules
other-modules:
Data.HTTPSEverywhere.Rules.Internal,
Data.HTTPSEverywhere.Rules.Internal.Parser,
Data.HTTPSEverywhere.Rules.Internal.Types
build-depends:
base >= 4.7 && < 4.8,
attoparsec >= 0.12 && < 0.13,
http-client >= 0.3 && < 0.4,
https-everywhere-rules-raw >= 4.0 && < 4.1,
text >= 1.1 && < 1.2,
pipes >= 4.1 && < 4.2,
errors >= 1.4 && < 1.5,
lens >= 4.3 && < 4.4,
functor-infix >= 0.0 && < 0.1,
string-conversions >= 0.3 && < 0.4,
text-icu >= 0.6 && < 0.7,
taggy-lens >= 0.1 && < 0.2
hs-source-dirs:
src
default-language:
Haskell2010
ghc-options:
-Wall

View File

@ -0,0 +1,15 @@
module Data.HTTPSEverywhere.Rules (
rewriteURL,
rewriteCookie
) where
import Data.Text (Text)
import Network.HTTP.Client (Cookie)
import Data.HTTPSEverywhere.Rules.Internal ()
rewriteURL :: Text -> IO Text
rewriteURL = undefined
rewriteCookie :: Cookie -> IO Cookie
rewriteCookie = undefined

View File

@ -0,0 +1,15 @@
module Data.HTTPSEverywhere.Rules.Internal (
getRulesets
) where
import Prelude hiding (readFile)
import Control.Monad ((<=<))
import Data.Functor.Infix ((<$$>))
import Data.HTTPSEverywhere.Rules.Raw (getRule, getRules)
import Pipes (Producer, for, each, yield, lift)
import Data.HTTPSEverywhere.Rules.Internal.Parser (parseRuleSets)
import Data.HTTPSEverywhere.Rules.Internal.Types (RuleSet)
getRulesets :: Producer RuleSet IO ()
getRulesets = lift getRules >>= flip (for . each) (flip (for . each) yield <=< lift . (parseRuleSets <$$> getRule))

View File

@ -0,0 +1,55 @@
{-# LANGUAGE MultiWayIf #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
module Data.HTTPSEverywhere.Rules.Internal.Parser (
parseRuleSets
) where
import Prelude hiding (head, last, tail, init)
import Control.Lens (toListOf, only, to, (^..), (^.), (&), (<&>), _Just)
import Data.Functor.Infix ((<$>),(<$$>))
import Data.Maybe (catMaybes, fromJust)
import Data.String.Conversions (cs)
import qualified Data.Text as Strict (Text)
import Data.Text (append, head, last, tail, init)
import qualified Data.Text.Lazy as Lazy (Text)
import Network.HTTP.Client (Cookie(..))
import Text.Taggy.Lens (html, allNamed, attr, Element)
import Data.HTTPSEverywhere.Rules.Internal.Types (RuleSet(..), Target(..), Rule(..), Exclusion(..), CookieRule(..))
import Data.Text.ICU.Extras (match, findAndReplace)
parseRuleSets :: Lazy.Text -> [RuleSet]
parseRuleSets = catMaybes <$$> toListOf $ html . allNamed (only "ruleset") . to parseRuleSet
parseRuleSet :: Element -> Maybe RuleSet
parseRuleSet xml = xml ^. attr "name" <&> \ruleSetName -> do
let ruleSetTargets = xml ^.. allNamed (only "target") . attr "name" . _Just . to parseTarget
ruleSetRules = xml ^.. allNamed (only "rule") . to parseRule & catMaybes
ruleSetExclusions = xml ^.. allNamed (only "exclusion") . attr "pattern" . _Just . to parseExclusion & catMaybes
ruleSetCookieRules = xml ^.. allNamed (only "securecookie") . to parseCookieRule & catMaybes
RuleSet ruleSetName ruleSetTargets ruleSetRules ruleSetExclusions ruleSetCookieRules
parseTarget :: Strict.Text -> Target
parseTarget = Target . fromJust . match . fromWildcard
where fromWildcard str = if
| head str == '*' -> prepend ".*" $ tail str
| last str == '*' -> append ".*" $ init str
| otherwise -> str
prepend = flip append
parseRule :: Element -> Maybe Rule
parseRule element = do
pattern <- element ^. attr "from"
replace <- element ^. attr "to"
findAndReplace pattern replace <&> Rule
parseExclusion :: Strict.Text -> Maybe Exclusion
parseExclusion = Exclusion <$$> match
parseCookieRule :: Element -> Maybe CookieRule
parseCookieRule element = CookieRule <$> do
hostMatches <- element ^. attr "host" . to (>>= match)
nameMatches <- element ^. attr "name" . to (>>= match)
return $ \Cookie{..} -> hostMatches (cs cookie_name) && nameMatches (cs cookie_domain)

View File

@ -0,0 +1,23 @@
module Data.HTTPSEverywhere.Rules.Internal.Types (
RuleSet(..),
Target(..),
Rule(..),
Exclusion(..),
CookieRule(..)
) where
import Data.Text (Text)
import Network.HTTP.Client (Cookie)
newtype Rule = Rule { getRule :: Text -> Maybe Text }
newtype Target = Target { getTarget :: Text -> Bool }
newtype Exclusion = Exclusion { getExclusion :: Text -> Bool }
newtype CookieRule = CookieRule { getCookieRule :: Cookie -> Bool }
data RuleSet = RuleSet
{ ruleSetName :: Text
, ruleSetTargets :: [Target]
, ruleSetRules :: [Rule]
, ruleSetExclusions :: [Exclusion]
, ruleSetCookieRules :: [CookieRule]
}

View File

@ -0,0 +1,56 @@
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE ViewPatterns #-}
{-# LANGUAGE OverloadedStrings #-}
{-# OPTIONS_GHC -fno-warn-name-shadowing #-}
module Data.Text.ICU.Extras (
match,
findAndReplace
) where
import Control.Applicative ((<$>), (*>), (<*>), (<|>))
import Control.Monad ((<=<))
import Control.Error (hush)
import Data.Attoparsec.Text (Parser, char, digit, takeWhile1, string, many', parseOnly)
import Data.Functor.Infix ((<$$>), (<&>))
import Data.Maybe (isJust, fromJust)
import Data.Monoid (Monoid(mappend,mconcat))
import Data.Text (Text)
import Data.Text.ICU (regex', find, group, Match)
match :: Text -> Maybe (Text -> Bool)
match = isJust <$$> find <$$> hush . regex' []
findAndReplace :: Text -> Text -> Maybe (Text -> Maybe Text)
findAndReplace pattern replacement = do
findAnd <- find <$$> hush $ regex' [] pattern
replace <- flip runReplacement <$> parseReplacement replacement
return $ replace <=< findAnd
type Replacement = [Segment]
data Segment = Reference Int | Literal Text deriving (Show)
parseReference :: Parser Segment
parseReference = char '$' *> digit <&> Reference . read . return
parseLiteral :: Parser Segment
parseLiteral = Literal <$> takeWhile1 (/= '$')
parseLiteralWithPrecedingDollar :: Parser Segment
parseLiteralWithPrecedingDollar = Literal <$$> mappend <$> string "$" <*> takeWhile1 (/= '$')
parseSegment :: Parser Segment
parseSegment = parseLiteral <|> parseReference <|> parseLiteralWithPrecedingDollar
parseReplacement :: Text -> Maybe Replacement
parseReplacement = hush . parseOnly (many' parseSegment)
runReplacement :: Match -> Replacement -> Maybe Text
runReplacement (flip group -> group) = mconcat <$$> invert . map dereference
where dereference = \case
Reference n -> group n
Literal str -> Just str
invert xs = if all isJust xs
then Just $ fromJust <$> xs
else Nothing