Refactored ICU extras.
parent
81ec883617
commit
9fcf5bc289
|
@ -16,28 +16,32 @@ import Prelude hiding (span)
|
||||||
import Control.Applicative ((<$>), (<*>), (*>), (<|>))
|
import Control.Applicative ((<$>), (<*>), (*>), (<|>))
|
||||||
import Control.Error (hush)
|
import Control.Error (hush)
|
||||||
import Data.Attoparsec.Text (Parser, char, digit, takeWhile1, string, many', parseOnly)
|
import Data.Attoparsec.Text (Parser, char, digit, takeWhile1, string, many', parseOnly)
|
||||||
|
import Data.Char (digitToInt)
|
||||||
import Data.Functor.Infix ((<$$>), (<&>))
|
import Data.Functor.Infix ((<$$>), (<&>))
|
||||||
import Data.Maybe (isJust, fromJust)
|
import Data.Maybe (isJust)
|
||||||
import Data.Monoid (Monoid(mconcat))
|
import Data.Monoid (Monoid(mappend, mconcat))
|
||||||
import Data.Text (Text)
|
import Data.Text (Text)
|
||||||
import Data.Text.ICU (regex', find, findAll, group, span, Match, suffix, groupCount)
|
import Data.Text.ICU (Regex, regex', find, findAll, group, span, Match, suffix, groupCount)
|
||||||
|
|
||||||
|
regex :: Text -> Maybe Regex
|
||||||
|
regex = hush . regex' []
|
||||||
|
|
||||||
match :: Text -> Maybe (Text -> Bool)
|
match :: Text -> Maybe (Text -> Bool)
|
||||||
match = isJust <$$> find <$$> hush . regex' []
|
match = (isJust <$$> find) <$$> regex
|
||||||
|
|
||||||
findAndReplace :: Text -> Text -> Maybe (Text -> Maybe Text)
|
findAndReplace :: Text -> Text -> Maybe (Text -> Maybe Text)
|
||||||
findAndReplace pattern replacement = do
|
findAndReplace pattern replacement = (.)
|
||||||
findAnd <- findAll <$$> hush $ regex' [] pattern
|
<$> runReplacement `fmap` parseReplacement replacement
|
||||||
replace <- flip runReplacement <$> parseReplacement replacement
|
<*> findAll `fmap` regex pattern
|
||||||
return $ replace . findAnd
|
|
||||||
|
|
||||||
type Replacement = [Segment]
|
type Replacement = [Segment]
|
||||||
|
|
||||||
data Segment = Reference Int | Literal Text
|
data Segment = Reference Int | Literal Text deriving (Show, Eq)
|
||||||
deriving (Show, Eq)
|
|
||||||
|
|
||||||
|
-- JavaScript admits at most nine capture groups, so this is the correct
|
||||||
|
-- (and likely intended) interpretation.
|
||||||
parseReference :: Parser Segment
|
parseReference :: Parser Segment
|
||||||
parseReference = char '$' *> digit <&> Reference . read . return
|
parseReference = char '$' *> digit <&> Reference . digitToInt
|
||||||
|
|
||||||
parseLiteral :: Parser Segment
|
parseLiteral :: Parser Segment
|
||||||
parseLiteral = Literal <$> takeWhile1 (/= '$')
|
parseLiteral = Literal <$> takeWhile1 (/= '$')
|
||||||
|
@ -51,19 +55,30 @@ parseSegment = parseLiteral <|> parseReference <|> parseLiteralDollar
|
||||||
parseReplacement :: Text -> Maybe Replacement
|
parseReplacement :: Text -> Maybe Replacement
|
||||||
parseReplacement = hush . parseOnly (many' parseSegment)
|
parseReplacement = hush . parseOnly (many' parseSegment)
|
||||||
|
|
||||||
runReplacement :: [Match] -> Replacement -> Maybe Text
|
-- In the language of ICU, given a match:
|
||||||
runReplacement matches replacement = mconcat <$$> invert . adornSuffix matches $ do
|
--
|
||||||
match <- matches
|
-- * 'span' : Text preceding the given match.
|
||||||
Just (span match) : map (dereference $ flip group match) replacement
|
-- * 'suffix n' : Suffix of the /n/th capturing group in the match.
|
||||||
|
--
|
||||||
|
-- To reconstitute strings in replacement, we 'prefix' all matches
|
||||||
|
-- with their spans, use their groups to 'dereference' replacements
|
||||||
|
-- (references are replaced with corresponding captures, literals are
|
||||||
|
-- inserted as is), cat the results in order, and finally append the
|
||||||
|
-- suffix of the terminal capturing group ('getSuffix'). When there are
|
||||||
|
-- no explicit groups, we use the zeroeth; that is, the text following
|
||||||
|
-- the final match.
|
||||||
|
|
||||||
adornSuffix :: [Match] -> ([Maybe Text] -> [Maybe Text])
|
runReplacement :: Replacement -> [Match] -> Maybe Text
|
||||||
adornSuffix = \case {[] -> id; ms -> (++ [flip suffix <*> groupCount $ last ms])}
|
runReplacement replacement matches = do
|
||||||
|
prefix <- sequence $ matches >>= \match ->
|
||||||
|
Just (span match) : dereference match replacement
|
||||||
|
mappend (mconcat prefix) <$> getSuffix matches
|
||||||
|
|
||||||
dereference :: (Int -> Maybe Text) -> Segment -> Maybe Text
|
dereference :: Match -> Replacement -> [Maybe Text]
|
||||||
dereference group = \case
|
dereference match = fmap $ \case
|
||||||
Reference n -> group n
|
Reference n -> group n match
|
||||||
Literal str -> Just str
|
Literal str -> Just str
|
||||||
|
|
||||||
invert :: [Maybe a] -> Maybe [a]
|
getSuffix :: [Match] -> Maybe Text
|
||||||
invert xs | all isJust xs = Just $ fromJust <$> xs
|
getSuffix [] = Just mempty
|
||||||
| otherwise = Nothing
|
getSuffix ms = flip suffix <*> groupCount $ last ms
|
||||||
|
|
|
@ -24,10 +24,13 @@ spec = do
|
||||||
it "Should correctly parse successive '$'s" $ do
|
it "Should correctly parse successive '$'s" $ do
|
||||||
parseReplacement "$$1" `shouldBe` Just [Literal "$", Reference 1]
|
parseReplacement "$$1" `shouldBe` Just [Literal "$", Reference 1]
|
||||||
parseReplacement "$$" `shouldBe` Just [Literal "$", Literal "$"]
|
parseReplacement "$$" `shouldBe` Just [Literal "$", Literal "$"]
|
||||||
|
it "Should identity no more than nine capture groups." $ do
|
||||||
|
parseReplacement "$10" `shouldBe` Just [Reference 1, Literal "0"]
|
||||||
describe "findAndReplace" $ do
|
describe "findAndReplace" $ do
|
||||||
it "Should find and replace based upon a regular expression and pattern." $ do
|
it "Should find and replace based upon a regular expression and pattern." $ do
|
||||||
("barqux" &) <$> findAndReplace "(bar)" "$1baz" `shouldBe` Just (Just "barbazqux")
|
("barqux" &) <$> findAndReplace "(bar)" "$1baz" `shouldBe` Just (Just "barbazqux")
|
||||||
("barqux" &) <$> findAndReplace "(qux)" "baz$1" `shouldBe` Just (Just "barbazqux")
|
("barqux" &) <$> findAndReplace "(qux)" "baz$1" `shouldBe` Just (Just "barbazqux")
|
||||||
|
("barqux" &) <$> findAndReplace "u" "uu" `shouldBe` Just (Just "barquux")
|
||||||
|
|
||||||
instance Show (Text -> Bool) where
|
instance Show (Text -> Bool) where
|
||||||
show _ = "Text -> Bool"
|
show _ = "Text -> Bool"
|
||||||
|
|
Loading…
Reference in New Issue