From 4d508f5894414d90a9a1a03535ee148e074e95ed Mon Sep 17 00:00:00 2001 From: Yorick van Pelt Date: Fri, 11 Jul 2014 16:50:17 +0200 Subject: [PATCH] initial commit --- json.js | 123 +++++++++++++++++++++++++++ parsec.js | 242 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ test.js | 12 +++ 3 files changed, 377 insertions(+) create mode 100644 json.js create mode 100644 parsec.js create mode 100644 test.js diff --git a/json.js b/json.js new file mode 100644 index 0000000..ca0d2b4 --- /dev/null +++ b/json.js @@ -0,0 +1,123 @@ +var p = require('./parsec') +var c = p.combinators +var s = p.p_string +var f = p.funp + +var char = function(ch) { + return c.token(p.p_string.char(ch)) } +var digit = function() { + return c.token(p.p_string.digit()) } + +function literal(x, y) { + return c.bind( + c.nxtnxt.apply(null, x.split('').map(char)) + , function() { return c.always(y) }) } + +function many_str(parser) { + return c.bind(c.many(parser), function(x) { return c.always(x.join('')) }) } + +function many1_str(parser) { + return c.bind(c.many1(parser), function(x) { return c.always(x.join('')) }) } + +var line_comment = c.nxtnxt( + char('/') + , char('/') + , c.many(c.token(f.not(p.p_string.char('\n')))) + , char('\n')) + + +var inline_comment_content = c.next( + c.many(c.token(f.not(p.p_string.char('*')))) + , char('*')) + +var inline_comment = c.nxtnxt( + char('/') + , char('*') + , c.many1(inline_comment_content) + , char('/')) + +var comment = c.either(line_comment, inline_comment) + +var opt_space = c.many(c.either(c.token(p.p_string.whitespace()), comment)) +function between_whitespace(parser) { + return c.between(opt_space, opt_space, parser) } + +function optional(def, parser) { + return c.either(parser, c.always(def)) } + +// I was legitimately running out of stack space :-) +var trampoline = function() { + var i = 0 + return function trampoline(parser) { + return function(state, cok, cerr, eok, eerr) { + // decrease this number if you get stack overflow errors :P + if (i++ < 15) return parser(state, cok, cerr, eok, eerr) + i=0 + process.nextTick(function() { parser(state, cok, cerr, eok, eerr) })} } }() + + +var string = c.between( + char('"'), char('"') + , many_str(c.either( + c.token(f.not(f.or(s.control(), s.char('"'), s.char('\\')))) + // escaped chars + , c.bind( + char('\\') + , c.choice( + char('"'), char('\\'), char('/'), char('b') + , char('f'), char('n'), char('r'), char('t')) + , function(_, n) { return c.always('"\\/\b\f\n\r\t'['"\\/bfnrt'.indexOf(n)]) })))) +var number = c.bind( + // sign + optional('', char('-')) + // integer part + , c.either(char('0'), many1_str(digit())) + // fractional part + , optional('', c.next(char('.'), many1_str(digit()))) + // 'e' part + , optional(0, c.bind( + c.either(char('e'), char('E')) + , c.choice(char('+'), char('-'), c.always('+')) + , many1_str(digit()) + , function(_, sign, x) { + return c.always(+(sign+x)) })) + , function(sign, int, frac, exp) { + return c.always(+(sign + int + '.' + frac + 'e' + exp)) }) + +function array() { + return c.between( + char('['), char(']') + , c.many(c.bind( + c.wrap(value)() // avoid infinite recursion + // separator + , c.either(char(','), c.lookahead(char(']'))) // wrong: [1,2,] will work + , function(a) { return c.always(a) }))) } + +function object() { + return c.between( + char('{') + , char('}') + , c.bind( + c.many(c.bind( + between_whitespace(string) + , char(':') + , c.wrap(value)() // avoid infinite recursion + , c.either(char(','), c.lookahead(char('}'))) // wrong: {"1":2,} will work + , function(k, _, v, _) { + return c.always([k, v]) })) + , function(pairs) { + var obj = {} + for (var i = 0; i < pairs.length; i++) + obj[pairs[i][0]] = pairs[i][1] + return c.always(obj)})) } + +function value() { + return trampoline(between_whitespace(c.choice( + string + , number + , object() + , array() + , literal('true', true) + , literal('false', false) + , literal('null', null)))) } +module.exports.value = value diff --git a/parsec.js b/parsec.js new file mode 100644 index 0000000..7bedc6a --- /dev/null +++ b/parsec.js @@ -0,0 +1,242 @@ +var errors = function() { + function ParseError(pos, msgs) { + this.msgs = [].concat(msgs) + this.line = pos.line + this.column = pos.column } + ParseError.prototype.show = function() { + return this.msgs.join(', ') + + " at" + + " line: " + this.line + + " column: " + this.column } + ParseError.prototype.combine = function(errs) { + if (!'length' in errs) errs = [errs] + return new CombinedError([this].concat(errs)) } + + function CombinedError(errs) { + this.errs = errs ? errs : [] } + CombinedError.prototype.combine = function(errs) { + if (!'length' in errs) errs = [errs] + this.errs = this.errs.concat(errs) } + + function unknown_error(pos) { + return new ParseError(pos, "Unknown error") } + function unexpect_error(pos, msg) { + return new ParseError(pos, "Unexpected " + msg) } + function expect_error(pos, msg) { + return new ParseError(pos, "Expected " + msg) } + + function merge_errors() { + var a0 = arguments[0] + var ar = Array.prototype.slice.call(arguments, 1) + return ar.length ? a0.combine(ar) : a0 } + + return { + Parse: ParseError + , unknown: unknown_error + , unexpect: unexpect_error + , expect: expect_error + , merge: merge_errors }}() + +var p_string = function() { + function Parse_String_State(str) { + function Parse_State(pos, line, column) { + this.pos = pos || 0 + this.line = line || 1 + this.column = column || 1 } + Parse_State.prototype.str = str + Parse_State.prototype.increment = function(c) { + var newline = c == '\n' + return new Parse_State(this.pos+1, newline?this.line+1:this.line, newline?1:this.column+1) } + Parse_State.prototype.getToken = function() { + return this.str[this.pos] } + Parse_State.prototype.length = function() { + return this.str.length } + return Parse_State } + + + function char(x) { + return function(t) { return x == t }} + function digit() { return function(x){ var cx = x.charCodeAt(0); return cx < 58 && cx > 47 }} + function letter() { + return function(x) { + var cx = x.charCodeAt(0) + return (cx < 91 && cx > 64) || (cx < 123 && cx > 96) }} + function whitespace() { + return function(x) { + return x == ' ' || x == '\t' || x == '\r' || x == '\n' }} + + function control() { + return function(x) { + return x.charCodeAt(0) < 32 }} + + function run(p, input, cb) { + var str_state = new (Parse_String_State(input))() + function pok(x) { + cb(null, x) } + function perr(err) { + cb(err) } + p(str_state, pok, perr, pok, perr) } + + return { + Parse_State: Parse_String_State + , char: char + , digit: digit + , letter: letter + , whitespace: whitespace + , control: control + , run: run }}() + + +var combinators = function() { + function always(x) { + return function(state, cok, cerr, eok, eerr) { + eok(x, state) }} + + // avoiding infinite recursions + function wrap(f) { + return function(x) { + return function(state, cok, cerr, eok, eerr) { + return f(x)(state, cok, cerr, eok, eerr) }}} + + function bind_2(p, f) { + return function(state, cok, cerr, eok, eerr) { + function pcok(item, state) { + var q = f(item) + q(state, cok, cerr, cok, cerr) } + function peok(item, state) { + wrap(f)(item)(state, cok, cerr, eok, eerr) } + p(state, pcok, cerr, peok, eerr) }} + + function bind() { + var ar = arguments, ari, arn = ar.length - 1 + if (arn == 1) return bind_2(arguments[0], arguments[1]) + var last = ar[arn] + var res = new Array(arn), resi + return function(state, cok, cerr, eok, eerr) { + ari = resi = 0; + bind(ar[ari++], function bind_arg2(x) { + res[resi++] = x + if (resi == arn) return last.apply(this, res) + else return bind(ar[ari++], bind_arg2) }) + (state, cok, cerr, eok, eerr)}} + + function next(p, q) { + return bind(p, function() { return q }) } + + function nxtnxt() { + var ar = Array.prototype.slice.call(arguments) + return ar.reduceRight(function(p, c) { + return next(c, p) }) } + + function never(err) { + return function(state, cok, cerr, eok, eerr) { + eerr(errors.unknown(state)) } } + + function either(p, q) { + return function(state, cok, cerr, eok, eerr) { + function p_eerr(err_from_p) { + function q_eerr(err_from_q) { + eerr(errors.merge(err_from_p, err_from_q)) } + q(state, cok, cerr, eok, q_eerr) } + p(state, cok, cerr, eok, p_eerr) } } + + function attempt(p) { + return function(state, cok, cerr, eok, eerr) { + p(state, cok, eerr, eok, eerr) }} + + function token(consume_p) { + return function(state, cok, cerr, eok, eerr) { + var t = state.getToken() + if (!t) eerr(errors.unexpect(state, 'end of input')) + else if (consume_p(t)) cok(t, state.increment(t)) + else eerr(errors.unexpect(state, "token " + t)) } } + + function times(n, p) { + if (n == 0) return always(null) + return function (state, cok, cerr, eok, eerr) { + var res = new Array(n) + var resi = 0 + function pcok(item, state) { + function peok(item, state) { + while(resi < n) res[resi++] = item + cok(res, state) } + res[resi++] = item + if (resi < n) p(state, pcok, cerr, pcok, eerr) + else if (resi == n) cok(res, state) } + function peok(item, state) { + while(resi < n) res[resi++] = item + eok(res, state) } + p(state, pcok, cerr, peok, eerr) } } + + function many(p) { + function many_err() { + throw new TypeError('`many` applied to parser that accepts an empty string') } + function safe_p(state, cok, cerr, eok, eerr) { + p(state, cok, cerr, many_err, eerr) } + return either( + bind(safe_p, wrap(many)(p), function(x, xs) { + return always([x].concat(xs)) }) + , always([])) } + + function many1(p) { + return bind(p, many(p), function(x, xs) { + return always([x].concat(xs) )})} + + function lookahead(p) { + return function(state, cok, cerr, eok, eerr) { + function ok(t) { + eok(t, state) } + p(state, ok, cerr, eok, eerr) } } + + function choice(first) { + /* either + varargs */ + var ar = Array.prototype.slice.call(arguments, 1) + if (ar.length == 0 && !first) return never + if (ar.length == 0 && first) return first + return either(first, choice.apply(null, ar)) } + + function eof() { + return function(state, cok, cerr, eok, eerr) { + if (state.pos == state.length()) eok(null, state) + else eerr(errors.expect(state, "end of input")) } } + + function between(open, close, p) { + return bind(open, p, close, function(o, x, c) { return always(x) }) } + + return { + always: always + , bind: bind + , next: next + , nxtnxt: nxtnxt + , never: never + , wrap: wrap + , either: either + , attempt: attempt + , many: many + , many1: many1 + , times: times + , token: token + , lookahead: lookahead + , choice: choice + , between: between + , eof: eof }}() + +var funp = function() { + // functional programming helpers, especially useful with token() + function not(f) { + return function(x) { return !f(x) }} + function and(p, q) { + return function(x) { return p(x) && q(x) }} + function or(p, q) { /* todo more than 2 args */ + if (arguments.length > 2) return Array.prototype.slice.call(arguments).reduce(function(p,q){return or(p,q)}) + return function(x) { return p(x) || q(x) }} + return { + not: not + , and: and + , or: or }}() + +module.exports = { + combinators: combinators + , errors: errors + , p_string: p_string + , funp: funp } diff --git a/test.js b/test.js new file mode 100644 index 0000000..af477dd --- /dev/null +++ b/test.js @@ -0,0 +1,12 @@ +#!/usr/bin/env node + +var json = require('./json') +var parsec = require('./parsec') +var fs = require('fs') + +//var contents = fs.readFileSync('test.json').toString() +//console.log(contents) +parsec.p_string.run(json.value(), '10 /* */ \n', function(e, x) { + if (e) console.log(e.errs) + else console.log(x) +})