Flash 670 lines
// json — a minimal JSON value tree: parse and serialize, in pure Flash.
//
// `parse` reads one RFC 8259 document into a tree of `Value` nodes and
// `stringify` writes a tree back out. The scope is deliberately small —
// no streaming, no comments, no NaN/Infinity — and the parse is strict:
// trailing data, trailing commas, unescaped control characters, lone
// surrogates, and leading zeros are all `error.Malformed`. String escapes
// decode per RFC 8259 including `\uXXXX` with surrogate pairs (the decoded
// text is UTF-8); bytes at or above 0x80 pass through unvalidated.
//
// Numbers keep integer fidelity: anything that scans as an integer and
// fits an `i64` becomes `.int`; everything else — fractions, exponents,
// integers past the i64 range — is kept as `.number`, the raw lexeme,
// which `stringify` writes back verbatim. No float conversion happens in
// either direction, so a parse/stringify round trip never loses digits.
//
// Allocation contract: every node, string, and slice is allocated from
// the caller's allocator, and a failed or abandoned parse does NOT free
// the partial tree — hand in an arena and release the whole document with
// its `deinit` (the in-module tests show the pattern). Nesting deeper
// than 128 levels is `error.Malformed`, so a hostile document cannot
// overrun the parser's stack.
use "base"
use "mem"
use "list"
use "fmt"
use "arena"
// Everything `parse` can report. `stringify` only ever raises
// `error.OutOfMemory`; it shares the set so the two stay one import.
pub const Error = error{ Malformed, OutOfMemory }
// One object member. Members keep document order; lookup is the linear
// scan in `Value.get` — JSON objects in protocol traffic are small.
pub const Member = struct {
key []u8,
value Value,
}
// One JSON value. `.number` is the raw lexeme of anything `.int` cannot
// hold exactly (see the header).
pub const Value = union(enum) {
nul,
bool bool,
int i64,
number []u8,
string []u8,
array []Value,
object []Member,
// The member named `key` of an object, or null — null both for a
// missing key and for a non-object receiver.
pub fn get(self Value, key []u8) ?Value {
switch self {
.object => |ms| {
for m in ms {
if mem.eql(u8, m.key, key) {
return m.value
}
}
return null
},
else => return null,
}
}
}
// Nesting deeper than this is rejected: each level costs native stack in
// the recursive descent, and no sane document comes close.
const max_depth = 128
// Parse one complete JSON document. Trailing non-whitespace is malformed.
pub fn parse(alloc base.Allocator, src []u8) Error!Value {
var p = Parser{ .alloc = alloc, .src = src, .pos = 0, .depth = 0 }
v := try p.value()
p.skipWs()
if p.pos != src.len {
return error.Malformed
}
return v
}
// Serialize a tree to a freshly allocated string the caller owns. Compact
// form — no added whitespace; strings re-escape quotes, backslashes, and
// control characters (`\u00XX` for those without a short form).
pub fn stringify(alloc base.Allocator, v Value) Error![]u8 {
var out list.List(u8) = .empty
errdefer out.deinit(alloc)
try writeValue(alloc, &out, v)
return out.toOwnedSlice(alloc)
}
// ---- parsing ----
const Parser = struct {
alloc base.Allocator,
src []u8,
pos usize,
depth u32,
fn value(self *mut Parser) Error!Value {
self.skipWs()
if self.pos >= self.src.len {
return error.Malformed
}
switch self.src[self.pos] {
'{' => return self.object(),
'[' => return self.array(),
'"' => return Value{ .string = try self.string() },
't', 'f', 'n' => return self.word(),
else => return self.number(),
}
}
fn object(self *mut Parser) Error!Value {
try self.descend()
self.pos += 1
var ms list.List(Member) = .empty
self.skipWs()
if self.peekIs('}') {
self.pos += 1
self.depth -= 1
return Value{ .object = try ms.toOwnedSlice(self.alloc) }
}
while true {
self.skipWs()
if !self.peekIs('"') {
return error.Malformed
}
k := try self.string()
self.skipWs()
try self.expect(':')
v := try self.value()
try ms.append(self.alloc, .{ .key = k, .value = v })
self.skipWs()
if self.peekIs(',') {
self.pos += 1
continue
}
try self.expect('}')
break
}
self.depth -= 1
return Value{ .object = try ms.toOwnedSlice(self.alloc) }
}
fn array(self *mut Parser) Error!Value {
try self.descend()
self.pos += 1
var xs list.List(Value) = .empty
self.skipWs()
if self.peekIs(']') {
self.pos += 1
self.depth -= 1
return Value{ .array = try xs.toOwnedSlice(self.alloc) }
}
while true {
v := try self.value()
try xs.append(self.alloc, v)
self.skipWs()
if self.peekIs(',') {
self.pos += 1
continue
}
try self.expect(']')
break
}
self.depth -= 1
return Value{ .array = try xs.toOwnedSlice(self.alloc) }
}
// Decode the string whose opening quote is at `pos` into a fresh
// allocation. Unescaped control characters are malformed per the RFC.
fn string(self *mut Parser) Error![]u8 {
self.pos += 1
var out list.List(u8) = .empty
while true {
if self.pos >= self.src.len {
return error.Malformed
}
c := self.src[self.pos]
if c == '"' {
self.pos += 1
return out.toOwnedSlice(self.alloc)
}
if c == '\\' {
try self.escape(&out)
continue
}
if c < 0x20 {
return error.Malformed
}
try out.append(self.alloc, c)
self.pos += 1
}
}
// Decode one escape sequence; `pos` sits on the backslash.
fn escape(self *mut Parser, out *mut list.List(u8)) Error!void {
if self.pos + 1 >= self.src.len {
return error.Malformed
}
e := self.src[self.pos + 1]
self.pos += 2
switch e {
'"' => try out.append(self.alloc, '"'),
'\\' => try out.append(self.alloc, '\\'),
'/' => try out.append(self.alloc, '/'),
'b' => try out.append(self.alloc, 0x08),
'f' => try out.append(self.alloc, 0x0C),
'n' => try out.append(self.alloc, '\n'),
'r' => try out.append(self.alloc, '\r'),
't' => try out.append(self.alloc, '\t'),
'u' => try self.unicodeEscape(out),
else => return error.Malformed,
}
}
// `\uXXXX`, with `pos` just past the `u`. A high surrogate must be
// followed by `\u`-escaped low surrogate (the pair combines to one
// code point); a lone surrogate of either kind is malformed.
fn unicodeEscape(self *mut Parser, out *mut list.List(u8)) Error!void {
hi := try self.hex4()
var cp u32 = hi
if hi >= 0xD800 && hi <= 0xDBFF {
if self.pos + 1 >= self.src.len || self.src[self.pos] != '\\' || self.src[self.pos + 1] != 'u' {
return error.Malformed
}
self.pos += 2
lo := try self.hex4()
if lo < 0xDC00 || lo > 0xDFFF {
return error.Malformed
}
cp = 0x10000 + ((hi - 0xD800) << 10) + (lo - 0xDC00)
} else if hi >= 0xDC00 && hi <= 0xDFFF {
return error.Malformed
}
try appendUtf8(self.alloc, out, cp)
}
fn hex4(self *mut Parser) Error!u32 {
if self.pos + 4 > self.src.len {
return error.Malformed
}
var v u32 = 0
var i usize = 0
while i < 4 {
d := hexVal(self.src[self.pos + i]) orelse return error.Malformed
v = (v << 4) | d
i += 1
}
self.pos += 4
return v
}
// `true` / `false` / `null` — anything else starting with t/f/n is
// malformed.
fn word(self *mut Parser) Error!Value {
if self.match("true") {
return Value{ .bool = true }
}
if self.match("false") {
return Value{ .bool = false }
}
if self.match("null") {
return Value.nul
}
return error.Malformed
}
// Scan one number per the RFC grammar (`-? int frac? exp?`, no leading
// zeros), then classify: an integral lexeme that fits i64 becomes
// `.int`, everything else keeps the raw lexeme as `.number`.
fn number(self *mut Parser) Error!Value {
start := self.pos
if self.peekIs('-') {
self.pos += 1
}
if self.pos >= self.src.len || !isDigit(self.src[self.pos]) {
return error.Malformed
}
if self.src[self.pos] == '0' {
self.pos += 1
} else {
self.skipDigits()
}
var integral = true
if self.peekIs('.') {
integral = false
self.pos += 1
if self.pos >= self.src.len || !isDigit(self.src[self.pos]) {
return error.Malformed
}
self.skipDigits()
}
if self.peekIs('e') || self.peekIs('E') {
integral = false
self.pos += 1
if self.peekIs('+') || self.peekIs('-') {
self.pos += 1
}
if self.pos >= self.src.len || !isDigit(self.src[self.pos]) {
return error.Malformed
}
self.skipDigits()
}
lex := self.src[start..self.pos]
if integral {
n := fmt.parseInt(i64, lex, 10) catch return self.rawNumber(lex)
return Value{ .int = n }
}
return self.rawNumber(lex)
}
fn rawNumber(self *mut Parser, lex []u8) Error!Value {
dup := try self.alloc.alloc(u8, lex.len)
mem.copy(u8, dup, lex)
return Value{ .number = dup }
}
fn match(self *mut Parser, s []u8) bool {
if self.pos + s.len > self.src.len {
return false
}
if !mem.eql(u8, self.src[self.pos .. self.pos + s.len], s) {
return false
}
self.pos += s.len
return true
}
fn expect(self *mut Parser, c u8) Error!void {
if !self.peekIs(c) {
return error.Malformed
}
self.pos += 1
}
fn peekIs(self *Parser, c u8) bool {
return self.pos < self.src.len && self.src[self.pos] == c
}
fn descend(self *mut Parser) Error!void {
if self.depth >= max_depth {
return error.Malformed
}
self.depth += 1
}
fn skipDigits(self *mut Parser) void {
while self.pos < self.src.len && isDigit(self.src[self.pos]) {
self.pos += 1
}
}
fn skipWs(self *mut Parser) void {
while self.pos < self.src.len {
c := self.src[self.pos]
if c != ' ' && c != '\t' && c != '\n' && c != '\r' {
return
}
self.pos += 1
}
}
}
fn isDigit(c u8) bool {
return c >= '0' && c <= '9'
}
fn hexVal(c u8) ?u32 {
if c >= '0' && c <= '9' {
return c - '0'
}
if c >= 'a' && c <= 'f' {
return c - 'a' + 10
}
if c >= 'A' && c <= 'F' {
return c - 'A' + 10
}
return null
}
// Append `cp` to `out` as UTF-8. The caller guarantees `cp` is a valid
// scalar value (the escape decoder rejects lone surrogates before here).
fn appendUtf8(alloc base.Allocator, out *mut list.List(u8), cp u32) Error!void {
if cp < 0x80 {
try out.append(alloc, #intCast(cp))
} else if cp < 0x800 {
try out.append(alloc, #intCast(0xC0 | (cp >> 6)))
try out.append(alloc, #intCast(0x80 | (cp & 0x3F)))
} else if cp < 0x10000 {
try out.append(alloc, #intCast(0xE0 | (cp >> 12)))
try out.append(alloc, #intCast(0x80 | ((cp >> 6) & 0x3F)))
try out.append(alloc, #intCast(0x80 | (cp & 0x3F)))
} else {
try out.append(alloc, #intCast(0xF0 | (cp >> 18)))
try out.append(alloc, #intCast(0x80 | ((cp >> 12) & 0x3F)))
try out.append(alloc, #intCast(0x80 | ((cp >> 6) & 0x3F)))
try out.append(alloc, #intCast(0x80 | (cp & 0x3F)))
}
}
// ---- serialization ----
const hex_digits = "0123456789abcdef"
fn writeValue(alloc base.Allocator, out *mut list.List(u8), v Value) Error!void {
switch v {
.nul => try out.appendSlice(alloc, "null"),
.bool => |b| {
if b {
try out.appendSlice(alloc, "true")
} else {
try out.appendSlice(alloc, "false")
}
},
.int => |n| {
var buf [32]u8 = undefined
s := fmt.bufPrint(buf[0..], "{d}", .{n}) catch unreachable
try out.appendSlice(alloc, s)
},
.number => |s| try out.appendSlice(alloc, s),
.string => |s| try writeString(alloc, out, s),
.array => |xs| {
try out.append(alloc, '[')
var first = true
for x in xs {
if !first {
try out.append(alloc, ',')
}
first = false
try writeValue(alloc, out, x)
}
try out.append(alloc, ']')
},
.object => |ms| {
try out.append(alloc, '{')
var first = true
for m in ms {
if !first {
try out.append(alloc, ',')
}
first = false
try writeString(alloc, out, m.key)
try out.append(alloc, ':')
try writeValue(alloc, out, m.value)
}
try out.append(alloc, '}')
},
}
}
fn writeString(alloc base.Allocator, out *mut list.List(u8), s []u8) Error!void {
try out.append(alloc, '"')
for c in s {
switch c {
'"' => try out.appendSlice(alloc, "\\\""),
'\\' => try out.appendSlice(alloc, "\\\\"),
'\n' => try out.appendSlice(alloc, "\\n"),
'\r' => try out.appendSlice(alloc, "\\r"),
'\t' => try out.appendSlice(alloc, "\\t"),
0x08 => try out.appendSlice(alloc, "\\b"),
0x0C => try out.appendSlice(alloc, "\\f"),
else => {
if c < 0x20 {
try out.appendSlice(alloc, "\\u00")
try out.append(alloc, hex_digits[c >> 4])
try out.append(alloc, hex_digits[c & 0xF])
} else {
try out.append(alloc, c)
}
},
}
}
try out.append(alloc, '"')
}
test "parse reads the scalar forms" {
var a = arena.ArenaAllocator.init(base.testAlloc)
defer a.deinit()
al := a.allocator()
t := try parse(al, "true")
switch t {
.bool => |b| try base.expect(b),
else => unreachable,
}
f := try parse(al, " false ")
switch f {
.bool => |b| try base.expect(!b),
else => unreachable,
}
n := try parse(al, "null")
try base.expect(n == .nul)
i := try parse(al, "-42")
switch i {
.int => |x| try base.expectEqual(-42, x),
else => unreachable,
}
s := try parse(al, "\"flash\"")
switch s {
.string => |x| try base.expect(mem.eql(u8, x, "flash")),
else => unreachable,
}
}
test "parse decodes string escapes including surrogate pairs" {
var a = arena.ArenaAllocator.init(base.testAlloc)
defer a.deinit()
al := a.allocator()
v := try parse(al, "\"a\\n\\t\\\\\\\"\\/\\u0041\\u00e9\"")
switch v {
.string => |x| try base.expect(mem.eql(u8, x, "a\n\t\\\"/A\xc3\xa9")),
else => unreachable,
}
// U+1F600 as a surrogate pair decodes to the 4-byte UTF-8 form.
p := try parse(al, "\"\\uD83D\\uDE00\"")
switch p {
.string => |x| try base.expect(mem.eql(u8, x, "\xf0\x9f\x98\x80")),
else => unreachable,
}
}
test "parse builds nested containers and get finds members" {
var a = arena.ArenaAllocator.init(base.testAlloc)
defer a.deinit()
al := a.allocator()
v := try parse(al, "{ \"id\": 1, \"params\": [true, {\"k\": \"v\"}], \"none\": null }")
id := v.get("id").?
switch id {
.int => |n| try base.expectEqual(1, n),
else => unreachable,
}
params := v.get("params").?
switch params {
.array => |xs| {
try base.expectEqual(2, xs.len)
inner := xs[1].get("k").?
switch inner {
.string => |s| try base.expect(mem.eql(u8, s, "v")),
else => unreachable,
}
},
else => unreachable,
}
try base.expect(v.get("none").? == .nul)
try base.expect(v.get("absent") == null)
try base.expect(id.get("anything") == null)
}
test "numbers keep integer fidelity and fall back to the raw lexeme" {
var a = arena.ArenaAllocator.init(base.testAlloc)
defer a.deinit()
al := a.allocator()
big := try parse(al, "99999999999999999999")
switch big {
.number => |s| try base.expect(mem.eql(u8, s, "99999999999999999999")),
else => unreachable,
}
pi := try parse(al, "3.14")
switch pi {
.number => |s| try base.expect(mem.eql(u8, s, "3.14")),
else => unreachable,
}
exp := try parse(al, "-1.5e+10")
switch exp {
.number => |s| try base.expect(mem.eql(u8, s, "-1.5e+10")),
else => unreachable,
}
zero := try parse(al, "0")
switch zero {
.int => |n| try base.expectEqual(0, n),
else => unreachable,
}
}
test "parse rejects malformed input" {
var a = arena.ArenaAllocator.init(base.testAlloc)
defer a.deinit()
al := a.allocator()
try base.expectError(error.Malformed, parse(al, ""))
try base.expectError(error.Malformed, parse(al, "{"))
try base.expectError(error.Malformed, parse(al, "[1,]"))
try base.expectError(error.Malformed, parse(al, "{\"a\":1,}"))
try base.expectError(error.Malformed, parse(al, "{\"a\" 1}"))
try base.expectError(error.Malformed, parse(al, "tru"))
try base.expectError(error.Malformed, parse(al, "nullx"))
try base.expectError(error.Malformed, parse(al, "\"open"))
try base.expectError(error.Malformed, parse(al, "\"bad \\q escape\""))
try base.expectError(error.Malformed, parse(al, "\"\\uD83D alone\""))
try base.expectError(error.Malformed, parse(al, "\"\\uDC00 alone\""))
try base.expectError(error.Malformed, parse(al, "01"))
try base.expectError(error.Malformed, parse(al, "1."))
try base.expectError(error.Malformed, parse(al, "1e"))
try base.expectError(error.Malformed, parse(al, "1 2"))
try base.expectError(error.Malformed, parse(al, "\"ctl \x01\""))
}
test "nesting past the depth limit is malformed, at the limit is fine" {
var a = arena.ArenaAllocator.init(base.testAlloc)
defer a.deinit()
al := a.allocator()
var deep [300]u8 = undefined
var i usize = 0
while i < 150 {
deep[i] = '['
deep[299 - i] = ']'
i += 1
}
try base.expectError(error.Malformed, parse(al, deep[0..]))
// 128 levels exactly — the deepest accepted document.
ok := try parse(al, deep[22 .. 150 + 128])
try base.expect(ok == .array)
}
test "stringify writes the tree back compactly" {
var a = arena.ArenaAllocator.init(base.testAlloc)
defer a.deinit()
al := a.allocator()
v := try parse(al, " { \"a\" : [ 1 , \"x\\ny\" , null , 2.5 ] , \"b\" : true } ")
s := try stringify(al, v)
try base.expect(mem.eql(u8, s, "{\"a\":[1,\"x\\ny\",null,2.5],\"b\":true}"))
}
test "a parse-stringify round trip is stable" {
var a = arena.ArenaAllocator.init(base.testAlloc)
defer a.deinit()
al := a.allocator()
src := "{\"jsonrpc\":\"2.0\",\"id\":1,\"method\":\"initialize\",\"params\":{\"capabilities\":{},\"n\":-9223372036854775808}}"
once := try stringify(al, try parse(al, src))
twice := try stringify(al, try parse(al, once))
try base.expect(mem.eql(u8, once, twice))
try base.expect(mem.eql(u8, once, src))
}
// The failing-allocator sweeps: every induced OOM must surface as an
// error. The arena wraps the failing allocator, so `deinit` releases
// whatever the aborted parse had acquired and nothing leaks.
fn parseSweep(alloc base.Allocator) !void {
var a = arena.ArenaAllocator.init(alloc)
defer a.deinit()
v := try parse(a.allocator(), "{\"k\":[1,\"two\",{\"x\":null}],\"f\":false}")
try base.expect(v.get("f") != null)
}
fn stringifySweep(alloc base.Allocator) !void {
var a = arena.ArenaAllocator.init(alloc)
defer a.deinit()
al := a.allocator()
s := try stringify(al, try parse(al, "[\"esc\\u0001\",2,{\"k\":null}]"))
try base.expect(mem.eql(u8, s, "[\"esc\\u0001\",2,{\"k\":null}]"))
}
test "parse survives failure at every allocation point" {
try base.checkAllAllocationFailures(base.testAlloc, parseSweep, .{})
}
test "stringify survives failure at every allocation point" {
try base.checkAllAllocationFailures(base.testAlloc, stringifySweep, .{})
}