fediglam/src/main/json.zig

678 lines
28 KiB
Zig
Raw Normal View History

2022-10-10 02:31:15 +00:00
const std = @import("std");
const mem = std.mem;
const Allocator = std.mem.Allocator;
const assert = std.debug.assert;
// This file is largely a copy of std.json
const StreamingParser = std.json.StreamingParser;
const Token = std.json.Token;
const unescapeValidString = std.json.unescapeValidString;
const UnescapeValidStringError = std.json.UnescapeValidStringError;
pub fn parse(comptime T: type, body: []const u8, alloc: std.mem.Allocator) !T {
var tokens = TokenStream.init(body);
const options = ParseOptions{ .allocator = alloc };
const token = (try tokens.next()) orelse return error.UnexpectedEndOfJson;
const r = try parseInternal(T, token, &tokens, options);
errdefer parseFreeInternal(T, r, options);
if (!options.allow_trailing_data) {
if ((try tokens.next()) != null) unreachable;
assert(tokens.i >= tokens.slice.len);
}
return r;
}
pub fn parseFree(value: anytype, alloc: std.mem.Allocator) void {
parseFreeInternal(@TypeOf(value), value, .{ .allocator = alloc });
}
// WARNING: the objects "parse" method must not contain a reference to the original value
fn hasCustomParse(comptime T: type) bool {
if (!std.meta.trait.hasFn("parse")(T)) return false;
if (!@hasDecl(T, "JsonParseAs")) return false;
return true;
}
///// The rest is (modified) from std.json
/// A small wrapper over a StreamingParser for full slices. Returns a stream of json Tokens.
pub const TokenStream = struct {
i: usize,
slice: []const u8,
parser: StreamingParser,
token: ?Token,
pub const Error = StreamingParser.Error || error{UnexpectedEndOfJson};
pub fn init(slice: []const u8) TokenStream {
return TokenStream{
.i = 0,
.slice = slice,
.parser = StreamingParser.init(),
.token = null,
};
}
fn stackUsed(self: *TokenStream) usize {
return self.parser.stack.len + if (self.token != null) @as(usize, 1) else 0;
}
pub fn next(self: *TokenStream) Error!?Token {
if (self.token) |token| {
self.token = null;
return token;
}
var t1: ?Token = undefined;
var t2: ?Token = undefined;
while (self.i < self.slice.len) {
try self.parser.feed(self.slice[self.i], &t1, &t2);
self.i += 1;
if (t1) |token| {
self.token = t2;
return token;
}
}
// Without this a bare number fails, the streaming parser doesn't know the input ended
try self.parser.feed(' ', &t1, &t2);
self.i += 1;
if (t1) |token| {
return token;
} else if (self.parser.complete) {
return null;
} else {
return error.UnexpectedEndOfJson;
}
}
};
/// Checks to see if a string matches what it would be as a json-encoded string
/// Assumes that `encoded` is a well-formed json string
fn encodesTo(decoded: []const u8, encoded: []const u8) bool {
var i: usize = 0;
var j: usize = 0;
while (i < decoded.len) {
if (j >= encoded.len) return false;
if (encoded[j] != '\\') {
if (decoded[i] != encoded[j]) return false;
j += 1;
i += 1;
} else {
const escape_type = encoded[j + 1];
if (escape_type != 'u') {
const t: u8 = switch (escape_type) {
'\\' => '\\',
'/' => '/',
'n' => '\n',
'r' => '\r',
't' => '\t',
'f' => 12,
'b' => 8,
'"' => '"',
else => unreachable,
};
if (decoded[i] != t) return false;
j += 2;
i += 1;
} else {
var codepoint = std.fmt.parseInt(u21, encoded[j + 2 .. j + 6], 16) catch unreachable;
j += 6;
if (codepoint >= 0xD800 and codepoint < 0xDC00) {
// surrogate pair
assert(encoded[j] == '\\');
assert(encoded[j + 1] == 'u');
const low_surrogate = std.fmt.parseInt(u21, encoded[j + 2 .. j + 6], 16) catch unreachable;
codepoint = 0x10000 + (((codepoint & 0x03ff) << 10) | (low_surrogate & 0x03ff));
j += 6;
}
var buf: [4]u8 = undefined;
const len = std.unicode.utf8Encode(codepoint, &buf) catch unreachable;
if (i + len > decoded.len) return false;
if (!mem.eql(u8, decoded[i .. i + len], buf[0..len])) return false;
i += len;
}
}
}
assert(i == decoded.len);
assert(j == encoded.len);
return true;
}
/// parse tokens from a stream, returning `false` if they do not decode to `value`
fn parsesTo(comptime T: type, value: T, tokens: *TokenStream, options: ParseOptions) !bool {
// TODO: should be able to write this function to not require an allocator
const tmp = try parse(T, tokens, options);
defer parseFree(T, tmp, options);
return parsedEqual(tmp, value);
}
/// Returns if a value returned by `parse` is deep-equal to another value
fn parsedEqual(a: anytype, b: @TypeOf(a)) bool {
switch (@typeInfo(@TypeOf(a))) {
.Optional => {
if (a == null and b == null) return true;
if (a == null or b == null) return false;
return parsedEqual(a.?, b.?);
},
.Union => |info| {
if (info.tag_type) |UnionTag| {
const tag_a = std.meta.activeTag(a);
const tag_b = std.meta.activeTag(b);
if (tag_a != tag_b) return false;
inline for (info.fields) |field_info| {
if (@field(UnionTag, field_info.name) == tag_a) {
return parsedEqual(@field(a, field_info.name), @field(b, field_info.name));
}
}
return false;
} else {
unreachable;
}
},
.Array => {
for (a) |e, i|
if (!parsedEqual(e, b[i])) return false;
return true;
},
.Struct => |info| {
inline for (info.fields) |field_info| {
if (!parsedEqual(@field(a, field_info.name), @field(b, field_info.name))) return false;
}
return true;
},
.Pointer => |ptrInfo| switch (ptrInfo.size) {
.One => return parsedEqual(a.*, b.*),
.Slice => {
if (a.len != b.len) return false;
for (a) |e, i|
if (!parsedEqual(e, b[i])) return false;
return true;
},
.Many, .C => unreachable,
},
else => return a == b,
}
unreachable;
}
const ParseOptions = struct {
allocator: ?Allocator = null,
/// Behaviour when a duplicate field is encountered.
duplicate_field_behavior: enum {
UseFirst,
Error,
UseLast,
} = .Error,
/// If false, finding an unknown field returns an error.
ignore_unknown_fields: bool = false,
allow_trailing_data: bool = false,
};
const SkipValueError = error{UnexpectedJsonDepth} || TokenStream.Error;
fn skipValue(tokens: *TokenStream) SkipValueError!void {
const original_depth = tokens.stackUsed();
// Return an error if no value is found
_ = try tokens.next();
if (tokens.stackUsed() < original_depth) return error.UnexpectedJsonDepth;
if (tokens.stackUsed() == original_depth) return;
while (try tokens.next()) |_| {
if (tokens.stackUsed() == original_depth) return;
}
}
fn ParseInternalError(comptime T: type) type {
// `inferred_types` is used to avoid infinite recursion for recursive type definitions.
const inferred_types = [_]type{};
return ParseInternalErrorImpl(T, &inferred_types);
}
fn ParseInternalErrorImpl(comptime T: type, comptime inferred_types: []const type) type {
if (hasCustomParse(T)) {
return ParseInternalError(T.JsonParseAs) || T.ParseError;
}
for (inferred_types) |ty| {
if (T == ty) return error{};
}
switch (@typeInfo(T)) {
.Bool => return error{UnexpectedToken},
.Float, .ComptimeFloat => return error{UnexpectedToken} || std.fmt.ParseFloatError,
.Int, .ComptimeInt => {
return error{ UnexpectedToken, InvalidNumber, Overflow } ||
std.fmt.ParseIntError || std.fmt.ParseFloatError;
},
.Optional => |optionalInfo| {
return ParseInternalErrorImpl(optionalInfo.child, inferred_types ++ [_]type{T});
},
.Enum => return error{ UnexpectedToken, InvalidEnumTag } || std.fmt.ParseIntError ||
std.meta.IntToEnumError || std.meta.IntToEnumError,
.Union => |unionInfo| {
if (unionInfo.tag_type) |_| {
var errors = error{NoUnionMembersMatched};
for (unionInfo.fields) |u_field| {
errors = errors || ParseInternalErrorImpl(u_field.field_type, inferred_types ++ [_]type{T});
}
return errors;
} else {
@compileError("Unable to parse into untagged union '" ++ @typeName(T) ++ "'");
}
},
.Struct => |structInfo| {
var errors = error{
DuplicateJSONField,
UnexpectedEndOfJson,
UnexpectedToken,
UnexpectedValue,
UnknownField,
MissingField,
} || SkipValueError || TokenStream.Error;
for (structInfo.fields) |field| {
errors = errors || ParseInternalErrorImpl(field.field_type, inferred_types ++ [_]type{T});
}
return errors;
},
.Array => |arrayInfo| {
return error{ UnexpectedEndOfJson, UnexpectedToken } || TokenStream.Error ||
UnescapeValidStringError ||
ParseInternalErrorImpl(arrayInfo.child, inferred_types ++ [_]type{T});
},
.Pointer => |ptrInfo| {
var errors = error{AllocatorRequired} || std.mem.Allocator.Error;
switch (ptrInfo.size) {
.One => {
return errors || ParseInternalErrorImpl(ptrInfo.child, inferred_types ++ [_]type{T});
},
.Slice => {
return errors || error{ UnexpectedEndOfJson, UnexpectedToken } ||
ParseInternalErrorImpl(ptrInfo.child, inferred_types ++ [_]type{T}) ||
UnescapeValidStringError || TokenStream.Error;
},
else => @compileError("Unable to parse into type '" ++ @typeName(T) ++ "'"),
}
},
else => return error{},
}
unreachable;
}
fn parseInternal(
comptime T: type,
token: Token,
tokens: *TokenStream,
options: ParseOptions,
) ParseInternalError(T)!T {
if (comptime hasCustomParse(T)) {
const val = try parseInternal(T.JsonParseAs, token, tokens, options);
defer parseFreeInternal(T.JsonParseAs, val, options);
return try T.parse(val);
}
switch (@typeInfo(T)) {
.Bool => {
return switch (token) {
.True => true,
.False => false,
else => error.UnexpectedToken,
};
},
.Float, .ComptimeFloat => {
switch (token) {
.Number => |numberToken| return try std.fmt.parseFloat(T, numberToken.slice(tokens.slice, tokens.i - 1)),
.String => |stringToken| return try std.fmt.parseFloat(T, stringToken.slice(tokens.slice, tokens.i - 1)),
else => return error.UnexpectedToken,
}
},
.Int, .ComptimeInt => {
switch (token) {
.Number => |numberToken| {
if (numberToken.is_integer)
return try std.fmt.parseInt(T, numberToken.slice(tokens.slice, tokens.i - 1), 10);
const float = try std.fmt.parseFloat(f128, numberToken.slice(tokens.slice, tokens.i - 1));
if (@round(float) != float) return error.InvalidNumber;
if (float > std.math.maxInt(T) or float < std.math.minInt(T)) return error.Overflow;
return @floatToInt(T, float);
},
.String => |stringToken| {
return std.fmt.parseInt(T, stringToken.slice(tokens.slice, tokens.i - 1), 10) catch |err| {
switch (err) {
error.Overflow => return err,
error.InvalidCharacter => {
const float = try std.fmt.parseFloat(f128, stringToken.slice(tokens.slice, tokens.i - 1));
if (@round(float) != float) return error.InvalidNumber;
if (float > std.math.maxInt(T) or float < std.math.minInt(T)) return error.Overflow;
return @floatToInt(T, float);
},
}
};
},
else => return error.UnexpectedToken,
}
},
.Optional => |optionalInfo| {
if (token == .Null) {
return null;
} else {
return try parseInternal(optionalInfo.child, token, tokens, options);
}
},
.Enum => |enumInfo| {
switch (token) {
.Number => |numberToken| {
if (!numberToken.is_integer) return error.UnexpectedToken;
const n = try std.fmt.parseInt(enumInfo.tag_type, numberToken.slice(tokens.slice, tokens.i - 1), 10);
return try std.meta.intToEnum(T, n);
},
.String => |stringToken| {
const source_slice = stringToken.slice(tokens.slice, tokens.i - 1);
switch (stringToken.escapes) {
.None => return std.meta.stringToEnum(T, source_slice) orelse return error.InvalidEnumTag,
.Some => {
inline for (enumInfo.fields) |field| {
if (field.name.len == stringToken.decodedLength() and encodesTo(field.name, source_slice)) {
return @field(T, field.name);
}
}
return error.InvalidEnumTag;
},
}
},
else => return error.UnexpectedToken,
}
},
.Union => |unionInfo| {
if (unionInfo.tag_type) |_| {
// try each of the union fields until we find one that matches
inline for (unionInfo.fields) |u_field| {
// take a copy of tokens so we can withhold mutations until success
var tokens_copy = tokens.*;
if (parseInternal(u_field.field_type, token, &tokens_copy, options)) |value| {
tokens.* = tokens_copy;
return @unionInit(T, u_field.name, value);
} else |err| {
// Bubble up error.OutOfMemory
// Parsing some types won't have OutOfMemory in their
// error-sets, for the condition to be valid, merge it in.
if (@as(@TypeOf(err) || error{OutOfMemory}, err) == error.OutOfMemory) return err;
// Bubble up AllocatorRequired, as it indicates missing option
if (@as(@TypeOf(err) || error{AllocatorRequired}, err) == error.AllocatorRequired) return err;
// otherwise continue through the `inline for`
}
}
return error.NoUnionMembersMatched;
} else {
@compileError("Unable to parse into untagged union '" ++ @typeName(T) ++ "'");
}
},
.Struct => |structInfo| {
switch (token) {
.ObjectBegin => {},
else => return error.UnexpectedToken,
}
var r: T = undefined;
var fields_seen = [_]bool{false} ** structInfo.fields.len;
errdefer {
inline for (structInfo.fields) |field, i| {
if (fields_seen[i] and !field.is_comptime) {
parseFreeInternal(field.field_type, @field(r, field.name), options);
}
}
}
while (true) {
switch ((try tokens.next()) orelse return error.UnexpectedEndOfJson) {
.ObjectEnd => break,
.String => |stringToken| {
const key_source_slice = stringToken.slice(tokens.slice, tokens.i - 1);
var child_options = options;
child_options.allow_trailing_data = true;
var found = false;
inline for (structInfo.fields) |field, i| {
// TODO: using switches here segfault the compiler (#2727?)
if ((stringToken.escapes == .None and mem.eql(u8, field.name, key_source_slice)) or (stringToken.escapes == .Some and (field.name.len == stringToken.decodedLength() and encodesTo(field.name, key_source_slice)))) {
// if (switch (stringToken.escapes) {
// .None => mem.eql(u8, field.name, key_source_slice),
// .Some => (field.name.len == stringToken.decodedLength() and encodesTo(field.name, key_source_slice)),
// }) {
if (fields_seen[i]) {
// switch (options.duplicate_field_behavior) {
// .UseFirst => {},
// .Error => {},
// .UseLast => {},
// }
if (options.duplicate_field_behavior == .UseFirst) {
// unconditonally ignore value. for comptime fields, this skips check against default_value
const next_token = (try tokens.next()) orelse return error.UnexpectedEndOfJson;
parseFreeInternal(field.field_type, try parseInternal(field.field_type, next_token, tokens, child_options), child_options);
found = true;
break;
} else if (options.duplicate_field_behavior == .Error) {
return error.DuplicateJSONField;
} else if (options.duplicate_field_behavior == .UseLast) {
if (!field.is_comptime) {
parseFreeInternal(field.field_type, @field(r, field.name), child_options);
}
fields_seen[i] = false;
}
}
if (field.is_comptime) {
if (!try parsesTo(field.field_type, @ptrCast(*const field.field_type, field.default_value.?).*, tokens, child_options)) {
return error.UnexpectedValue;
}
} else {
const next_token = (try tokens.next()) orelse return error.UnexpectedEndOfJson;
@field(r, field.name) = try parseInternal(field.field_type, next_token, tokens, child_options);
}
fields_seen[i] = true;
found = true;
break;
}
}
if (!found) {
if (options.ignore_unknown_fields) {
try skipValue(tokens);
continue;
} else {
return error.UnknownField;
}
}
},
else => return error.UnexpectedToken,
}
}
inline for (structInfo.fields) |field, i| {
if (!fields_seen[i]) {
if (field.default_value) |default_ptr| {
if (!field.is_comptime) {
2022-11-04 06:26:50 +00:00
const default = @ptrCast(*align(1) const field.field_type, default_ptr).*;
2022-10-10 02:31:15 +00:00
@field(r, field.name) = default;
}
} else {
return error.MissingField;
}
}
}
return r;
},
.Array => |arrayInfo| {
switch (token) {
.ArrayBegin => {
var r: T = undefined;
var i: usize = 0;
var child_options = options;
child_options.allow_trailing_data = true;
errdefer {
// Without the r.len check `r[i]` is not allowed
if (r.len > 0) while (true) : (i -= 1) {
parseFreeInternal(arrayInfo.child, r[i], options);
if (i == 0) break;
};
}
while (i < r.len) : (i += 1) {
const next_token = (try tokens.next()) orelse return error.UnexpectedEndOfJson;
r[i] = try parseInternal(arrayInfo.child, next_token, tokens, child_options);
}
const tok = (try tokens.next()) orelse return error.UnexpectedEndOfJson;
switch (tok) {
.ArrayEnd => {},
else => return error.UnexpectedToken,
}
return r;
},
.String => |stringToken| {
if (arrayInfo.child != u8) return error.UnexpectedToken;
var r: T = undefined;
const source_slice = stringToken.slice(tokens.slice, tokens.i - 1);
switch (stringToken.escapes) {
.None => mem.copy(u8, &r, source_slice),
.Some => try unescapeValidString(&r, source_slice),
}
return r;
},
else => return error.UnexpectedToken,
}
},
.Pointer => |ptrInfo| {
const allocator = options.allocator orelse return error.AllocatorRequired;
switch (ptrInfo.size) {
.One => {
const r: T = try allocator.create(ptrInfo.child);
errdefer allocator.destroy(r);
r.* = try parseInternal(ptrInfo.child, token, tokens, options);
return r;
},
.Slice => {
switch (token) {
.ArrayBegin => {
var arraylist = std.ArrayList(ptrInfo.child).init(allocator);
errdefer {
while (arraylist.popOrNull()) |v| {
parseFreeInternal(ptrInfo.child, v, options);
}
arraylist.deinit();
}
while (true) {
const tok = (try tokens.next()) orelse return error.UnexpectedEndOfJson;
switch (tok) {
.ArrayEnd => break,
else => {},
}
try arraylist.ensureUnusedCapacity(1);
const v = try parseInternal(ptrInfo.child, tok, tokens, options);
arraylist.appendAssumeCapacity(v);
}
if (ptrInfo.sentinel) |some| {
const sentinel_value = @ptrCast(*const ptrInfo.child, some).*;
try arraylist.append(sentinel_value);
const output = arraylist.toOwnedSlice();
return output[0 .. output.len - 1 :sentinel_value];
}
return arraylist.toOwnedSlice();
},
.String => |stringToken| {
if (ptrInfo.child != u8) return error.UnexpectedToken;
const source_slice = stringToken.slice(tokens.slice, tokens.i - 1);
const len = stringToken.decodedLength();
const output = try allocator.alloc(u8, len + @boolToInt(ptrInfo.sentinel != null));
errdefer allocator.free(output);
switch (stringToken.escapes) {
.None => mem.copy(u8, output, source_slice),
.Some => try unescapeValidString(output, source_slice),
}
if (ptrInfo.sentinel) |some| {
const char = @ptrCast(*const u8, some).*;
output[len] = char;
return output[0..len :char];
}
return output;
},
else => return error.UnexpectedToken,
}
},
else => @compileError("Unable to parse into type '" ++ @typeName(T) ++ "'"),
}
},
else => @compileError("Unable to parse into type '" ++ @typeName(T) ++ "'"),
}
unreachable;
}
fn ParseError(comptime T: type) type {
return ParseInternalError(T) || error{UnexpectedEndOfJson} || TokenStream.Error;
}
/// Releases resources created by `parse`.
/// Should be called with the same type and `ParseOptions` that were passed to `parse`
fn parseFreeInternal(comptime T: type, value: T, options: ParseOptions) void {
switch (@typeInfo(T)) {
.Bool, .Float, .ComptimeFloat, .Int, .ComptimeInt, .Enum => {},
.Optional => {
if (value) |v| {
return parseFreeInternal(@TypeOf(v), v, options);
}
},
.Union => |unionInfo| {
if (unionInfo.tag_type) |UnionTagType| {
inline for (unionInfo.fields) |u_field| {
if (value == @field(UnionTagType, u_field.name)) {
parseFreeInternal(u_field.field_type, @field(value, u_field.name), options);
break;
}
}
} else {
unreachable;
}
},
.Struct => |structInfo| {
inline for (structInfo.fields) |field| {
if (!field.is_comptime) {
parseFreeInternal(field.field_type, @field(value, field.name), options);
}
}
},
.Array => |arrayInfo| {
for (value) |v| {
parseFreeInternal(arrayInfo.child, v, options);
}
},
.Pointer => |ptrInfo| {
const allocator = options.allocator orelse unreachable;
switch (ptrInfo.size) {
.One => {
parseFreeInternal(ptrInfo.child, value.*, options);
allocator.destroy(value);
},
.Slice => {
for (value) |v| {
parseFreeInternal(ptrInfo.child, v, options);
}
allocator.free(value);
},
else => unreachable,
}
},
else => unreachable,
}
}