parseQuotedString

This commit is contained in:
jaina heartles 2022-12-19 05:41:35 -08:00
parent 4c661672c2
commit 7f689c7030

View file

@ -275,6 +275,76 @@ fn isTokenChar(ch: u8) bool {
}
}
// Parses a quoted-string (rfc 9110) off the stream. Backslash-tokens are unescaped.
// The caller takes responsibility for deallocating the memory returned.
fn parseQuotedString(alloc: std.mem.Allocator, peek_stream: anytype) ![]const u8 {
const reader = peek_stream.reader();
var data = std.ArrayList(u8).init(alloc);
errdefer data.deinit();
{
const start = try reader.readByte();
if (start != '"') {
try peek_stream.putBackByte(start);
return error.MissingStartQuote;
}
}
while (true) {
const ch = switch (try reader.readByte()) {
'\t', ' ', '!', 0x23...0x5b, 0x5d...0x7e, 0x80...0xff => |c| c,
'\\' => switch (try reader.readByte()) {
'\t', ' ', 0x21...0x7e, 0x80...0xff => |c| c,
else => return error.UnexpectedChar,
},
'"' => break,
else => return error.UnexpectedChar,
};
try data.append(ch);
}
return data.toOwnedSlice();
}
test "parseQuotedString" {
const testCase = struct {
fn func(data: []const u8, stream_error: ?anyerror, expected: anyerror![]const u8, remaining: []const u8) !void {
var fbs = std.io.fixedBufferStream(data);
var stream = errorReader(stream_error orelse error.EndOfStream, fbs.reader());
var peeker = std.io.peekStream(1, stream.reader());
const result = parseQuotedString(std.testing.allocator, &peeker);
defer if (result) |v| std.testing.allocator.free(v) else |_| {};
if (expected) |val|
try std.testing.expectEqualStrings(val, try result)
else |expected_err|
try std.testing.expectError(expected_err, result);
try std.testing.expect(try peeker.reader().isBytes(remaining));
try std.testing.expectError(stream_error orelse error.EndOfStream, peeker.reader().readByte());
}
}.func;
try testCase("\"abcdefg\"", null, "abcdefg", "");
try testCase("\"abcdefg\"abcd", null, "abcdefg", "abcd");
try testCase("\"xyz\\\"z\"", null, "xyz\"z", "");
try testCase("\"xyz\\\\z\"", null, "xyz\\z", "");
try testCase("\"💯\"", null, "💯", "");
try testCase("abcdefg\"abcd", null, error.MissingStartQuote, "abcdefg\"abcd");
try testCase("\"abcdefg", null, error.EndOfStream, "");
try testCase("\"abcdefg", error.ClosedPipe, error.ClosedPipe, "");
}
// Attempts to parse a token (rfc 9110) off the stream. It stops at the first non-token
// char. Said char reamins on the stream. If the token is empty, returns error.EmptyToken;
// The caller takes responsibility for deallocating the memory returned.
fn parseToken(alloc: std.mem.Allocator, peek_stream: anytype) ![]const u8 {
var data = std.ArrayList(u8).init(alloc);
errdefer data.deinit();
@ -288,14 +358,16 @@ fn parseToken(alloc: std.mem.Allocator, peek_stream: anytype) ![]const u8 {
try data.append(ch);
} else |err| if (err != error.EndOfStream) return err;
if (data.items.len == 0) return error.EmptyToken;
return data.toOwnedSlice();
}
test "parseToken" {
const testCase = struct {
fn func(data: []const u8, err: ?anyerror, expected: anyerror![]const u8, remaining: []const u8) !void {
fn func(data: []const u8, stream_error: ?anyerror, expected: anyerror![]const u8, remaining: []const u8) !void {
var fbs = std.io.fixedBufferStream(data);
var stream = errorReader(err orelse error.EndOfStream, fbs.reader());
var stream = errorReader(stream_error orelse error.EndOfStream, fbs.reader());
var peeker = std.io.peekStream(1, stream.reader());
const result = parseToken(std.testing.allocator, &peeker);
@ -307,7 +379,7 @@ test "parseToken" {
try std.testing.expectError(expected_err, result);
try std.testing.expect(try peeker.reader().isBytes(remaining));
try std.testing.expectError(err orelse error.EndOfStream, peeker.reader().readByte());
try std.testing.expectError(stream_error orelse error.EndOfStream, peeker.reader().readByte());
}
}.func;
@ -316,12 +388,60 @@ test "parseToken" {
try testCase("abc;defg", null, "abc", ";defg");
try testCase("abc%defg$; ", null, "abc%defg$", "; ");
try testCase(" ", null, "", " ");
try testCase(";", null, "", ";");
try testCase(" ", null, error.EmptyToken, " ");
try testCase(";", null, error.EmptyToken, ";");
try testCase("abcdefg", error.ClosedPipe, error.ClosedPipe, "");
}
// Parses a token or quoted string (rfc 9110) off the stream, as appropriate.
// The caller takes responsibility for deallocating the memory returned.
fn parseTokenOrQuotedString(alloc: std.mem.Allocator, peek_stream: anytype) ![]const u8 {
return parseToken(alloc, peek_stream) catch |err| switch (err) {
error.EmptyToken => return try parseQuotedString(alloc, peek_stream),
else => |e| return e,
};
}
test "parseTokenOrQuotedString" {
const testCase = struct {
fn func(data: []const u8, stream_error: ?anyerror, expected: anyerror![]const u8, remaining: []const u8) !void {
var fbs = std.io.fixedBufferStream(data);
var stream = errorReader(stream_error orelse error.EndOfStream, fbs.reader());
var peeker = std.io.peekStream(1, stream.reader());
const result = parseTokenOrQuotedString(std.testing.allocator, &peeker);
defer if (result) |v| std.testing.allocator.free(v) else |_| {};
if (expected) |val|
try std.testing.expectEqualStrings(val, try result)
else |expected_err|
try std.testing.expectError(expected_err, result);
try std.testing.expect(try peeker.reader().isBytes(remaining));
try std.testing.expectError(stream_error orelse error.EndOfStream, peeker.reader().readByte());
}
}.func;
try testCase("abcdefg", null, "abcdefg", "");
try testCase("abc defg", null, "abc", " defg");
try testCase("abc;defg", null, "abc", ";defg");
try testCase("abc%defg$; ", null, "abc%defg$", "; ");
try testCase("\"abcdefg\"", null, "abcdefg", "");
try testCase("\"abcdefg\"abcd", null, "abcdefg", "abcd");
try testCase("\"xyz\\\"z\"", null, "xyz\"z", "");
try testCase("\"xyz\\\\z\"", null, "xyz\\z", "");
try testCase("\"💯\"", null, "💯", "");
try testCase(" ", null, error.MissingStartQuote, " ");
try testCase(";", null, error.MissingStartQuote, ";");
try testCase("\"abcdefg", null, error.EndOfStream, "");
try testCase("abcdefg", error.ClosedPipe, error.ClosedPipe, "");
try testCase("\"abcdefg", error.ClosedPipe, error.ClosedPipe, "");
}
fn ErrorReader(comptime E: type, comptime ReaderType: type) type {
return struct {
inner_reader: ReaderType,