From 7f689c70303fa22bb65e8e3be60f199ddcd6e90c Mon Sep 17 00:00:00 2001 From: jaina heartles Date: Mon, 19 Dec 2022 05:41:35 -0800 Subject: [PATCH] parseQuotedString --- src/http/request/parser.zig | 130 ++++++++++++++++++++++++++++++++++-- 1 file changed, 125 insertions(+), 5 deletions(-) diff --git a/src/http/request/parser.zig b/src/http/request/parser.zig index 54ca6bd..f10b72d 100644 --- a/src/http/request/parser.zig +++ b/src/http/request/parser.zig @@ -275,6 +275,76 @@ fn isTokenChar(ch: u8) bool { } } +// Parses a quoted-string (rfc 9110) off the stream. Backslash-tokens are unescaped. +// The caller takes responsibility for deallocating the memory returned. +fn parseQuotedString(alloc: std.mem.Allocator, peek_stream: anytype) ![]const u8 { + const reader = peek_stream.reader(); + + var data = std.ArrayList(u8).init(alloc); + errdefer data.deinit(); + + { + const start = try reader.readByte(); + if (start != '"') { + try peek_stream.putBackByte(start); + return error.MissingStartQuote; + } + } + + while (true) { + const ch = switch (try reader.readByte()) { + '\t', ' ', '!', 0x23...0x5b, 0x5d...0x7e, 0x80...0xff => |c| c, + + '\\' => switch (try reader.readByte()) { + '\t', ' ', 0x21...0x7e, 0x80...0xff => |c| c, + else => return error.UnexpectedChar, + }, + + '"' => break, + else => return error.UnexpectedChar, + }; + + try data.append(ch); + } + + return data.toOwnedSlice(); +} + +test "parseQuotedString" { + const testCase = struct { + fn func(data: []const u8, stream_error: ?anyerror, expected: anyerror![]const u8, remaining: []const u8) !void { + var fbs = std.io.fixedBufferStream(data); + var stream = errorReader(stream_error orelse error.EndOfStream, fbs.reader()); + var peeker = std.io.peekStream(1, stream.reader()); + + const result = parseQuotedString(std.testing.allocator, &peeker); + defer if (result) |v| std.testing.allocator.free(v) else |_| {}; + + if (expected) |val| + try std.testing.expectEqualStrings(val, try result) + else |expected_err| + try std.testing.expectError(expected_err, result); + + try std.testing.expect(try peeker.reader().isBytes(remaining)); + try std.testing.expectError(stream_error orelse error.EndOfStream, peeker.reader().readByte()); + } + }.func; + + try testCase("\"abcdefg\"", null, "abcdefg", ""); + try testCase("\"abcdefg\"abcd", null, "abcdefg", "abcd"); + try testCase("\"xyz\\\"z\"", null, "xyz\"z", ""); + try testCase("\"xyz\\\\z\"", null, "xyz\\z", ""); + try testCase("\"💯\"", null, "💯", ""); + + try testCase("abcdefg\"abcd", null, error.MissingStartQuote, "abcdefg\"abcd"); + try testCase("\"abcdefg", null, error.EndOfStream, ""); + + try testCase("\"abcdefg", error.ClosedPipe, error.ClosedPipe, ""); +} + +// Attempts to parse a token (rfc 9110) off the stream. It stops at the first non-token +// char. Said char reamins on the stream. If the token is empty, returns error.EmptyToken; +// The caller takes responsibility for deallocating the memory returned. fn parseToken(alloc: std.mem.Allocator, peek_stream: anytype) ![]const u8 { var data = std.ArrayList(u8).init(alloc); errdefer data.deinit(); @@ -288,14 +358,16 @@ fn parseToken(alloc: std.mem.Allocator, peek_stream: anytype) ![]const u8 { try data.append(ch); } else |err| if (err != error.EndOfStream) return err; + if (data.items.len == 0) return error.EmptyToken; + return data.toOwnedSlice(); } test "parseToken" { const testCase = struct { - fn func(data: []const u8, err: ?anyerror, expected: anyerror![]const u8, remaining: []const u8) !void { + fn func(data: []const u8, stream_error: ?anyerror, expected: anyerror![]const u8, remaining: []const u8) !void { var fbs = std.io.fixedBufferStream(data); - var stream = errorReader(err orelse error.EndOfStream, fbs.reader()); + var stream = errorReader(stream_error orelse error.EndOfStream, fbs.reader()); var peeker = std.io.peekStream(1, stream.reader()); const result = parseToken(std.testing.allocator, &peeker); @@ -307,7 +379,7 @@ test "parseToken" { try std.testing.expectError(expected_err, result); try std.testing.expect(try peeker.reader().isBytes(remaining)); - try std.testing.expectError(err orelse error.EndOfStream, peeker.reader().readByte()); + try std.testing.expectError(stream_error orelse error.EndOfStream, peeker.reader().readByte()); } }.func; @@ -316,12 +388,60 @@ test "parseToken" { try testCase("abc;defg", null, "abc", ";defg"); try testCase("abc%defg$; ", null, "abc%defg$", "; "); - try testCase(" ", null, "", " "); - try testCase(";", null, "", ";"); + try testCase(" ", null, error.EmptyToken, " "); + try testCase(";", null, error.EmptyToken, ";"); try testCase("abcdefg", error.ClosedPipe, error.ClosedPipe, ""); } +// Parses a token or quoted string (rfc 9110) off the stream, as appropriate. +// The caller takes responsibility for deallocating the memory returned. +fn parseTokenOrQuotedString(alloc: std.mem.Allocator, peek_stream: anytype) ![]const u8 { + return parseToken(alloc, peek_stream) catch |err| switch (err) { + error.EmptyToken => return try parseQuotedString(alloc, peek_stream), + else => |e| return e, + }; +} + +test "parseTokenOrQuotedString" { + const testCase = struct { + fn func(data: []const u8, stream_error: ?anyerror, expected: anyerror![]const u8, remaining: []const u8) !void { + var fbs = std.io.fixedBufferStream(data); + var stream = errorReader(stream_error orelse error.EndOfStream, fbs.reader()); + var peeker = std.io.peekStream(1, stream.reader()); + + const result = parseTokenOrQuotedString(std.testing.allocator, &peeker); + defer if (result) |v| std.testing.allocator.free(v) else |_| {}; + + if (expected) |val| + try std.testing.expectEqualStrings(val, try result) + else |expected_err| + try std.testing.expectError(expected_err, result); + + try std.testing.expect(try peeker.reader().isBytes(remaining)); + try std.testing.expectError(stream_error orelse error.EndOfStream, peeker.reader().readByte()); + } + }.func; + + try testCase("abcdefg", null, "abcdefg", ""); + try testCase("abc defg", null, "abc", " defg"); + try testCase("abc;defg", null, "abc", ";defg"); + try testCase("abc%defg$; ", null, "abc%defg$", "; "); + + try testCase("\"abcdefg\"", null, "abcdefg", ""); + try testCase("\"abcdefg\"abcd", null, "abcdefg", "abcd"); + try testCase("\"xyz\\\"z\"", null, "xyz\"z", ""); + try testCase("\"xyz\\\\z\"", null, "xyz\\z", ""); + try testCase("\"💯\"", null, "💯", ""); + + try testCase(" ", null, error.MissingStartQuote, " "); + try testCase(";", null, error.MissingStartQuote, ";"); + try testCase("\"abcdefg", null, error.EndOfStream, ""); + + try testCase("abcdefg", error.ClosedPipe, error.ClosedPipe, ""); + try testCase("\"abcdefg", error.ClosedPipe, error.ClosedPipe, ""); +} + fn ErrorReader(comptime E: type, comptime ReaderType: type) type { return struct { inner_reader: ReaderType,