fediglam/src/http/multipart.zig

410 lines
12 KiB
Zig
Raw Normal View History

2022-11-27 13:43:06 +00:00
const std = @import("std");
2022-11-27 14:11:01 +00:00
const util = @import("util");
2022-11-27 13:43:06 +00:00
const max_boundary = 70;
2022-11-27 14:11:01 +00:00
const FormFieldResult = struct {
2022-11-28 06:33:05 +00:00
field: FormField,
2022-11-27 14:11:01 +00:00
more: bool,
2022-11-27 13:43:06 +00:00
};
2022-11-27 14:24:41 +00:00
const FormField = struct {
value: []const u8,
params: FormDataParams,
};
2022-11-27 14:11:01 +00:00
const ParamIter = struct {
str: []const u8,
index: usize = 0,
const Param = struct {
name: []const u8,
value: []const u8,
};
pub fn from(str: []const u8) ParamIter {
return .{ .str = str, .index = std.mem.indexOfScalar(u8, str, ';') orelse str.len };
}
pub fn next(self: *ParamIter) ?Param {
if (self.index >= self.str.len) return null;
const start = self.index + 1;
const new_start = std.mem.indexOfScalarPos(u8, self.str, start, ';') orelse self.str.len;
self.index = new_start;
const param = std.mem.trim(u8, self.str[start..new_start], " \t");
var split = std.mem.split(u8, param, "=");
const name = split.first();
const value = std.mem.trimLeft(u8, split.rest(), " \t");
// TODO: handle quoted values
// TODO: handle parse errors
return Param{
.name = name,
.value = value,
};
}
};
const FormDataParams = struct {
name: ?[]const u8 = null,
filename: ?[]const u8 = null,
charset: ?[]const u8 = null,
2022-11-27 13:43:06 +00:00
};
2022-11-27 14:11:01 +00:00
fn parseParams(alloc: std.mem.Allocator, comptime T: type, str: []const u8) !T {
var result = T{};
errdefer util.deepFree(alloc, result);
var iter = ParamIter.from(str);
while (iter.next()) |param| {
inline for (comptime std.meta.fieldNames(T)) |f| {
if (std.mem.eql(u8, param.name, f)) {
@field(result, f) = try util.deepClone(alloc, param.value);
}
}
}
return result;
}
2022-11-27 13:43:06 +00:00
fn isFinalPart(peek_stream: anytype) !bool {
const reader = peek_stream.reader();
var buf: [2]u8 = undefined;
const end = try reader.readAll(&buf);
const end_line = buf[0..end];
const terminal = std.mem.eql(u8, end_line, "--");
if (!terminal) try peek_stream.putBack(end_line);
// Skip whitespace
while (true) {
const b = reader.readByte() catch |err| switch (err) {
error.EndOfStream => {
if (terminal) break else return error.InvalidMultipartBoundary;
},
else => return err,
};
2022-11-27 14:11:01 +00:00
if (std.mem.indexOfScalar(u8, " \t\r\n", b) == null) {
2022-11-27 13:43:06 +00:00
try peek_stream.putBackByte(b);
break;
}
}
return terminal;
}
fn parseFormField(boundary: []const u8, peek_stream: anytype, alloc: std.mem.Allocator) !FormFieldResult {
const reader = peek_stream.reader();
// TODO: refactor
var headers = try @import("./request/parser.zig").parseHeaders(alloc, reader);
defer headers.deinit();
var value = std.ArrayList(u8).init(alloc);
errdefer value.deinit();
line_loop: while (true) {
// parse crlf--
var buf: [4]u8 = undefined;
try reader.readNoEof(&buf);
if (!std.mem.eql(u8, &buf, "\r\n--")) {
try value.append(buf[0]);
try peek_stream.putBack(buf[1..]);
var ch = try reader.readByte();
while (ch != '\r') : (ch = try reader.readByte()) try value.append(ch);
try peek_stream.putBackByte(ch);
continue;
}
for (boundary) |ch, i| {
const b = try reader.readByte();
if (b != ch) {
try value.appendSlice("\r\n--");
try value.appendSlice(boundary[0 .. i + 1]);
continue :line_loop;
}
}
// Boundary parsed. See if its a terminal or not
break;
}
const terminal = try isFinalPart(peek_stream);
2022-11-27 14:11:01 +00:00
const disposition = headers.get("Content-Disposition") orelse return error.NoDisposition;
2022-11-27 13:43:06 +00:00
return FormFieldResult{
2022-11-28 06:33:05 +00:00
.field = .{
.value = value.toOwnedSlice(),
.params = try parseParams(alloc, FormDataParams, disposition),
},
2022-11-27 13:43:06 +00:00
.more = !terminal,
};
}
2022-11-28 06:33:05 +00:00
pub fn parseFormData(comptime T: type, boundary: []const u8, reader: anytype, alloc: std.mem.Allocator) !T {
2022-11-27 13:43:06 +00:00
if (boundary.len > max_boundary) return error.BoundaryTooLarge;
var stream = std.io.peekStream(72, reader);
{
var buf: [72]u8 = undefined;
const count = try stream.reader().readAll(buf[0 .. boundary.len + 2]);
var line = buf[0..count];
if (line.len != boundary.len + 2) return error.InvalidMultipartBoundary;
if (!std.mem.startsWith(u8, line, "--")) return error.InvalidMultipartBoundary;
if (!std.mem.endsWith(u8, line, boundary)) return error.InvalidMultipartBoundary;
2022-11-28 06:33:05 +00:00
if (try isFinalPart(&stream)) return error.NoForm;
2022-11-27 13:43:06 +00:00
}
2022-11-28 06:33:05 +00:00
var fields = Intermediary(T){};
2022-11-27 13:43:06 +00:00
while (true) {
2022-11-28 06:33:05 +00:00
const form_field = try parseFormField(boundary, &stream, alloc);
inline for (std.meta.fields(Intermediary(T))) |field| {
if (std.ascii.eqlIgnoreCase(field.name[2..], form_field.field.params.name.?)) {
@field(fields, field.name) = form_field.field;
break;
}
} else {
std.log.debug("unknown form field {?s}", .{form_field.field.params.name});
util.deepFree(alloc, form_field);
}
if (!form_field.more) break;
}
return (try parse(alloc, T, "", "", fields)).?;
}
fn parse(
alloc: std.mem.Allocator,
comptime T: type,
comptime prefix: []const u8,
comptime name: []const u8,
fields: anytype,
) !?T {
if (comptime isScalar(T)) return try parseFormValue(alloc, T, @field(fields, prefix ++ "." ++ name));
switch (@typeInfo(T)) {
.Union => |info| {
var result: ?T = null;
inline for (info.fields) |field| {
const F = field.field_type;
const maybe_value = try parse(alloc, F, prefix, field.name, fields);
if (maybe_value) |value| {
if (result != null) return error.DuplicateUnionField;
result = @unionInit(T, field.name, value);
}
}
std.log.debug("{any}", .{result});
return result;
},
.Struct => |info| {
var result: T = undefined;
var fields_specified: usize = 0;
errdefer inline for (info.fields) |field, i| {
if (fields_specified < i) util.deepFree(alloc, @field(result, field.name));
};
inline for (info.fields) |field| {
const F = field.field_type;
var maybe_value: ?F = null;
if (try parse(alloc, F, prefix ++ "." ++ name, field.name, fields)) |v| {
maybe_value = v;
} else if (field.default_value) |default| {
if (comptime @sizeOf(F) != 0) {
maybe_value = try util.deepClone(alloc, @ptrCast(*const F, @alignCast(@alignOf(F), default)).*);
} else {
maybe_value = std.mem.zeroes(F);
}
}
if (maybe_value) |v| {
fields_specified += 1;
@field(result, field.name) = v;
}
}
if (fields_specified == 0) {
return null;
} else if (fields_specified != info.fields.len) {
std.log.debug("{} {s} {s}", .{ T, prefix, name });
return error.PartiallySpecifiedStruct;
} else {
return result;
}
},
// Only applies to non-scalar optionals
.Optional => |info| return try parse(alloc, info.child, prefix, name, fields),
2022-11-27 14:11:01 +00:00
2022-11-28 06:33:05 +00:00
else => @compileError("tmp"),
}
}
fn recursiveFieldPaths(comptime T: type, comptime prefix: []const u8) []const []const u8 {
comptime {
if (std.meta.trait.is(.Optional)(T)) return recursiveFieldPaths(std.meta.Child(T), prefix);
2022-11-27 13:43:06 +00:00
2022-11-28 06:33:05 +00:00
var fields: []const []const u8 = &.{};
for (std.meta.fields(T)) |f| {
const full_name = prefix ++ f.name;
if (isScalar(f.field_type)) {
fields = fields ++ @as([]const []const u8, &.{full_name});
} else {
const field_prefix = if (@typeInfo(f.field_type) == .Union) prefix else full_name ++ ".";
fields = fields ++ recursiveFieldPaths(f.field_type, field_prefix);
}
}
return fields;
2022-11-27 13:43:06 +00:00
}
}
2022-11-27 14:24:41 +00:00
2022-11-28 06:33:05 +00:00
fn Intermediary(comptime T: type) type {
const field_names = recursiveFieldPaths(T, "..");
var fields: [field_names.len]std.builtin.Type.StructField = undefined;
for (field_names) |name, i| fields[i] = .{
.name = name,
.field_type = ?FormField,
.default_value = &@as(?FormField, null),
.is_comptime = false,
.alignment = @alignOf(?FormField),
};
return @Type(.{ .Struct = .{
.layout = .Auto,
.fields = &fields,
.decls = &.{},
.is_tuple = false,
} });
}
2022-11-27 14:24:41 +00:00
const FormFile = struct {
filename: ?[]const u8,
data: []const u8,
};
2022-11-28 06:33:05 +00:00
fn parseFormValue(alloc: std.mem.Allocator, comptime T: type, f: ?FormField) !T {
const field = f orelse unreachable;
2022-11-27 14:24:41 +00:00
if (comptime std.meta.trait.isZigString(T)) return field.value;
if (T == FormFile) {
return FormFile{
.filename = field.filename,
.data = field.value,
};
}
const result = if (comptime std.meta.trait.isIntegral(T))
try std.fmt.parseInt(T, field.value, 0)
else if (comptime std.meta.trait.isFloat(T))
try std.fmt.parseFloat(T, field.value)
else if (comptime std.meta.trait.is(.Enum)(T)) blk: {
const val = std.ascii.lowerStringAlloc(alloc, field.value);
defer alloc.free(val);
break :blk std.meta.stringToEnum(T, val) orelse return error.InvalidEnumValue;
} else if (T == bool) blk: {
const val = std.ascii.lowerStringAlloc(alloc, field.value);
defer alloc.free(val);
break :blk bool_map.get(val) orelse return error.InvalidBool;
} else if (comptime std.meta.trait.hasFn("parse")(T))
try T.parse(field.value)
else
@compileError("Invalid type " ++ @typeName(T));
return result;
}
const bool_map = std.ComptimeStringMap(bool, .{
.{ "true", true },
.{ "t", true },
.{ "yes", true },
.{ "y", true },
.{ "1", true },
.{ "false", false },
.{ "f", false },
.{ "no", false },
.{ "n", false },
.{ "0", false },
});
fn isScalar(comptime T: type) bool {
if (comptime std.meta.trait.isZigString(T)) return true;
if (comptime std.meta.trait.isIntegral(T)) return true;
if (comptime std.meta.trait.isFloat(T)) return true;
if (comptime std.meta.trait.is(.Enum)(T)) return true;
if (comptime std.meta.trait.is(.EnumLiteral)(T)) return true;
if (T == bool) return true;
if (T == FormFile) return true;
if (comptime std.meta.trait.hasFn("parse")(T)) return true;
if (comptime std.meta.trait.is(.Optional)(T) and isScalar(std.meta.Child(T))) return true;
return false;
}
2022-11-27 13:43:06 +00:00
fn toCrlf(comptime str: []const u8) []const u8 {
comptime {
var buf: [str.len * 2]u8 = undefined;
@setEvalBranchQuota(@intCast(u32, str.len * 2)); // TODO: why does this need to be *2
var buf_len: usize = 0;
for (str) |ch| {
if (ch == '\n') {
buf[buf_len] = '\r';
buf_len += 1;
}
buf[buf_len] = ch;
buf_len += 1;
}
return buf[0..buf_len];
}
}
test "parseFormData" {
const body = toCrlf(
\\--abcd
2022-11-27 14:11:01 +00:00
\\Content-Disposition: form-data; name=first; charset=utf8
2022-11-27 13:43:06 +00:00
\\
\\content
\\--abcd
\\content-Disposition: form-data; name=second
\\
\\no content
\\--abcd
\\content-disposition: form-data; name=third
\\
\\
\\--abcd--
\\
);
2022-11-27 14:11:01 +00:00
2022-11-28 06:33:05 +00:00
const T = struct {
first: []const u8,
second: []const u8,
third: []const u8,
};
2022-11-27 13:43:06 +00:00
var stream = std.io.StreamSource{ .const_buffer = std.io.fixedBufferStream(body) };
2022-11-28 06:33:05 +00:00
const result = try parseFormData(T, "abcd", stream.reader(), std.testing.allocator);
std.debug.print("\nfirst: {s}\n\n", .{result.first});
std.debug.print("\nsecond: {s}\n\n", .{result.second});
std.debug.print("\nthird: {s}\n\n", .{result.third});
std.debug.print("\n{any}\n\n", .{result});
2022-11-27 13:43:06 +00:00
}