yet-another-pomf-clone/src/main.zig

451 lines
16 KiB
Zig

const std = @import("std");
const http = @import("apple_pie");
const hzzp = @import("hzzp");
const mimetypes = @import("mimetypes");
const fmt = std.fmt;
const images_dir_path = "./images";
var registry: ?mimetypes.Registry = null;
var global_allocator: ?*std.mem.Allocator = null;
pub fn main() anyerror!void {
std.log.info("welcome to webscale", .{});
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
defer _ = gpa.deinit();
registry = mimetypes.Registry.init(std.heap.page_allocator);
defer registry.?.deinit();
try registry.?.load();
// TODO: configurable addr via env var
const bind_addr = try std.net.Address.parseIp("0.0.0.0", 8080);
std.log.info("serving on {}", .{bind_addr});
// TODO: configurable path via env var
try std.fs.cwd().makePath(images_dir_path);
global_allocator = &gpa.allocator;
try http.listenAndServe(
&gpa.allocator,
bind_addr,
comptime http.router.router(&[_]http.router.Route{
http.router.get("/", index),
http.router.post("/api/upload", uploadFile),
http.router.get("/i/:filename", fetchFile),
}),
);
}
fn index(response: *http.Response, request: http.Request) !void {
try response.writer().writeAll("Hello Zig!");
}
fn generateImageId(buffer: []u8) []const u8 {
var i: usize = 0;
const seed = @truncate(u64, @bitCast(u128, std.time.nanoTimestamp()));
var r = std.rand.DefaultPrng.init(seed);
while (i < 16) : (i += 1) {
// random ascii lowercase char
var idx = @intCast(u8, r.random.uintLessThan(u5, 24));
var letter = @as(u8, 97) + idx;
buffer[i] = letter;
}
return buffer[0..i];
}
const StreamT = std.io.FixedBufferStream([]const u8);
const ContentDisposition = struct {
allocator: *std.mem.Allocator,
name: []const u8,
filename: []const u8,
const Self = @This();
pub fn deinit(self: *const Self) void {
self.allocator.free(self.name);
self.allocator.free(self.filename);
}
pub fn format(self: Self, comptime f: []const u8, options: fmt.FormatOptions, writer: anytype) !void {
if (f.len != 0) {
@compileError("Unknown format character: '" ++ f ++ "'");
}
return fmt.format(
writer,
"Disposition{{.name='{s}', .filename='{s}'}}",
.{ self.name, self.filename },
);
}
};
const Part = struct {
allocator: *std.mem.Allocator,
disposition: ContentDisposition,
content_type: []const u8,
body: []const u8,
const Self = @This();
pub fn deinit(self: *const Self) void {
self.disposition.deinit();
self.allocator.free(self.content_type);
}
pub fn format(self: Self, comptime f: []const u8, options: fmt.FormatOptions, writer: anytype) !void {
if (f.len != 0) {
@compileError("Unknown format character: '" ++ f ++ "'");
}
return fmt.format(
writer,
"Part{{.content_type='{s}', .disposition={}, .body='{s}'}}",
.{ self.content_type, self.disposition, self.body },
);
}
};
const Multipart = struct {
stream: StreamT,
boundary: []const u8,
cursor: usize = 0,
const Self = @This();
// TODO: move boundary_buffer to allocator
pub fn init(body: []const u8, content_type: []const u8, boundary_buffer: []u8) !Multipart {
// parse content_type into what we want (the boundary)
var it = std.mem.split(content_type, ";");
const should_be_multipart = it.next() orelse return error.MissingContentType;
std.log.debug("should be multipart: {s}", .{should_be_multipart});
if (!std.mem.eql(u8, should_be_multipart, "multipart/form-data"))
return error.InvalidContentType;
const should_be_boundary = it.next() orelse return error.MissingBoundary;
std.log.debug("should be boundary: {s} {d}", .{ should_be_boundary, should_be_boundary.len });
if (!std.mem.startsWith(u8, should_be_boundary, " boundary="))
return error.InvalidBoundary;
var boundary_it = std.mem.split(should_be_boundary, "=");
_ = boundary_it.next();
const boundary_value = boundary_it.next() orelse return error.InvalidBoundary;
std.log.debug("boundary value: {s} {d}", .{ boundary_value, boundary_value.len });
const actual_boundary_value = try std.fmt.bufPrint(boundary_buffer, "--{s}", .{boundary_value});
std.log.debug("actual boundary value: {s} {d}", .{ actual_boundary_value, actual_boundary_value.len });
return Self{
.stream = StreamT{ .buffer = body, .pos = 0 },
.boundary = actual_boundary_value,
};
}
pub fn next(self: *Self, hzzp_buffer: []u8, allocator: *std.mem.Allocator) !?Part {
var reader = self.stream.reader();
// first self.boundary.len+2 bytes MUST be boundary + \r + \n
var boundary_buffer: [512]u8 = undefined;
const maybe_boundary_raw = (try reader.readUntilDelimiterOrEof(&boundary_buffer, '\n')) orelse return null;
const maybe_boundary_strip1 = std.mem.trimRight(u8, maybe_boundary_raw, "\n");
const maybe_boundary_strip2 = std.mem.trimRight(u8, maybe_boundary_strip1, "\r");
if (!std.mem.eql(u8, maybe_boundary_strip2, self.boundary)) {
std.log.err("expected '{s}' {}, got '{s}' {}", .{ self.boundary, self.boundary.len, maybe_boundary_strip2, maybe_boundary_strip2.len });
return error.InvalidBoundaryBody;
}
std.log.debug("got successful boundary {s}", .{maybe_boundary_strip2});
// from there ownwards, its just http!
var parser = hzzp.parser.request.create(hzzp_buffer, reader);
// This is a hack so that it doesnt try to parse an http header.
parser.state = .header;
var content_disposition: ?ContentDisposition = null;
var content_type: ?[]const u8 = null;
std.log.debug("next bytes: {s}", .{self.stream.buffer[self.stream.pos..(self.stream.pos + 50)]});
while (try parser.next()) |event| {
std.log.debug("got event: {}", .{event});
switch (event) {
.status => unreachable,
.end => break,
.head_done => {},
.header => |header| {
// TODO lowercase header name
if (std.mem.eql(u8, header.name, "Content-Disposition")) {
// parse disposition
var disposition_it = std.mem.split(header.value, ";");
_ = disposition_it.next();
var dispo_name: []const u8 = undefined;
var dispo_filename: []const u8 = undefined;
while (disposition_it.next()) |disposition_part_raw| {
const disposition_part = std.mem.trim(u8, disposition_part_raw, " ");
if (std.mem.eql(u8, disposition_part, "form-data")) continue;
// we have an A=B thing
var single_part_it = std.mem.split(disposition_part, "=");
const inner_part_name = single_part_it.next().?;
const inner_part_value_quoted = single_part_it.next().?;
const inner_part_value = std.mem.trim(u8, inner_part_value_quoted, "\"");
if (std.mem.eql(u8, inner_part_name, "name")) dispo_name = inner_part_value;
if (std.mem.eql(u8, inner_part_name, "filename")) dispo_filename = inner_part_value;
}
content_disposition = ContentDisposition{
.allocator = allocator,
.name = try std.mem.dupe(allocator, u8, dispo_name),
.filename = try std.mem.dupe(allocator, u8, dispo_filename),
};
std.log.debug("got content disposition for part! {}", .{content_disposition});
} else if (std.mem.eql(u8, header.name, "Content-Type")) {
content_type = try std.mem.dupe(allocator, u8, header.value);
std.log.debug("got content type for part! {s}", .{content_type});
}
},
else => {
std.log.err("unexpected event: {}", .{event});
@panic("shit");
},
}
}
// the rest of the reader until we find a matching boundary is the part body.
// hzzp does not do it for us because it cant find a body encoding
// (content-length, content-encoding)
//
// we can use the fact that we know the reader is FixedBufferStream
// to extract the remaining body, then trim the boundary!
//
//
// when we find a marker, we also need to know if its an ending marker,
// because the multipart files are prefixed with the boundary, not suffixed.
//
// bc of that we need to set the reader so that its directly on top of
// the next boundary marker for the next file to work
const remaining_body = self.stream.buffer[self.stream.pos..self.stream.buffer.len];
// read body until we find the boundary end marker (--{s} OR --{s}--)
var it = std.mem.split(remaining_body, self.boundary);
const almost_actual_body = it.next() orelse return error.MissingPartBody;
var body_it = std.mem.split(almost_actual_body, "\r\n");
const body = body_it.next().?;
const next_boundary_pos = self.stream.pos + body.len;
const next_boundary_body = self.stream.buffer[next_boundary_pos..self.stream.buffer.len];
// check out on the next 2 chars
const possible_end = it.next() orelse return error.MissingNextPrefixOrEndSuffix;
if (std.mem.startsWith(u8, possible_end, "--")) {
// we just got the ending boundary marker. the reader should be disabled
// for future reads.
self.stream.pos = self.stream.buffer.len;
return Part{
.allocator = allocator,
.disposition = content_disposition.?,
.content_type = content_type.?,
.body = body,
};
}
// there is a next file, the reader should be shifted forward to the
// boundary marker
self.stream.pos = self.stream.pos + body.len;
return Part{
.allocator = allocator,
.disposition = content_disposition.?,
.content_type = content_type.?,
.body = body,
};
}
};
fn uploadFile(response: *http.Response, request: http.Request) !void {
std.log.info("upload! got {d} bytes", .{request.body.len});
// find content-type header
var it = request.iterator();
var content_type: ?[]const u8 = null;
while (it.next()) |header| {
if (std.mem.eql(u8, header.key, "Content-Type")) {
content_type = header.value;
}
}
if (content_type == null) return error.InvalidContentType;
// parse multipart data
var boundary_buffer: [512]u8 = undefined;
var multipart = try Multipart.init(request.body, content_type.?, &boundary_buffer);
var hzzp_buffer: [1024]u8 = undefined;
while (try multipart.next(&hzzp_buffer, global_allocator.?)) |part| {
defer part.deinit();
std.log.info(
"got part from multipart request! name='{s}' filename='{s}' content_type='{s}' length={d}",
.{ part.disposition.name, part.disposition.filename, part.content_type, part.body.len },
);
var extensions = registry.?.getExtensionsByType(part.content_type);
if (extensions == null) return error.InvalidContentMimeType;
const extension = extensions.?.items[0];
var image_id_buffer: [256]u8 = undefined;
const image_id = generateImageId(&image_id_buffer);
var image_path_buffer: [512]u8 = undefined;
const image_path = try std.fmt.bufPrint(
&image_path_buffer,
"{s}/{s}{s}",
.{ images_dir_path, image_id, extension },
);
const image_file = try std.fs.cwd().createFile(image_path, .{});
try image_file.writer().writeAll(part.body);
try response.writer().writeAll(image_path);
return;
}
}
fn fetchFile(response: *http.Response, request: http.Request, filename: []const u8) !void {
std.log.info("got name: {s}", .{filename});
var image_path_buffer: [512]u8 = undefined;
const images_dir = try std.fs.cwd().openDir(images_dir_path, .{});
const image_path = try std.fmt.bufPrint(
&image_path_buffer,
"{s}/{s}",
.{ images_dir_path, filename },
);
// TODO return 404 on error
const image_file = try std.fs.cwd().openFile(image_path, .{ .read = false });
while (true) {
var file_write_buffer: [1024]u8 = undefined;
const bytes_read = try image_file.read(&file_write_buffer);
if (bytes_read == 0) return;
try response.writer().writeAll(&file_write_buffer);
}
}
pub const log_level: std.log.Level = .debug;
test "multipart" {
const PART1_REAL_BODY =
"Hello!\n";
const PART2_REAL_BODY =
"{\"status\": \"OK\"}";
const body =
"--1234\r\n" ++
"Content-Type: text/plain\r\n" ++
"Content-Disposition: form-data; name=file1; filename=ab.txt\r\n" ++
"\r\n" ++
PART1_REAL_BODY ++
"--1234\r\n" ++
"Content-Type: application/json\r\n" ++
// TODO: add 'content-type' support to content-disposition as well
"Content-Disposition: form-data; name=file2; filename=data.json\r\n" ++
"\r\n" ++
PART2_REAL_BODY ++
"--1234--\r\n";
var buf: [512]u8 = undefined;
var multipart = try Multipart.init(
body,
"multipart/form-data; boundary=1234",
&buf,
);
var hzzp_buffer: [1024]u8 = undefined;
var part1 = (try multipart.next(&hzzp_buffer, std.testing.allocator)).?;
defer part1.deinit();
std.debug.warn(
"\npart={}\n",
.{part1},
);
std.testing.expectEqualSlices(u8, "text/plain", part1.content_type);
std.testing.expectEqualSlices(u8, "file1", part1.disposition.name);
std.testing.expectEqualSlices(u8, "ab.txt", part1.disposition.filename);
std.testing.expectEqualSlices(u8, PART1_REAL_BODY, part1.body);
var part2 = (try multipart.next(&hzzp_buffer, std.testing.allocator)).?;
defer part2.deinit();
std.debug.warn(
"\npart2={}\n",
.{part2},
);
std.testing.expectEqualSlices(u8, "application/json", part2.content_type);
std.testing.expectEqualSlices(u8, "file2", part2.disposition.name);
std.testing.expectEqualSlices(u8, "data.json", part2.disposition.filename);
std.testing.expectEqualSlices(u8, PART2_REAL_BODY, part2.body);
// stop the loop (if there were any) afterwards
std.testing.expectEqual(
@as(?Part, null),
try multipart.next(&hzzp_buffer, std.testing.allocator),
);
}
test "multipart single file" {
const PART1_REAL_BODY =
"Hello!";
const body =
"--1234\r\n" ++
"Content-Type: text/plain\r\n" ++
"Content-Disposition: form-data; name=file1; filename=ab.txt\r\n" ++
"\r\n" ++
PART1_REAL_BODY ++
"--1234--\r\n";
var buf: [512]u8 = undefined;
var multipart = try Multipart.init(
body,
"multipart/form-data; boundary=1234",
&buf,
);
var hzzp_buffer: [1024]u8 = undefined;
var part1 = (try multipart.next(&hzzp_buffer, std.testing.allocator)).?;
defer part1.deinit();
std.debug.warn(
"\npart={}\n",
.{part1},
);
std.testing.expectEqualSlices(u8, "text/plain", part1.content_type);
std.testing.expectEqualSlices(u8, "file1", part1.disposition.name);
std.testing.expectEqualSlices(u8, "ab.txt", part1.disposition.filename);
std.testing.expectEqualSlices(u8, PART1_REAL_BODY, part1.body);
std.testing.expectEqual(
@as(?Part, null),
try multipart.next(&hzzp_buffer, std.testing.allocator),
);
}