const std = @import("std"); const http = @import("apple_pie"); const hzzp = @import("hzzp"); const mimetypes = @import("mimetypes"); const fmt = std.fmt; const images_dir_path = "./images"; var registry: ?mimetypes.Registry = null; var global_allocator: ?*std.mem.Allocator = null; pub fn main() anyerror!void { std.log.info("welcome to webscale", .{}); var gpa = std.heap.GeneralPurposeAllocator(.{}){}; defer _ = gpa.deinit(); registry = mimetypes.Registry.init(std.heap.page_allocator); defer registry.?.deinit(); try registry.?.load(); // TODO: configurable addr via env var const bind_addr = try std.net.Address.parseIp("0.0.0.0", 8080); std.log.info("serving on {}", .{bind_addr}); // TODO: configurable path via env var try std.fs.cwd().makePath(images_dir_path); global_allocator = &gpa.allocator; try http.listenAndServe( &gpa.allocator, bind_addr, comptime http.router.router(&[_]http.router.Route{ http.router.get("/", index), http.router.post("/api/upload", uploadFile), http.router.get("/i/:filename", fetchFile), }), ); } fn index(response: *http.Response, request: http.Request) !void { try response.writer().writeAll("Hello Zig!"); } fn generateImageId(buffer: []u8) []const u8 { var i: usize = 0; const seed = @truncate(u64, @bitCast(u128, std.time.nanoTimestamp())); var r = std.rand.DefaultPrng.init(seed); while (i < 16) : (i += 1) { // random ascii lowercase char var idx = @intCast(u8, r.random.uintLessThan(u5, 24)); var letter = @as(u8, 97) + idx; buffer[i] = letter; } return buffer[0..i]; } const StreamT = std.io.FixedBufferStream([]const u8); const ContentDisposition = struct { allocator: *std.mem.Allocator, name: []const u8, filename: []const u8, const Self = @This(); pub fn deinit(self: *const Self) void { self.allocator.free(self.name); self.allocator.free(self.filename); } pub fn format(self: Self, comptime f: []const u8, options: fmt.FormatOptions, writer: anytype) !void { if (f.len != 0) { @compileError("Unknown format character: '" ++ f ++ "'"); } return fmt.format( writer, "Disposition{{.name='{s}', .filename='{s}'}}", .{ self.name, self.filename }, ); } }; const Part = struct { allocator: *std.mem.Allocator, disposition: ContentDisposition, content_type: []const u8, body: []const u8, const Self = @This(); pub fn deinit(self: *const Self) void { self.disposition.deinit(); self.allocator.free(self.content_type); } pub fn format(self: Self, comptime f: []const u8, options: fmt.FormatOptions, writer: anytype) !void { if (f.len != 0) { @compileError("Unknown format character: '" ++ f ++ "'"); } return fmt.format( writer, "Part{{.content_type='{s}', .disposition={}, .body='{s}'}}", .{ self.content_type, self.disposition, self.body }, ); } }; const Multipart = struct { stream: StreamT, boundary: []const u8, cursor: usize = 0, const Self = @This(); // TODO: move boundary_buffer to allocator pub fn init(body: []const u8, content_type: []const u8, boundary_buffer: []u8) !Multipart { // parse content_type into what we want (the boundary) var it = std.mem.split(content_type, ";"); const should_be_multipart = it.next() orelse return error.MissingContentType; std.log.debug("should be multipart: {s}", .{should_be_multipart}); if (!std.mem.eql(u8, should_be_multipart, "multipart/form-data")) return error.InvalidContentType; const should_be_boundary = it.next() orelse return error.MissingBoundary; std.log.debug("should be boundary: {s} {d}", .{ should_be_boundary, should_be_boundary.len }); if (!std.mem.startsWith(u8, should_be_boundary, " boundary=")) return error.InvalidBoundary; var boundary_it = std.mem.split(should_be_boundary, "="); _ = boundary_it.next(); const boundary_value = boundary_it.next() orelse return error.InvalidBoundary; std.log.debug("boundary value: {s} {d}", .{ boundary_value, boundary_value.len }); const actual_boundary_value = try std.fmt.bufPrint(boundary_buffer, "--{s}", .{boundary_value}); std.log.debug("actual boundary value: {s} {d}", .{ actual_boundary_value, actual_boundary_value.len }); return Self{ .stream = StreamT{ .buffer = body, .pos = 0 }, .boundary = actual_boundary_value, }; } pub fn next(self: *Self, hzzp_buffer: []u8, allocator: *std.mem.Allocator) !?Part { var reader = self.stream.reader(); // first self.boundary.len+2 bytes MUST be boundary + \r + \n var boundary_buffer: [512]u8 = undefined; const maybe_boundary_raw = (try reader.readUntilDelimiterOrEof(&boundary_buffer, '\n')) orelse return null; const maybe_boundary_strip1 = std.mem.trimRight(u8, maybe_boundary_raw, "\n"); const maybe_boundary_strip2 = std.mem.trimRight(u8, maybe_boundary_strip1, "\r"); if (!std.mem.eql(u8, maybe_boundary_strip2, self.boundary)) { std.log.err("expected '{s}' {}, got '{s}' {}", .{ self.boundary, self.boundary.len, maybe_boundary_strip2, maybe_boundary_strip2.len }); return error.InvalidBoundaryBody; } std.log.debug("got successful boundary {s}", .{maybe_boundary_strip2}); // from there ownwards, its just http! var parser = hzzp.parser.request.create(hzzp_buffer, reader); // This is a hack so that it doesnt try to parse an http header. parser.state = .header; var content_disposition: ?ContentDisposition = null; var content_type: ?[]const u8 = null; std.log.debug("next bytes: {s}", .{self.stream.buffer[self.stream.pos..(self.stream.pos + 50)]}); while (try parser.next()) |event| { std.log.debug("got event: {}", .{event}); switch (event) { .status => unreachable, .end => break, .head_done => {}, .header => |header| { // TODO lowercase header name if (std.mem.eql(u8, header.name, "Content-Disposition")) { // parse disposition var disposition_it = std.mem.split(header.value, ";"); _ = disposition_it.next(); var dispo_name: []const u8 = undefined; var dispo_filename: []const u8 = undefined; while (disposition_it.next()) |disposition_part_raw| { const disposition_part = std.mem.trim(u8, disposition_part_raw, " "); if (std.mem.eql(u8, disposition_part, "form-data")) continue; // we have an A=B thing var single_part_it = std.mem.split(disposition_part, "="); const inner_part_name = single_part_it.next().?; const inner_part_value_quoted = single_part_it.next().?; const inner_part_value = std.mem.trim(u8, inner_part_value_quoted, "\""); if (std.mem.eql(u8, inner_part_name, "name")) dispo_name = inner_part_value; if (std.mem.eql(u8, inner_part_name, "filename")) dispo_filename = inner_part_value; } content_disposition = ContentDisposition{ .allocator = allocator, .name = try std.mem.dupe(allocator, u8, dispo_name), .filename = try std.mem.dupe(allocator, u8, dispo_filename), }; std.log.debug("got content disposition for part! {}", .{content_disposition}); } else if (std.mem.eql(u8, header.name, "Content-Type")) { content_type = try std.mem.dupe(allocator, u8, header.value); std.log.debug("got content type for part! {s}", .{content_type}); } }, else => { std.log.err("unexpected event: {}", .{event}); @panic("shit"); }, } } // the rest of the reader until we find a matching boundary is the part body. // hzzp does not do it for us because it cant find a body encoding // (content-length, content-encoding) // // we can use the fact that we know the reader is FixedBufferStream // to extract the remaining body, then trim the boundary! // // // when we find a marker, we also need to know if its an ending marker, // because the multipart files are prefixed with the boundary, not suffixed. // // bc of that we need to set the reader so that its directly on top of // the next boundary marker for the next file to work const remaining_body = self.stream.buffer[self.stream.pos..self.stream.buffer.len]; // read body until we find the boundary end marker (--{s} OR --{s}--) var it = std.mem.split(remaining_body, self.boundary); const almost_actual_body = it.next() orelse return error.MissingPartBody; var body_it = std.mem.split(almost_actual_body, "\r\n"); const body = body_it.next().?; const next_boundary_pos = self.stream.pos + body.len; const next_boundary_body = self.stream.buffer[next_boundary_pos..self.stream.buffer.len]; // check out on the next 2 chars const possible_end = it.next() orelse return error.MissingNextPrefixOrEndSuffix; if (std.mem.startsWith(u8, possible_end, "--")) { // we just got the ending boundary marker. the reader should be disabled // for future reads. self.stream.pos = self.stream.buffer.len; return Part{ .allocator = allocator, .disposition = content_disposition.?, .content_type = content_type.?, .body = body, }; } // there is a next file, the reader should be shifted forward to the // boundary marker self.stream.pos = self.stream.pos + body.len; return Part{ .allocator = allocator, .disposition = content_disposition.?, .content_type = content_type.?, .body = body, }; } }; fn uploadFile(response: *http.Response, request: http.Request) !void { std.log.info("upload! got {d} bytes", .{request.body.len}); // find content-type header var it = request.iterator(); var content_type: ?[]const u8 = null; while (it.next()) |header| { if (std.mem.eql(u8, header.key, "Content-Type")) { content_type = header.value; } } if (content_type == null) return error.InvalidContentType; // parse multipart data var boundary_buffer: [512]u8 = undefined; var multipart = try Multipart.init(request.body, content_type.?, &boundary_buffer); var hzzp_buffer: [1024]u8 = undefined; while (try multipart.next(&hzzp_buffer, global_allocator.?)) |part| { defer part.deinit(); std.log.info( "got part from multipart request! name='{s}' filename='{s}' content_type='{s}' length={d}", .{ part.disposition.name, part.disposition.filename, part.content_type, part.body.len }, ); var extensions = registry.?.getExtensionsByType(part.content_type); if (extensions == null) return error.InvalidContentMimeType; const extension = extensions.?.items[0]; var image_id_buffer: [256]u8 = undefined; const image_id = generateImageId(&image_id_buffer); var image_path_buffer: [512]u8 = undefined; const image_path = try std.fmt.bufPrint( &image_path_buffer, "{s}/{s}{s}", .{ images_dir_path, image_id, extension }, ); const image_file = try std.fs.cwd().createFile(image_path, .{}); try image_file.writer().writeAll(part.body); try response.writer().writeAll(image_path); return; } } fn fetchFile(response: *http.Response, request: http.Request, filename: []const u8) !void { std.log.info("got name: {s}", .{filename}); var image_path_buffer: [512]u8 = undefined; const images_dir = try std.fs.cwd().openDir(images_dir_path, .{}); const image_path = try std.fmt.bufPrint( &image_path_buffer, "{s}/{s}", .{ images_dir_path, filename }, ); // TODO return 404 on error const image_file = try std.fs.cwd().openFile(image_path, .{ .read = false }); while (true) { var file_write_buffer: [1024]u8 = undefined; const bytes_read = try image_file.read(&file_write_buffer); if (bytes_read == 0) return; try response.writer().writeAll(&file_write_buffer); } } pub const log_level: std.log.Level = .debug; test "multipart" { const PART1_REAL_BODY = "Hello!\n"; const PART2_REAL_BODY = "{\"status\": \"OK\"}"; const body = "--1234\r\n" ++ "Content-Type: text/plain\r\n" ++ "Content-Disposition: form-data; name=file1; filename=ab.txt\r\n" ++ "\r\n" ++ PART1_REAL_BODY ++ "--1234\r\n" ++ "Content-Type: application/json\r\n" ++ // TODO: add 'content-type' support to content-disposition as well "Content-Disposition: form-data; name=file2; filename=data.json\r\n" ++ "\r\n" ++ PART2_REAL_BODY ++ "--1234--\r\n"; var buf: [512]u8 = undefined; var multipart = try Multipart.init( body, "multipart/form-data; boundary=1234", &buf, ); var hzzp_buffer: [1024]u8 = undefined; var part1 = (try multipart.next(&hzzp_buffer, std.testing.allocator)).?; defer part1.deinit(); std.debug.warn( "\npart={}\n", .{part1}, ); std.testing.expectEqualSlices(u8, "text/plain", part1.content_type); std.testing.expectEqualSlices(u8, "file1", part1.disposition.name); std.testing.expectEqualSlices(u8, "ab.txt", part1.disposition.filename); std.testing.expectEqualSlices(u8, PART1_REAL_BODY, part1.body); var part2 = (try multipart.next(&hzzp_buffer, std.testing.allocator)).?; defer part2.deinit(); std.debug.warn( "\npart2={}\n", .{part2}, ); std.testing.expectEqualSlices(u8, "application/json", part2.content_type); std.testing.expectEqualSlices(u8, "file2", part2.disposition.name); std.testing.expectEqualSlices(u8, "data.json", part2.disposition.filename); std.testing.expectEqualSlices(u8, PART2_REAL_BODY, part2.body); // stop the loop (if there were any) afterwards std.testing.expectEqual( @as(?Part, null), try multipart.next(&hzzp_buffer, std.testing.allocator), ); } test "multipart single file" { const PART1_REAL_BODY = "Hello!"; const body = "--1234\r\n" ++ "Content-Type: text/plain\r\n" ++ "Content-Disposition: form-data; name=file1; filename=ab.txt\r\n" ++ "\r\n" ++ PART1_REAL_BODY ++ "--1234--\r\n"; var buf: [512]u8 = undefined; var multipart = try Multipart.init( body, "multipart/form-data; boundary=1234", &buf, ); var hzzp_buffer: [1024]u8 = undefined; var part1 = (try multipart.next(&hzzp_buffer, std.testing.allocator)).?; defer part1.deinit(); std.debug.warn( "\npart={}\n", .{part1}, ); std.testing.expectEqualSlices(u8, "text/plain", part1.content_type); std.testing.expectEqualSlices(u8, "file1", part1.disposition.name); std.testing.expectEqualSlices(u8, "ab.txt", part1.disposition.filename); std.testing.expectEqualSlices(u8, PART1_REAL_BODY, part1.body); std.testing.expectEqual( @as(?Part, null), try multipart.next(&hzzp_buffer, std.testing.allocator), ); }