yet-another-pomf-clone/src/main.zig

const std = @import("std");
const http = @import("apple_pie");
const hzzp = @import("hzzp");
const mimetypes = @import("mimetypes");

const fmt = std.fmt;

const images_dir_path = "./images";

var registry: ?mimetypes.Registry = null;
var global_allocator: ?*std.mem.Allocator = null;

pub fn main() anyerror!void {
    std.log.info("welcome to webscale", .{});

    var gpa = std.heap.GeneralPurposeAllocator(.{}){};
    defer _ = gpa.deinit();

    registry = mimetypes.Registry.init(std.heap.page_allocator);
    defer registry.?.deinit();
    try registry.?.load();

    // TODO: configurable addr via env var
    const bind_addr = try std.net.Address.parseIp("0.0.0.0", 8080);
    std.log.info("serving on {}", .{bind_addr});

    // TODO: configurable path via env var
    try std.fs.cwd().makePath(images_dir_path);

    global_allocator = &gpa.allocator;

    try http.listenAndServe(
        &gpa.allocator,
        bind_addr,
        comptime http.router.router(&[_]http.router.Route{
            http.router.get("/", index),
            http.router.post("/api/upload", uploadFile),
            http.router.get("/i/:filename", fetchFile),
        }),
    );
}

fn index(response: *http.Response, request: http.Request) !void {
    try response.writer().writeAll("Hello Zig!");
}

fn generateImageId(buffer: []u8) []const u8 {
    var i: usize = 0;

    const seed = @truncate(u64, @bitCast(u128, std.time.nanoTimestamp()));
    var r = std.rand.DefaultPrng.init(seed);

    while (i < 16) : (i += 1) {
        // random ascii lowercase char
        var idx = @intCast(u8, r.random.uintLessThan(u5, 24));
        var letter = @as(u8, 97) + idx;
        buffer[i] = letter;
    }

    return buffer[0..i];
}

const StreamT = std.io.FixedBufferStream([]const u8);

const ContentDisposition = struct {
    allocator: *std.mem.Allocator,
    name: []const u8,
    filename: []const u8,

    const Self = @This();

    pub fn deinit(self: *const Self) void {
        self.allocator.free(self.name);
        self.allocator.free(self.filename);
    }

    pub fn format(self: Self, comptime f: []const u8, options: fmt.FormatOptions, writer: anytype) !void {
        if (f.len != 0) {
            @compileError("Unknown format character: '" ++ f ++ "'");
        }

        return fmt.format(
            writer,
            "Disposition{{.name='{s}', .filename='{s}'}}",
            .{ self.name, self.filename },
        );
    }
};

const Part = struct {
    allocator: *std.mem.Allocator,
    disposition: ContentDisposition,
    content_type: []const u8,
    body: []const u8,

    const Self = @This();

    pub fn deinit(self: *const Self) void {
        self.disposition.deinit();
        self.allocator.free(self.content_type);
    }

    pub fn format(self: Self, comptime f: []const u8, options: fmt.FormatOptions, writer: anytype) !void {
        if (f.len != 0) {
            @compileError("Unknown format character: '" ++ f ++ "'");
        }

        return fmt.format(
            writer,
            "Part{{.content_type='{s}', .disposition={}, .body='{s}'}}",
            .{ self.content_type, self.disposition, self.body },
        );
    }
};

const Multipart = struct {
    stream: StreamT,
    boundary: []const u8,
    cursor: usize = 0,

    const Self = @This();

    // TODO: move boundary_buffer to allocator
    pub fn init(body: []const u8, content_type: []const u8, boundary_buffer: []u8) !Multipart {
        // parse content_type into what we want (the boundary)
        var it = std.mem.split(content_type, ";");
        const should_be_multipart = it.next() orelse return error.MissingContentType;
        std.log.debug("should be multipart: {s}", .{should_be_multipart});
        if (!std.mem.eql(u8, should_be_multipart, "multipart/form-data"))
            return error.InvalidContentType;

        const should_be_boundary = it.next() orelse return error.MissingBoundary;
        std.log.debug("should be boundary: {s} {d}", .{ should_be_boundary, should_be_boundary.len });
        if (!std.mem.startsWith(u8, should_be_boundary, " boundary="))
            return error.InvalidBoundary;

        var boundary_it = std.mem.split(should_be_boundary, "=");
        _ = boundary_it.next();
        const boundary_value = boundary_it.next() orelse return error.InvalidBoundary;
        std.log.debug("boundary value: {s} {d}", .{ boundary_value, boundary_value.len });

        const actual_boundary_value = try std.fmt.bufPrint(boundary_buffer, "--{s}", .{boundary_value});
        std.log.debug("actual boundary value: {s} {d}", .{ actual_boundary_value, actual_boundary_value.len });

        return Self{
            .stream = StreamT{ .buffer = body, .pos = 0 },
            .boundary = actual_boundary_value,
        };
    }

    pub fn next(self: *Self, hzzp_buffer: []u8, allocator: *std.mem.Allocator) !?Part {
        var reader = self.stream.reader();
        // first self.boundary.len+2 bytes MUST be boundary + \r + \n
        var boundary_buffer: [512]u8 = undefined;
        const maybe_boundary_raw = (try reader.readUntilDelimiterOrEof(&boundary_buffer, '\n')) orelse return null;

        const maybe_boundary_strip1 = std.mem.trimRight(u8, maybe_boundary_raw, "\n");
        const maybe_boundary_strip2 = std.mem.trimRight(u8, maybe_boundary_strip1, "\r");
        if (!std.mem.eql(u8, maybe_boundary_strip2, self.boundary)) {
            std.log.err("expected '{s}' {}, got '{s}' {}", .{ self.boundary, self.boundary.len, maybe_boundary_strip2, maybe_boundary_strip2.len });
            return error.InvalidBoundaryBody;
        }
        std.log.debug("got successful boundary {s}", .{maybe_boundary_strip2});

        // from there ownwards, its just http!
        var parser = hzzp.parser.request.create(hzzp_buffer, reader);

        // This is a hack so that it doesnt try to parse an http header.
        parser.state = .header;

        var content_disposition: ?ContentDisposition = null;
        var content_type: ?[]const u8 = null;

        std.log.debug("next bytes: {s}", .{self.stream.buffer[self.stream.pos..(self.stream.pos + 50)]});

        while (try parser.next()) |event| {
            std.log.debug("got event: {}", .{event});
            switch (event) {
                .status => unreachable,
                .end => break,
                .head_done => {},
                .header => |header| {
                    // TODO lowercase header name
                    if (std.mem.eql(u8, header.name, "Content-Disposition")) {
                        // parse disposition
                        var disposition_it = std.mem.split(header.value, ";");
                        _ = disposition_it.next();

                        var dispo_name: []const u8 = undefined;
                        var dispo_filename: []const u8 = undefined;

                        while (disposition_it.next()) |disposition_part_raw| {
                            const disposition_part = std.mem.trim(u8, disposition_part_raw, " ");

                            if (std.mem.eql(u8, disposition_part, "form-data")) continue;

                            // we have an A=B thing
                            var single_part_it = std.mem.split(disposition_part, "=");

                            const inner_part_name = single_part_it.next().?;
                            const inner_part_value_quoted = single_part_it.next().?;

                            const inner_part_value = std.mem.trim(u8, inner_part_value_quoted, "\"");

                            if (std.mem.eql(u8, inner_part_name, "name")) dispo_name = inner_part_value;
                            if (std.mem.eql(u8, inner_part_name, "filename")) dispo_filename = inner_part_value;
                        }

                        content_disposition = ContentDisposition{
                            .allocator = allocator,
                            .name = try std.mem.dupe(allocator, u8, dispo_name),
                            .filename = try std.mem.dupe(allocator, u8, dispo_filename),
                        };
                        std.log.debug("got content disposition for part! {}", .{content_disposition});
                    } else if (std.mem.eql(u8, header.name, "Content-Type")) {
                        content_type = try std.mem.dupe(allocator, u8, header.value);
                        std.log.debug("got content type for part! {s}", .{content_type});
                    }
                },
                else => {
                    std.log.err("unexpected event: {}", .{event});
                    @panic("shit");
                },
            }
        }

        // the rest of the reader until we find a matching boundary is the part body.
        // hzzp does not do it for us because it cant find a body encoding
        // (content-length, content-encoding)
        //
        // we can use the fact that we know the reader is FixedBufferStream
        // to extract the remaining body, then trim the boundary!
        //
        //
        // when we find a marker, we also need to know if its an ending marker,
        // because the multipart files are prefixed with the boundary, not suffixed.
        //
        // bc of that we need to set the reader so that its directly on top of
        // the next boundary marker for the next file to work

        const remaining_body = self.stream.buffer[self.stream.pos..self.stream.buffer.len];

        // read body until we find the boundary end marker (--{s} OR --{s}--)
        var it = std.mem.split(remaining_body, self.boundary);
        const almost_actual_body = it.next() orelse return error.MissingPartBody;

        var body_it = std.mem.split(almost_actual_body, "\r\n");
        const body = body_it.next().?;

        const next_boundary_pos = self.stream.pos + body.len;
        const next_boundary_body = self.stream.buffer[next_boundary_pos..self.stream.buffer.len];

        // check out on the next 2 chars
        const possible_end = it.next() orelse return error.MissingNextPrefixOrEndSuffix;

        if (std.mem.startsWith(u8, possible_end, "--")) {
            // we just got the ending boundary marker. the reader should be disabled
            // for future reads.
            self.stream.pos = self.stream.buffer.len;
            return Part{
                .allocator = allocator,
                .disposition = content_disposition.?,
                .content_type = content_type.?,
                .body = body,
            };
        }

        // there is a next file, the reader should be shifted forward to the
        // boundary marker
        self.stream.pos = self.stream.pos + body.len;

        return Part{
            .allocator = allocator,
            .disposition = content_disposition.?,
            .content_type = content_type.?,
            .body = body,
        };
    }
};

fn uploadFile(response: *http.Response, request: http.Request) !void {
    std.log.info("upload! got {d} bytes", .{request.body.len});

    // find content-type header
    var it = request.iterator();
    var content_type: ?[]const u8 = null;
    while (it.next()) |header| {
        if (std.mem.eql(u8, header.key, "Content-Type")) {
            content_type = header.value;
        }
    }

    if (content_type == null) return error.InvalidContentType;

    // parse multipart data
    var boundary_buffer: [512]u8 = undefined;
    var multipart = try Multipart.init(request.body, content_type.?, &boundary_buffer);
    var hzzp_buffer: [1024]u8 = undefined;

    while (try multipart.next(&hzzp_buffer, global_allocator.?)) |part| {
        defer part.deinit();
        std.log.info(
            "got part from multipart request! name='{s}' filename='{s}' content_type='{s}' length={d}",
            .{ part.disposition.name, part.disposition.filename, part.content_type, part.body.len },
        );

        var extensions = registry.?.getExtensionsByType(part.content_type);
        if (extensions == null) return error.InvalidContentMimeType;
        const extension = extensions.?.items[0];

        var image_id_buffer: [256]u8 = undefined;
        const image_id = generateImageId(&image_id_buffer);

        var image_path_buffer: [512]u8 = undefined;
        const image_path = try std.fmt.bufPrint(
            &image_path_buffer,
            "{s}/{s}{s}",
            .{ images_dir_path, image_id, extension },
        );

        const image_file = try std.fs.cwd().createFile(image_path, .{});
        try image_file.writer().writeAll(part.body);

        try response.writer().writeAll(image_path);
        return;
    }
}

fn fetchFile(response: *http.Response, request: http.Request, filename: []const u8) !void {
    std.log.info("got name: {s}", .{filename});
    var image_path_buffer: [512]u8 = undefined;

    const images_dir = try std.fs.cwd().openDir(images_dir_path, .{});
    const image_path = try std.fmt.bufPrint(
        &image_path_buffer,
        "{s}/{s}",
        .{ images_dir_path, filename },
    );

    // TODO return 404 on error
    const image_file = try std.fs.cwd().openFile(image_path, .{ .read = false });
    while (true) {
        var file_write_buffer: [1024]u8 = undefined;
        const bytes_read = try image_file.read(&file_write_buffer);
        if (bytes_read == 0) return;
        try response.writer().writeAll(&file_write_buffer);
    }
}

pub const log_level: std.log.Level = .debug;
test "multipart" {
    const PART1_REAL_BODY =
        "Hello!\n";

    const PART2_REAL_BODY =
        "{\"status\": \"OK\"}";

    const body =
        "--1234\r\n" ++
        "Content-Type: text/plain\r\n" ++
        "Content-Disposition: form-data; name=file1; filename=ab.txt\r\n" ++
        "\r\n" ++
        PART1_REAL_BODY ++
        "--1234\r\n" ++
        "Content-Type: application/json\r\n" ++
        // TODO: add 'content-type' support to content-disposition as well
        "Content-Disposition: form-data; name=file2; filename=data.json\r\n" ++
        "\r\n" ++
        PART2_REAL_BODY ++
        "--1234--\r\n";

    var buf: [512]u8 = undefined;
    var multipart = try Multipart.init(
        body,
        "multipart/form-data; boundary=1234",
        &buf,
    );

    var hzzp_buffer: [1024]u8 = undefined;
    var part1 = (try multipart.next(&hzzp_buffer, std.testing.allocator)).?;
    defer part1.deinit();

    std.debug.warn(
        "\npart={}\n",
        .{part1},
    );

    std.testing.expectEqualSlices(u8, "text/plain", part1.content_type);
    std.testing.expectEqualSlices(u8, "file1", part1.disposition.name);
    std.testing.expectEqualSlices(u8, "ab.txt", part1.disposition.filename);
    std.testing.expectEqualSlices(u8, PART1_REAL_BODY, part1.body);

    var part2 = (try multipart.next(&hzzp_buffer, std.testing.allocator)).?;
    defer part2.deinit();

    std.debug.warn(
        "\npart2={}\n",
        .{part2},
    );

    std.testing.expectEqualSlices(u8, "application/json", part2.content_type);
    std.testing.expectEqualSlices(u8, "file2", part2.disposition.name);
    std.testing.expectEqualSlices(u8, "data.json", part2.disposition.filename);
    std.testing.expectEqualSlices(u8, PART2_REAL_BODY, part2.body);

    // stop the loop (if there were any) afterwards
    std.testing.expectEqual(
        @as(?Part, null),
        try multipart.next(&hzzp_buffer, std.testing.allocator),
    );
}

test "multipart single file" {
    const PART1_REAL_BODY =
        "Hello!";

    const body =
        "--1234\r\n" ++
        "Content-Type: text/plain\r\n" ++
        "Content-Disposition: form-data; name=file1; filename=ab.txt\r\n" ++
        "\r\n" ++
        PART1_REAL_BODY ++
        "--1234--\r\n";

    var buf: [512]u8 = undefined;
    var multipart = try Multipart.init(
        body,
        "multipart/form-data; boundary=1234",
        &buf,
    );

    var hzzp_buffer: [1024]u8 = undefined;
    var part1 = (try multipart.next(&hzzp_buffer, std.testing.allocator)).?;
    defer part1.deinit();

    std.debug.warn(
        "\npart={}\n",
        .{part1},
    );

    std.testing.expectEqualSlices(u8, "text/plain", part1.content_type);
    std.testing.expectEqualSlices(u8, "file1", part1.disposition.name);
    std.testing.expectEqualSlices(u8, "ab.txt", part1.disposition.filename);
    std.testing.expectEqualSlices(u8, PART1_REAL_BODY, part1.body);

    std.testing.expectEqual(
        @as(?Part, null),
        try multipart.next(&hzzp_buffer, std.testing.allocator),
    );
}