From e42a97e192459422d4c32b74761161d558bc78a0 Mon Sep 17 00:00:00 2001 From: Luna Date: Mon, 25 Nov 2019 21:42:24 -0300 Subject: [PATCH] use 'as' builtin - move src/codegen.zig to src/codegen/llvm.zig --- src/analysis.zig | 26 +- src/codegen.zig | 603 +------------------------------------------ src/codegen/llvm.zig | 599 ++++++++++++++++++++++++++++++++++++++++++ src/comp_ctx.zig | 2 +- src/main.zig | 2 +- src/parsers.zig | 5 +- 6 files changed, 622 insertions(+), 615 deletions(-) create mode 100644 src/codegen/llvm.zig diff --git a/src/analysis.zig b/src/analysis.zig index 1962762..c3363a5 100644 --- a/src/analysis.zig +++ b/src/analysis.zig @@ -88,7 +88,7 @@ pub const Analyzer = struct { self.doError( "expected struct or enum for '{}', got {}", val, - @tagName(comp.SymbolType(sym.?.value.*)), + @tagName(@as(comp.SymbolType, sym.?.value.*)), ); break :blk null; }, @@ -106,7 +106,7 @@ pub const Analyzer = struct { symbol_type: comp.SymbolUnderlyingType, wanted_type_enum: comp.SymbolUnderlyingTypeEnum, ) !void { - var actual_enum = comp.SymbolUnderlyingTypeEnum(symbol_type); + var actual_enum = @as(comp.SymbolUnderlyingTypeEnum, symbol_type); if (actual_enum != wanted_type_enum) { std.debug.warn("Expected {}, got {}\n", wanted_type_enum, actual_enum); return CompileError.TypeError; @@ -121,7 +121,7 @@ pub const Analyzer = struct { switch (symbol_type) { .Integer32, .Integer64, .Double => {}, else => { - var actual_enum = comp.SymbolUnderlyingTypeEnum(symbol_type); + var actual_enum = @as(comp.SymbolUnderlyingTypeEnum, symbol_type); std.debug.warn("Expected numeric, got {}\n", actual_enum); return CompileError.TypeError; }, @@ -154,8 +154,8 @@ pub const Analyzer = struct { symbol_type: comp.SymbolUnderlyingType, expected_type: comp.SymbolUnderlyingType, ) !void { - const symbol_enum = comp.SymbolUnderlyingTypeEnum(symbol_type); - const expected_enum = comp.SymbolUnderlyingTypeEnum(expected_type); + const symbol_enum = @as(comp.SymbolUnderlyingTypeEnum, symbol_type); + const expected_enum = @as(comp.SymbolUnderlyingTypeEnum, expected_type); if (symbol_enum != expected_enum) { std.debug.warn("Expected {}, got {}\n", expected_enum, symbol_enum); @@ -253,7 +253,7 @@ pub const Analyzer = struct { .Call => |call| { self.setErrToken(call.paren); - std.debug.assert(ast.ExprType(call.callee.*) == .Variable); + std.debug.assert(@as(ast.ExprType, call.callee.*) == .Variable); const func_name = call.callee.*.Variable.lexeme; var symbol = try ctx.fetchGlobalSymbol(func_name, .Function); @@ -264,11 +264,14 @@ pub const Analyzer = struct { var arg_type = try self.resolveExprType(ctx, &arg_expr); self.expectSymUnTypeEqual(arg_type, param_type) catch { + const param_type_val = @as(comp.SymbolUnderlyingTypeEnum, param_type); + const arg_type_val = @as(comp.SymbolUnderlyingTypeEnum, arg_type); + self.doError( "Expected parameter {} to be {}, got {}", idx, - @tagName(comp.SymbolUnderlyingTypeEnum(param_type)), - @tagName(comp.SymbolUnderlyingTypeEnum(arg_type)), + @tagName(param_type_val), + @tagName(arg_type_val), ); return CompileError.TypeError; @@ -287,8 +290,9 @@ pub const Analyzer = struct { .Get => |get| { var target = get.target.*; - if (ast.ExprType(target) != .Variable) { - std.debug.warn("Expected Variable as get target, got {}\n", ast.ExprType(target)); + const target_type = @as(ast.ExprType, target); + if (target_type != .Variable) { + std.debug.warn("Expected Variable as get target, got {}\n", target_type); return CompileError.TypeError; } @@ -332,7 +336,7 @@ pub const Analyzer = struct { else => { self.doError( "Expected Struct/Enum as get target, got {}", - comp.SymbolUnderlyingTypeEnum(global_typ), + @as(comp.SymbolUnderlyingTypeEnum, global_typ), ); return CompileError.TypeError; diff --git a/src/codegen.zig b/src/codegen.zig index bad81e6..38aa1bd 100644 --- a/src/codegen.zig +++ b/src/codegen.zig @@ -1,606 +1,9 @@ -const std = @import("std"); -const ast = @import("ast.zig"); -const llvm = @import("llvm.zig"); -const comp = @import("comp_ctx.zig"); -// const analysis = @import("analysis.zig"); - -fn sliceify(non_slice: ?[*]const u8) []const u8 { - if (non_slice == null) return ""; - return non_slice.?[0..std.mem.len(u8, non_slice.?)]; -} +pub const llvm = @import("codegen/llvm.zig"); +// pub const x86 = @import("codegen/x86.zig"); pub const CompileError = error{ - LLVMError, + BackendError, EmitError, TypeError, Invalid, }; - -fn mkLLVMBool(val: bool) llvm.LLVMValueRef { - if (val) { - return llvm.LLVMConstInt(llvm.LLVMInt1Type(), 1, 1); - } else { - return llvm.LLVMConstInt(llvm.LLVMInt1Type(), 0, 1); - } -} - -pub const LLVMTable = std.StringHashMap(llvm.LLVMValueRef); -pub const LLVMValueList = std.ArrayList(llvm.LLVMValueRef); - -pub const Codegen = struct { - allocator: *std.mem.Allocator, - ctx: *comp.CompilationContext, - llvm_table: LLVMTable, - - current_function_name: ?[]const u8 = null, - - pub fn init(allocator: *std.mem.Allocator, ctx: *comp.CompilationContext) Codegen { - return Codegen{ - .allocator = allocator, - .ctx = ctx, - .llvm_table = LLVMTable.init(allocator), - }; - } - - fn typeToLLVM(self: *@This(), typ: comp.SymbolUnderlyingType) !llvm.LLVMTypeRef { - return switch (typ) { - .Integer32 => llvm.LLVMInt32Type(), - .Integer64 => llvm.LLVMInt64Type(), - .Bool => llvm.LLVMInt1Type(), - - .OpaqueType => |val| { - std.debug.warn("Invalid return type: {}\n", val); - return CompileError.TypeError; - }, - - .Struct, .Enum => |lex| blk: { - var sym_data = self.ctx.symbol_table.get(lex).?.value; - break :blk switch (sym_data.*) { - .Struct => unreachable, - .Enum => llvm.LLVMInt32Type(), - else => { - std.debug.warn("Function {} is not a type\n", lex); - return CompileError.TypeError; - }, - }; - }, - - else => { - std.debug.warn("TODO handle {}\n", typ); - return CompileError.TypeError; - }, - }; - } - - fn emitForVariableType(self: *@This(), vari: var, get: var, kv: var) !llvm.LLVMValueRef { - var sym = kv.value; - - switch (sym.*) { - .Enum => |map| { - var val = map.get(get.name.lexeme); - if (val == null) { - std.debug.warn( - "enum {} does not have field {}\n", - vari.lexeme, - get.name.lexeme, - ); - } - return llvm.LLVMConstInt(llvm.LLVMInt32Type(), val.?.value, 1); - }, - - .Struct => @panic("TODO handle struct"), - else => { - std.debug.warn("Invalid get target: {}\n", comp.SymbolType(sym.*)); - return CompileError.EmitError; - }, - } - } - - fn emitExpr( - self: *Codegen, - builder: var, - expr: *const ast.Expr, - ) anyerror!llvm.LLVMValueRef { - return switch (expr.*) { - - // TODO handle all literals, construct llvm values for them - .Literal => |literal| blk: { - break :blk switch (literal) { - // TODO other literals - .Integer32 => |val| llvm.LLVMConstInt( - llvm.LLVMInt32Type(), - @intCast(c_ulonglong, val), - 10, - ), - .Integer64 => |val| llvm.LLVMConstInt( - llvm.LLVMInt64Type(), - @intCast(c_ulonglong, val), - 10, - ), - - .Float => |val| blk2: { - var val_cstr = try std.cstr.addNullByte(self.allocator, val); - break :blk2 llvm.LLVMConstRealOfString(llvm.LLVMDoubleType(), val_cstr.ptr); - }, - .Bool => |val| blk2: { - break :blk2 mkLLVMBool(val); - }, - else => unreachable, - }; - }, - - .Unary => |unary| { - var right = try self.emitExpr(builder, unary.right); - - return switch (unary.op) { - .Negate => llvm.LLVMBuildNeg(builder, right, c"neg_tmp"), - .Not => llvm.LLVMBuildNot(builder, right, c"neg_tmp"), - }; - }, - - .Binary => |binary| { - var left = try self.emitExpr(builder, binary.left); - var right = try self.emitExpr(builder, binary.right); - - return switch (binary.op) { - .Add => llvm.LLVMBuildAdd(builder, left, right, c"addtmp"), - .Sub => llvm.LLVMBuildSub(builder, left, right, c"subtmp"), - .Mul => llvm.LLVMBuildMul(builder, left, right, c"multmp"), - - //.Div => llvm.LLVMBuildDiv(builder, left, right, c"divtmp"), - .And => llvm.LLVMBuildAnd(builder, left, right, c"andtmp"), - .Or => llvm.LLVMBuildOr(builder, left, right, c"ortmp"), - - else => { - std.debug.warn("Unexpected binary operator: '{}'\n", binary.op); - return CompileError.EmitError; - }, - }; - }, - - .Get => |get| { - var target = get.target.*; - - switch (target) { - .Variable => |vari| { - // first, we must check if the target is a type - // and emit accordingly - var kv_sym_opt = self.ctx.symbol_table.get(vari.lexeme); - if (kv_sym_opt) |kv| { - return try self.emitForVariableType(vari, get, kv); - } - - // if not, its likely a variable, we should handle it accordingly - // as well - @panic("TODO handle variables"); - }, - - else => { - std.debug.warn("Invalid get target: {}\n", ast.ExprType(target)); - return CompileError.EmitError; - }, - } - }, - - .Call => |call| { - const name = call.callee.*.Variable.lexeme; - - var llvm_func = self.llvm_table.get(name); - if (llvm_func == null) { - std.debug.warn("Function '{}' not found\n", name); - return CompileError.EmitError; - } - - var args = LLVMValueList.init(self.allocator); - errdefer args.deinit(); - - for (call.arguments.toSlice()) |arg_expr| { - var arg_val = try self.emitExpr(builder, &arg_expr); - try args.append(arg_val); - } - - var args_slice = args.toSlice(); - - return llvm.LLVMBuildCall( - builder, - llvm_func.?.value, - args_slice.ptr, - @intCast(c_uint, args_slice.len), - c"call", - ); - }, - - .Assign => |assign| { - const name = assign.name.lexeme; - var meta = self.ctx.current_scope.?.meta_map.get(name).?.value; - var assign_expr = try self.emitExpr(builder, assign.value); - var llvm_alloca: llvm.LLVMValueRef = switch (meta.using) { - .Function => meta.from_function.?.parameters.get(name).?.value.llvm_alloca.?, - .Scope => meta.llvm_alloca.?, - }; - - return llvm.LLVMBuildStore(builder, assign_expr, llvm_alloca); - }, - - .Variable => |vari| { - var kv_opt = self.ctx.current_scope.?.meta_map.get(vari.lexeme); - - if (kv_opt == null) { - std.debug.warn("variable {} not fully analyzed\n", vari.lexeme); - return CompileError.EmitError; - } - - // we have metadata, which means we can check if the variable - // is coming from the scope or from the function - - var metadata = kv_opt.?.value; - std.debug.warn("!! LOAD FROM VAR META {}\n", @ptrToInt(metadata)); - - var buf = try self.allocator.alloc(u8, 512); - errdefer self.allocator.free(buf); - - var load_str = try std.fmt.bufPrint(buf, "{}_loaded", vari.lexeme); - - var load_cstr = try std.cstr.addNullByte(self.allocator, load_str); - errdefer self.allocator.free(load_cstr); - - return switch (metadata.using) { - .Function => blk: { - var param = metadata.from_function.?.parameters.get(vari.lexeme).?.value; - break :blk llvm.LLVMBuildLoad(builder, param.llvm_alloca.?, load_cstr.ptr); - }, - - .Scope => blk: { - var llvm_alloca = metadata.llvm_alloca.?; - //var var_typ = metadata.from_scope.?.env.get(vari.lexeme).?.value; - break :blk llvm.LLVMBuildLoad(builder, llvm_alloca, load_cstr.ptr); - }, - }; - }, - - .Grouping => |expr_ptr| blk: { - break :blk try self.emitExpr(builder, expr_ptr); - }, - - else => { - std.debug.warn("Got unexpected expr {}\n", ast.ExprType(expr.*)); - return CompileError.EmitError; - }, - }; - } - - fn emitStmt(self: *Codegen, builder: var, stmt: *ast.Stmt) anyerror!void { - std.debug.warn("cgen: emitting stmt {}\n", ast.StmtType(stmt.*)); - - switch (stmt.*) { - .Expr => |expr| _ = try self.emitExpr(builder, expr), - - .Return => |ret| { - var ret_expr = try self.emitExpr(builder, ret.value); - _ = llvm.LLVMBuildRet(builder, ret_expr); - }, - - .If => |ifstmt| { - var cond = try self.emitExpr(builder, ifstmt.condition); - var zero = mkLLVMBool(false); - var icmp = llvm.LLVMBuildICmp(builder, llvm.LLVMIntPredicate.LLVMIntNE, cond, zero, c"ifcond"); - - var insert = llvm.LLVMGetInsertBlock(builder); - var function = llvm.LLVMGetBasicBlockParent(insert); - - var then_bb = llvm.LLVMAppendBasicBlock(function, c"then"); - var else_bb = llvm.LLVMAppendBasicBlock(function, c"else"); - var merge_bb = llvm.LLVMAppendBasicBlock(function, c"ifcont"); - - var condbr = llvm.LLVMBuildCondBr(builder, icmp, then_bb, else_bb); - - llvm.LLVMPositionBuilderAtEnd(builder, then_bb); - - // roughly translating to kaleidoscope's - // 'Value *ThenV = Then->codegen();' - var then_rets = false; - var else_rets = false; - - self.ctx.setScope(self.ctx.current_scope.?.nextChild()); - - var then_branch = ifstmt.then_branch.toSlice(); - for (then_branch) |_, idx| { - // keep emitting until branch has ret - var then_stmt = &then_branch[idx]; - - if (!then_rets) - try self.emitStmt(builder, then_stmt); - - // TODO break? lol - switch (then_stmt.*) { - .Return => then_rets = true, - else => {}, - } - } - - self.ctx.dumpScope(); - - // only build the br instruction if we didn't ret, because - // there can't be any instruction after a terminator - // same applies for the else branch - if (!then_rets) - _ = llvm.LLVMBuildBr(builder, merge_bb); - - then_bb = llvm.LLVMGetInsertBlock(builder); - - llvm.LLVMPositionBuilderAtEnd(builder, else_bb); - - // roughly translating to kaleidoscope's - // 'Else *ElseV = Else->codegen();' - if (ifstmt.else_branch) |else_block| { - self.ctx.setScope(self.ctx.current_scope.?.nextChild()); - - var else_slice = else_block.toSlice(); - for (else_slice) |_, idx| { - // keep emitting until branch has ret - var else_stmt = &else_slice[idx]; - - if (!else_rets) - try self.emitStmt(builder, else_stmt); - - switch (else_stmt.*) { - .Return => else_rets = true, - else => {}, - } - } - - self.ctx.dumpScope(); - } - - if (!else_rets) - _ = llvm.LLVMBuildBr(builder, merge_bb); - - else_bb = llvm.LLVMGetInsertBlock(builder); - - llvm.LLVMPositionBuilderAtEnd(builder, merge_bb); - - // if both of the branches return, we should put - // the merge branch as unreachable. - if (then_rets and else_rets) - _ = llvm.LLVMBuildUnreachable(builder); - }, - - .VarDecl => |vardecl| { - // we alaready inferred the type of the variable in the - // analyze pass and the current scope contains the variable's - // type(hopefully), so we resolve it - const name = vardecl.name.lexeme; - var var_metadata = self.ctx.current_scope.?.meta_map.get(name).?.value; - - var name_cstr = try std.cstr.addNullByte(self.allocator, name); - errdefer self.allocator.free(name_cstr); - - var fn_symbol = self.getFnSymbol(self.current_function_name.?); - - var variable = llvm.LLVMBuildAlloca( - builder, - try self.typeToLLVM(var_metadata.typ), - name_cstr.ptr, - ); - - stmt.*.VarDecl.llvm_alloca = variable; - - var_metadata.*.llvm_alloca = variable; - - std.debug.warn("!! DECL VAR {} => {}\n", @ptrToInt(var_metadata), variable); - - var llvm_expr = try self.emitExpr(builder, vardecl.value); - _ = llvm.LLVMBuildStore(builder, llvm_expr, variable); - }, - - else => { - std.debug.warn("Got unexpected stmt {}\n", stmt.*); - return CompileError.EmitError; - }, - } - } - - fn getFnSymbol(self: *@This(), name: []const u8) *comp.FunctionSymbol { - var fn_sym_search = self.ctx.symbol_table.get(name).?.value; - std.debug.assert(comp.SymbolType(fn_sym_search.*) == .Function); - return &fn_sym_search.Function; - } - - /// Emit LLVM ir for the given node. - fn genNode( - self: *Codegen, - mod: llvm.LLVMModuleRef, - node: *ast.Node, - ) !void { - switch (node.*) { - .Root => @panic("Should not have gotten Root"), - .FnDecl => |decl| { - const name = decl.func_name.lexeme; - self.current_function_name = name; - std.debug.warn("cgen: genning function '{}'\n", name); - - var fn_sym = self.getFnSymbol(name); - - const name_cstr = try std.cstr.addNullByte(self.allocator, name); - errdefer self.allocator.free(name_cstr); - - var param_types = llvm.LLVMTypeList.init(self.allocator); - errdefer param_types.deinit(); - - for (decl.params.toSlice()) |param| { - try param_types.append(try self.typeToLLVM(fn_sym.parameters.get( - param.name.lexeme, - ).?.value.typ)); - } - - var llvm_ret_type = llvm.LLVMFunctionType( - try self.typeToLLVM(fn_sym.return_type), - param_types.toSlice().ptr, - @intCast(c_uint, param_types.len), - 0, - ); - - var func = llvm.LLVMAddFunction(mod, name_cstr.ptr, llvm_ret_type); - _ = try self.llvm_table.put(name, func); - - var buf = try self.allocator.alloc(u8, 512); - var entry_lbl = try std.fmt.bufPrint(buf, "fn_{}_entry", name); - var entry_lbl_cstr = try std.cstr.addNullByte(self.allocator, entry_lbl); - var entry = llvm.LLVMAppendBasicBlock(func, entry_lbl_cstr.ptr); - - var builder = llvm.LLVMCreateBuilder(); - llvm.LLVMPositionBuilderAtEnd(builder, entry); - - // to have the ability to mutate parameters, we must allocate them on - // the stack - var params_slice = decl.params.toSlice(); - for (params_slice) |param_node, idx| { - var param = fn_sym.parameters.get(param_node.name.lexeme).?.value; - - const param_name_cstr = try std.cstr.addNullByte(self.allocator, param_node.name.lexeme); - errdefer self.allocator.free(param_name_cstr); - - var alloca = llvm.LLVMBuildAlloca(builder, try self.typeToLLVM(param.typ), param_name_cstr.ptr); - - std.debug.warn("SET PARAM LLVM ALLOCA {} to {}\n", param_node.name.lexeme, alloca); - param.llvm_alloca = alloca; - - _ = llvm.LLVMBuildStore( - builder, - llvm.LLVMGetParam(func, @intCast(c_uint, idx)), - alloca, - ); - } - - self.ctx.setScope(fn_sym.scope); - - // TODO check if stmt is return and if we already - // returned before - var body_slice = decl.body.toSlice(); - for (body_slice) |_, idx| { - try self.emitStmt(builder, &body_slice[idx]); - } - - self.ctx.dumpScope(); - std.debug.warn("cgen: generated function '{}'\n", name); - }, - - // NOTE: enums don't have specific llvm ir code generated for them - .Enum => {}, - - .ConstDecl => |constdecls| { - for (constdecls.toSlice()) |constdecl| { - const name = constdecl.name.lexeme; - - var const_type = self.ctx.symbol_table.get(name).?.value; - var const_llvm_type = try self.typeToLLVM(const_type.Const); - - const const_name = try std.cstr.addNullByte(self.allocator, name); - errdefer self.allocator.free(const_name); - - var global = llvm.LLVMAddGlobal(mod, const_llvm_type, const_name.ptr); - - // TODO maybe put builder at main function so we can still - // call other functions inside consts? - - var builder = llvm.LLVMCreateBuilder(); - var expr_llvm_val = try self.emitExpr(builder, constdecl.expr); - - llvm.LLVMSetInitializer(global, expr_llvm_val); - } - }, - - else => { - std.debug.warn("TODO handle node type {}\n", @tagName(node.*)); - return; - }, - } - } - - pub fn gen(self: *Codegen, root: *ast.Node) !void { - std.debug.warn("cgen: start gen\n"); - _ = llvm.LLVMInitializeNativeTarget(); - - var mod = llvm.LLVMModuleCreateWithName(c"awoo").?; - defer llvm.LLVMDisposeModule(mod); - - var root_slice = root.Root.toSlice(); - for (root_slice) |_, idx| { - try self.genNode(mod, &root_slice[idx]); - } - - var err: ?[*]u8 = null; - defer llvm.LLVMDisposeMessage(err); - - if (llvm.LLVMPrintModuleToFile(mod, c"output.ll", &err) != 0) { - std.debug.warn("error printing module to file: {}\n", sliceify(err)); - return CompileError.LLVMError; - } - - //if (llvm.LLVMWriteBitcodeToFile(mod, c"awoo.bc") != 0) { - // std.debug.warn("error writing bitcode to file: {}\n", sliceify(err)); - // return CompileError.LLVMError; - //} - - std.debug.warn("cgen: verify llvm module\n"); - _ = llvm.LLVMVerifyModule( - mod, - llvm.LLVMVerifierFailureAction.LLVMAbortProcessAction, - &err, - ); - - llvm.LLVMInitializeAllTargetInfos(); - llvm.LLVMInitializeAllTargets(); - llvm.LLVMInitializeAllTargetMCs(); - llvm.LLVMInitializeAllAsmParsers(); - llvm.LLVMInitializeAllAsmPrinters(); - - var engine: llvm.LLVMExecutionEngineRef = undefined; - if (llvm.LLVMCreateExecutionEngineForModule(&engine, mod, &err) != 0) { - std.debug.warn("failed to create execution engine: {}\n", sliceify(err)); - return CompileError.LLVMError; - } - - var machine = llvm.LLVMGetExecutionEngineTargetMachine(engine); - defer llvm.LLVMDisposeTargetMachine(machine); - - var target = llvm.LLVMGetTargetMachineTarget(machine); - var target_data = llvm.LLVMCreateTargetDataLayout(machine); - var data_layout = llvm.LLVMCopyStringRepOfTargetData(target_data); - llvm.LLVMSetDataLayout(mod, data_layout); - - var outpath_cstr = try std.cstr.addNullByte(self.allocator, "outpath.o"); - - //var asmpath_cstr = try std.cstr.addNullByte(self.allocator, "output.S"); - - var desc = llvm.LLVMGetTargetDescription(target); - var features = llvm.LLVMGetTargetMachineFeatureString(machine); - var triple = llvm.LLVMGetTargetMachineTriple(machine); - - std.debug.warn("target: {}\n", sliceify(desc)); - std.debug.warn("triple: {}\n", sliceify(triple)); - std.debug.warn("features: {}\n", sliceify(features)); - - //if (llvm.LLVMTargetMachineEmitToFile( - // machine, - // mod, - // asmpath_cstr.ptr, - // llvm.LLVMCodeGenFileType.LLVMAssemblyFile, - // &err, - //) != 0) { - // std.debug.warn("failed to emit to assembly file: {}\n", sliceify(err)); - // return CompileError.LLVMError; - //} - - if (llvm.LLVMTargetMachineEmitToFile( - machine, - mod, - outpath_cstr.ptr, - llvm.LLVMCodeGenFileType.LLVMObjectFile, - &err, - ) != 0) { - std.debug.warn("failed to emit to file: {}\n", sliceify(err)); - return CompileError.LLVMError; - } - } -}; diff --git a/src/codegen/llvm.zig b/src/codegen/llvm.zig new file mode 100644 index 0000000..6da6f82 --- /dev/null +++ b/src/codegen/llvm.zig @@ -0,0 +1,599 @@ +const std = @import("std"); +const ast = @import("../ast.zig"); +const llvm = @import("../llvm.zig"); +const comp = @import("../comp_ctx.zig"); +const CompileError = @import("../codegen.zig").CompileError; + +fn sliceify(non_slice: ?[*]const u8) []const u8 { + if (non_slice == null) return ""; + return non_slice.?[0..std.mem.len(u8, non_slice.?)]; +} + +fn mkLLVMBool(val: bool) llvm.LLVMValueRef { + if (val) { + return llvm.LLVMConstInt(llvm.LLVMInt1Type(), 1, 1); + } else { + return llvm.LLVMConstInt(llvm.LLVMInt1Type(), 0, 1); + } +} + +pub const LLVMTable = std.StringHashMap(llvm.LLVMValueRef); +pub const LLVMValueList = std.ArrayList(llvm.LLVMValueRef); + +pub const Codegen = struct { + allocator: *std.mem.Allocator, + ctx: *comp.CompilationContext, + llvm_table: LLVMTable, + + current_function_name: ?[]const u8 = null, + + pub fn init(allocator: *std.mem.Allocator, ctx: *comp.CompilationContext) Codegen { + return Codegen{ + .allocator = allocator, + .ctx = ctx, + .llvm_table = LLVMTable.init(allocator), + }; + } + + fn typeToLLVM(self: *@This(), typ: comp.SymbolUnderlyingType) !llvm.LLVMTypeRef { + return switch (typ) { + .Integer32 => llvm.LLVMInt32Type(), + .Integer64 => llvm.LLVMInt64Type(), + .Bool => llvm.LLVMInt1Type(), + + .OpaqueType => |val| { + std.debug.warn("Invalid return type: {}\n", val); + return CompileError.TypeError; + }, + + .Struct, .Enum => |lex| blk: { + var sym_data = self.ctx.symbol_table.get(lex).?.value; + break :blk switch (sym_data.*) { + .Struct => unreachable, + .Enum => llvm.LLVMInt32Type(), + else => { + std.debug.warn("Function {} is not a type\n", lex); + return CompileError.TypeError; + }, + }; + }, + + else => { + std.debug.warn("TODO handle {}\n", typ); + return CompileError.TypeError; + }, + }; + } + + fn emitForVariableType(self: *@This(), vari: var, get: var, kv: var) !llvm.LLVMValueRef { + var sym = kv.value; + + switch (sym.*) { + .Enum => |map| { + var val = map.get(get.name.lexeme); + if (val == null) { + std.debug.warn( + "enum {} does not have field {}\n", + vari.lexeme, + get.name.lexeme, + ); + } + return llvm.LLVMConstInt(llvm.LLVMInt32Type(), val.?.value, 1); + }, + + .Struct => @panic("TODO handle struct"), + else => { + std.debug.warn("Invalid get target: {}\n", @as(comp.SymbolType, sym.*)); + return CompileError.EmitError; + }, + } + } + + fn emitExpr( + self: *Codegen, + builder: var, + expr: *const ast.Expr, + ) anyerror!llvm.LLVMValueRef { + return switch (expr.*) { + + // TODO handle all literals, construct llvm values for them + .Literal => |literal| blk: { + break :blk switch (literal) { + // TODO other literals + .Integer32 => |val| llvm.LLVMConstInt( + llvm.LLVMInt32Type(), + @intCast(c_ulonglong, val), + 10, + ), + .Integer64 => |val| llvm.LLVMConstInt( + llvm.LLVMInt64Type(), + @intCast(c_ulonglong, val), + 10, + ), + + .Float => |val| blk2: { + var val_cstr = try std.cstr.addNullByte(self.allocator, val); + break :blk2 llvm.LLVMConstRealOfString(llvm.LLVMDoubleType(), val_cstr.ptr); + }, + .Bool => |val| blk2: { + break :blk2 mkLLVMBool(val); + }, + else => unreachable, + }; + }, + + .Unary => |unary| { + var right = try self.emitExpr(builder, unary.right); + + return switch (unary.op) { + .Negate => llvm.LLVMBuildNeg(builder, right, c"neg_tmp"), + .Not => llvm.LLVMBuildNot(builder, right, c"neg_tmp"), + }; + }, + + .Binary => |binary| { + var left = try self.emitExpr(builder, binary.left); + var right = try self.emitExpr(builder, binary.right); + + return switch (binary.op) { + .Add => llvm.LLVMBuildAdd(builder, left, right, c"addtmp"), + .Sub => llvm.LLVMBuildSub(builder, left, right, c"subtmp"), + .Mul => llvm.LLVMBuildMul(builder, left, right, c"multmp"), + + //.Div => llvm.LLVMBuildDiv(builder, left, right, c"divtmp"), + .And => llvm.LLVMBuildAnd(builder, left, right, c"andtmp"), + .Or => llvm.LLVMBuildOr(builder, left, right, c"ortmp"), + + else => { + std.debug.warn("Unexpected binary operator: '{}'\n", binary.op); + return CompileError.EmitError; + }, + }; + }, + + .Get => |get| { + var target = get.target.*; + + switch (target) { + .Variable => |vari| { + // first, we must check if the target is a type + // and emit accordingly + var kv_sym_opt = self.ctx.symbol_table.get(vari.lexeme); + if (kv_sym_opt) |kv| { + return try self.emitForVariableType(vari, get, kv); + } + + // if not, its likely a variable, we should handle it accordingly + // as well + @panic("TODO handle variables"); + }, + + else => { + std.debug.warn("Invalid get target: {}\n", @as(ast.ExprType, target)); + return CompileError.EmitError; + }, + } + }, + + .Call => |call| { + const name = call.callee.*.Variable.lexeme; + + var llvm_func = self.llvm_table.get(name); + if (llvm_func == null) { + std.debug.warn("Function '{}' not found\n", name); + return CompileError.EmitError; + } + + var args = LLVMValueList.init(self.allocator); + errdefer args.deinit(); + + for (call.arguments.toSlice()) |arg_expr| { + var arg_val = try self.emitExpr(builder, &arg_expr); + try args.append(arg_val); + } + + var args_slice = args.toSlice(); + + return llvm.LLVMBuildCall( + builder, + llvm_func.?.value, + args_slice.ptr, + @intCast(c_uint, args_slice.len), + c"call", + ); + }, + + .Assign => |assign| { + const name = assign.name.lexeme; + var meta = self.ctx.current_scope.?.meta_map.get(name).?.value; + var assign_expr = try self.emitExpr(builder, assign.value); + var llvm_alloca: llvm.LLVMValueRef = switch (meta.using) { + .Function => meta.from_function.?.parameters.get(name).?.value.llvm_alloca.?, + .Scope => meta.llvm_alloca.?, + }; + + return llvm.LLVMBuildStore(builder, assign_expr, llvm_alloca); + }, + + .Variable => |vari| { + var kv_opt = self.ctx.current_scope.?.meta_map.get(vari.lexeme); + + if (kv_opt == null) { + std.debug.warn("variable {} not fully analyzed\n", vari.lexeme); + return CompileError.EmitError; + } + + // we have metadata, which means we can check if the variable + // is coming from the scope or from the function + + var metadata = kv_opt.?.value; + std.debug.warn("!! LOAD FROM VAR META {}\n", @ptrToInt(metadata)); + + var buf = try self.allocator.alloc(u8, 512); + errdefer self.allocator.free(buf); + + var load_str = try std.fmt.bufPrint(buf, "{}_loaded", vari.lexeme); + + var load_cstr = try std.cstr.addNullByte(self.allocator, load_str); + errdefer self.allocator.free(load_cstr); + + return switch (metadata.using) { + .Function => blk: { + var param = metadata.from_function.?.parameters.get(vari.lexeme).?.value; + break :blk llvm.LLVMBuildLoad(builder, param.llvm_alloca.?, load_cstr.ptr); + }, + + .Scope => blk: { + var llvm_alloca = metadata.llvm_alloca.?; + //var var_typ = metadata.from_scope.?.env.get(vari.lexeme).?.value; + break :blk llvm.LLVMBuildLoad(builder, llvm_alloca, load_cstr.ptr); + }, + }; + }, + + .Grouping => |expr_ptr| blk: { + break :blk try self.emitExpr(builder, expr_ptr); + }, + + else => { + std.debug.warn("Got unexpected expr {}\n", @as(ast.ExprType, expr.*)); + return CompileError.EmitError; + }, + }; + } + + fn emitStmt(self: *Codegen, builder: var, stmt: *ast.Stmt) anyerror!void { + std.debug.warn("cgen: emitting stmt {}\n", @as(ast.StmtType, stmt.*)); + + switch (stmt.*) { + .Expr => |expr| _ = try self.emitExpr(builder, expr), + + .Return => |ret| { + var ret_expr = try self.emitExpr(builder, ret.value); + _ = llvm.LLVMBuildRet(builder, ret_expr); + }, + + .If => |ifstmt| { + var cond = try self.emitExpr(builder, ifstmt.condition); + var zero = mkLLVMBool(false); + var icmp = llvm.LLVMBuildICmp(builder, llvm.LLVMIntPredicate.LLVMIntNE, cond, zero, c"ifcond"); + + var insert = llvm.LLVMGetInsertBlock(builder); + var function = llvm.LLVMGetBasicBlockParent(insert); + + var then_bb = llvm.LLVMAppendBasicBlock(function, c"then"); + var else_bb = llvm.LLVMAppendBasicBlock(function, c"else"); + var merge_bb = llvm.LLVMAppendBasicBlock(function, c"ifcont"); + + var condbr = llvm.LLVMBuildCondBr(builder, icmp, then_bb, else_bb); + + llvm.LLVMPositionBuilderAtEnd(builder, then_bb); + + // roughly translating to kaleidoscope's + // 'Value *ThenV = Then->codegen();' + var then_rets = false; + var else_rets = false; + + self.ctx.setScope(self.ctx.current_scope.?.nextChild()); + + var then_branch = ifstmt.then_branch.toSlice(); + for (then_branch) |_, idx| { + // keep emitting until branch has ret + var then_stmt = &then_branch[idx]; + + if (!then_rets) + try self.emitStmt(builder, then_stmt); + + // TODO break? lol + switch (then_stmt.*) { + .Return => then_rets = true, + else => {}, + } + } + + self.ctx.dumpScope(); + + // only build the br instruction if we didn't ret, because + // there can't be any instruction after a terminator + // same applies for the else branch + if (!then_rets) + _ = llvm.LLVMBuildBr(builder, merge_bb); + + then_bb = llvm.LLVMGetInsertBlock(builder); + + llvm.LLVMPositionBuilderAtEnd(builder, else_bb); + + // roughly translating to kaleidoscope's + // 'Else *ElseV = Else->codegen();' + if (ifstmt.else_branch) |else_block| { + self.ctx.setScope(self.ctx.current_scope.?.nextChild()); + + var else_slice = else_block.toSlice(); + for (else_slice) |_, idx| { + // keep emitting until branch has ret + var else_stmt = &else_slice[idx]; + + if (!else_rets) + try self.emitStmt(builder, else_stmt); + + switch (else_stmt.*) { + .Return => else_rets = true, + else => {}, + } + } + + self.ctx.dumpScope(); + } + + if (!else_rets) + _ = llvm.LLVMBuildBr(builder, merge_bb); + + else_bb = llvm.LLVMGetInsertBlock(builder); + + llvm.LLVMPositionBuilderAtEnd(builder, merge_bb); + + // if both of the branches return, we should put + // the merge branch as unreachable. + if (then_rets and else_rets) + _ = llvm.LLVMBuildUnreachable(builder); + }, + + .VarDecl => |vardecl| { + // we alaready inferred the type of the variable in the + // analyze pass and the current scope contains the variable's + // type(hopefully), so we resolve it + const name = vardecl.name.lexeme; + var var_metadata = self.ctx.current_scope.?.meta_map.get(name).?.value; + + var name_cstr = try std.cstr.addNullByte(self.allocator, name); + errdefer self.allocator.free(name_cstr); + + var fn_symbol = self.getFnSymbol(self.current_function_name.?); + + var variable = llvm.LLVMBuildAlloca( + builder, + try self.typeToLLVM(var_metadata.typ), + name_cstr.ptr, + ); + + stmt.*.VarDecl.llvm_alloca = variable; + + var_metadata.*.llvm_alloca = variable; + + std.debug.warn("!! DECL VAR {} => {}\n", @ptrToInt(var_metadata), variable); + + var llvm_expr = try self.emitExpr(builder, vardecl.value); + _ = llvm.LLVMBuildStore(builder, llvm_expr, variable); + }, + + else => { + std.debug.warn("Got unexpected stmt {}\n", stmt.*); + return CompileError.EmitError; + }, + } + } + + fn getFnSymbol(self: *@This(), name: []const u8) *comp.FunctionSymbol { + var fn_sym_search = self.ctx.symbol_table.get(name).?.value; + std.debug.assert(@as(comp.SymbolType, fn_sym_search.*) == .Function); + return &fn_sym_search.Function; + } + + /// Emit LLVM ir for the given node. + fn genNode( + self: *Codegen, + mod: llvm.LLVMModuleRef, + node: *ast.Node, + ) !void { + switch (node.*) { + .Root => @panic("Should not have gotten Root"), + .FnDecl => |decl| { + const name = decl.func_name.lexeme; + self.current_function_name = name; + std.debug.warn("cgen: genning function '{}'\n", name); + + var fn_sym = self.getFnSymbol(name); + + const name_cstr = try std.cstr.addNullByte(self.allocator, name); + errdefer self.allocator.free(name_cstr); + + var param_types = llvm.LLVMTypeList.init(self.allocator); + errdefer param_types.deinit(); + + for (decl.params.toSlice()) |param| { + try param_types.append(try self.typeToLLVM(fn_sym.parameters.get( + param.name.lexeme, + ).?.value.typ)); + } + + var llvm_ret_type = llvm.LLVMFunctionType( + try self.typeToLLVM(fn_sym.return_type), + param_types.toSlice().ptr, + @intCast(c_uint, param_types.len), + 0, + ); + + var func = llvm.LLVMAddFunction(mod, name_cstr.ptr, llvm_ret_type); + _ = try self.llvm_table.put(name, func); + + var buf = try self.allocator.alloc(u8, 512); + var entry_lbl = try std.fmt.bufPrint(buf, "fn_{}_entry", name); + var entry_lbl_cstr = try std.cstr.addNullByte(self.allocator, entry_lbl); + var entry = llvm.LLVMAppendBasicBlock(func, entry_lbl_cstr.ptr); + + var builder = llvm.LLVMCreateBuilder(); + llvm.LLVMPositionBuilderAtEnd(builder, entry); + + // to have the ability to mutate parameters, we must allocate them on + // the stack + var params_slice = decl.params.toSlice(); + for (params_slice) |param_node, idx| { + var param = fn_sym.parameters.get(param_node.name.lexeme).?.value; + + const param_name_cstr = try std.cstr.addNullByte(self.allocator, param_node.name.lexeme); + errdefer self.allocator.free(param_name_cstr); + + var alloca = llvm.LLVMBuildAlloca(builder, try self.typeToLLVM(param.typ), param_name_cstr.ptr); + + std.debug.warn("SET PARAM LLVM ALLOCA {} to {}\n", param_node.name.lexeme, alloca); + param.llvm_alloca = alloca; + + _ = llvm.LLVMBuildStore( + builder, + llvm.LLVMGetParam(func, @intCast(c_uint, idx)), + alloca, + ); + } + + self.ctx.setScope(fn_sym.scope); + + // TODO check if stmt is return and if we already + // returned before + var body_slice = decl.body.toSlice(); + for (body_slice) |_, idx| { + try self.emitStmt(builder, &body_slice[idx]); + } + + self.ctx.dumpScope(); + std.debug.warn("cgen: generated function '{}'\n", name); + }, + + // NOTE: enums don't have specific llvm ir code generated for them + .Enum => {}, + + .ConstDecl => |constdecls| { + for (constdecls.toSlice()) |constdecl| { + const name = constdecl.name.lexeme; + + var const_type = self.ctx.symbol_table.get(name).?.value; + var const_llvm_type = try self.typeToLLVM(const_type.Const); + + const const_name = try std.cstr.addNullByte(self.allocator, name); + errdefer self.allocator.free(const_name); + + var global = llvm.LLVMAddGlobal(mod, const_llvm_type, const_name.ptr); + + // TODO maybe put builder at main function so we can still + // call other functions inside consts? + + var builder = llvm.LLVMCreateBuilder(); + var expr_llvm_val = try self.emitExpr(builder, constdecl.expr); + + llvm.LLVMSetInitializer(global, expr_llvm_val); + } + }, + + else => { + std.debug.warn("TODO handle node type {}\n", @tagName(node.*)); + return; + }, + } + } + + pub fn gen(self: *Codegen, root: *ast.Node) !void { + std.debug.warn("cgen: start gen\n"); + _ = llvm.LLVMInitializeNativeTarget(); + + var mod = llvm.LLVMModuleCreateWithName(c"awoo").?; + defer llvm.LLVMDisposeModule(mod); + + var root_slice = root.Root.toSlice(); + for (root_slice) |_, idx| { + try self.genNode(mod, &root_slice[idx]); + } + + var err: ?[*]u8 = null; + defer llvm.LLVMDisposeMessage(err); + + if (llvm.LLVMPrintModuleToFile(mod, c"output.ll", &err) != 0) { + std.debug.warn("error printing module to file: {}\n", sliceify(err)); + return CompileError.BackendError; + } + + //if (llvm.LLVMWriteBitcodeToFile(mod, c"awoo.bc") != 0) { + // std.debug.warn("error writing bitcode to file: {}\n", sliceify(err)); + // return CompileError.BackendError; + //} + + std.debug.warn("cgen: verify llvm module\n"); + _ = llvm.LLVMVerifyModule( + mod, + llvm.LLVMVerifierFailureAction.LLVMAbortProcessAction, + &err, + ); + + llvm.LLVMInitializeAllTargetInfos(); + llvm.LLVMInitializeAllTargets(); + llvm.LLVMInitializeAllTargetMCs(); + llvm.LLVMInitializeAllAsmParsers(); + llvm.LLVMInitializeAllAsmPrinters(); + + var engine: llvm.LLVMExecutionEngineRef = undefined; + if (llvm.LLVMCreateExecutionEngineForModule(&engine, mod, &err) != 0) { + std.debug.warn("failed to create execution engine: {}\n", sliceify(err)); + return CompileError.BackendError; + } + + var machine = llvm.LLVMGetExecutionEngineTargetMachine(engine); + defer llvm.LLVMDisposeTargetMachine(machine); + + var target = llvm.LLVMGetTargetMachineTarget(machine); + var target_data = llvm.LLVMCreateTargetDataLayout(machine); + var data_layout = llvm.LLVMCopyStringRepOfTargetData(target_data); + llvm.LLVMSetDataLayout(mod, data_layout); + + var outpath_cstr = try std.cstr.addNullByte(self.allocator, "outpath.o"); + + //var asmpath_cstr = try std.cstr.addNullByte(self.allocator, "output.S"); + + var desc = llvm.LLVMGetTargetDescription(target); + var features = llvm.LLVMGetTargetMachineFeatureString(machine); + var triple = llvm.LLVMGetTargetMachineTriple(machine); + + std.debug.warn("target: {}\n", sliceify(desc)); + std.debug.warn("triple: {}\n", sliceify(triple)); + std.debug.warn("features: {}\n", sliceify(features)); + + //if (llvm.LLVMTargetMachineEmitToFile( + // machine, + // mod, + // asmpath_cstr.ptr, + // llvm.LLVMCodeGenFileType.LLVMAssemblyFile, + // &err, + //) != 0) { + // std.debug.warn("failed to emit to assembly file: {}\n", sliceify(err)); + // return CompileError.BackendError; + //} + + if (llvm.LLVMTargetMachineEmitToFile( + machine, + mod, + outpath_cstr.ptr, + llvm.LLVMCodeGenFileType.LLVMObjectFile, + &err, + ) != 0) { + std.debug.warn("failed to emit to file: {}\n", sliceify(err)); + return CompileError.BackendError; + } + } +}; diff --git a/src/comp_ctx.zig b/src/comp_ctx.zig index 0d4246c..fdbcaeb 100644 --- a/src/comp_ctx.zig +++ b/src/comp_ctx.zig @@ -355,7 +355,7 @@ pub const CompilationContext = struct { var value = sym_kv.?.value; - var sym_typ = SymbolType(value.*); + var sym_typ = @as(SymbolType, value.*); if (sym_typ != typ) { std.debug.warn("Expected {}, got {}\n", sym_typ, typ); return CompilationError.TypeError; diff --git a/src/main.zig b/src/main.zig index 17e004e..4dc9918 100644 --- a/src/main.zig +++ b/src/main.zig @@ -56,7 +56,7 @@ pub fn run(allocator: *std.mem.Allocator, slice: []const u8) !Result { std.debug.warn("symbol table\n"); printer.printContext(ctx); - var cgen = codegen.Codegen.init(allocator, &ctx); + var cgen = codegen.llvm.Codegen.init(allocator, &ctx); try cgen.gen(root); var child = try std.ChildProcess.init( diff --git a/src/parsers.zig b/src/parsers.zig index 75bedfc..dbf3722 100644 --- a/src/parsers.zig +++ b/src/parsers.zig @@ -1026,8 +1026,9 @@ pub const Parser = struct { fn finishStructVal(self: *@This(), expr: *Expr) !*Expr { // {a: 10 b: 10} // for this to work properly, must be Variable, since its a type. - if (ast.ExprType(expr.*) != .Variable) { - return self.doError("Expected variable for struct type, got {}", ast.ExprType(expr.*)); + const expr_type = @as(ast.ExprType, expr.*); + if (expr_type != .Variable) { + return self.doError("Expected variable for struct type, got {}", expr_type); } var inits = ast.StructInitList.init(self.allocator);