const std = @import("std"); const ast = @import("../ast.zig"); const llvm = @import("../llvm.zig"); const comp = @import("../comp_ctx.zig"); const CompileError = @import("../codegen.zig").CompileError; fn sliceify(non_slice: ?[*:0]const u8) []const u8 { return non_slice.?[0..std.mem.len(u8, non_slice.?)]; } fn mkLLVMBool(val: bool) llvm.LLVMValueRef { if (val) { return llvm.LLVMConstInt(llvm.LLVMInt1Type(), 1, 1); } else { return llvm.LLVMConstInt(llvm.LLVMInt1Type(), 0, 1); } } pub const LLVMTable = std.StringHashMap(llvm.LLVMValueRef); pub const LLVMValueList = std.ArrayList(llvm.LLVMValueRef); pub const Codegen = struct { allocator: *std.mem.Allocator, ctx: *comp.CompilationContext, llvm_table: LLVMTable, current_function_name: ?[]const u8 = null, pub fn init(allocator: *std.mem.Allocator, ctx: *comp.CompilationContext) Codegen { return Codegen{ .allocator = allocator, .ctx = ctx, .llvm_table = LLVMTable.init(allocator), }; } fn typeToLLVM(self: *@This(), typ: comp.SymbolUnderlyingType) !llvm.LLVMTypeRef { return switch (typ) { .Integer32 => llvm.LLVMInt32Type(), .Integer64 => llvm.LLVMInt64Type(), .Bool => llvm.LLVMInt1Type(), .OpaqueType => |val| { std.debug.warn("Invalid return type: {}\n", val); return CompileError.TypeError; }, .Struct, .Enum => |lex| blk: { var sym_data = self.ctx.symbol_table.get(lex).?.value; break :blk switch (sym_data.*) { .Struct => unreachable, .Enum => llvm.LLVMInt32Type(), else => { std.debug.warn("Function {} is not a type\n", lex); return CompileError.TypeError; }, }; }, else => { std.debug.warn("TODO handle {}\n", typ); return CompileError.TypeError; }, }; } fn emitForVariableType(self: *@This(), vari: var, get: var, kv: var) !llvm.LLVMValueRef { var sym = kv.value; switch (sym.*) { .Enum => |map| { var val = map.get(get.name.lexeme); if (val == null) { std.debug.warn( "enum {} does not have field {}\n", vari.lexeme, get.name.lexeme, ); } return llvm.LLVMConstInt(llvm.LLVMInt32Type(), val.?.value, 1); }, .Struct => @panic("TODO handle struct"), else => { std.debug.warn("Invalid get target: {}\n", @as(comp.SymbolType, sym.*)); return CompileError.EmitError; }, } } fn emitExpr( self: *Codegen, builder: var, expr: *const ast.Expr, ) anyerror!llvm.LLVMValueRef { return switch (expr.*) { // TODO handle all literals, construct llvm values for them .Literal => |literal| blk: { break :blk switch (literal) { // TODO other literals .Integer32 => |val| llvm.LLVMConstInt( llvm.LLVMInt32Type(), @intCast(c_ulonglong, val), 10, ), .Integer64 => |val| llvm.LLVMConstInt( llvm.LLVMInt64Type(), @intCast(c_ulonglong, val), 10, ), .Float => |val| blk2: { var val_cstr = try std.cstr.addNullByte(self.allocator, val); break :blk2 llvm.LLVMConstRealOfString(llvm.LLVMDoubleType(), val_cstr.ptr); }, .Bool => |val| blk2: { break :blk2 mkLLVMBool(val); }, else => unreachable, }; }, .Unary => |unary| { var right = try self.emitExpr(builder, unary.right); return switch (unary.op) { .Negate => llvm.LLVMBuildNeg(builder, right, "neg_tmp"), .Not => llvm.LLVMBuildNot(builder, right, "neg_tmp"), }; }, .Binary => |binary| { var left = try self.emitExpr(builder, binary.left); var right = try self.emitExpr(builder, binary.right); return switch (binary.op) { .Add => llvm.LLVMBuildAdd(builder, left, right, "addtmp"), .Sub => llvm.LLVMBuildSub(builder, left, right, "subtmp"), .Mul => llvm.LLVMBuildMul(builder, left, right, "multmp"), //.Div => llvm.LLVMBuildDiv(builder, left, right, "divtmp"), .And => llvm.LLVMBuildAnd(builder, left, right, "andtmp"), .Or => llvm.LLVMBuildOr(builder, left, right, "ortmp"), else => { std.debug.warn("Unexpected binary operator: '{}'\n", binary.op); return CompileError.EmitError; }, }; }, .Get => |get| { var target = get.target.*; switch (target) { .Variable => |vari| { // first, we must check if the target is a type // and emit accordingly var kv_sym_opt = self.ctx.symbol_table.get(vari.lexeme); if (kv_sym_opt) |kv| { return try self.emitForVariableType(vari, get, kv); } // if not, its likely a variable, we should handle it accordingly // as well @panic("TODO handle variables"); }, else => { std.debug.warn("Invalid get target: {}\n", @as(ast.ExprType, target)); return CompileError.EmitError; }, } }, .Call => |call| { const name = call.callee.*.Variable.lexeme; var llvm_func = self.llvm_table.get(name); if (llvm_func == null) { std.debug.warn("Function '{}' not found\n", name); return CompileError.EmitError; } var args = LLVMValueList.init(self.allocator); errdefer args.deinit(); for (call.arguments.toSlice()) |arg_expr| { var arg_val = try self.emitExpr(builder, &arg_expr); try args.append(arg_val); } var args_slice = args.toSlice(); return llvm.LLVMBuildCall( builder, llvm_func.?.value, args_slice.ptr, @intCast(c_uint, args_slice.len), "call", ); }, .Assign => |assign| { const name = assign.name.lexeme; var meta = self.ctx.current_scope.?.meta_map.get(name).?.value; var assign_expr = try self.emitExpr(builder, assign.value); var llvm_alloca: llvm.LLVMValueRef = switch (meta.using) { .Function => meta.from_function.?.parameters.get(name).?.value.llvm_alloca.?, .Scope => meta.llvm_alloca.?, }; return llvm.LLVMBuildStore(builder, assign_expr, llvm_alloca); }, .Variable => |vari| { var kv_opt = self.ctx.current_scope.?.meta_map.get(vari.lexeme); if (kv_opt == null) { std.debug.warn("variable {} not fully analyzed\n", vari.lexeme); return CompileError.EmitError; } // we have metadata, which means we can check if the variable // is coming from the scope or from the function var metadata = kv_opt.?.value; std.debug.warn("!! LOAD FROM VAR META {}\n", @ptrToInt(metadata)); var buf = try self.allocator.alloc(u8, 512); errdefer self.allocator.free(buf); var load_str = try std.fmt.bufPrint(buf, "{}_loaded", vari.lexeme); var load_cstr = try std.cstr.addNullByte(self.allocator, load_str); errdefer self.allocator.free(load_cstr); return switch (metadata.using) { .Function => blk: { var param = metadata.from_function.?.parameters.get(vari.lexeme).?.value; break :blk llvm.LLVMBuildLoad(builder, param.llvm_alloca.?, load_cstr.ptr); }, .Scope => blk: { var llvm_alloca = metadata.llvm_alloca.?; //var var_typ = metadata.from_scope.?.env.get(vari.lexeme).?.value; break :blk llvm.LLVMBuildLoad(builder, llvm_alloca, load_cstr.ptr); }, }; }, .Grouping => |expr_ptr| blk: { break :blk try self.emitExpr(builder, expr_ptr); }, else => { std.debug.warn("Got unexpected expr {}\n", @as(ast.ExprType, expr.*)); return CompileError.EmitError; }, }; } fn emitStmt(self: *Codegen, builder: var, stmt: *ast.Stmt) anyerror!void { std.debug.warn("cgen: emitting stmt {}\n", @as(ast.StmtType, stmt.*)); switch (stmt.*) { .Expr => |expr| _ = try self.emitExpr(builder, expr), .Return => |ret| { var ret_expr = try self.emitExpr(builder, ret.value); _ = llvm.LLVMBuildRet(builder, ret_expr); }, .If => |ifstmt| { var cond = try self.emitExpr(builder, ifstmt.condition); var zero = mkLLVMBool(false); var icmp = llvm.LLVMBuildICmp(builder, llvm.LLVMIntPredicate.LLVMIntNE, cond, zero, "ifcond"); var insert = llvm.LLVMGetInsertBlock(builder); var function = llvm.LLVMGetBasicBlockParent(insert); var then_bb = llvm.LLVMAppendBasicBlock(function, "then"); var else_bb = llvm.LLVMAppendBasicBlock(function, "else"); var merge_bb = llvm.LLVMAppendBasicBlock(function, "ifcont"); var condbr = llvm.LLVMBuildCondBr(builder, icmp, then_bb, else_bb); llvm.LLVMPositionBuilderAtEnd(builder, then_bb); // roughly translating to kaleidoscope's // 'Value *ThenV = Then->codegen();' var then_rets = false; var else_rets = false; self.ctx.setScope(self.ctx.current_scope.?.nextChild()); var then_branch = ifstmt.then_branch.toSlice(); for (then_branch) |_, idx| { // keep emitting until branch has ret var then_stmt = &then_branch[idx]; if (!then_rets) try self.emitStmt(builder, then_stmt); // TODO break? lol switch (then_stmt.*) { .Return => then_rets = true, else => {}, } } self.ctx.dumpScope(); // only build the br instruction if we didn't ret, because // there can't be any instruction after a terminator // same applies for the else branch if (!then_rets) _ = llvm.LLVMBuildBr(builder, merge_bb); then_bb = llvm.LLVMGetInsertBlock(builder); llvm.LLVMPositionBuilderAtEnd(builder, else_bb); // roughly translating to kaleidoscope's // 'Else *ElseV = Else->codegen();' if (ifstmt.else_branch) |else_block| { self.ctx.setScope(self.ctx.current_scope.?.nextChild()); var else_slice = else_block.toSlice(); for (else_slice) |_, idx| { // keep emitting until branch has ret var else_stmt = &else_slice[idx]; if (!else_rets) try self.emitStmt(builder, else_stmt); switch (else_stmt.*) { .Return => else_rets = true, else => {}, } } self.ctx.dumpScope(); } if (!else_rets) _ = llvm.LLVMBuildBr(builder, merge_bb); else_bb = llvm.LLVMGetInsertBlock(builder); llvm.LLVMPositionBuilderAtEnd(builder, merge_bb); // if both of the branches return, we should put // the merge branch as unreachable. if (then_rets and else_rets) _ = llvm.LLVMBuildUnreachable(builder); }, .VarDecl => |vardecl| { // we alaready inferred the type of the variable in the // analyze pass and the current scope contains the variable's // type(hopefully), so we resolve it const name = vardecl.name.lexeme; var var_metadata = self.ctx.current_scope.?.meta_map.get(name).?.value; var name_cstr = try std.cstr.addNullByte(self.allocator, name); errdefer self.allocator.free(name_cstr); var fn_symbol = self.getFnSymbol(self.current_function_name.?); var variable = llvm.LLVMBuildAlloca( builder, try self.typeToLLVM(var_metadata.typ), name_cstr.ptr, ); stmt.*.VarDecl.llvm_alloca = variable; var_metadata.*.llvm_alloca = variable; std.debug.warn("!! DECL VAR {} => {}\n", @ptrToInt(var_metadata), variable); var llvm_expr = try self.emitExpr(builder, vardecl.value); _ = llvm.LLVMBuildStore(builder, llvm_expr, variable); }, else => { std.debug.warn("Got unexpected stmt {}\n", stmt.*); return CompileError.EmitError; }, } } fn getFnSymbol(self: *@This(), name: []const u8) *comp.FunctionSymbol { var fn_sym_search = self.ctx.symbol_table.get(name).?.value; std.debug.assert(@as(comp.SymbolType, fn_sym_search.*) == .Function); return &fn_sym_search.Function; } /// Emit LLVM ir for the given node. fn genNode( self: *Codegen, mod: llvm.LLVMModuleRef, node: *ast.Node, ) !void { switch (node.*) { .Root => @panic("Should not have gotten Root"), .FnDecl => |decl| { const name = decl.func_name.lexeme; self.current_function_name = name; std.debug.warn("cgen: genning function '{}'\n", name); var fn_sym = self.getFnSymbol(name); const name_cstr = try std.cstr.addNullByte(self.allocator, name); errdefer self.allocator.free(name_cstr); var param_types = llvm.LLVMTypeList.init(self.allocator); errdefer param_types.deinit(); for (decl.params.toSlice()) |param| { try param_types.append(try self.typeToLLVM(fn_sym.parameters.get( param.name.lexeme, ).?.value.typ)); } var llvm_ret_type = llvm.LLVMFunctionType( try self.typeToLLVM(fn_sym.return_type), param_types.toSlice().ptr, @intCast(c_uint, param_types.len), 0, ); var func = llvm.LLVMAddFunction(mod, name_cstr.ptr, llvm_ret_type); _ = try self.llvm_table.put(name, func); var buf = try self.allocator.alloc(u8, 512); var entry_lbl = try std.fmt.bufPrint(buf, "fn_{}_entry", name); var entry_lbl_cstr = try std.cstr.addNullByte(self.allocator, entry_lbl); var entry = llvm.LLVMAppendBasicBlock(func, entry_lbl_cstr.ptr); var builder = llvm.LLVMCreateBuilder(); llvm.LLVMPositionBuilderAtEnd(builder, entry); // to have the ability to mutate parameters, we must allocate them on // the stack var params_slice = decl.params.toSlice(); for (params_slice) |param_node, idx| { var param = fn_sym.parameters.get(param_node.name.lexeme).?.value; const param_name_cstr = try std.cstr.addNullByte(self.allocator, param_node.name.lexeme); errdefer self.allocator.free(param_name_cstr); var alloca = llvm.LLVMBuildAlloca(builder, try self.typeToLLVM(param.typ), param_name_cstr.ptr); std.debug.warn("SET PARAM LLVM ALLOCA {} to {}\n", param_node.name.lexeme, alloca); param.llvm_alloca = alloca; _ = llvm.LLVMBuildStore( builder, llvm.LLVMGetParam(func, @intCast(c_uint, idx)), alloca, ); } self.ctx.setScope(fn_sym.scope); // TODO check if stmt is return and if we already // returned before var body_slice = decl.body.toSlice(); for (body_slice) |_, idx| { try self.emitStmt(builder, &body_slice[idx]); } self.ctx.dumpScope(); std.debug.warn("cgen: generated function '{}'\n", name); }, // NOTE: enums don't have specific llvm ir code generated for them .Enum => {}, .ConstDecl => |constdecls| { for (constdecls.toSlice()) |constdecl| { const name = constdecl.name.lexeme; var const_type = self.ctx.symbol_table.get(name).?.value; var const_llvm_type = try self.typeToLLVM(const_type.Const); const const_name = try std.cstr.addNullByte(self.allocator, name); errdefer self.allocator.free(const_name); var global = llvm.LLVMAddGlobal(mod, const_llvm_type, const_name.ptr); // TODO maybe put builder at main function so we can still // call other functions inside consts? var builder = llvm.LLVMCreateBuilder(); var expr_llvm_val = try self.emitExpr(builder, constdecl.expr); llvm.LLVMSetInitializer(global, expr_llvm_val); } }, else => { std.debug.warn("TODO handle node type {}\n", @tagName(node.*)); return; }, } } pub fn gen(self: *Codegen, root: *ast.Node) !void { std.debug.warn("cgen: start gen\n"); _ = llvm.LLVMInitializeNativeTarget(); var mod = llvm.LLVMModuleCreateWithName("awoo").?; defer llvm.LLVMDisposeModule(mod); var root_slice = root.Root.toSlice(); for (root_slice) |_, idx| { try self.genNode(mod, &root_slice[idx]); } var err: ?[*:0]u8 = null; defer llvm.LLVMDisposeMessage(err); if (llvm.LLVMPrintModuleToFile(mod, "output.ll", &err) != 0) { std.debug.warn("error printing module to file: {}\n", sliceify(err)); return CompileError.BackendError; } //if (llvm.LLVMWriteBitcodeToFile(mod, "awoo.bc") != 0) { // std.debug.warn("error writing bitcode to file: {}\n", sliceify(err)); // return CompileError.BackendError; //} std.debug.warn("cgen: verify llvm module\n"); _ = llvm.LLVMVerifyModule( mod, llvm.LLVMVerifierFailureAction.LLVMAbortProcessAction, &err, ); llvm.LLVMInitializeAllTargetInfos(); llvm.LLVMInitializeAllTargets(); llvm.LLVMInitializeAllTargetMCs(); llvm.LLVMInitializeAllAsmParsers(); llvm.LLVMInitializeAllAsmPrinters(); var engine: llvm.LLVMExecutionEngineRef = undefined; if (llvm.LLVMCreateExecutionEngineForModule(&engine, mod, &err) != 0) { std.debug.warn("failed to create execution engine: {}\n", sliceify(err)); return CompileError.BackendError; } var machine = llvm.LLVMGetExecutionEngineTargetMachine(engine); defer llvm.LLVMDisposeTargetMachine(machine); var target = llvm.LLVMGetTargetMachineTarget(machine); var target_data = llvm.LLVMCreateTargetDataLayout(machine); var data_layout = llvm.LLVMCopyStringRepOfTargetData(target_data); llvm.LLVMSetDataLayout(mod, data_layout); var outpath_cstr = try std.cstr.addNullByte(self.allocator, "outpath.o"); //var asmpath_cstr = try std.cstr.addNullByte(self.allocator, "output.S"); var desc = llvm.LLVMGetTargetDescription(target); var features = llvm.LLVMGetTargetMachineFeatureString(machine); var triple = llvm.LLVMGetTargetMachineTriple(machine); std.debug.warn("target: {}\n", sliceify(desc)); std.debug.warn("triple: {}\n", sliceify(triple)); std.debug.warn("features: {}\n", sliceify(features)); //if (llvm.LLVMTargetMachineEmitToFile( // machine, // mod, // asmpath_cstr.ptr, // llvm.LLVMCodeGenFileType.LLVMAssemblyFile, // &err, //) != 0) { // std.debug.warn("failed to emit to assembly file: {}\n", sliceify(err)); // return CompileError.BackendError; //} if (llvm.LLVMTargetMachineEmitToFile( machine, mod, outpath_cstr.ptr, llvm.LLVMCodeGenFileType.LLVMObjectFile, &err, ) != 0) { std.debug.warn("failed to emit to file: {}\n", sliceify(err)); return CompileError.BackendError; } } };