From d9b08f569f29907e9c635fb3c0b4c76c2d90a92e Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Sat, 26 Jan 2013 21:51:31 -0800 Subject: [PATCH] Adding a bunch of instructions. --- TODO.md | 28 +- .../xenia/cpu/codegen/function_generator.h | 7 + include/xenia/cpu/ppc/instr.h | 11 + private/runtest.sh | 5 + src/cpu/codegen/emit_alu.cc | 433 +++++++++++++----- src/cpu/codegen/emit_control.cc | 151 +++++- src/cpu/codegen/emit_memory.cc | 10 +- src/cpu/codegen/function_generator.cc | 123 ++++- src/cpu/codegen/module_generator.cc | 1 + src/cpu/exec_module.cc | 15 + 10 files changed, 616 insertions(+), 168 deletions(-) diff --git a/TODO.md b/TODO.md index 59e896005..5e0feec96 100644 --- a/TODO.md +++ b/TODO.md @@ -1,33 +1,19 @@ ## Instructions -``` -need cr0: -andix -orx -mullwx -divwux +### XER CA bit (carry) -addx -addix -addic -addzex -subfx -subfex -subficx +Not sure the way I'm doing this is right. addic/subficx/etc set it to the value +of the overflow bit from the LLVM *_with_overflow intrinsic. + +``` +MISDECODING: andix rlwinmx rlwimix rldiclx -extsbx + slwx srawix - -# can be no-op, or @llvm.prefetch -dcbt -dcbtst - -twi # @llvm.debugtrap ? - ``` Overflow bits can be set via the intrinsics: diff --git a/include/xenia/cpu/codegen/function_generator.h b/include/xenia/cpu/codegen/function_generator.h index d81d598e4..8889c0d99 100644 --- a/include/xenia/cpu/codegen/function_generator.h +++ b/include/xenia/cpu/codegen/function_generator.h @@ -66,13 +66,20 @@ public: llvm::Value* xer_value(); void update_xer_value(llvm::Value* value); + void update_xer_with_overflow(llvm::Value* value); + void update_xer_with_carry(llvm::Value* value); + void update_xer_with_overflow_and_carry(llvm::Value* value); + llvm::Value* lr_value(); void update_lr_value(llvm::Value* value); + llvm::Value* ctr_value(); void update_ctr_value(llvm::Value* value); llvm::Value* cr_value(uint32_t n); void update_cr_value(uint32_t n, llvm::Value* value); + void update_cr_with_cond(uint32_t n, llvm::Value* lhs, llvm::Value* rhs, + bool is_signed); llvm::Value* gpr_value(uint32_t n); void update_gpr_value(uint32_t n, llvm::Value* value); diff --git a/include/xenia/cpu/ppc/instr.h b/include/xenia/cpu/ppc/instr.h index fab692e2c..c4b48d0d4 100644 --- a/include/xenia/cpu/ppc/instr.h +++ b/include/xenia/cpu/ppc/instr.h @@ -166,6 +166,17 @@ typedef struct { uint32_t : 6; } M; // kXEPPCInstrFormatMD + struct { + uint32_t Rc : 1; + uint32_t SH5 : 1; + uint32_t : 3; + uint32_t MB5 : 1; + uint32_t MB : 5; + uint32_t SH : 5; + uint32_t RA : 5; + uint32_t RS : 5; + uint32_t : 6; + } MD; // kXEPPCInstrFormatMDS // kXEPPCInstrFormatVA // kXEPPCInstrFormatVX diff --git a/private/runtest.sh b/private/runtest.sh index 95b9dbaea..b05bbb88e 100755 --- a/private/runtest.sh +++ b/private/runtest.sh @@ -1,6 +1,11 @@ python xenia-build.py xethunk python xenia-build.py build +if [ "$?" -ne 0 ]; then + echo "Build failed!" + exit $? +fi + ./build/xenia/release/xenia-run \ private/$1 \ --optimize_ir_modules=true \ diff --git a/src/cpu/codegen/emit_alu.cc b/src/cpu/codegen/emit_alu.cc index b17f43f01..eb26e9237 100644 --- a/src/cpu/codegen/emit_alu.cc +++ b/src/cpu/codegen/emit_alu.cc @@ -29,21 +29,35 @@ namespace codegen { XEEMITTER(addx, 0x7C000214, XO )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) { // RD <- (RA) + (RB) - if (i.XO.Rc) { - // With cr0 update. - XEINSTRNOTIMPLEMENTED(); - return 1; - } if (i.XO.OE) { // With XER update. - XEINSTRNOTIMPLEMENTED(); - return 1; + // This is a different codepath as we need to use llvm.sadd.with.overflow. + + Function* sadd_with_overflow = Intrinsic::getDeclaration( + g.gen_module(), Intrinsic::sadd_with_overflow, b.getInt64Ty()); + Value* v = b.CreateCall2(sadd_with_overflow, + g.gpr_value(i.XO.RA), g.gpr_value(i.XO.RB)); + g.update_gpr_value(i.XO.RT, b.CreateExtractValue(v, 0)); + g.update_xer_with_overflow(b.CreateExtractValue(v, 1)); + + if (i.XO.Rc) { + // With cr0 update. + g.update_cr_with_cond(0, v, b.getInt64(0), true); + } + + return 0; + } else { + // No OE bit setting. + Value* v = b.CreateAdd(g.gpr_value(i.XO.RA), g.gpr_value(i.XO.RB)); + g.update_gpr_value(i.XO.RT, v); + + if (i.XO.Rc) { + // With cr0 update. + g.update_cr_with_cond(0, v, b.getInt64(0), true); + } + + return 0; } - - Value* v = b.CreateAdd(g.gpr_value(i.XO.RA), g.gpr_value(i.XO.RB)); - g.update_gpr_value(i.XO.RT, v); - - return 0; } XEEMITTER(addcx, 0X7C000014, XO )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) { @@ -57,8 +71,10 @@ XEEMITTER(addex, 0x7C000114, XO )(FunctionGenerator& g, IRBuilder<>& b, I } XEEMITTER(addi, 0x38000000, D )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) { - // if RA = 0 then RT <- EXTS(SI) - // else RT <- (RA) + EXTS(SI) + // if RA = 0 then + // RT <- EXTS(SI) + // else + // RT <- (RA) + EXTS(SI) Value* v = b.getInt64(XEEXTS16(i.D.DS)); if (i.D.RA) { @@ -70,8 +86,16 @@ XEEMITTER(addi, 0x38000000, D )(FunctionGenerator& g, IRBuilder<>& b, I } XEEMITTER(addic, 0x30000000, D )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + // RT <- (RA) + EXTS(SI) + + Function* sadd_with_overflow = Intrinsic::getDeclaration( + g.gen_module(), Intrinsic::sadd_with_overflow, b.getInt64Ty()); + Value* v = b.CreateCall2(sadd_with_overflow, + g.gpr_value(i.D.RA), b.getInt64(XEEXTS16(i.D.DS))); + g.update_gpr_value(i.D.RT, b.CreateExtractValue(v, 0)); + g.update_xer_with_carry(b.CreateExtractValue(v, 1)); + + return 0; } XEEMITTER(addicx, 0x34000000, D )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) { @@ -80,8 +104,10 @@ XEEMITTER(addicx, 0x34000000, D )(FunctionGenerator& g, IRBuilder<>& b, I } XEEMITTER(addis, 0x3C000000, D )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) { - // if RA = 0 then RT <- EXTS(SI) || i16.0 - // else RT <- (RA) + EXTS(SI) || i16.0 + // if RA = 0 then + // RT <- EXTS(SI) || i16.0 + // else + // RT <- (RA) + EXTS(SI) || i16.0 Value* v = b.getInt64(XEEXTS16(i.D.DS) << 16); if (i.D.RA) { @@ -98,8 +124,29 @@ XEEMITTER(addmex, 0x7C0001D4, XO )(FunctionGenerator& g, IRBuilder<>& b, I } XEEMITTER(addzex, 0x7C000194, XO )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + // RT <- (RA) + CA + + Function* sadd_with_overflow = Intrinsic::getDeclaration( + g.gen_module(), Intrinsic::sadd_with_overflow, b.getInt64Ty()); + Value* ca = b.CreateAnd(b.CreateLShr(g.xer_value(), 29), 0x1); + Value* v = b.CreateCall2(sadd_with_overflow, + g.gpr_value(i.XO.RA), ca); + Value* add_value = b.CreateExtractValue(v, 0); + g.update_gpr_value(i.XO.RT, add_value); + if (i.XO.OE) { + // With XER[SO] update too. + g.update_xer_with_overflow_and_carry(b.CreateExtractValue(v, 1)); + } else { + // Just CA update. + g.update_xer_with_carry(b.CreateExtractValue(v, 1)); + } + + if (i.XO.Rc) { + // With cr0 update. + g.update_cr_with_cond(0, add_value, b.getInt64(0), true); + } + + return 0; } XEEMITTER(divdx, 0x7C0003D2, XO )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) { @@ -120,24 +167,50 @@ XEEMITTER(divwx, 0x7C0003D6, XO )(FunctionGenerator& g, IRBuilder<>& b, I XEEMITTER(divwux, 0x7C000396, XO )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) { // dividend[0:31] <- (RA)[32:63] // divisor[0:31] <- (RB)[32:63] + // if divisor = 0 then + // if OE = 1 then + // XER[OV] <- 1 + // return // RT[32:63] <- dividend ÷ divisor // RT[0:31] <- undefined - if (i.XO.Rc) { - // With cr0 update. - XEINSTRNOTIMPLEMENTED(); - return 1; - } - if (i.XO.OE) { - // With XER update. - XEINSTRNOTIMPLEMENTED(); - return 1; - } - Value* dividend = b.CreateTrunc(g.gpr_value(i.XO.RA), b.getInt32Ty()); Value* divisor = b.CreateTrunc(g.gpr_value(i.XO.RB), b.getInt32Ty()); + + // Note that we skip the zero handling block and just avoid the divide if + // we are OE=0. + BasicBlock* zero_bb = i.XO.OE ? + BasicBlock::Create(*g.context(), "", g.gen_fn()) : NULL; + BasicBlock* nonzero_bb = BasicBlock::Create(*g.context(), "", g.gen_fn()); + BasicBlock* after_bb = BasicBlock::Create(*g.context(), "", g.gen_fn()); + b.CreateCondBr(b.CreateICmpEQ(divisor, b.getInt32(0)), + i.XO.OE ? zero_bb : after_bb, nonzero_bb); + + if (zero_bb) { + // Divisor was zero - do XER update. + b.SetInsertPoint(zero_bb); + g.update_xer_with_overflow(b.getInt1(1)); + b.CreateBr(after_bb); + } + + // Divide. + b.SetInsertPoint(nonzero_bb); Value* v = b.CreateUDiv(dividend, divisor); v = b.CreateZExt(v, b.getInt64Ty()); + g.update_gpr_value(i.XO.RT, v); + + // If we are OE=1 we need to clear the overflow bit. + g.update_xer_with_overflow(b.getInt1(0)); + + if (i.XO.Rc) { + // With cr0 update. + g.update_cr_with_cond(0, v, b.getInt64(0), true); + } + + b.CreateBr(after_bb); + + // Resume. + b.SetInsertPoint(after_bb); return 0; } @@ -184,11 +257,6 @@ XEEMITTER(mulli, 0x1C000000, D )(FunctionGenerator& g, IRBuilder<>& b, I XEEMITTER(mullwx, 0x7C0001D6, XO )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) { // RT <- (RA)[32:63] × (RB)[32:63] - if (i.XO.Rc) { - // With cr0 update. - XEINSTRNOTIMPLEMENTED(); - return 1; - } if (i.XO.OE) { // With XER update. XEINSTRNOTIMPLEMENTED(); @@ -199,6 +267,11 @@ XEEMITTER(mullwx, 0x7C0001D6, XO )(FunctionGenerator& g, IRBuilder<>& b, I b.CreateSExt(g.gpr_value(i.XO.RB), b.getInt64Ty())); g.update_gpr_value(i.XO.RT, v); + if (i.XO.Rc) { + // With cr0 update. + g.update_cr_with_cond(0, v, b.getInt64(0), true); + } + return 0; } @@ -208,8 +281,37 @@ XEEMITTER(negx, 0x7C0000D0, XO )(FunctionGenerator& g, IRBuilder<>& b, I } XEEMITTER(subfx, 0x7C000050, XO )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + // RT <- ¬(RA) + (RB) + 1 + + if (i.XO.OE) { + // With XER update. + // This is a different codepath as we need to use llvm.ssub.with.overflow. + + Function* ssub_with_overflow = Intrinsic::getDeclaration( + g.gen_module(), Intrinsic::ssub_with_overflow, b.getInt64Ty()); + Value* v = b.CreateCall2(ssub_with_overflow, + g.gpr_value(i.XO.RB), g.gpr_value(i.XO.RA)); + g.update_gpr_value(i.XO.RT, b.CreateExtractValue(v, 0)); + g.update_xer_with_overflow(b.CreateExtractValue(v, 1)); + + if (i.XO.Rc) { + // With cr0 update. + g.update_cr_with_cond(0, v, b.getInt64(0), true); + } + + return 0; + } else { + // No OE bit setting. + Value* v = b.CreateSub(g.gpr_value(i.XO.RB), g.gpr_value(i.XO.RA)); + g.update_gpr_value(i.XO.RT, v); + + if (i.XO.Rc) { + // With cr0 update. + g.update_cr_with_cond(0, v, b.getInt64(0), true); + } + + return 0; + } } XEEMITTER(subfcx, 0x7C000010, XO )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) { @@ -218,13 +320,45 @@ XEEMITTER(subfcx, 0x7C000010, XO )(FunctionGenerator& g, IRBuilder<>& b, I } XEEMITTER(subficx, 0x20000000, D )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + // RT <- ¬(RA) + EXTS(SI) + 1 + + Function* ssub_with_overflow = Intrinsic::getDeclaration( + g.gen_module(), Intrinsic::ssub_with_overflow, b.getInt64Ty()); + Value* v = b.CreateCall2(ssub_with_overflow, + b.getInt64(XEEXTS16(i.D.DS)), g.gpr_value(i.D.RA)); + g.update_gpr_value(i.D.RT, b.CreateExtractValue(v, 0)); + g.update_xer_with_carry(b.CreateExtractValue(v, 1)); + + return 0; } XEEMITTER(subfex, 0x7C000110, XO )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + // RT <- ¬(RA) + (RB) + CA + + // TODO(benvanik): possible that the add of rb+ca needs to also check for + // overflow! + + Value* ca = b.CreateAnd(b.CreateLShr(g.xer_value(), 29), 0x1); + Function* uadd_with_overflow = Intrinsic::getDeclaration( + g.gen_module(), Intrinsic::uadd_with_overflow, b.getInt64Ty()); + Value* v = b.CreateCall2(uadd_with_overflow, + b.CreateNot(g.gpr_value(i.XO.RA)), + b.CreateAdd(g.gpr_value(i.XO.RB), ca)); + g.update_gpr_value(i.XO.RT, b.CreateExtractValue(v, 0)); + + if (i.XO.OE) { + // With XER update. + g.update_xer_with_overflow_and_carry(b.CreateExtractValue(v, 1)); + } else { + g.update_xer_with_carry(b.CreateExtractValue(v, 1)); + } + + if (i.XO.Rc) { + // With cr0 update. + g.update_cr_with_cond(0, v, b.getInt64(0), true); + } + + return 0; } XEEMITTER(subfmex, 0x7C0001D0, XO )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) { @@ -240,29 +374,6 @@ XEEMITTER(subfzex, 0x7C000190, XO )(FunctionGenerator& g, IRBuilder<>& b, I // Integer compare (A-4) -void XeEmitCompareCore(FunctionGenerator& g, IRBuilder<>& b, - Value* lhs, Value* rhs, uint32_t BF, bool is_signed) { - // bit0 = RA < RB - // bit1 = RA > RB - // bit2 = RA = RB - // bit3 = XER[SO] - // Bits are reversed: - // 0123 - // 3210 - - Value* is_lt = is_signed ? - b.CreateICmpSLT(lhs, rhs) : b.CreateICmpULT(lhs, rhs); - Value* is_gt = is_signed ? - b.CreateICmpSGT(lhs, rhs) : b.CreateICmpUGT(lhs, rhs); - Value* cp = b.CreateSelect(is_gt, b.getInt8(1 << 2), b.getInt8(1 << 1)); - Value* c = b.CreateSelect(is_lt, b.getInt8(1 << 3), cp); - - // TODO(benvanik): set bit 4 to XER[SO] - - // Insert the 4 bits into their location in the CR. - g.update_cr_value(BF, c); -} - XEEMITTER(cmp, 0x7C000000, X )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) { // if L = 0 then // a <- EXTS((RA)[32:63]) @@ -291,7 +402,7 @@ XEEMITTER(cmp, 0x7C000000, X )(FunctionGenerator& g, IRBuilder<>& b, I rhs = b.CreateSExt(rhs, b.getInt64Ty()); } - XeEmitCompareCore(g, b, lhs, rhs, BF, true); + g.update_cr_with_cond(BF, lhs, rhs, true); return 0; } @@ -320,7 +431,7 @@ XEEMITTER(cmpi, 0x2C000000, D )(FunctionGenerator& g, IRBuilder<>& b, I } Value* rhs = b.getInt64(XEEXTS16(i.D.DS)); - XeEmitCompareCore(g, b, lhs, rhs, BF, true); + g.update_cr_with_cond(BF, lhs, rhs, true); return 0; } @@ -353,7 +464,7 @@ XEEMITTER(cmpl, 0x7C000040, X )(FunctionGenerator& g, IRBuilder<>& b, I rhs = b.CreateZExt(rhs, b.getInt64Ty()); } - XeEmitCompareCore(g, b, lhs, rhs, BF, false); + g.update_cr_with_cond(BF, lhs, rhs, false); return 0; } @@ -382,7 +493,7 @@ XEEMITTER(cmpli, 0x28000000, D )(FunctionGenerator& g, IRBuilder<>& b, I } Value* rhs = b.getInt64(i.D.DS); - XeEmitCompareCore(g, b, lhs, rhs, BF, false); + g.update_cr_with_cond(BF, lhs, rhs, false); return 0; } @@ -393,31 +504,29 @@ XEEMITTER(cmpli, 0x28000000, D )(FunctionGenerator& g, IRBuilder<>& b, I XEEMITTER(andx, 0x7C000038, X )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) { // RA <- (RS) & (RB) - if (i.X.Rc) { - // With cr0 update. - XEINSTRNOTIMPLEMENTED(); - return 1; - } - Value* v = b.CreateAnd(g.gpr_value(i.X.RT), g.gpr_value(i.X.RB)); g.update_gpr_value(i.X.RA, v); + if (i.X.Rc) { + // With cr0 update. + g.update_cr_with_cond(0, v, b.getInt64(0), true); + } + return 0; } XEEMITTER(andcx, 0x7C000078, X )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) { // RA <- (RS) & ¬(RB) - if (i.X.Rc) { - // With cr0 update. - XEINSTRNOTIMPLEMENTED(); - return 1; - } - Value* v = b.CreateXor(g.gpr_value(i.X.RB), -1); v = b.CreateAnd(g.gpr_value(i.X.RT), v); g.update_gpr_value(i.X.RA, v); + if (i.X.Rc) { + // With cr0 update. + g.update_cr_with_cond(0, v, b.getInt64(0), true); + } + return 0; } @@ -427,8 +536,8 @@ XEEMITTER(andix, 0x70000000, D )(FunctionGenerator& g, IRBuilder<>& b, I Value* v = b.CreateAnd(g.gpr_value(i.D.RT), (uint64_t)i.D.DS); g.update_gpr_value(i.D.RA, v); - // TODO(benvanik): update cr0 - XEINSTRNOTIMPLEMENTED(); + // With cr0 update. + g.update_cr_with_cond(0, v, b.getInt64(0), true); return 1; } @@ -439,8 +548,8 @@ XEEMITTER(andisx, 0x74000000, D )(FunctionGenerator& g, IRBuilder<>& b, I Value* v = b.CreateAnd(g.gpr_value(i.D.RT), ((uint64_t)i.D.DS) << 16); g.update_gpr_value(i.D.RA, v); - // TODO(benvanik): update cr0 - XEINSTRNOTIMPLEMENTED(); + // With cr0 update. + g.update_cr_with_cond(0, v, b.getInt64(0), true); return 1; } @@ -457,11 +566,6 @@ XEEMITTER(cntlzwx, 0x7C000034, X )(FunctionGenerator& g, IRBuilder<>& b, I // n <- n + 1 // RA <- n - 32 - if (i.X.Rc) { - XEINSTRNOTIMPLEMENTED(); - return 1; - } - Value* v = g.gpr_value(i.X.RT); v = b.CreateTrunc(v, b.getInt32Ty()); @@ -474,6 +578,11 @@ XEEMITTER(cntlzwx, 0x7C000034, X )(FunctionGenerator& g, IRBuilder<>& b, I count = b.CreateZExt(count, b.getInt64Ty()); g.update_gpr_value(i.X.RA, count); + if (i.X.Rc) { + // With cr0 update. + g.update_cr_with_cond(0, count, b.getInt64(0), true); + } + return 0; } @@ -487,16 +596,16 @@ XEEMITTER(extsbx, 0x7C000774, X )(FunctionGenerator& g, IRBuilder<>& b, I // RA[56:63] <- (RS)[56:63] // RA[0:55] <- i56.s - if (i.X.Rc) { - XEINSTRNOTIMPLEMENTED(); - return 1; - } - Value* v = g.gpr_value(i.X.RT); v = b.CreateTrunc(v, b.getInt8Ty()); v = b.CreateSExt(v, b.getInt64Ty()); g.update_gpr_value(i.X.RA, v); + if (i.X.Rc) { + // Update cr0. + g.update_cr_with_cond(0, v, b.getInt64(0), true); + } + return 0; } @@ -518,31 +627,29 @@ XEEMITTER(nandx, 0x7C0003B8, X )(FunctionGenerator& g, IRBuilder<>& b, I XEEMITTER(norx, 0x7C0000F8, X )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) { // RA <- ¬((RS) | (RB)) - if (i.X.Rc) { - // With cr0 update. - XEINSTRNOTIMPLEMENTED(); - return 1; - } - Value* v = b.CreateOr(g.gpr_value(i.X.RT), g.gpr_value(i.X.RB)); v = b.CreateXor(v, -1); g.update_gpr_value(i.X.RA, v); + if (i.X.Rc) { + // With cr0 update. + g.update_cr_with_cond(0, v, b.getInt64(0), true); + } + return 0; } XEEMITTER(orx, 0x7C000378, X )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) { // RA <- (RS) | (RB) - if (i.X.Rc) { - // With cr0 update. - XEINSTRNOTIMPLEMENTED(); - return 1; - } - Value* v = b.CreateOr(g.gpr_value(i.X.RT), g.gpr_value(i.X.RB)); g.update_gpr_value(i.X.RA, v); + if (i.X.Rc) { + // With cr0 update. + g.update_cr_with_cond(0, v, b.getInt64(0), true); + } + return 0; } @@ -572,15 +679,14 @@ XEEMITTER(oris, 0x64000000, D )(FunctionGenerator& g, IRBuilder<>& b, I XEEMITTER(xorx, 0x7C000278, X )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) { // RA <- (RS) XOR (RB) - if (i.X.Rc) { - // With cr0 update. - XEINSTRNOTIMPLEMENTED(); - return 1; - } - Value* v = b.CreateXor(g.gpr_value(i.X.RT), g.gpr_value(i.X.RB)); g.update_gpr_value(i.X.RA, v); + if (i.X.Rc) { + // With cr0 update. + g.update_cr_with_cond(0, v, b.getInt64(0), true); + } + return 0; } @@ -621,6 +727,29 @@ XEEMITTER(rldicx, 0x78000008, MD )(FunctionGenerator& g, IRBuilder<>& b, I } XEEMITTER(rldiclx, 0x78000000, MD )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) { + // n <- sh[5] || sh[0:4] + // r <- ROTL64((RS), n) + // b <- mb[5] || mb[0:4] + // m <- MASK(b, 63) + // RA <- r & m + + // uint32_t sh = (i.MD.SH5 << 5) | i.MD.SH; + // uint32_t mb = (i.MD.MB5 << 5) | i.MD.MB; + + // Value* v = g.gpr_value(i.MD.RS); + // if (sh) { + // v = // rotate by sh + // } + // if (mb) { + // v = // mask b mb->63 + // } + // g.update_gpr_value(i.MD.RA, v); + + // if (i.MD.Rc) { + // // With cr0 update. + // g.update_cr_with_cond(0, v, b.getInt64(0), true); + // } + XEINSTRNOTIMPLEMENTED(); return 1; } @@ -636,6 +765,10 @@ XEEMITTER(rldimix, 0x7800000C, MD )(FunctionGenerator& g, IRBuilder<>& b, I } XEEMITTER(rlwimix, 0x50000000, M )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) { + // n <- SH + // r <- ROTL32((RS)[32:63], n) + // m <- MASK(MB+32, ME+32) + // RA <- r&m | (RA)&¬m XEINSTRNOTIMPLEMENTED(); return 1; } @@ -646,19 +779,17 @@ XEEMITTER(rlwinmx, 0x54000000, M )(FunctionGenerator& g, IRBuilder<>& b, I // m <- MASK(MB+32, ME+32) // RA <- r & m - if (i.M.Rc) { - // With cr0 update. - XEINSTRNOTIMPLEMENTED(); - return 1; - } - // The compiler will generate a bunch of these for the special case of // SH=0, MB=ME // Which seems to just select a single bit and set cr0 for use with a branch. // We can detect this and do less work. if (!i.M.SH && i.M.MB == i.M.ME) { Value* v = b.CreateAnd(g.gpr_value(i.M.RS), 1 << i.M.MB); - g.update_gpr_value(i.M.RS, v); + g.update_gpr_value(i.M.RA, v); + if (i.M.Rc) { + // With cr0 update. + g.update_cr_with_cond(0, v, b.getInt64(0), true); + } return 0; } @@ -669,6 +800,12 @@ XEEMITTER(rlwinmx, 0x54000000, M )(FunctionGenerator& g, IRBuilder<>& b, I // v = b.CreateOr(b.CreateShl(v, i.M.SH), b.CreateLShr(v, 32 - i.M.SH)); // v = b.CreateAnd(v, XEMASK(i.M.MB + 32, i.M.ME + 32)); + // if (i.M.Rc) { + // // With cr0 update. + // g.update_cr_with_cond(0, v, b.getInt64(0), true); + // } + + printf("rlwinmx %d %d %d\n", i.M.SH, i.M.MB, i.M.ME); XEINSTRNOTIMPLEMENTED(); return 1; } @@ -687,8 +824,24 @@ XEEMITTER(sldx, 0x7C000036, X )(FunctionGenerator& g, IRBuilder<>& b, I } XEEMITTER(slwx, 0x7C000030, X )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + // n <- (RB)[59:63] + // r <- ROTL32((RS)[32:63], n) + // if (RB)[58] = 0 then + // m <- MASK(32, 63-n) + // else + // m <- i64.0 + // RA <- r & m + + Value* v = b.CreateShl(g.gpr_value(i.X.RT), g.gpr_value(i.X.RB)); + v = b.CreateAnd(v, UINT32_MAX); + g.update_gpr_value(i.X.RA, v); + + if (i.X.Rc) { + // With cr0 update. + g.update_cr_with_cond(0, v, b.getInt64(0), true); + } + + return 0; } XEEMITTER(sradx, 0x7C000634, X )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) { @@ -707,8 +860,40 @@ XEEMITTER(srawx, 0x7C000630, X )(FunctionGenerator& g, IRBuilder<>& b, I } XEEMITTER(srawix, 0x7C000670, X )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + // n <- SH + // r <- ROTL32((RS)[32:63], 64-n) + // m <- MASK(n+32, 63) + // s <- (RS)[32] + // RA <- r&m | (i64.s)&¬m + // CA <- s & ((r&¬m)[32:63]≠0) + + Value* rs64 = g.gpr_value(i.X.RT); + Value* rs32 = b.CreateTrunc(rs64, b.getInt32Ty()); + + Value* v; + Value* ca; + if (!i.X.RB) { + // No shift, just a fancy sign extend and CA clearer. + v = rs32; + ca = b.getInt64(0); + } else { + v = b.CreateAShr(rs32, i.X.RB); + + // CA is set to 1 if the low-order 32 bits of (RS) contain a negative number + // and any 1-bits are shifted out of position 63; otherwise CA is set to 0. + ca = b.CreateAnd(b.CreateICmpSLT(v, b.getInt32(0)), + b.CreateICmpSLT(rs64, b.getInt64(0))); + } + v = b.CreateSExt(v, b.getInt64Ty()); + g.update_gpr_value(i.X.RA, v); + g.update_xer_with_carry(ca); + + if (i.X.Rc) { + // With cr0 update. + g.update_cr_with_cond(0, v, b.getInt64(0), true); + } + + return 0; } XEEMITTER(srdx, 0x7C000436, X )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) { diff --git a/src/cpu/codegen/emit_control.cc b/src/cpu/codegen/emit_control.cc index 669273f86..1bfdbd10c 100644 --- a/src/cpu/codegen/emit_control.cc +++ b/src/cpu/codegen/emit_control.cc @@ -436,24 +436,159 @@ XEEMITTER(sc, 0x44000002, SC )(FunctionGenerator& g, IRBuilder<>& b, I // Trap (A-25) +int XeEmitTrap(FunctionGenerator& g, IRBuilder<>& b, InstrData& i, + Value* va, Value* vb, uint32_t TO) { + // if (a < b) & TO[0] then TRAP + // if (a > b) & TO[1] then TRAP + // if (a = b) & TO[2] then TRAP + // if (a u b) & TO[4] then TRAP + // Bits swapped: + // 01234 + // 43210 + + if (!TO) { + return 0; + } + + BasicBlock* after_bb = BasicBlock::Create(*g.context(), "", g.gen_fn(), + g.GetNextBasicBlock()); + BasicBlock* trap_bb = BasicBlock::Create(*g.context(), "", g.gen_fn(), + after_bb); + + // Create the basic blocks (so we can chain). + std::vector bbs; + if (TO & (1 << 4)) { + bbs.push_back(BasicBlock::Create(*g.context(), "", g.gen_fn(), trap_bb)); + } + if (TO & (1 << 3)) { + bbs.push_back(BasicBlock::Create(*g.context(), "", g.gen_fn(), trap_bb)); + } + if (TO & (1 << 2)) { + bbs.push_back(BasicBlock::Create(*g.context(), "", g.gen_fn(), trap_bb)); + } + if (TO & (1 << 1)) { + bbs.push_back(BasicBlock::Create(*g.context(), "", g.gen_fn(), trap_bb)); + } + if (TO & (1 << 0)) { + bbs.push_back(BasicBlock::Create(*g.context(), "", g.gen_fn(), trap_bb)); + } + bbs.push_back(after_bb); + + // Jump to the first bb. + b.CreateBr(bbs.front()); + + // Setup each basic block. + std::vector::iterator it = bbs.begin(); + if (TO & (1 << 4)) { + // a < b + BasicBlock* bb = *(it++); + b.SetInsertPoint(bb); + Value* cmp = b.CreateICmpSLT(va, vb); + b.CreateCondBr(cmp, trap_bb, *it); + } + if (TO & (1 << 3)) { + // a > b + BasicBlock* bb = *(it++); + b.SetInsertPoint(bb); + Value* cmp = b.CreateICmpSGT(va, vb); + b.CreateCondBr(cmp, trap_bb, *it); + } + if (TO & (1 << 2)) { + // a = b + BasicBlock* bb = *(it++); + b.SetInsertPoint(bb); + Value* cmp = b.CreateICmpEQ(va, vb); + b.CreateCondBr(cmp, trap_bb, *it); + } + if (TO & (1 << 1)) { + // a u b + BasicBlock* bb = *(it++); + b.SetInsertPoint(bb); + Value* cmp = b.CreateICmpUGT(va, vb); + b.CreateCondBr(cmp, trap_bb, *it); + } + + // Create trap BB. + b.SetInsertPoint(trap_bb); + g.SpillRegisters(); + // TODO(benvanik): use @llvm.debugtrap? could make debugging better + b.CreateCall2(g.gen_module()->getGlobalVariable("XeTrap"), + g.gen_fn()->arg_begin(), + b.getInt32(i.address)); + b.CreateBr(after_bb); + + // Resume. + b.SetInsertPoint(after_bb); + + return 0; +} + XEEMITTER(td, 0x7C000088, X )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + // a <- (RA) + // b <- (RB) + // if (a < b) & TO[0] then TRAP + // if (a > b) & TO[1] then TRAP + // if (a = b) & TO[2] then TRAP + // if (a u b) & TO[4] then TRAP + return XeEmitTrap(g, b, i, + g.gpr_value(i.X.RA), + g.gpr_value(i.X.RB), + i.X.RT); } XEEMITTER(tdi, 0x08000000, D )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + // a <- (RA) + // if (a < EXTS(SI)) & TO[0] then TRAP + // if (a > EXTS(SI)) & TO[1] then TRAP + // if (a = EXTS(SI)) & TO[2] then TRAP + // if (a u EXTS(SI)) & TO[4] then TRAP + return XeEmitTrap(g, b, i, + g.gpr_value(i.D.RA), + b.getInt64(XEEXTS16(i.D.DS)), + i.D.RT); } XEEMITTER(tw, 0x7C000008, X )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + // a <- EXTS((RA)[32:63]) + // b <- EXTS((RB)[32:63]) + // if (a < b) & TO[0] then TRAP + // if (a > b) & TO[1] then TRAP + // if (a = b) & TO[2] then TRAP + // if (a u b) & TO[4] then TRAP + return XeEmitTrap(g, b, i, + b.CreateSExt(b.CreateTrunc(g.gpr_value(i.X.RA), + b.getInt32Ty()), + b.getInt64Ty()), + b.CreateSExt(b.CreateTrunc(g.gpr_value(i.X.RB), + b.getInt32Ty()), + b.getInt64Ty()), + i.X.RT); } XEEMITTER(twi, 0x0C000000, D )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + // a <- EXTS((RA)[32:63]) + // if (a < EXTS(SI)) & TO[0] then TRAP + // if (a > EXTS(SI)) & TO[1] then TRAP + // if (a = EXTS(SI)) & TO[2] then TRAP + // if (a u EXTS(SI)) & TO[4] then TRAP + return XeEmitTrap(g, b, i, + b.CreateSExt(b.CreateTrunc(g.gpr_value(i.D.RA), + b.getInt32Ty()), + b.getInt64Ty()), + b.getInt64(XEEXTS16(i.D.DS)), + i.D.RT); } diff --git a/src/cpu/codegen/emit_memory.cc b/src/cpu/codegen/emit_memory.cc index adba42036..d329ae42e 100644 --- a/src/cpu/codegen/emit_memory.cc +++ b/src/cpu/codegen/emit_memory.cc @@ -658,13 +658,15 @@ XEEMITTER(dcbst, 0x7C00006C, X )(FunctionGenerator& g, IRBuilder<>& b, I } XEEMITTER(dcbt, 0x7C00022C, X )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + // No-op for now. + // TODO(benvanik): use @llvm.prefetch + return 0; } XEEMITTER(dcbtst, 0x7C0001EC, X )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) { - XEINSTRNOTIMPLEMENTED(); - return 1; + // No-op for now. + // TODO(benvanik): use @llvm.prefetch + return 0; } XEEMITTER(dcbz, 0x7C0007EC, X )(FunctionGenerator& g, IRBuilder<>& b, InstrData& i) { diff --git a/src/cpu/codegen/function_generator.cc b/src/cpu/codegen/function_generator.cc index 40e286e83..4863c3a26 100644 --- a/src/cpu/codegen/function_generator.cc +++ b/src/cpu/codegen/function_generator.cc @@ -489,7 +489,7 @@ void FunctionGenerator::FillRegisters() { } // Note that we skip zero. - for (uint32_t n = 1; n < XECOUNT(locals_.gpr); n++) { + for (size_t n = 0; n < XECOUNT(locals_.gpr); n++) { if (locals_.gpr[n]) { b.CreateStore(LoadStateValue( offsetof(xe_ppc_state_t, r) + 8 * n, @@ -550,7 +550,7 @@ void FunctionGenerator::SpillRegisters() { } // Note that we skip zero. - for (uint32_t n = 1; n < XECOUNT(locals_.gpr); n++) { + for (uint32_t n = 0; n < XECOUNT(locals_.gpr); n++) { Value* v = locals_.gpr[n]; if (v) { StoreStateValue( @@ -591,6 +591,62 @@ void FunctionGenerator::update_xer_value(Value* value) { b.CreateStore(value, locals_.xer); } +void FunctionGenerator::update_xer_with_overflow(Value* value) { + IRBuilder<>& b = *builder_; + + setup_xer(); + + // Expects a i1 indicating overflow. + // Trust the caller that if it's larger than that it's already truncated. + if (!value->getType()->isIntegerTy(64)) { + value = b.CreateZExt(value, b.getInt64Ty()); + } + + Value* xer = xer_value(); + xer = b.CreateAnd(xer, 0xFFFFFFFFBFFFFFFF); // clear bit 30 + xer = b.CreateOr(xer, b.CreateShl(value, 31)); + xer = b.CreateOr(xer, b.CreateShl(value, 30)); + b.CreateStore(xer, locals_.xer); +} + +void FunctionGenerator::update_xer_with_carry(Value* value) { + IRBuilder<>& b = *builder_; + + setup_xer(); + + // Expects a i1 indicating carry. + // Trust the caller that if it's larger than that it's already truncated. + if (!value->getType()->isIntegerTy(64)) { + value = b.CreateZExt(value, b.getInt64Ty()); + } + + Value* xer = xer_value(); + xer = b.CreateAnd(xer, 0xFFFFFFFFDFFFFFFF); // clear bit 29 + xer = b.CreateOr(xer, b.CreateShl(value, 29)); + b.CreateStore(xer, locals_.xer); +} + +void FunctionGenerator::update_xer_with_overflow_and_carry(Value* value) { + IRBuilder<>& b = *builder_; + + setup_xer(); + + // Expects a i1 indicating overflow. + // Trust the caller that if it's larger than that it's already truncated. + if (!value->getType()->isIntegerTy(64)) { + value = b.CreateZExt(value, b.getInt64Ty()); + } + + // This is effectively an update_xer_with_overflow followed by an + // update_xer_with_carry, but since the logic is largely the same share it. + Value* xer = xer_value(); + xer = b.CreateAnd(xer, 0xFFFFFFFF9FFFFFFF); // clear bit 30 & 29 + xer = b.CreateOr(xer, b.CreateShl(value, 31)); + xer = b.CreateOr(xer, b.CreateShl(value, 30)); + xer = b.CreateOr(xer, b.CreateShl(value, 29)); + b.CreateStore(xer, locals_.xer); +} + void FunctionGenerator::setup_lr() { IRBuilder<>& b = *builder_; @@ -679,10 +735,50 @@ void FunctionGenerator::update_cr_value(uint32_t n, Value* value) { setup_cr(n); - value = b.CreateTrunc(value, b.getInt8Ty()); + // Truncate to 8 bits if needed. + // TODO(benvanik): also widen? + if (!value->getType()->isIntegerTy(8)) { + value = b.CreateTrunc(value, b.getInt8Ty()); + } + b.CreateStore(value, locals_.cr[n]); } +void FunctionGenerator::update_cr_with_cond( + uint32_t n, Value* lhs, Value* rhs, bool is_signed) { + IRBuilder<>& b = *builder_; + + // bit0 = RA < RB + // bit1 = RA > RB + // bit2 = RA = RB + // bit3 = XER[SO] + // Bits are reversed: + // 0123 + // 3210 + + // TODO(benvanik): inline this using the x86 cmp instruction - this prevents + // the need for a lot of the compares and ensures we lower to the best + // possible x86. + // Value* cmp = InlineAsm::get( + // FunctionType::get(), + // "cmp $0, $1 \n" + // "mov from compare registers \n", + // "r,r", ?? + // true); + + Value* is_lt = is_signed ? + b.CreateICmpSLT(lhs, rhs) : b.CreateICmpULT(lhs, rhs); + Value* is_gt = is_signed ? + b.CreateICmpSGT(lhs, rhs) : b.CreateICmpUGT(lhs, rhs); + Value* cp = b.CreateSelect(is_gt, b.getInt8(1 << 2), b.getInt8(1 << 1)); + Value* c = b.CreateSelect(is_lt, b.getInt8(1 << 3), cp); + + // TODO(benvanik): set bit 4 to XER[SO] + + // Insert the 4 bits into their location in the CR. + update_cr_value(n, c); +} + void FunctionGenerator::setup_gpr(uint32_t n) { IRBuilder<>& b = *builder_; @@ -699,10 +795,13 @@ Value* FunctionGenerator::gpr_value(uint32_t n) { IRBuilder<>& b = *builder_; XEASSERT(n >= 0 && n < 32); - if (n == 0) { - // Always force zero to a constant - this should help LLVM. - return b.getInt64(0); - } + + // Actually r0 is writable, even though nobody should ever do that. + // Perhaps we can check usage and enable this if safe? + // if (n == 0) { + // // Always force zero to a constant - this should help LLVM. + // return b.getInt64(0); + // } setup_gpr(n); @@ -713,10 +812,12 @@ void FunctionGenerator::update_gpr_value(uint32_t n, Value* value) { IRBuilder<>& b = *builder_; XEASSERT(n >= 0 && n < 32); - if (n == 0) { - // Ignore writes to zero. - return; - } + + // See above - r0 can be written. + // if (n == 0) { + // // Ignore writes to zero. + // return; + // } setup_gpr(n); diff --git a/src/cpu/codegen/module_generator.cc b/src/cpu/codegen/module_generator.cc index 8f2244c17..6d7d30d80 100644 --- a/src/cpu/codegen/module_generator.cc +++ b/src/cpu/codegen/module_generator.cc @@ -250,6 +250,7 @@ void ModuleGenerator::BuildFunction(CodegenFunction* cgf) { void ModuleGenerator::OptimizeFunction(Module* m, Function* fn) { FunctionPassManager pm(m); + //fn->dump(); if (FLAGS_optimize_ir_functions) { PassManagerBuilder pmb; pmb.OptLevel = 3; diff --git a/src/cpu/exec_module.cc b/src/cpu/exec_module.cc index 7de1fda40..cdc936352 100644 --- a/src/cpu/exec_module.cc +++ b/src/cpu/exec_module.cc @@ -202,6 +202,11 @@ XECLEANUP: return result_code; } +void XeTrap(xe_ppc_state_t* state, uint32_t cia) { + printf("TRAP"); + XEASSERTALWAYS(); +} + void XeIndirectBranch(xe_ppc_state_t* state, uint64_t target, uint64_t br_ia) { printf("INDIRECT BRANCH %.8X -> %.8X\n", (uint32_t)br_ia, (uint32_t)target); XEASSERTALWAYS(); @@ -255,6 +260,16 @@ int ExecModule::InjectGlobals() { int8PtrTy)); // Control methods: + std::vector trapArgs; + trapArgs.push_back(int8PtrTy); + trapArgs.push_back(Type::getInt32Ty(context)); + FunctionType* trapTy = FunctionType::get( + Type::getVoidTy(context), trapArgs, false); + gv = new GlobalVariable(*gen_module_, trapTy, true, + GlobalVariable::ExternalLinkage, 0, + "XeTrap"); + engine_->addGlobalMapping(gv, (void*)&XeTrap); + std::vector indirectBranchArgs; indirectBranchArgs.push_back(int8PtrTy); indirectBranchArgs.push_back(Type::getInt64Ty(context));