From 415ad94dbe0b673aaa7206e51184f2a90a452d5b Mon Sep 17 00:00:00 2001 From: Tillmann Karras Date: Tue, 18 Aug 2015 15:05:27 +0200 Subject: [PATCH 1/2] Interpreter: fix undefined bits of mffs Lioncash tested this on hardware. --- .../Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp index 7258959467..08f165bcde 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp @@ -448,11 +448,10 @@ void Interpreter::mcrfs(UGeckoInstruction _inst) void Interpreter::mffsx(UGeckoInstruction _inst) { // load from FPSCR - // This may or may not be accurate - but better than nothing, I guess // TODO(ector): grab all overflow flags etc and set them in FPSCR UpdateFPSCR(); - riPS0(_inst.FD) = (u64)FPSCR.Hex; + riPS0(_inst.FD) = 0xFFF8000000000000 | FPSCR.Hex; if (_inst.Rc) PanicAlert("mffsx: inst_.Rc"); From 0f2c65668746c739ad116fb8daa3c027ea429f41 Mon Sep 17 00:00:00 2001 From: Tillmann Karras Date: Tue, 18 Aug 2015 18:12:32 +0200 Subject: [PATCH 2/2] Jit64: implement FPSCR related instructions --- Source/Core/Core/PowerPC/Jit64/Jit.h | 7 + .../Core/Core/PowerPC/Jit64/Jit64_Tables.cpp | 12 +- .../PowerPC/Jit64/Jit_SystemRegisters.cpp | 185 ++++++++++++++++++ 3 files changed, 198 insertions(+), 6 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit.h b/Source/Core/Core/PowerPC/Jit64/Jit.h index 333a5b2d8f..06f8a6e524 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/PowerPC/Jit64/Jit.h @@ -148,6 +148,7 @@ public: void (Gen::XEmitter::*sseOp)(Gen::X64Reg, const Gen::OpArg&), bool packed, bool preserve_inputs, bool roundRHS = false); void FloatCompare(UGeckoInstruction inst, bool upper = false); + void UpdateRoundingMode(); // OPCODES void FallBackToInterpreter(UGeckoInstruction _inst); @@ -190,6 +191,12 @@ public: void mfcr(UGeckoInstruction inst); void mcrf(UGeckoInstruction inst); void mcrxr(UGeckoInstruction inst); + void mcrfs(UGeckoInstruction inst); + void mffsx(UGeckoInstruction inst); + void mtfsb0x(UGeckoInstruction inst); + void mtfsb1x(UGeckoInstruction inst); + void mtfsfix(UGeckoInstruction inst); + void mtfsfx(UGeckoInstruction inst); void boolX(UGeckoInstruction inst); void crXXX(UGeckoInstruction inst); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp b/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp index b8af4f19cc..4f2046f5c8 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp @@ -334,12 +334,12 @@ static GekkoOPTemplate table63[] = {40, &Jit64::fsign}, // fnegx {12, &Jit64::frspx}, // frspx - {64, &Jit64::FallBackToInterpreter}, // mcrfs - {583, &Jit64::FallBackToInterpreter}, // mffsx - {70, &Jit64::FallBackToInterpreter}, // mtfsb0x - {38, &Jit64::FallBackToInterpreter}, // mtfsb1x - {134, &Jit64::FallBackToInterpreter}, // mtfsfix - {711, &Jit64::FallBackToInterpreter}, // mtfsfx + {64, &Jit64::mcrfs}, // mcrfs + {583, &Jit64::mffsx}, // mffsx + {70, &Jit64::mtfsb0x}, // mtfsb0x + {38, &Jit64::mtfsb1x}, // mtfsb1x + {134, &Jit64::mtfsfix}, // mtfsfix + {711, &Jit64::mtfsfx}, // mtfsfx }; static GekkoOPTemplate table63_2[] = diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp index d781a17a3a..a8835c777e 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp @@ -556,3 +556,188 @@ void Jit64::crXXX(UGeckoInstruction inst) // Store result bit in CRBD SetCRFieldBit(inst.CRBD >> 2, 3 - (inst.CRBD & 3), RSCRATCH); } + +void Jit64::mcrfs(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITSystemRegistersOff); + + u8 shift = 4 * (7 - inst.CRFS); + u32 mask = 0xF << shift; + + // Only clear exception bits (but not FEX/VX). + mask &= 0x9FF87000; + + MOV(32, R(RSCRATCH), PPCSTATE(fpscr)); + if (cpu_info.bBMI1) + { + MOV(32, R(RSCRATCH2), Imm32((4 << 8) | shift)); + BEXTR(32, RSCRATCH2, R(RSCRATCH), RSCRATCH2); + } + else + { + MOV(32, R(RSCRATCH2), R(RSCRATCH)); + SHR(32, R(RSCRATCH2), Imm8(shift)); + AND(32, R(RSCRATCH2), Imm32(0xF)); + } + AND(32, R(RSCRATCH), Imm32(mask)); + MOV(32, PPCSTATE(fpscr), R(RSCRATCH)); + LEA(64, RSCRATCH, M(&m_crTable)); + MOV(64, R(RSCRATCH), MComplex(RSCRATCH, RSCRATCH2, SCALE_8, 0)); + MOV(64, PPCSTATE(cr_val[inst.CRFD]), R(RSCRATCH)); +} + +void Jit64::mffsx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITSystemRegistersOff); + FALLBACK_IF(inst.Rc); + + MOV(32, R(RSCRATCH), PPCSTATE(fpscr)); + + // FPSCR.FEX = 0 (and VX for below) + AND(32, R(RSCRATCH), Imm32(~0x60000000)); + + // FPSCR.VX = (FPSCR.Hex & FPSCR_VX_ANY) != 0; + XOR(32, R(RSCRATCH2), R(RSCRATCH2)); + TEST(32, R(RSCRATCH), Imm32(FPSCR_VX_ANY)); + SETcc(CC_NZ, R(RSCRATCH2)); + SHL(32, R(RSCRATCH2), Imm8(31 - 2)); + OR(32, R(RSCRATCH), R(RSCRATCH2)); + + MOV(32, PPCSTATE(fpscr), R(RSCRATCH)); + + int d = inst.FD; + fpr.BindToRegister(d, false, true); + MOV(64, R(RSCRATCH2), Imm64(0xFFF8000000000000)); + OR(64, R(RSCRATCH), R(RSCRATCH2)); + MOVQ_xmm(XMM0, R(RSCRATCH)); + MOVSD(fpr.RX(d), R(XMM0)); +} + +static const u32 s_rn_to_rc[] = { 0 << 13, 3 << 13, 2 << 13, 1 << 13 }; + +void Jit64::UpdateRoundingMode() +{ + static u32 csr; + STMXCSR(M(&csr)); + MOV(32, R(RSCRATCH), PPCSTATE(fpscr)); + AND(32, R(RSCRATCH), Imm32(3)); + LEA(64, RSCRATCH2, M(&s_rn_to_rc)); + MOV(32, R(RSCRATCH), MComplex(RSCRATCH2, RSCRATCH, SCALE_4, 0)); + OR(32, M(&csr), R(RSCRATCH)); + LDMXCSR(M(&csr)); +} + +void Jit64::mtfsb0x(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITSystemRegistersOff); + FALLBACK_IF(inst.Rc); + + AND(32, PPCSTATE(fpscr), Imm32(~(0x80000000 >> inst.CRBD))); + + switch (inst.CRBD) + { + case 29: + // NI + static u32 csr; + STMXCSR(M(&csr)); + AND(32, M(&csr), Imm32(~(1 << 15))); + LDMXCSR(M(&csr)); + break; + case 30: + case 31: + // RN + UpdateRoundingMode(); + break; + default: + break; + } +} + +void Jit64::mtfsb1x(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITSystemRegistersOff); + FALLBACK_IF(inst.Rc); + + OR(32, PPCSTATE(fpscr), Imm32(0x80000000 >> inst.CRBD)); + + switch (inst.CRBD) + { + case 29: + // NI + static u32 csr; + STMXCSR(M(&csr)); + OR(32, M(&csr), Imm32(1 << 15)); + LDMXCSR(M(&csr)); + break; + case 30: + case 31: + // RN + UpdateRoundingMode(); + break; + default: + break; + } +} + +void Jit64::mtfsfix(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITSystemRegistersOff); + FALLBACK_IF(inst.Rc); + + u8 imm = inst.hex >> (31 - 19); + + MOV(32, R(RSCRATCH), PPCSTATE(fpscr)); + AND(32, R(RSCRATCH), Imm32(~0xF)); + OR(32, R(RSCRATCH), Imm32(imm << (28 - 4 * inst.CRFD))); + MOV(32, PPCSTATE(fpscr), R(RSCRATCH)); + + // XE, NI, RN + if (inst.CRFD == 7) + { + u32 ftz_bit = (imm & 4) << 13; + u32 rc_mask = s_rn_to_rc[imm & 3]; + u32 or_mask = ftz_bit | rc_mask; + u32 all_bits = 7 << 13; + u32 and_mask = all_bits & ~(ftz_bit | rc_mask); + + static u32 csr; + STMXCSR(M(&csr)); + if (or_mask) + OR(32, M(&csr), Imm32(or_mask)); + if (and_mask != all_bits) + AND(32, M(&csr), Imm32(~and_mask)); + LDMXCSR(M(&csr)); + } +} + +void Jit64::mtfsfx(UGeckoInstruction inst) +{ + INSTRUCTION_START + JITDISABLE(bJITSystemRegistersOff); + FALLBACK_IF(inst.Rc); + + u32 mask = 0; + for (int i = 0; i < 8; i++) + { + if (inst.FM & (1 << i)) + mask |= 0xF << (4 * i); + } + + int b = inst.FB; + X64Reg xmm = XMM0; + if (fpr.R(b).IsSimpleReg()) + xmm = fpr.RX(b); + else + MOVSD(XMM0, fpr.R(b)); + MOVQ_xmm(R(RSCRATCH), xmm); + AND(32, R(RSCRATCH), Imm32(mask)); + MOV(32, R(RSCRATCH2), PPCSTATE(fpscr)); + AND(32, R(RSCRATCH2), Imm32(~mask)); + OR(32, R(RSCRATCH), R(RSCRATCH2)); + MOV(32, PPCSTATE(fpscr), R(RSCRATCH)); +}