From 5fce109ce114a1376c6a9dcaa60660d6bc451f0f Mon Sep 17 00:00:00 2001 From: Fiora Date: Fri, 12 Sep 2014 13:19:50 -0700 Subject: [PATCH 1/2] Reorganize carry to store flags separately instead of part of XER Also correct behavior with regards to which bits in XER are treated as zero based on a hwtest (probably doesn't affect any real games, but might as well be correct). --- Source/Core/Core/PowerPC/Gekko.h | 9 ++-- .../Interpreter/Interpreter_LoadStore.cpp | 4 +- .../Interpreter_SystemRegisters.cpp | 13 ++++-- .../Core/Core/PowerPC/Jit64/Jit_Integer.cpp | 39 ++++++---------- .../PowerPC/Jit64/Jit_SystemRegisters.cpp | 46 ++++++++++++++++--- Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp | 6 +-- .../Core/Core/PowerPC/JitCommon/Jit_Util.cpp | 19 +++----- Source/Core/Core/PowerPC/JitCommon/Jit_Util.h | 2 +- Source/Core/Core/PowerPC/PowerPC.h | 25 +++++++--- Source/Core/Core/State.cpp | 2 +- 10 files changed, 97 insertions(+), 68 deletions(-) diff --git a/Source/Core/Core/PowerPC/Gekko.h b/Source/Core/Core/PowerPC/Gekko.h index cde09114ee..d776e53a5c 100644 --- a/Source/Core/Core/PowerPC/Gekko.h +++ b/Source/Core/Core/PowerPC/Gekko.h @@ -335,16 +335,17 @@ union UFPR #define XER_CA_SHIFT 29 #define XER_OV_SHIFT 30 #define XER_SO_SHIFT 31 -#define XER_CA_MASK (1U << XER_CA_SHIFT) -#define XER_OV_MASK (1U << XER_OV_SHIFT) -#define XER_SO_MASK (1U << XER_SO_SHIFT) +#define XER_OV_MASK 1 +#define XER_SO_MASK 2 // XER union UReg_XER { struct { u32 BYTE_COUNT : 7; - u32 : 22; + u32 : 1; + u32 BYTE_CMP : 8; + u32 : 13; u32 CA : 1; u32 OV : 1; u32 SO : 1; diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp index 332e5e5632..5b8e6351ff 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStore.cpp @@ -499,7 +499,7 @@ void Interpreter::lhzx(UGeckoInstruction _inst) void Interpreter::lswx(UGeckoInstruction _inst) { u32 EA = Helper_Get_EA_X(_inst); - u32 n = rSPR(SPR_XER) & 0x7F; + u32 n = (u8)PowerPC::ppcState.xer_stringctrl; int r = _inst.RD; int i = 0; @@ -727,7 +727,7 @@ void Interpreter::stswi(UGeckoInstruction _inst) void Interpreter::stswx(UGeckoInstruction _inst) { u32 EA = Helper_Get_EA_X(_inst); - u32 n = rSPR(SPR_XER) & 0x7F; + u32 n = (u8)PowerPC::ppcState.xer_stringctrl; int r = _inst.RS; int i = 0; diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp index 466871399a..1e8c98f1a9 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_SystemRegisters.cpp @@ -112,9 +112,9 @@ void Interpreter::mtfsfx(UGeckoInstruction _inst) void Interpreter::mcrxr(UGeckoInstruction _inst) { - // USES_XER - SetCRField(_inst.CRFD, PowerPC::ppcState.spr[SPR_XER] >> 28); - PowerPC::ppcState.spr[SPR_XER] &= ~0xF0000000; // clear 0-3 + SetCRField(_inst.CRFD, GetXER().Hex >> 28); + PowerPC::ppcState.xer_ca = 0; + PowerPC::ppcState.xer_so_ov = 0; } void Interpreter::mfcr(UGeckoInstruction _inst) @@ -235,6 +235,9 @@ void Interpreter::mfspr(UGeckoInstruction _inst) rSPR(iIndex) &= ~1; } break; + case SPR_XER: + rSPR(iIndex) = GetXER().Hex; + break; } m_GPR[_inst.RD] = rSPR(iIndex); } @@ -350,6 +353,10 @@ void Interpreter::mtspr(UGeckoInstruction _inst) case SPR_SDR: Memory::SDRUpdated(); break; + + case SPR_XER: + SetXER(rSPR(iIndex)); + break; } } diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp index 10efad48c5..7b490c15f1 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp @@ -22,12 +22,12 @@ void Jit64::GenerateConstantOverflow(bool overflow) if (overflow) { //XER[OV/SO] = 1 - OR(32, PPCSTATE(spr[SPR_XER]), Imm32(XER_SO_MASK | XER_OV_MASK)); + MOV(8, PPCSTATE(xer_so_ov), Imm8(XER_OV_MASK | XER_SO_MASK)); } else { //XER[OV] = 0 - AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~XER_OV_MASK)); + AND(8, PPCSTATE(xer_so_ov), Imm8(~XER_OV_MASK)); } } @@ -36,11 +36,11 @@ void Jit64::GenerateOverflow() { FixupBranch jno = J_CC(CC_NO); //XER[OV/SO] = 1 - OR(32, PPCSTATE(spr[SPR_XER]), Imm32(XER_SO_MASK | XER_OV_MASK)); + MOV(8, PPCSTATE(xer_so_ov), Imm8(XER_OV_MASK | XER_SO_MASK)); FixupBranch exit = J(); SetJumpTarget(jno); //XER[OV] = 0 - AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~XER_OV_MASK)); + AND(8, PPCSTATE(xer_so_ov), Imm8(~XER_OV_MASK)); SetJumpTarget(exit); } @@ -60,7 +60,7 @@ void Jit64::FinalizeCarry(CCFlags cond) { // convert the condition to a carry flag (is there a better way?) SETcc(cond, R(RSCRATCH)); - BT(8, R(RSCRATCH), Imm8(0)); + SHR(8, R(RSCRATCH), Imm8(1)); } js.carryFlagSet = true; } @@ -92,23 +92,22 @@ void Jit64::FinalizeCarry(bool ca) } else { - JitClearCAOV(true, false); + JitClearCA(); } } } -// Assumes CA,OV are clear void Jit64::FinalizeCarryOverflow(bool oe, bool inv) { - // USES_XER if (oe) { // Make sure not to lose the carry flags (not a big deal, this path is rare). PUSHF(); - AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~(XER_SO_MASK | XER_OV_MASK))); + //XER[OV] = 0 + AND(8, PPCSTATE(xer_so_ov), Imm8(~XER_OV_MASK)); FixupBranch jno = J_CC(CC_NO); //XER[OV/SO] = 1 - OR(32, PPCSTATE(spr[SPR_XER]), Imm32(XER_SO_MASK | XER_OV_MASK)); + MOV(8, PPCSTATE(xer_so_ov), Imm8(XER_SO_MASK | XER_OV_MASK)); SetJumpTarget(jno); POPF(); } @@ -1792,27 +1791,15 @@ void Jit64::srawix(UGeckoInstruction inst) if (a != s) MOV(32, gpr.R(a), R(RSCRATCH)); // some optimized common cases that can be done in slightly fewer ops - if (amount == 31) + if (amount == 1) { - JitSetCA(); - SAR(32, gpr.R(a), Imm8(31)); - NEG(32, R(RSCRATCH)); // RSCRATCH = input == INT_MIN ? INT_MIN : -input; - AND(32, R(RSCRATCH), Imm32(0x80000000)); // RSCRATCH = input < 0 && input != INT_MIN ? 0 : 0x80000000 - SHR(32, R(RSCRATCH), Imm8(31 - XER_CA_SHIFT)); - XOR(32, PPCSTATE(spr[SPR_XER]), R(RSCRATCH)); // XER.CA = (input < 0 && input != INT_MIN) - } - else if (amount == 1) - { - JitClearCAOV(true, false); - SHR(32, R(RSCRATCH), Imm8(31)); // sign - AND(32, R(RSCRATCH), gpr.R(a)); // (sign && carry) + SHR(32, R(RSCRATCH), Imm8(31)); // sign + AND(32, R(RSCRATCH), gpr.R(a)); // (sign && carry) SAR(32, gpr.R(a), Imm8(1)); - SHL(32, R(RSCRATCH), Imm8(XER_CA_SHIFT)); - OR(32, PPCSTATE(spr[SPR_XER]), R(RSCRATCH)); // XER.CA = sign && carry, aka (input&0x80000001) == 0x80000001 + MOV(8, PPCSTATE(xer_ca), R(RSCRATCH)); // XER.CA = sign && carry, aka (input&0x80000001) == 0x80000001 } else { - JitClearCAOV(true, false); SAR(32, gpr.R(a), Imm8(amount)); SHL(32, R(RSCRATCH), Imm8(32 - amount)); TEST(32, R(RSCRATCH), gpr.R(a)); diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp index 560004816e..2aedc09c8b 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_SystemRegisters.cpp @@ -140,7 +140,6 @@ void Jit64::mtspr(UGeckoInstruction inst) case SPR_LR: case SPR_CTR: - case SPR_XER: // These are safe to do the easy way, see the bottom of this function. break; @@ -155,6 +154,24 @@ void Jit64::mtspr(UGeckoInstruction inst) // These are safe to do the easy way, see the bottom of this function. break; + case SPR_XER: + gpr.Lock(d); + gpr.BindToRegister(d, true, false); + MOV(32, R(RSCRATCH), gpr.R(d)); + AND(32, R(RSCRATCH), Imm32(0xff7f)); + MOV(16, PPCSTATE(xer_stringctrl), R(RSCRATCH)); + + MOV(32, R(RSCRATCH), gpr.R(d)); + SHR(32, R(RSCRATCH), Imm8(XER_CA_SHIFT)); + AND(8, R(RSCRATCH), Imm8(1)); + MOV(8, PPCSTATE(xer_ca), R(RSCRATCH)); + + MOV(32, R(RSCRATCH), gpr.R(d)); + SHR(32, R(RSCRATCH), Imm8(XER_OV_SHIFT)); + MOV(8, PPCSTATE(xer_so_ov), R(RSCRATCH)); + gpr.UnlockAll(); + return; + default: FALLBACK_IF(true); } @@ -238,6 +255,18 @@ void Jit64::mfspr(UGeckoInstruction inst) gpr.UnlockAllX(); break; } + case SPR_XER: + gpr.Lock(d); + gpr.BindToRegister(d, false); + MOVZX(32, 16, gpr.RX(d), PPCSTATE(xer_stringctrl)); + MOVZX(32, 8, RSCRATCH, PPCSTATE(xer_ca)); + SHL(32, R(RSCRATCH), Imm8(XER_CA_SHIFT)); + OR(32, gpr.R(d), R(RSCRATCH)); + + MOVZX(32, 8, RSCRATCH, PPCSTATE(xer_so_ov)); + SHL(32, R(RSCRATCH), Imm8(XER_OV_SHIFT)); + OR(32, gpr.R(d), R(RSCRATCH)); + break; case SPR_WPAR: case SPR_DEC: case SPR_PMC1: @@ -422,17 +451,20 @@ void Jit64::mcrxr(UGeckoInstruction inst) INSTRUCTION_START JITDISABLE(bJITSystemRegistersOff); - // USES_CR - // Copy XER[0-3] into CR[inst.CRFD] - MOV(32, R(RSCRATCH), PPCSTATE(spr[SPR_XER])); - SHR(32, R(RSCRATCH), Imm8(28)); + MOVZX(32, 8, RSCRATCH, PPCSTATE(xer_ca)); + MOVZX(32, 8, RSCRATCH2, PPCSTATE(xer_so_ov)); + // [0 SO OV CA] + LEA(32, RSCRATCH, MComplex(RSCRATCH, RSCRATCH2, SCALE_2, 0)); + // [SO OV CA 0] << 3 + SHL(32, R(RSCRATCH), Imm8(4)); - MOV(64, R(RSCRATCH), MScaled(RSCRATCH, SCALE_8, (u32)(u64)m_crTable)); + MOV(64, R(RSCRATCH), MDisp(RSCRATCH, (u32)(u64)m_crTable)); MOV(64, PPCSTATE(cr_val[inst.CRFD]), R(RSCRATCH)); // Clear XER[0-3] - AND(32, PPCSTATE(spr[SPR_XER]), Imm32(0x0FFFFFFF)); + MOV(8, PPCSTATE(xer_ca), Imm8(0)); + MOV(8, PPCSTATE(xer_so_ov), Imm8(0)); } void Jit64::crXXX(UGeckoInstruction inst) diff --git a/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp b/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp index 0491854b42..88c96c327d 100644 --- a/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp +++ b/Source/Core/Core/PowerPC/Jit64IL/IR_X86.cpp @@ -1027,9 +1027,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) break; X64Reg reg = regFindFreeReg(RI); - Jit->MOV(32, R(reg), PPCSTATE(spr[SPR_XER])); - Jit->SHR(32, R(reg), Imm8(29)); - Jit->AND(32, R(reg), Imm8(1)); + Jit->MOVZX(32, 8, reg, PPCSTATE(xer_ca)); RI.regs[reg] = I; break; } @@ -1107,7 +1105,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) Jit->JitSetCA(); FixupBranch cont = Jit->J(); Jit->SetJumpTarget(nocarry); - Jit->JitClearCAOV(true, false); + Jit->JitClearCA(); Jit->SetJumpTarget(cont); regNormalRegClear(RI, I); break; diff --git a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp index 30bbfee806..283ea6ea2e 100644 --- a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp @@ -896,30 +896,23 @@ void EmuCodeBlock::SetFPRF(Gen::X64Reg xmm) void EmuCodeBlock::JitGetAndClearCAOV(bool oe) { if (oe) - AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~XER_OV_MASK)); //XER.OV = 0 - BTR(32, PPCSTATE(spr[SPR_XER]), Imm8(29)); //carry = XER.CA, XER.CA = 0 + AND(8, PPCSTATE(xer_so_ov), Imm32(~XER_OV_MASK)); //XER.OV = 0 + SHR(8, PPCSTATE(xer_ca), Imm8(1)); //carry = XER.CA, XER.CA = 0 } void EmuCodeBlock::JitSetCA() { - OR(32, PPCSTATE(spr[SPR_XER]), Imm32(XER_CA_MASK)); //XER.CA = 1 + MOV(8, PPCSTATE(xer_ca), Imm8(1)); //XER.CA = 1 } // Some testing shows CA is set roughly ~1/3 of the time (relative to clears), so // branchless calculation of CA is probably faster in general. void EmuCodeBlock::JitSetCAIf(CCFlags conditionCode) { - SETcc(conditionCode, R(RSCRATCH)); - MOVZX(32, 8, RSCRATCH, R(RSCRATCH)); - SHL(32, R(RSCRATCH), Imm8(XER_CA_SHIFT)); - AND(32, PPCSTATE(spr[SPR_XER]), Imm32(~XER_CA_MASK)); - OR(32, PPCSTATE(spr[SPR_XER]), R(RSCRATCH)); //XER.CA = 1 + SETcc(conditionCode, PPCSTATE(xer_ca)); } -void EmuCodeBlock::JitClearCAOV(bool ca, bool oe) +void EmuCodeBlock::JitClearCA() { - u32 mask = (ca ? ~XER_CA_MASK : 0xFFFFFFFF) & (oe ? ~XER_OV_MASK : 0xFFFFFFFF); - if (mask == 0xFFFFFFFF) - return; - AND(32, PPCSTATE(spr[SPR_XER]), Imm32(mask)); + MOV(8, PPCSTATE(xer_ca), Imm8(0)); } diff --git a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h index 527d7c1491..e46621067a 100644 --- a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h +++ b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.h @@ -121,7 +121,7 @@ public: void JitGetAndClearCAOV(bool oe); void JitSetCA(); void JitSetCAIf(Gen::CCFlags conditionCode); - void JitClearCAOV(bool ca, bool oe); + void JitClearCA(); void ForceSinglePrecisionS(Gen::X64Reg xmm); void ForceSinglePrecisionP(Gen::X64Reg xmm); diff --git a/Source/Core/Core/PowerPC/PowerPC.h b/Source/Core/Core/PowerPC/PowerPC.h index a43c667f8c..a4c2fc4af1 100644 --- a/Source/Core/Core/PowerPC/PowerPC.h +++ b/Source/Core/Core/PowerPC/PowerPC.h @@ -61,6 +61,12 @@ struct GC_ALIGNED64(PowerPCState) // This variable should be inside of the CoreTiming namespace if we wanted to be correct. int downcount; + // XER, reformatted into byte fields for easier access. + u8 xer_ca; + u8 xer_so_ov; // format: (SO << 1) | OV + // The Broadway CPU implements bits 16-23 of the XER register... even though it doesn't support lscbx + u16 xer_stringctrl; + #if _M_X86_64 // This member exists for the purpose of an assertion in x86 JitBase.cpp // that its offset <= 0x100. To minimize code size on x86, we want as much @@ -252,35 +258,40 @@ inline u32 GetCR() return PowerPC::CompactCR(); } -// SetCarry/GetCarry may speed up soon. inline void SetCarry(int ca) { - ((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]).CA = ca; + PowerPC::ppcState.xer_ca = ca; } inline int GetCarry() { - return ((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]).CA; + return PowerPC::ppcState.xer_ca; } inline UReg_XER GetXER() { - return ((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]); + u32 xer = 0; + xer |= PowerPC::ppcState.xer_stringctrl; + xer |= PowerPC::ppcState.xer_ca << XER_CA_SHIFT; + xer |= PowerPC::ppcState.xer_so_ov << XER_OV_SHIFT; + return xer; } inline void SetXER(UReg_XER new_xer) { - ((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]) = new_xer; + PowerPC::ppcState.xer_stringctrl = new_xer.BYTE_COUNT + (new_xer.BYTE_CMP << 8); + PowerPC::ppcState.xer_ca = new_xer.CA; + PowerPC::ppcState.xer_so_ov = (new_xer.SO << 1) + new_xer.OV; } inline int GetXER_SO() { - return ((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]).SO; + return PowerPC::ppcState.xer_so_ov >> 1; } inline void SetXER_SO(int value) { - ((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]).SO = value; + PowerPC::ppcState.xer_so_ov |= value << 1; } void UpdateFPRF(double dvalue); diff --git a/Source/Core/Core/State.cpp b/Source/Core/Core/State.cpp index 9a1e15f74f..737a3eb50f 100644 --- a/Source/Core/Core/State.cpp +++ b/Source/Core/Core/State.cpp @@ -63,7 +63,7 @@ static Common::Event g_compressAndDumpStateSyncEvent; static std::thread g_save_thread; // Don't forget to increase this after doing changes on the savestate system -static const u32 STATE_VERSION = 33; +static const u32 STATE_VERSION = 34; enum { From 76697922b4f3f525df9429451ce636cc52bde6c6 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Mon, 15 Sep 2014 02:21:03 -0500 Subject: [PATCH 2/2] Implement XER optimization on ARMv7 JIT core Not completely optimized; there's room for improvement here. --- .../Core/PowerPC/JitArm32/JitArm_Integer.cpp | 83 ++++++++----------- .../JitArm32/JitArm_SystemRegisters.cpp | 36 +++++++- 2 files changed, 68 insertions(+), 51 deletions(-) diff --git a/Source/Core/Core/PowerPC/JitArm32/JitArm_Integer.cpp b/Source/Core/Core/PowerPC/JitArm32/JitArm_Integer.cpp index 7767a7c1ee..823b0a7bec 100644 --- a/Source/Core/Core/PowerPC/JitArm32/JitArm_Integer.cpp +++ b/Source/Core/Core/PowerPC/JitArm32/JitArm_Integer.cpp @@ -46,51 +46,46 @@ void JitArm::ComputeRC(s32 value, int cr) void JitArm::ComputeCarry() { ARMReg tmp = gpr.GetReg(); - Operand2 mask = Operand2(2, 2); // XER_CA_MASK - LDR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER])); SetCC(CC_CS); - ORR(tmp, tmp, mask); + ORR(tmp, tmp, 1); SetCC(CC_CC); - BIC(tmp, tmp, mask); + BIC(tmp, tmp, 1); SetCC(); - STR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER])); + STRB(tmp, R9, PPCSTATE_OFF(xer_ca)); gpr.Unlock(tmp); } void JitArm::ComputeCarry(bool Carry) { ARMReg tmp = gpr.GetReg(); - Operand2 mask = Operand2(2, 2); // XER_CA_MASK - LDR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER])); + LDRB(tmp, R9, PPCSTATE_OFF(xer_ca)); if (Carry) - ORR(tmp, tmp, mask); + ORR(tmp, tmp, 1); else - BIC(tmp, tmp, mask); - STR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER])); + BIC(tmp, tmp, 1); + STRB(tmp, R9, PPCSTATE_OFF(xer_ca)); gpr.Unlock(tmp); } void JitArm::GetCarryAndClear(ARMReg reg) { ARMReg tmp = gpr.GetReg(); - Operand2 mask = Operand2(2, 2); // XER_CA_MASK - LDR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER])); - AND(reg, tmp, mask); - BIC(tmp, tmp, mask); - STR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER])); + LDRB(tmp, R9, PPCSTATE_OFF(xer_ca)); + AND(reg, tmp, 1); + BIC(tmp, tmp, 1); + STRB(tmp, R9, PPCSTATE_OFF(xer_ca)); gpr.Unlock(tmp); } void JitArm::FinalizeCarry(ARMReg reg) { ARMReg tmp = gpr.GetReg(); - Operand2 mask = Operand2(2, 2); // XER_CA_MASK SetCC(CC_CS); - ORR(reg, reg, mask); + ORR(reg, reg, 1); SetCC(); - LDR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER])); + LDRB(tmp, R9, PPCSTATE_OFF(xer_ca)); ORR(tmp, tmp, reg); - STR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER])); + STRB(tmp, R9, PPCSTATE_OFF(xer_ca)); gpr.Unlock(tmp); } @@ -107,25 +102,23 @@ void JitArm::subfic(UGeckoInstruction inst) if (imm == 0) { ARMReg tmp = gpr.GetReg(); - Operand2 mask = Operand2(2, 2); // XER_CA_MASK - LDR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER])); - BIC(tmp, tmp, mask); + LDRB(tmp, R9, PPCSTATE_OFF(xer_ca)); + BIC(tmp, tmp, 1); // Flags act exactly like subtracting from 0 RSBS(gpr.R(d), gpr.R(d), 0); SetCC(CC_CS); - ORR(tmp, tmp, mask); + ORR(tmp, tmp, 1); SetCC(); - STR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER])); + STRB(tmp, R9, PPCSTATE_OFF(xer_ca)); gpr.Unlock(tmp); } else if (imm == -1) { // CA is always set in this case ARMReg tmp = gpr.GetReg(); - Operand2 mask = Operand2(2, 2); // XER_CA_MASK - LDR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER])); - ORR(tmp, tmp, mask); - STR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER])); + LDRB(tmp, R9, PPCSTATE_OFF(xer_ca)); + ORR(tmp, tmp, 1); + STRB(tmp, R9, PPCSTATE_OFF(xer_ca)); gpr.Unlock(tmp); MVN(gpr.R(d), gpr.R(d)); @@ -134,35 +127,33 @@ void JitArm::subfic(UGeckoInstruction inst) { ARMReg tmp = gpr.GetReg(); ARMReg rA = gpr.GetReg(); - Operand2 mask = Operand2(2, 2); // XER_CA_MASK MOVI2R(rA, imm + 1); - LDR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER])); - BIC(tmp, tmp, mask); + LDRB(tmp, R9, PPCSTATE_OFF(xer_ca)); + BIC(tmp, tmp, 1); // Flags act exactly like subtracting from 0 MVN(gpr.R(d), gpr.R(d)); ADDS(gpr.R(d), gpr.R(d), rA); // Output carry is inverted SetCC(CC_CS); - ORR(tmp, tmp, mask); + ORR(tmp, tmp, 1); SetCC(); - STR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER])); + STRB(tmp, R9, PPCSTATE_OFF(xer_ca)); gpr.Unlock(tmp, rA); } } else { ARMReg tmp = gpr.GetReg(); - Operand2 mask = Operand2(2, 2); // XER_CA_MASK MOVI2R(gpr.R(d), imm); - LDR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER])); - BIC(tmp, tmp, mask); + LDRB(tmp, R9, PPCSTATE_OFF(xer_ca)); + BIC(tmp, tmp, 1); // Flags act exactly like subtracting from 0 SUBS(gpr.R(d), gpr.R(d), gpr.R(a)); // Output carry is inverted SetCC(CC_CS); - ORR(tmp, tmp, mask); + ORR(tmp, tmp, 1); SetCC(); - STR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER])); + STRB(tmp, R9, PPCSTATE_OFF(xer_ca)); gpr.Unlock(tmp); } // This instruction has no RC flag @@ -871,7 +862,6 @@ void JitArm::srawix(UGeckoInstruction inst) ARMReg RA = gpr.R(a); ARMReg RS = gpr.R(s); ARMReg tmp = gpr.GetReg(); - Operand2 mask = Operand2(2, 2); // XER_CA_MASK MOV(tmp, RS); ASR(RA, RS, amount); @@ -880,12 +870,12 @@ void JitArm::srawix(UGeckoInstruction inst) LSL(tmp, tmp, 32 - amount); TST(tmp, RA); - LDR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER])); - BIC(tmp, tmp, mask); + LDRB(tmp, R9, PPCSTATE_OFF(xer_ca)); + BIC(tmp, tmp, 1); SetCC(CC_NEQ); - ORR(tmp, tmp, mask); + ORR(tmp, tmp, 1); SetCC(); - STR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER])); + STRB(tmp, R9, PPCSTATE_OFF(xer_ca)); gpr.Unlock(tmp); } else @@ -895,10 +885,9 @@ void JitArm::srawix(UGeckoInstruction inst) MOV(RA, RS); ARMReg tmp = gpr.GetReg(); - Operand2 mask = Operand2(2, 2); // XER_CA_MASK - LDR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER])); - BIC(tmp, tmp, mask); - STR(tmp, R9, PPCSTATE_OFF(spr[SPR_XER])); + LDRB(tmp, R9, PPCSTATE_OFF(xer_ca)); + BIC(tmp, tmp, 1); + STRB(tmp, R9, PPCSTATE_OFF(xer_ca)); gpr.Unlock(tmp); } diff --git a/Source/Core/Core/PowerPC/JitArm32/JitArm_SystemRegisters.cpp b/Source/Core/Core/PowerPC/JitArm32/JitArm_SystemRegisters.cpp index b9e9c5bcd0..4946bc4d32 100644 --- a/Source/Core/Core/PowerPC/JitArm32/JitArm_SystemRegisters.cpp +++ b/Source/Core/Core/PowerPC/JitArm32/JitArm_SystemRegisters.cpp @@ -76,11 +76,10 @@ void JitArm::mtspr(UGeckoInstruction inst) case SPR_SRR0: case SPR_SRR1: // These are safe to do the easy way, see the bottom of this function. - break; + break; case SPR_LR: case SPR_CTR: - case SPR_XER: case SPR_GQR0: case SPR_GQR0 + 1: case SPR_GQR0 + 2: @@ -90,8 +89,23 @@ void JitArm::mtspr(UGeckoInstruction inst) case SPR_GQR0 + 6: case SPR_GQR0 + 7: // These are safe to do the easy way, see the bottom of this function. - break; - + break; + case SPR_XER: + { + ARMReg RD = gpr.R(inst.RD); + ARMReg tmp = gpr.GetReg(); + ARMReg mask = gpr.GetReg(); + MOVI2R(mask, 0xFF7F); + AND(tmp, RD, mask); + STRH(tmp, R9, PPCSTATE_OFF(xer_stringctrl)); + LSR(tmp, RD, XER_CA_SHIFT); + AND(tmp, tmp, 1); + STRB(tmp, R9, PPCSTATE_OFF(xer_ca)); + LSR(tmp, RD, XER_OV_SHIFT); + STRB(tmp, R9, PPCSTATE_OFF(xer_so_ov)); + gpr.Unlock(tmp, mask); + } + break; default: FALLBACK_IF(true); } @@ -116,6 +130,20 @@ void JitArm::mfspr(UGeckoInstruction inst) u32 iIndex = (inst.SPRU << 5) | (inst.SPRL & 0x1F); switch (iIndex) { + case SPR_XER: + { + ARMReg RD = gpr.R(inst.RD); + ARMReg tmp = gpr.GetReg(); + LDRH(RD, R9, PPCSTATE_OFF(xer_stringctrl)); + LDRB(tmp, R9, PPCSTATE_OFF(xer_ca)); + LSL(tmp, tmp, XER_CA_SHIFT); + ORR(RD, RD, tmp); + LDRB(tmp, R9, PPCSTATE_OFF(xer_so_ov)); + LSL(tmp, tmp, XER_OV_SHIFT); + ORR(RD, RD, tmp); + gpr.Unlock(tmp); + } + break; case SPR_WPAR: case SPR_DEC: case SPR_TL: