diff --git a/Source/Core/Common/Src/x64Emitter.cpp b/Source/Core/Common/Src/x64Emitter.cpp index 9acfac5864..7583ef0fc3 100644 --- a/Source/Core/Common/Src/x64Emitter.cpp +++ b/Source/Core/Common/Src/x64Emitter.cpp @@ -834,6 +834,39 @@ void XEmitter::SHL(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, sh void XEmitter::SHR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 5);} void XEmitter::SAR(int bits, OpArg dest, OpArg shift) {WriteShift(bits, dest, shift, 7);} +// index can be either imm8 or register, don't use memory destination because it's slow +void XEmitter::WriteBitTest(int bits, OpArg &dest, OpArg &index, int ext) +{ + if (dest.IsImm()) + { + _assert_msg_(DYNA_REC, 0, "WriteBitTest - can't test imms"); + } + if ((index.IsImm() && index.GetImmBits() != 8)) + { + _assert_msg_(DYNA_REC, 0, "WriteBitTest - illegal argument"); + } + if (bits == 16) Write8(0x66); + if (index.IsImm()) + { + dest.WriteRex(this, bits, bits); + Write8(0x0F); Write8(0xBA); + dest.WriteRest(this, 1, (X64Reg)ext); + Write8((u8)index.offset); + } + else + { + X64Reg operand = index.GetSimpleReg(); + dest.WriteRex(this, bits, bits, operand); + Write8(0x0F); Write8(0x83 + 8*ext); + dest.WriteRest(this, 1, operand); + } +} + +void XEmitter::BT(int bits, OpArg dest, OpArg index) {WriteBitTest(bits, dest, index, 4);} +void XEmitter::BTS(int bits, OpArg dest, OpArg index) {WriteBitTest(bits, dest, index, 5);} +void XEmitter::BTR(int bits, OpArg dest, OpArg index) {WriteBitTest(bits, dest, index, 6);} +void XEmitter::BTC(int bits, OpArg dest, OpArg index) {WriteBitTest(bits, dest, index, 7);} + void OpArg::WriteSingleByteOp(XEmitter *emit, u8 op, X64Reg _operandReg, int bits) { if (bits == 16) diff --git a/Source/Core/Common/Src/x64Emitter.h b/Source/Core/Common/Src/x64Emitter.h index 9ffb890f54..6bd9291eea 100644 --- a/Source/Core/Common/Src/x64Emitter.h +++ b/Source/Core/Common/Src/x64Emitter.h @@ -249,6 +249,7 @@ private: void WriteMulDivType(int bits, OpArg src, int ext); void WriteBitSearchType(int bits, X64Reg dest, OpArg src, u8 byte2); void WriteShift(int bits, OpArg dest, OpArg &shift, int ext); + void WriteBitTest(int bits, OpArg &dest, OpArg &index, int ext); void WriteMXCSR(OpArg arg, int ext); void WriteSSEOp(int size, u8 sseOp, bool packed, X64Reg regOp, OpArg arg, int extrabytes = 0); void WriteNormalOp(XEmitter *emit, int bits, NormalOp op, const OpArg &a1, const OpArg &a2); @@ -374,6 +375,12 @@ public: void SHR(int bits, OpArg dest, OpArg shift); void SAR(int bits, OpArg dest, OpArg shift); + // Bit Test + void BT(int bits, OpArg dest, OpArg index); + void BTS(int bits, OpArg dest, OpArg index); + void BTR(int bits, OpArg dest, OpArg index); + void BTC(int bits, OpArg dest, OpArg index); + // Extend EAX into EDX in various ways void CWD(int bits = 16); inline void CDQ() {CWD(32);} diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit.h b/Source/Core/Core/Src/PowerPC/Jit64/Jit.h index 9d9b57658b..8a13af741a 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit.h @@ -150,6 +150,8 @@ public: void GenerateConstantOverflow(bool overflow); void GenerateOverflow(); void FinalizeCarryOverflow(bool oe, bool inv = false); + void GetCarryEAXAndClear(); + void FinalizeCarryGenerateOverflowEAX(bool oe, bool inv = false); void GenerateCarry(); void GenerateRC(); void ComputeRC(const Gen::OpArg & arg); diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Integer.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Integer.cpp index 4b68b32036..7c79fce963 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_Integer.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_Integer.cpp @@ -73,13 +73,53 @@ void Jit64::FinalizeCarryOverflow(bool oe, bool inv) } else { - // Output carry is inverted + // Do carry FixupBranch carry1 = J_CC(inv ? CC_C : CC_NC); JitSetCA(); SetJumpTarget(carry1); } } +void Jit64::GetCarryEAXAndClear() +{ + MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER])); + BTR(32, R(EAX), Imm8(29)); +} + +// Assumes that XER is in EAX and that the CA bit is clear. +void Jit64::FinalizeCarryGenerateOverflowEAX(bool oe, bool inv) +{ + // USES_XER + if (oe) + { + FixupBranch jno = J_CC(CC_NO); + // Do carry + FixupBranch carry1 = J_CC(inv ? CC_C : CC_NC); + OR(32, R(EAX), Imm32(XER_CA_MASK)); + SetJumpTarget(carry1); + //XER[OV/SO] = 1 + OR(32, R(EAX), Imm32(XER_SO_MASK | XER_OV_MASK)); + FixupBranch exit = J(); + SetJumpTarget(jno); + // Do carry + FixupBranch carry2 = J_CC(inv ? CC_C : CC_NC); + JitSetCA(); + SetJumpTarget(carry2); + //XER[OV] = 0 + AND(32, R(EAX), Imm32(~XER_OV_MASK)); + SetJumpTarget(exit); + } + else + { + // Do carry + FixupBranch carry1 = J_CC(inv ? CC_C : CC_NC); + OR(32, R(EAX), Imm32(XER_CA_MASK)); + SetJumpTarget(carry1); + } + // Dump EAX back into XER + MOV(32, M(&PowerPC::ppcState.spr[SPR_XER]), R(EAX)); +} + // Assumes that the flags were just set through an addition. void Jit64::GenerateCarry() { // USES_XER @@ -908,10 +948,7 @@ void Jit64::subfex(UGeckoInstruction inst) gpr.Lock(a, b, d); gpr.BindToRegister(d, (d == a || d == b), true); - // Get CA and clear it (along with OV if applicable) - MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER])); - JitClearCAOV(inst.OE); - SHR(32, R(EAX), Imm8(30)); + GetCarryEAXAndClear(); bool invertedCarry = false; if (d == b) @@ -928,16 +965,14 @@ void Jit64::subfex(UGeckoInstruction inst) } else { - // Convert carry to borrow - CMC(); - MOV(32, gpr.R(d), gpr.R(b)); - SBB(32, gpr.R(d), gpr.R(a)); - invertedCarry = true; + MOV(32, gpr.R(d), gpr.R(a)); + NOT(32, gpr.R(d)); + ADC(32, gpr.R(d), gpr.R(b)); } if (inst.Rc) { GenerateRC(); } - FinalizeCarryOverflow(inst.OE, invertedCarry); + FinalizeCarryGenerateOverflowEAX(inst.OE, invertedCarry); gpr.UnlockAll(); } @@ -951,9 +986,7 @@ void Jit64::subfmex(UGeckoInstruction inst) gpr.Lock(a, d); gpr.BindToRegister(d, d == a); - MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER])); - JitClearCAOV(inst.OE); - SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag + GetCarryEAXAndClear(); if (d != a) { MOV(32, gpr.R(d), gpr.R(a)); @@ -964,7 +997,7 @@ void Jit64::subfmex(UGeckoInstruction inst) { GenerateRC(); } - FinalizeCarryOverflow(inst.OE); + FinalizeCarryGenerateOverflowEAX(inst.OE); gpr.UnlockAll(); } @@ -977,9 +1010,8 @@ void Jit64::subfzex(UGeckoInstruction inst) gpr.Lock(a, d); gpr.BindToRegister(d, d == a); - MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER])); - JitClearCAOV(inst.OE); - SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag + + GetCarryEAXAndClear(); if (d != a) { MOV(32, gpr.R(d), gpr.R(a)); @@ -990,7 +1022,7 @@ void Jit64::subfzex(UGeckoInstruction inst) { GenerateRC(); } - FinalizeCarryOverflow(inst.OE); + FinalizeCarryGenerateOverflowEAX(inst.OE); gpr.UnlockAll(); } @@ -1267,31 +1299,29 @@ void Jit64::addex(UGeckoInstruction inst) { gpr.Lock(a, b, d); gpr.BindToRegister(d, true); - MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER])); - JitClearCAOV(inst.OE); - SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag + + GetCarryEAXAndClear(); ADC(32, gpr.R(d), gpr.R((d == a) ? b : a)); if (inst.Rc) { GenerateRC(); } - FinalizeCarryOverflow(inst.OE); + FinalizeCarryGenerateOverflowEAX(inst.OE); gpr.UnlockAll(); } else { gpr.Lock(a, b, d); gpr.BindToRegister(d, false); - MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER])); - JitClearCAOV(inst.OE); - SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag + + GetCarryEAXAndClear(); MOV(32, gpr.R(d), gpr.R(a)); ADC(32, gpr.R(d), gpr.R(b)); if (inst.Rc) { GenerateRC(); } - FinalizeCarryOverflow(inst.OE); + FinalizeCarryGenerateOverflowEAX(inst.OE); gpr.UnlockAll(); } } @@ -1343,31 +1373,29 @@ void Jit64::addmex(UGeckoInstruction inst) { gpr.Lock(d); gpr.BindToRegister(d, true); - MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER])); - JitClearCAOV(inst.OE); - SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag + + GetCarryEAXAndClear(); ADC(32, gpr.R(d), Imm32(0xFFFFFFFF)); if (inst.Rc) { GenerateRC(); } - FinalizeCarryOverflow(inst.OE); + FinalizeCarryGenerateOverflowEAX(inst.OE); gpr.UnlockAll(); } else { gpr.Lock(a, d); gpr.BindToRegister(d, false); - MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER])); - JitClearCAOV(inst.OE); - SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag + + GetCarryEAXAndClear(); MOV(32, gpr.R(d), gpr.R(a)); ADC(32, gpr.R(d), Imm32(0xFFFFFFFF)); if (inst.Rc) { GenerateRC(); } - FinalizeCarryOverflow(inst.OE); + FinalizeCarryGenerateOverflowEAX(inst.OE); gpr.UnlockAll(); } } @@ -1383,31 +1411,29 @@ void Jit64::addzex(UGeckoInstruction inst) { gpr.Lock(d); gpr.BindToRegister(d, true); - MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER])); - JitClearCAOV(inst.OE); - SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag + + GetCarryEAXAndClear(); ADC(32, gpr.R(d), Imm8(0)); if (inst.Rc) { GenerateRC(); } - FinalizeCarryOverflow(inst.OE); + FinalizeCarryGenerateOverflowEAX(inst.OE); gpr.UnlockAll(); } else { gpr.Lock(a, d); gpr.BindToRegister(d, false); - MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER])); - JitClearCAOV(inst.OE); - SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag + + GetCarryEAXAndClear(); MOV(32, gpr.R(d), gpr.R(a)); ADC(32, gpr.R(d), Imm8(0)); if (inst.Rc) { GenerateRC(); } - FinalizeCarryOverflow(inst.OE); + FinalizeCarryGenerateOverflowEAX(inst.OE); gpr.UnlockAll(); } }