diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_Integer.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_Integer.cpp index a2041c35b5..a95ebd0289 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_Integer.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_Integer.cpp @@ -294,36 +294,45 @@ void JitIL::mullwx(UGeckoInstruction inst) void JitIL::mulhwux(UGeckoInstruction inst) { - Default(inst); return; -#if 0 - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff) - {Default(inst); return;} // turn off from debugger + INSTRUCTION_START + JITDISABLE(Integer) - INSTRUCTION_START; - int a = inst.RA, b = inst.RB, d = inst.RD; - gpr.FlushLockX(EDX); - gpr.Lock(a, b, d); - if (d != a && d != b) { - gpr.LoadToX64(d, false, true); - } else { - gpr.LoadToX64(d, true, true); - } - if (gpr.RX(d) == EDX) - PanicAlert("mulhwux : WTF"); - MOV(32, R(EAX), gpr.R(a)); - gpr.KillImmediate(b); - MUL(32, gpr.R(b)); - gpr.UnlockAll(); - gpr.UnlockAllX(); - if (inst.Rc) { - MOV(32, R(EAX), R(EDX)); - MOV(32, gpr.R(d), R(EDX)); - // result is already in eax - CALL((u8*)asm_routines.computeRc); - } else { - MOV(32, gpr.R(d), R(EDX)); - } -#endif + // Compute upper 32-bit of (a * b) using Karatsuba algorithm + // Karatsuba algorithm reduces the number of multiplication 4 to 3 + // d = a * b + // = {a1 * (1 << 16) + a0} * {b1 * (1 << 16) + b0}; + // = d2 * (1 << 32) + d1 * (1 << 16) + d0 + // where + // d2 = a1 * b1 + // d0 = a0 * b0 + // d1 = (a1 + a0) * (b1 * b0) - d2 - d0 + // since + // d1 = (a1 + a0) * (b1 * b0) - d2 - d0 + // = a1 * b1 + a1 * b0 + a0 * b1 + a0 * b0 - a1 * b1 - a0 * b0 + // = a1 * b0 + a0 * b1 + // The result of mulhwux is + // d2' = (((d0 >> 16) + d1) >> 16) + d2 + // + // Though it is not so fast... + IREmitter::InstLoc a = ibuild.EmitLoadGReg(inst.RA); + IREmitter::InstLoc a0 = ibuild.EmitAnd(a, ibuild.EmitIntConst(0xFFFF)); + IREmitter::InstLoc a1 = ibuild.EmitShrl(a, ibuild.EmitIntConst(16)); + IREmitter::InstLoc b = ibuild.EmitLoadGReg(inst.RB); + IREmitter::InstLoc b0 = ibuild.EmitAnd(b, ibuild.EmitIntConst(0xFFFF)); + IREmitter::InstLoc b1 = ibuild.EmitShrl(b, ibuild.EmitIntConst(16)); + + IREmitter::InstLoc d2 = ibuild.EmitMul(a1, b1); + IREmitter::InstLoc d0 = ibuild.EmitMul(a0, b0); + IREmitter::InstLoc d1 = ibuild.EmitMul(ibuild.EmitAdd(a1, a0), ibuild.EmitAdd(b1, b0)); + d1 = ibuild.EmitSub(d1, d2); + d1 = ibuild.EmitSub(d1, d0); + + d1 = ibuild.EmitAdd(d1, ibuild.EmitShrl(d0, ibuild.EmitIntConst(16))); + d2 = ibuild.EmitAdd(d2, ibuild.EmitShrl(d1, ibuild.EmitIntConst(16))); + + ibuild.EmitStoreGReg(d2, inst.RD); + if (inst.Rc) + ComputeRC(ibuild, d2); } // skipped some of the special handling in here - if we get crashes, let the interpreter handle this op @@ -375,36 +384,28 @@ void JitIL::addzex(UGeckoInstruction inst) if (inst.Rc) ComputeRC(ibuild, val); } -// This can be optimized + void JitIL::addex(UGeckoInstruction inst) { - Default(inst); return; -#if 0 - // USES_XER - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITIntegerOff) - {Default(inst); return;} // turn off from debugger + INSTRUCTION_START + JITDISABLE(Integer) - INSTRUCTION_START; - int a = inst.RA, b = inst.RB, d = inst.RD; - gpr.FlushLockX(ECX); - gpr.Lock(a, b, d); - if (d != a && d != b) - gpr.LoadToX64(d, false); - else - gpr.LoadToX64(d, true); - MOV(32, R(EAX), M(&PowerPC::ppcState.spr[SPR_XER])); - SHR(32, R(EAX), Imm8(30)); // shift the carry flag out into the x86 carry flag - MOV(32, R(EAX), gpr.R(a)); - ADC(32, R(EAX), gpr.R(b)); - MOV(32, gpr.R(d), R(EAX)); - //GenerateCarry(ECX); - gpr.UnlockAll(); - gpr.UnlockAllX(); + IREmitter::InstLoc a = ibuild.EmitLoadGReg(inst.RA); + IREmitter::InstLoc b = ibuild.EmitLoadGReg(inst.RB); + + IREmitter::InstLoc ab = ibuild.EmitAdd(a, b); + IREmitter::InstLoc new_carry = ibuild.EmitICmpUlt(ab, a); + + IREmitter::InstLoc previous_carry = ibuild.EmitLoadCarry(); + IREmitter::InstLoc abc = ibuild.EmitAdd(ab, previous_carry); + new_carry = ibuild.EmitOr(new_carry, ibuild.EmitICmpUlt(abc, ab)); + + ibuild.EmitStoreGReg(abc, inst.RD); + ibuild.EmitStoreCarry(new_carry); + + if (inst.OE) PanicAlert("OE: addex"); if (inst.Rc) - { - CALL((u8*)asm_routines.computeRc); - } -#endif + ComputeRC(ibuild, abc); } void JitIL::rlwinmx(UGeckoInstruction inst) diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_SystemRegisters.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_SystemRegisters.cpp index 9356729d69..4e9f765773 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_SystemRegisters.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_SystemRegisters.cpp @@ -128,63 +128,31 @@ void JitIL::mftb(UGeckoInstruction inst) void JitIL::mfcr(UGeckoInstruction inst) { - Default(inst); return; -#if 0 - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITSystemRegistersOff) - {Default(inst); return;} // turn off from debugger INSTRUCTION_START; - // USES_CR - int d = inst.RD; - gpr.LoadToX64(d, false, true); - MOV(8, R(EAX), M(&PowerPC::ppcState.cr_fast[0])); - SHL(32, R(EAX), Imm8(4)); - for (int i = 1; i < 7; i++) { - OR(8, R(EAX), M(&PowerPC::ppcState.cr_fast[i])); - SHL(32, R(EAX), Imm8(4)); + JITDISABLE(SystemRegisters) + + IREmitter::InstLoc d = ibuild.EmitIntConst(0); + for (int i = 0; i < 8; ++i) + { + d = ibuild.EmitShl(d, ibuild.EmitIntConst(4)); + d = ibuild.EmitOr(d, ibuild.EmitLoadCR(i)); } - OR(8, R(EAX), M(&PowerPC::ppcState.cr_fast[7])); - MOV(32, gpr.R(d), R(EAX)); -#endif + ibuild.EmitStoreGReg(d, inst.RD); } void JitIL::mtcrf(UGeckoInstruction inst) { - Default(inst); return; -#if 0 - if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITSystemRegistersOff) - {Default(inst); return;} // turn off from debugger INSTRUCTION_START; + JITDISABLE(SystemRegisters) - // USES_CR - u32 mask = 0; - u32 crm = inst.CRM; - if (crm == 0xFF) { - gpr.FlushLockX(ECX); - MOV(32, R(EAX), gpr.R(inst.RS)); - for (int i = 0; i < 8; i++) { - MOV(32, R(ECX), R(EAX)); - SHR(32, R(ECX), Imm8(28 - (i * 4))); - AND(32, R(ECX), Imm32(0xF)); - MOV(8, M(&PowerPC::ppcState.cr_fast[i]), R(ECX)); + IREmitter::InstLoc s = ibuild.EmitLoadGReg(inst.RS); + for (int i = 0; i < 8; ++i) + { + if (inst.CRM & (1 << i)) + { + ibuild.EmitStoreCR(ibuild.EmitAnd(ibuild.EmitShrl(s, ibuild.EmitIntConst(28 - 4 * i)), ibuild.EmitIntConst(0xf)), i); } - gpr.UnlockAllX(); - } else { - Default(inst); - return; - - // TODO: translate this to work in new CR model. - for (int i = 0; i < 8; i++) { - if (crm & (1 << i)) - mask |= 0xF << (i*4); - } - MOV(32, R(EAX), gpr.R(inst.RS)); - MOV(32, R(ECX), M(&PowerPC::ppcState.cr)); - AND(32, R(EAX), Imm32(mask)); - AND(32, R(ECX), Imm32(~mask)); - OR(32, R(EAX), R(ECX)); - MOV(32, M(&PowerPC::ppcState.cr), R(EAX)); } -#endif } void JitIL::crXX(UGeckoInstruction inst) diff --git a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_Tables.cpp b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_Tables.cpp index 6830f9da1d..9024080402 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_Tables.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64IL/JitIL_Tables.cpp @@ -186,7 +186,7 @@ static GekkoOPTemplate table19[] = {193, &JitIL::crXX}, //"crxor", OPTYPE_CR, FL_EVIL}}, {150, &JitIL::DoNothing}, //"isync", OPTYPE_ICACHE, FL_EVIL}}, - {0, &JitIL::Default}, //"mcrf", OPTYPE_SYSTEM, FL_EVIL}}, + {0, &JitIL::mfcr}, //"mcrf", OPTYPE_SYSTEM, FL_EVIL}}, {50, &JitIL::rfi}, //"rfi", OPTYPE_SYSTEM, FL_ENDBLOCK | FL_CHECKEXCEPTIONS, 1}}, {18, &JitIL::Default}, //"rfid", OPTYPE_SYSTEM, FL_ENDBLOCK | FL_CHECKEXCEPTIONS}}