A couple of fixes for the current JIT.
One is generalizing the stwux implementation to all of stXx; not a particularly significant improvement, but I had it in my tree, and it makes my WIP JIT a drop-in replacement for the current JIT. (Not that you'd really want to at the moment... The WIP JIT still has at least one significant bug I haven't tracked down, and it's a lot slower for anything using a significant amount of floating-point, which is basically anything except FMVs. That said, it should basically work as long as you're making a 32-bit build.) The other fix is a minor (but probably visible) performance improvement for 64-bit Linux: fixing the store JIT to work properly. I proposed the patch a couple days ago, but apparently nobody tested it... this way, I can force everyone on 64-bit Linux to test it :) Please add the "NOTE: stb and stbu are broken under 64bit Linux" bit back in if this breaks anything. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@1730 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
parent
0367e7ee4d
commit
4286b210a5
|
@ -278,7 +278,7 @@ public:
|
||||||
|
|
||||||
void lwzux(UGeckoInstruction inst);
|
void lwzux(UGeckoInstruction inst);
|
||||||
|
|
||||||
void stwux(UGeckoInstruction inst);
|
void stXx(UGeckoInstruction inst);
|
||||||
|
|
||||||
void lmw(UGeckoInstruction inst);
|
void lmw(UGeckoInstruction inst);
|
||||||
void stmw(UGeckoInstruction inst);
|
void stmw(UGeckoInstruction inst);
|
||||||
|
|
|
@ -369,43 +369,33 @@
|
||||||
#endif*/
|
#endif*/
|
||||||
|
|
||||||
//Still here? Do regular path.
|
//Still here? Do regular path.
|
||||||
|
|
||||||
// NOTE: stb and stbu are broken under 64bit Linux
|
|
||||||
#ifndef _WIN32
|
|
||||||
#ifdef _M_X64
|
|
||||||
Default(inst);
|
|
||||||
return;
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
gpr.Lock(s, a);
|
gpr.Lock(s, a);
|
||||||
gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2);
|
gpr.FlushLockX(ECX, EDX);
|
||||||
MOV(32, R(ABI_PARAM2), gpr.R(a));
|
MOV(32, R(EDX), gpr.R(a));
|
||||||
MOV(32, R(ABI_PARAM1), gpr.R(s));
|
MOV(32, R(ECX), gpr.R(s));
|
||||||
if (offset)
|
if (offset)
|
||||||
ADD(32, R(ABI_PARAM2), Imm32((u32)offset));
|
ADD(32, R(EDX), Imm32((u32)offset));
|
||||||
if (update && offset)
|
if (update && offset)
|
||||||
{
|
{
|
||||||
gpr.LoadToX64(a, true, true);
|
gpr.LoadToX64(a, true, true);
|
||||||
MOV(32, gpr.R(a), R(ABI_PARAM2));
|
MOV(32, gpr.R(a), R(EDX));
|
||||||
}
|
}
|
||||||
TEST(32, R(ABI_PARAM2), Imm32(0x0C000000));
|
TEST(32, R(EDX), Imm32(0x0C000000));
|
||||||
FixupBranch unsafe_addr = J_CC(CC_NZ);
|
FixupBranch unsafe_addr = J_CC(CC_NZ);
|
||||||
BSWAP(accessSize, ABI_PARAM1);
|
BSWAP(accessSize, ECX);
|
||||||
#ifdef _M_X64
|
#ifdef _M_X64
|
||||||
// FIXME: On Linux x64, when accessSize == 8, R(ABI_PARAM1)
|
MOV(accessSize, MComplex(RBX, EDX, SCALE_1, 0), R(ECX));
|
||||||
// refers to BH when we want DIL!
|
|
||||||
MOV(accessSize, MComplex(RBX, ABI_PARAM2, SCALE_1, 0), R(ABI_PARAM1));
|
|
||||||
#else
|
#else
|
||||||
AND(32, R(ABI_PARAM2), Imm32(Memory::MEMVIEW32_MASK));
|
AND(32, R(EDX), Imm32(Memory::MEMVIEW32_MASK));
|
||||||
MOV(accessSize, MDisp(ABI_PARAM2, (u32)Memory::base), R(ABI_PARAM1));
|
MOV(accessSize, MDisp(EDX, (u32)Memory::base), R(ECX));
|
||||||
#endif
|
#endif
|
||||||
FixupBranch skip_call = J();
|
FixupBranch skip_call = J();
|
||||||
SetJumpTarget(unsafe_addr);
|
SetJumpTarget(unsafe_addr);
|
||||||
switch (accessSize)
|
switch (accessSize)
|
||||||
{
|
{
|
||||||
case 32: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), ABI_PARAM1, ABI_PARAM2); break;
|
case 32: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), ECX, EDX); break;
|
||||||
case 16: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U16, 2), ABI_PARAM1, ABI_PARAM2); break;
|
case 16: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U16, 2), ECX, EDX); break;
|
||||||
case 8: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U8, 2), ABI_PARAM1, ABI_PARAM2); break;
|
case 8: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U8, 2), ECX, EDX); break;
|
||||||
}
|
}
|
||||||
SetJumpTarget(skip_call);
|
SetJumpTarget(skip_call);
|
||||||
gpr.UnlockAll();
|
gpr.UnlockAll();
|
||||||
|
@ -417,7 +407,7 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void Jit64::stwux(UGeckoInstruction inst)
|
void Jit64::stXx(UGeckoInstruction inst)
|
||||||
{
|
{
|
||||||
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreOff)
|
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreOff)
|
||||||
{Default(inst); return;} // turn off from debugger
|
{Default(inst); return;} // turn off from debugger
|
||||||
|
@ -430,13 +420,25 @@
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
gpr.Lock(a, b, s);
|
gpr.Lock(a, b, s);
|
||||||
gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2);
|
gpr.FlushLockX(ECX, EDX);
|
||||||
|
|
||||||
gpr.LoadToX64(a, true, true);
|
if (inst.SUBOP10 & 32) {
|
||||||
ADD(32, gpr.R(a), gpr.R(b));
|
gpr.LoadToX64(a, true, true);
|
||||||
MOV(32, R(ABI_PARAM2), gpr.R(a));
|
ADD(32, gpr.R(a), gpr.R(b));
|
||||||
MOV(32, R(ABI_PARAM1), gpr.R(s));
|
MOV(32, R(EDX), gpr.R(a));
|
||||||
SafeWriteRegToReg(ABI_PARAM1, ABI_PARAM2, 32, 0);
|
} else {
|
||||||
|
MOV(32, R(EDX), gpr.R(a));
|
||||||
|
ADD(32, R(EDX), gpr.R(b));
|
||||||
|
}
|
||||||
|
unsigned accessSize;
|
||||||
|
switch (inst.SUBOP10 & ~32) {
|
||||||
|
case 151: accessSize = 32; break;
|
||||||
|
case 407: accessSize = 16; break;
|
||||||
|
case 215: accessSize = 8; break;
|
||||||
|
}
|
||||||
|
|
||||||
|
MOV(32, R(ECX), gpr.R(s));
|
||||||
|
SafeWriteRegToReg(ECX, EDX, accessSize, 0);
|
||||||
|
|
||||||
gpr.UnlockAll();
|
gpr.UnlockAll();
|
||||||
gpr.UnlockAllX();
|
gpr.UnlockAllX();
|
||||||
|
|
|
@ -338,16 +338,16 @@ static GekkoOPTemplate table31[] =
|
||||||
{597, Interpreter::lswi, &Jit64::Default, {"lswi", OPTYPE_LOAD, FL_EVIL | FL_IN_AB | FL_OUT_D}},
|
{597, Interpreter::lswi, &Jit64::Default, {"lswi", OPTYPE_LOAD, FL_EVIL | FL_IN_AB | FL_OUT_D}},
|
||||||
|
|
||||||
//store word
|
//store word
|
||||||
{151, Interpreter::stwx, &Jit64::Default, {"stwx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}},
|
{151, Interpreter::stwx, &Jit64::stXx, {"stwx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}},
|
||||||
{183, Interpreter::stwux, &Jit64::stwux, {"stwux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}},
|
{183, Interpreter::stwux, &Jit64::stXx, {"stwux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}},
|
||||||
|
|
||||||
//store halfword
|
//store halfword
|
||||||
{407, Interpreter::sthx, &Jit64::Default, {"sthx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}},
|
{407, Interpreter::sthx, &Jit64::stXx, {"sthx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}},
|
||||||
{439, Interpreter::sthux, &Jit64::Default, {"sthux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}},
|
{439, Interpreter::sthux, &Jit64::stXx, {"sthux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}},
|
||||||
|
|
||||||
//store byte
|
//store byte
|
||||||
{215, Interpreter::stbx, &Jit64::Default, {"stbx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}},
|
{215, Interpreter::stbx, &Jit64::stXx, {"stbx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}},
|
||||||
{247, Interpreter::stbux, &Jit64::Default, {"stbux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}},
|
{247, Interpreter::stbux, &Jit64::stXx, {"stbux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}},
|
||||||
|
|
||||||
//store bytereverse
|
//store bytereverse
|
||||||
{662, Interpreter::stwbrx, &Jit64::Default, {"stwbrx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}},
|
{662, Interpreter::stwbrx, &Jit64::Default, {"stwbrx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}},
|
||||||
|
|
Loading…
Reference in New Issue