A couple of fixes for the current JIT.

One is generalizing the stwux implementation to all of stXx; not a
particularly significant improvement, but I had it in my tree, and it
makes my WIP JIT a drop-in replacement for the current JIT. (Not that
you'd really want to at the moment... The WIP JIT still has at
least one significant bug I haven't tracked down, and it's a lot slower
for anything using a significant amount of floating-point, which is 
basically anything except FMVs.  That said, it should basically work as
long as you're making a 32-bit build.)

The other fix is a minor (but probably visible) performance improvement 
for 64-bit Linux: fixing the store JIT to work properly.  I proposed 
the patch a couple days ago, but apparently nobody tested it... this 
way, I can force everyone on 64-bit Linux to test it :)  Please add the
"NOTE: stb and stbu are broken under 64bit Linux" bit back in if this
breaks anything.



git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@1730 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
magumagu9 2009-01-01 13:23:08 +00:00
parent 0367e7ee4d
commit 4286b210a5
3 changed files with 39 additions and 37 deletions

View File

@ -278,7 +278,7 @@ public:
void lwzux(UGeckoInstruction inst);
void stwux(UGeckoInstruction inst);
void stXx(UGeckoInstruction inst);
void lmw(UGeckoInstruction inst);
void stmw(UGeckoInstruction inst);

View File

@ -369,43 +369,33 @@
#endif*/
//Still here? Do regular path.
// NOTE: stb and stbu are broken under 64bit Linux
#ifndef _WIN32
#ifdef _M_X64
Default(inst);
return;
#endif
#endif
gpr.Lock(s, a);
gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2);
MOV(32, R(ABI_PARAM2), gpr.R(a));
MOV(32, R(ABI_PARAM1), gpr.R(s));
gpr.FlushLockX(ECX, EDX);
MOV(32, R(EDX), gpr.R(a));
MOV(32, R(ECX), gpr.R(s));
if (offset)
ADD(32, R(ABI_PARAM2), Imm32((u32)offset));
ADD(32, R(EDX), Imm32((u32)offset));
if (update && offset)
{
gpr.LoadToX64(a, true, true);
MOV(32, gpr.R(a), R(ABI_PARAM2));
MOV(32, gpr.R(a), R(EDX));
}
TEST(32, R(ABI_PARAM2), Imm32(0x0C000000));
TEST(32, R(EDX), Imm32(0x0C000000));
FixupBranch unsafe_addr = J_CC(CC_NZ);
BSWAP(accessSize, ABI_PARAM1);
BSWAP(accessSize, ECX);
#ifdef _M_X64
// FIXME: On Linux x64, when accessSize == 8, R(ABI_PARAM1)
// refers to BH when we want DIL!
MOV(accessSize, MComplex(RBX, ABI_PARAM2, SCALE_1, 0), R(ABI_PARAM1));
MOV(accessSize, MComplex(RBX, EDX, SCALE_1, 0), R(ECX));
#else
AND(32, R(ABI_PARAM2), Imm32(Memory::MEMVIEW32_MASK));
MOV(accessSize, MDisp(ABI_PARAM2, (u32)Memory::base), R(ABI_PARAM1));
AND(32, R(EDX), Imm32(Memory::MEMVIEW32_MASK));
MOV(accessSize, MDisp(EDX, (u32)Memory::base), R(ECX));
#endif
FixupBranch skip_call = J();
SetJumpTarget(unsafe_addr);
switch (accessSize)
{
case 32: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), ABI_PARAM1, ABI_PARAM2); break;
case 16: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U16, 2), ABI_PARAM1, ABI_PARAM2); break;
case 8: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U8, 2), ABI_PARAM1, ABI_PARAM2); break;
case 32: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), ECX, EDX); break;
case 16: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U16, 2), ECX, EDX); break;
case 8: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U8, 2), ECX, EDX); break;
}
SetJumpTarget(skip_call);
gpr.UnlockAll();
@ -417,7 +407,7 @@
}
}
void Jit64::stwux(UGeckoInstruction inst)
void Jit64::stXx(UGeckoInstruction inst)
{
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreOff)
{Default(inst); return;} // turn off from debugger
@ -430,13 +420,25 @@
return;
}
gpr.Lock(a, b, s);
gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2);
gpr.FlushLockX(ECX, EDX);
gpr.LoadToX64(a, true, true);
ADD(32, gpr.R(a), gpr.R(b));
MOV(32, R(ABI_PARAM2), gpr.R(a));
MOV(32, R(ABI_PARAM1), gpr.R(s));
SafeWriteRegToReg(ABI_PARAM1, ABI_PARAM2, 32, 0);
if (inst.SUBOP10 & 32) {
gpr.LoadToX64(a, true, true);
ADD(32, gpr.R(a), gpr.R(b));
MOV(32, R(EDX), gpr.R(a));
} else {
MOV(32, R(EDX), gpr.R(a));
ADD(32, R(EDX), gpr.R(b));
}
unsigned accessSize;
switch (inst.SUBOP10 & ~32) {
case 151: accessSize = 32; break;
case 407: accessSize = 16; break;
case 215: accessSize = 8; break;
}
MOV(32, R(ECX), gpr.R(s));
SafeWriteRegToReg(ECX, EDX, accessSize, 0);
gpr.UnlockAll();
gpr.UnlockAllX();

View File

@ -338,16 +338,16 @@ static GekkoOPTemplate table31[] =
{597, Interpreter::lswi, &Jit64::Default, {"lswi", OPTYPE_LOAD, FL_EVIL | FL_IN_AB | FL_OUT_D}},
//store word
{151, Interpreter::stwx, &Jit64::Default, {"stwx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}},
{183, Interpreter::stwux, &Jit64::stwux, {"stwux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}},
{151, Interpreter::stwx, &Jit64::stXx, {"stwx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}},
{183, Interpreter::stwux, &Jit64::stXx, {"stwux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}},
//store halfword
{407, Interpreter::sthx, &Jit64::Default, {"sthx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}},
{439, Interpreter::sthux, &Jit64::Default, {"sthux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}},
{407, Interpreter::sthx, &Jit64::stXx, {"sthx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}},
{439, Interpreter::sthux, &Jit64::stXx, {"sthux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}},
//store byte
{215, Interpreter::stbx, &Jit64::Default, {"stbx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}},
{247, Interpreter::stbux, &Jit64::Default, {"stbux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}},
{215, Interpreter::stbx, &Jit64::stXx, {"stbx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}},
{247, Interpreter::stbux, &Jit64::stXx, {"stbux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}},
//store bytereverse
{662, Interpreter::stwbrx, &Jit64::Default, {"stwbrx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}},