From 4286b210a5e74d2eca1089c531d7505f5bedbed7 Mon Sep 17 00:00:00 2001 From: magumagu9 Date: Thu, 1 Jan 2009 13:23:08 +0000 Subject: [PATCH] A couple of fixes for the current JIT. One is generalizing the stwux implementation to all of stXx; not a particularly significant improvement, but I had it in my tree, and it makes my WIP JIT a drop-in replacement for the current JIT. (Not that you'd really want to at the moment... The WIP JIT still has at least one significant bug I haven't tracked down, and it's a lot slower for anything using a significant amount of floating-point, which is basically anything except FMVs. That said, it should basically work as long as you're making a 32-bit build.) The other fix is a minor (but probably visible) performance improvement for 64-bit Linux: fixing the store JIT to work properly. I proposed the patch a couple days ago, but apparently nobody tested it... this way, I can force everyone on 64-bit Linux to test it :) Please add the "NOTE: stb and stbu are broken under 64bit Linux" bit back in if this breaks anything. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@1730 8ced0084-cf51-0410-be5f-012b33b47a6e --- Source/Core/Core/Src/PowerPC/Jit64/Jit.h | 2 +- .../Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp | 62 ++++++++++--------- Source/Core/Core/Src/PowerPC/PPCTables.cpp | 12 ++-- 3 files changed, 39 insertions(+), 37 deletions(-) diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit.h b/Source/Core/Core/Src/PowerPC/Jit64/Jit.h index 8e3f637904..c9a390a9f3 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit.h +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit.h @@ -278,7 +278,7 @@ public: void lwzux(UGeckoInstruction inst); - void stwux(UGeckoInstruction inst); + void stXx(UGeckoInstruction inst); void lmw(UGeckoInstruction inst); void stmw(UGeckoInstruction inst); diff --git a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp index cc74cd53fe..27e52d8bf4 100644 --- a/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/Src/PowerPC/Jit64/Jit_LoadStore.cpp @@ -369,43 +369,33 @@ #endif*/ //Still here? Do regular path. - - // NOTE: stb and stbu are broken under 64bit Linux - #ifndef _WIN32 - #ifdef _M_X64 - Default(inst); - return; - #endif - #endif gpr.Lock(s, a); - gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2); - MOV(32, R(ABI_PARAM2), gpr.R(a)); - MOV(32, R(ABI_PARAM1), gpr.R(s)); + gpr.FlushLockX(ECX, EDX); + MOV(32, R(EDX), gpr.R(a)); + MOV(32, R(ECX), gpr.R(s)); if (offset) - ADD(32, R(ABI_PARAM2), Imm32((u32)offset)); + ADD(32, R(EDX), Imm32((u32)offset)); if (update && offset) { gpr.LoadToX64(a, true, true); - MOV(32, gpr.R(a), R(ABI_PARAM2)); + MOV(32, gpr.R(a), R(EDX)); } - TEST(32, R(ABI_PARAM2), Imm32(0x0C000000)); + TEST(32, R(EDX), Imm32(0x0C000000)); FixupBranch unsafe_addr = J_CC(CC_NZ); - BSWAP(accessSize, ABI_PARAM1); + BSWAP(accessSize, ECX); #ifdef _M_X64 - // FIXME: On Linux x64, when accessSize == 8, R(ABI_PARAM1) - // refers to BH when we want DIL! - MOV(accessSize, MComplex(RBX, ABI_PARAM2, SCALE_1, 0), R(ABI_PARAM1)); + MOV(accessSize, MComplex(RBX, EDX, SCALE_1, 0), R(ECX)); #else - AND(32, R(ABI_PARAM2), Imm32(Memory::MEMVIEW32_MASK)); - MOV(accessSize, MDisp(ABI_PARAM2, (u32)Memory::base), R(ABI_PARAM1)); + AND(32, R(EDX), Imm32(Memory::MEMVIEW32_MASK)); + MOV(accessSize, MDisp(EDX, (u32)Memory::base), R(ECX)); #endif FixupBranch skip_call = J(); SetJumpTarget(unsafe_addr); switch (accessSize) { - case 32: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), ABI_PARAM1, ABI_PARAM2); break; - case 16: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U16, 2), ABI_PARAM1, ABI_PARAM2); break; - case 8: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U8, 2), ABI_PARAM1, ABI_PARAM2); break; + case 32: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U32, 2), ECX, EDX); break; + case 16: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U16, 2), ECX, EDX); break; + case 8: ABI_CallFunctionRR(thunks.ProtectFunction((void *)&Memory::Write_U8, 2), ECX, EDX); break; } SetJumpTarget(skip_call); gpr.UnlockAll(); @@ -417,7 +407,7 @@ } } - void Jit64::stwux(UGeckoInstruction inst) + void Jit64::stXx(UGeckoInstruction inst) { if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITLoadStoreOff) {Default(inst); return;} // turn off from debugger @@ -430,13 +420,25 @@ return; } gpr.Lock(a, b, s); - gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2); + gpr.FlushLockX(ECX, EDX); - gpr.LoadToX64(a, true, true); - ADD(32, gpr.R(a), gpr.R(b)); - MOV(32, R(ABI_PARAM2), gpr.R(a)); - MOV(32, R(ABI_PARAM1), gpr.R(s)); - SafeWriteRegToReg(ABI_PARAM1, ABI_PARAM2, 32, 0); + if (inst.SUBOP10 & 32) { + gpr.LoadToX64(a, true, true); + ADD(32, gpr.R(a), gpr.R(b)); + MOV(32, R(EDX), gpr.R(a)); + } else { + MOV(32, R(EDX), gpr.R(a)); + ADD(32, R(EDX), gpr.R(b)); + } + unsigned accessSize; + switch (inst.SUBOP10 & ~32) { + case 151: accessSize = 32; break; + case 407: accessSize = 16; break; + case 215: accessSize = 8; break; + } + + MOV(32, R(ECX), gpr.R(s)); + SafeWriteRegToReg(ECX, EDX, accessSize, 0); gpr.UnlockAll(); gpr.UnlockAllX(); diff --git a/Source/Core/Core/Src/PowerPC/PPCTables.cpp b/Source/Core/Core/Src/PowerPC/PPCTables.cpp index 1e6887c191..72bcdc10d4 100644 --- a/Source/Core/Core/Src/PowerPC/PPCTables.cpp +++ b/Source/Core/Core/Src/PowerPC/PPCTables.cpp @@ -338,16 +338,16 @@ static GekkoOPTemplate table31[] = {597, Interpreter::lswi, &Jit64::Default, {"lswi", OPTYPE_LOAD, FL_EVIL | FL_IN_AB | FL_OUT_D}}, //store word - {151, Interpreter::stwx, &Jit64::Default, {"stwx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}}, - {183, Interpreter::stwux, &Jit64::stwux, {"stwux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}}, + {151, Interpreter::stwx, &Jit64::stXx, {"stwx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}}, + {183, Interpreter::stwux, &Jit64::stXx, {"stwux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}}, //store halfword - {407, Interpreter::sthx, &Jit64::Default, {"sthx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}}, - {439, Interpreter::sthux, &Jit64::Default, {"sthux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}}, + {407, Interpreter::sthx, &Jit64::stXx, {"sthx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}}, + {439, Interpreter::sthux, &Jit64::stXx, {"sthux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}}, //store byte - {215, Interpreter::stbx, &Jit64::Default, {"stbx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}}, - {247, Interpreter::stbux, &Jit64::Default, {"stbux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}}, + {215, Interpreter::stbx, &Jit64::stXx, {"stbx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}}, + {247, Interpreter::stbux, &Jit64::stXx, {"stbux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}}, //store bytereverse {662, Interpreter::stwbrx, &Jit64::Default, {"stwbrx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}},