From d96016ed21f2a34793e3373a71cb377630f24cd9 Mon Sep 17 00:00:00 2001 From: Fiora Date: Thu, 18 Sep 2014 20:38:44 -0700 Subject: [PATCH] JIT: support byte-reversed stores/loads 4 more instructions down. Store ones should be pretty well-tested; load ones seem to almost never be used. I found them in Turok Evolution, so I was able to check code generation, but the relevant code didn't seem to be called. --- .../Core/Core/PowerPC/Jit64/Jit64_Tables.cpp | 8 +++---- .../Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp | 24 +++++++++++++++---- .../Core/Core/PowerPC/JitCommon/Jit_Util.cpp | 4 +++- 3 files changed, 27 insertions(+), 9 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp b/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp index 0007915089..0c8285c46f 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit64_Tables.cpp @@ -222,8 +222,8 @@ static GekkoOPTemplate table31[] = {119, &Jit64::lXXx}, //"lbzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, //load byte reverse - {534, &Jit64::FallBackToInterpreter}, //"lwbrx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, - {790, &Jit64::FallBackToInterpreter}, //"lhbrx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, + {534, &Jit64::lXXx}, //"lwbrx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, + {790, &Jit64::lXXx}, //"lhbrx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, // Conditional load/store (Wii SMP) {150, &Jit64::FallBackToInterpreter}, //"stwcxd", OPTYPE_STORE, FL_EVIL | FL_SET_CR0}}, @@ -246,8 +246,8 @@ static GekkoOPTemplate table31[] = {247, &Jit64::stXx}, //"stbux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}}, //store bytereverse - {662, &Jit64::FallBackToInterpreter}, //"stwbrx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}}, - {918, &Jit64::FallBackToInterpreter}, //"sthbrx", OPTYPE_STORE, FL_IN_A | FL_IN_B}}, + {662, &Jit64::stXx}, //"stwbrx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}}, + {918, &Jit64::stXx}, //"sthbrx", OPTYPE_STORE, FL_IN_A | FL_IN_B}}, {661, &Jit64::FallBackToInterpreter}, //"stswx", OPTYPE_STORE, FL_EVIL}}, {725, &Jit64::FallBackToInterpreter}, //"stswi", OPTYPE_STORE, FL_EVIL}}, diff --git a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp index 7e01afb1de..e5ea85af01 100644 --- a/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp +++ b/Source/Core/Core/PowerPC/Jit64/Jit_LoadStore.cpp @@ -28,6 +28,7 @@ void Jit64::lXXx(UGeckoInstruction inst) // Determine memory access size and sign extend int accessSize = 0; bool signExtend = false; + bool byte_reversed = false; switch (inst.OPCD) { case 32: // lwz @@ -57,6 +58,8 @@ void Jit64::lXXx(UGeckoInstruction inst) case 31: switch (inst.SUBOP10) { + case 534: // lwbrx + byte_reversed = true; case 23: // lwzx case 55: // lwzux accessSize = 32; @@ -68,6 +71,8 @@ void Jit64::lXXx(UGeckoInstruction inst) accessSize = 8; signExtend = false; break; + case 790: // lhbrx + byte_reversed = true; case 279: // lhzx case 311: // lhzux accessSize = 16; @@ -244,6 +249,14 @@ void Jit64::lXXx(UGeckoInstruction inst) MEMCHECK_END } + // TODO: support no-swap in SafeLoadToReg instead + if (byte_reversed) + { + MEMCHECK_START(false) + BSWAP(accessSize, gpr.RX(d)); + MEMCHECK_END + } + gpr.UnlockAll(); gpr.UnlockAllX(); } @@ -442,8 +455,9 @@ void Jit64::stXx(UGeckoInstruction inst) JITDISABLE(bJITLoadStoreOff); int a = inst.RA, b = inst.RB, s = inst.RS; - FALLBACK_IF(!a || a == s || a == b); bool update = !!(inst.SUBOP10 & 32); + bool byte_reverse = !!(inst.SUBOP10 & 512); + FALLBACK_IF(!a || (update && a == s) || (update && js.memcheck && a == b)); gpr.Lock(a, b, s); @@ -467,9 +481,11 @@ void Jit64::stXx(UGeckoInstruction inst) switch (inst.SUBOP10 & ~32) { case 151: + case 662: accessSize = 32; break; case 407: + case 918: accessSize = 16; break; case 215: @@ -483,12 +499,12 @@ void Jit64::stXx(UGeckoInstruction inst) if (gpr.R(s).IsImm()) { - SafeWriteRegToReg(gpr.R(s), RSCRATCH2, accessSize, 0, CallerSavedRegistersInUse()); + SafeWriteRegToReg(gpr.R(s), RSCRATCH2, accessSize, 0, CallerSavedRegistersInUse(), byte_reverse ? SAFE_LOADSTORE_NO_SWAP : 0); } else { X64Reg reg_value; - if (WriteClobbersRegValue(accessSize, /* swap */ true)) + if (WriteClobbersRegValue(accessSize, /* swap */ !byte_reverse)) { MOV(32, R(RSCRATCH), gpr.R(s)); reg_value = RSCRATCH; @@ -498,7 +514,7 @@ void Jit64::stXx(UGeckoInstruction inst) gpr.BindToRegister(s, true, false); reg_value = gpr.RX(s); } - SafeWriteRegToReg(reg_value, RSCRATCH2, accessSize, 0, CallerSavedRegistersInUse()); + SafeWriteRegToReg(reg_value, RSCRATCH2, accessSize, 0, CallerSavedRegistersInUse(), byte_reverse ? SAFE_LOADSTORE_NO_SWAP : 0); } if (update && js.memcheck) diff --git a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp index f2313579fe..30bbfee806 100644 --- a/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp +++ b/Source/Core/Core/PowerPC/JitCommon/Jit_Util.cpp @@ -479,9 +479,11 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces Imm8((u8)reg_value.offset); } + // TODO: support byte-swapped non-immediate fastmem stores if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bMMU && SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem && - !(flags & (SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_FASTMEM)) + !(flags & SAFE_LOADSTORE_NO_FASTMEM) && + (reg_value.IsImm() || !(flags & SAFE_LOADSTORE_NO_SWAP)) #ifdef ENABLE_MEM_CHECK && !SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging #endif