JIT: support byte-reversed stores/loads

4 more instructions down.

Store ones should be pretty well-tested; load ones seem to almost never be
used. I found them in Turok Evolution, so I was able to check code generation,
but the relevant code didn't seem to be called.
This commit is contained in:
Fiora 2014-09-18 20:38:44 -07:00
parent a8abbdae85
commit d96016ed21
3 changed files with 27 additions and 9 deletions

View File

@ -222,8 +222,8 @@ static GekkoOPTemplate table31[] =
{119, &Jit64::lXXx}, //"lbzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}}, {119, &Jit64::lXXx}, //"lbzux", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A | FL_IN_B}},
//load byte reverse //load byte reverse
{534, &Jit64::FallBackToInterpreter}, //"lwbrx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, {534, &Jit64::lXXx}, //"lwbrx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}},
{790, &Jit64::FallBackToInterpreter}, //"lhbrx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}}, {790, &Jit64::lXXx}, //"lhbrx", OPTYPE_LOAD, FL_OUT_D | FL_IN_A0 | FL_IN_B}},
// Conditional load/store (Wii SMP) // Conditional load/store (Wii SMP)
{150, &Jit64::FallBackToInterpreter}, //"stwcxd", OPTYPE_STORE, FL_EVIL | FL_SET_CR0}}, {150, &Jit64::FallBackToInterpreter}, //"stwcxd", OPTYPE_STORE, FL_EVIL | FL_SET_CR0}},
@ -246,8 +246,8 @@ static GekkoOPTemplate table31[] =
{247, &Jit64::stXx}, //"stbux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}}, {247, &Jit64::stXx}, //"stbux", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_B}},
//store bytereverse //store bytereverse
{662, &Jit64::FallBackToInterpreter}, //"stwbrx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}}, {662, &Jit64::stXx}, //"stwbrx", OPTYPE_STORE, FL_IN_A0 | FL_IN_B}},
{918, &Jit64::FallBackToInterpreter}, //"sthbrx", OPTYPE_STORE, FL_IN_A | FL_IN_B}}, {918, &Jit64::stXx}, //"sthbrx", OPTYPE_STORE, FL_IN_A | FL_IN_B}},
{661, &Jit64::FallBackToInterpreter}, //"stswx", OPTYPE_STORE, FL_EVIL}}, {661, &Jit64::FallBackToInterpreter}, //"stswx", OPTYPE_STORE, FL_EVIL}},
{725, &Jit64::FallBackToInterpreter}, //"stswi", OPTYPE_STORE, FL_EVIL}}, {725, &Jit64::FallBackToInterpreter}, //"stswi", OPTYPE_STORE, FL_EVIL}},

View File

@ -28,6 +28,7 @@ void Jit64::lXXx(UGeckoInstruction inst)
// Determine memory access size and sign extend // Determine memory access size and sign extend
int accessSize = 0; int accessSize = 0;
bool signExtend = false; bool signExtend = false;
bool byte_reversed = false;
switch (inst.OPCD) switch (inst.OPCD)
{ {
case 32: // lwz case 32: // lwz
@ -57,6 +58,8 @@ void Jit64::lXXx(UGeckoInstruction inst)
case 31: case 31:
switch (inst.SUBOP10) switch (inst.SUBOP10)
{ {
case 534: // lwbrx
byte_reversed = true;
case 23: // lwzx case 23: // lwzx
case 55: // lwzux case 55: // lwzux
accessSize = 32; accessSize = 32;
@ -68,6 +71,8 @@ void Jit64::lXXx(UGeckoInstruction inst)
accessSize = 8; accessSize = 8;
signExtend = false; signExtend = false;
break; break;
case 790: // lhbrx
byte_reversed = true;
case 279: // lhzx case 279: // lhzx
case 311: // lhzux case 311: // lhzux
accessSize = 16; accessSize = 16;
@ -244,6 +249,14 @@ void Jit64::lXXx(UGeckoInstruction inst)
MEMCHECK_END MEMCHECK_END
} }
// TODO: support no-swap in SafeLoadToReg instead
if (byte_reversed)
{
MEMCHECK_START(false)
BSWAP(accessSize, gpr.RX(d));
MEMCHECK_END
}
gpr.UnlockAll(); gpr.UnlockAll();
gpr.UnlockAllX(); gpr.UnlockAllX();
} }
@ -442,8 +455,9 @@ void Jit64::stXx(UGeckoInstruction inst)
JITDISABLE(bJITLoadStoreOff); JITDISABLE(bJITLoadStoreOff);
int a = inst.RA, b = inst.RB, s = inst.RS; int a = inst.RA, b = inst.RB, s = inst.RS;
FALLBACK_IF(!a || a == s || a == b);
bool update = !!(inst.SUBOP10 & 32); bool update = !!(inst.SUBOP10 & 32);
bool byte_reverse = !!(inst.SUBOP10 & 512);
FALLBACK_IF(!a || (update && a == s) || (update && js.memcheck && a == b));
gpr.Lock(a, b, s); gpr.Lock(a, b, s);
@ -467,9 +481,11 @@ void Jit64::stXx(UGeckoInstruction inst)
switch (inst.SUBOP10 & ~32) switch (inst.SUBOP10 & ~32)
{ {
case 151: case 151:
case 662:
accessSize = 32; accessSize = 32;
break; break;
case 407: case 407:
case 918:
accessSize = 16; accessSize = 16;
break; break;
case 215: case 215:
@ -483,12 +499,12 @@ void Jit64::stXx(UGeckoInstruction inst)
if (gpr.R(s).IsImm()) if (gpr.R(s).IsImm())
{ {
SafeWriteRegToReg(gpr.R(s), RSCRATCH2, accessSize, 0, CallerSavedRegistersInUse()); SafeWriteRegToReg(gpr.R(s), RSCRATCH2, accessSize, 0, CallerSavedRegistersInUse(), byte_reverse ? SAFE_LOADSTORE_NO_SWAP : 0);
} }
else else
{ {
X64Reg reg_value; X64Reg reg_value;
if (WriteClobbersRegValue(accessSize, /* swap */ true)) if (WriteClobbersRegValue(accessSize, /* swap */ !byte_reverse))
{ {
MOV(32, R(RSCRATCH), gpr.R(s)); MOV(32, R(RSCRATCH), gpr.R(s));
reg_value = RSCRATCH; reg_value = RSCRATCH;
@ -498,7 +514,7 @@ void Jit64::stXx(UGeckoInstruction inst)
gpr.BindToRegister(s, true, false); gpr.BindToRegister(s, true, false);
reg_value = gpr.RX(s); reg_value = gpr.RX(s);
} }
SafeWriteRegToReg(reg_value, RSCRATCH2, accessSize, 0, CallerSavedRegistersInUse()); SafeWriteRegToReg(reg_value, RSCRATCH2, accessSize, 0, CallerSavedRegistersInUse(), byte_reverse ? SAFE_LOADSTORE_NO_SWAP : 0);
} }
if (update && js.memcheck) if (update && js.memcheck)

View File

@ -479,9 +479,11 @@ void EmuCodeBlock::SafeWriteRegToReg(OpArg reg_value, X64Reg reg_addr, int acces
Imm8((u8)reg_value.offset); Imm8((u8)reg_value.offset);
} }
// TODO: support byte-swapped non-immediate fastmem stores
if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bMMU && if (!SConfig::GetInstance().m_LocalCoreStartupParameter.bMMU &&
SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem && SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem &&
!(flags & (SAFE_LOADSTORE_NO_SWAP | SAFE_LOADSTORE_NO_FASTMEM)) !(flags & SAFE_LOADSTORE_NO_FASTMEM) &&
(reg_value.IsImm() || !(flags & SAFE_LOADSTORE_NO_SWAP))
#ifdef ENABLE_MEM_CHECK #ifdef ENABLE_MEM_CHECK
&& !SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging && !SConfig::GetInstance().m_LocalCoreStartupParameter.bEnableDebugging
#endif #endif