Merge pull request #443 from magumagu/loadstore-cleanup
Loadstore cleanup
This commit is contained in:
commit
bd377b9580
|
@ -1286,9 +1286,7 @@ void XEmitter::MOVQ_xmm(X64Reg dest, OpArg arg) {
|
|||
}
|
||||
|
||||
void XEmitter::MOVQ_xmm(OpArg arg, X64Reg src) {
|
||||
if (arg.IsSimpleReg())
|
||||
PanicAlert("Emitter: MOVQ_xmm doesn't support single registers as destination");
|
||||
if (src > 7)
|
||||
if (src > 7 || arg.IsSimpleReg())
|
||||
{
|
||||
// Alternate encoding
|
||||
// This does not display correctly in MSVC's debugger, it thinks it's a MOVD
|
||||
|
|
|
@ -88,7 +88,7 @@ static GekkoOPTemplate primarytable[] =
|
|||
{51, &Jit64::FallBackToInterpreter}, //"lfdu", OPTYPE_LOADFP, FL_OUT_A | FL_IN_A}},
|
||||
|
||||
{52, &Jit64::stfs}, //"stfs", OPTYPE_STOREFP, FL_IN_A}},
|
||||
{53, &Jit64::stfs}, //"stfsu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}},
|
||||
{53, &Jit64::FallBackToInterpreter}, //"stfsu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}},
|
||||
{54, &Jit64::stfd}, //"stfd", OPTYPE_STOREFP, FL_IN_A}},
|
||||
{55, &Jit64::FallBackToInterpreter}, //"stfdu", OPTYPE_STOREFP, FL_OUT_A | FL_IN_A}},
|
||||
|
||||
|
|
|
@ -314,38 +314,6 @@ void Jit64::stX(UGeckoInstruction inst)
|
|||
}
|
||||
}
|
||||
|
||||
// Optimized stack access?
|
||||
if (accessSize == 32 && !gpr.R(a).IsImm() && a == 1 && js.st.isFirstBlockOfFunction && jo.optimizeStack)
|
||||
{
|
||||
gpr.FlushLockX(ABI_PARAM1);
|
||||
MOV(32, R(ABI_PARAM1), gpr.R(a));
|
||||
MOV(32, R(EAX), gpr.R(s));
|
||||
SwapAndStore(accessSize, MComplex(RBX, ABI_PARAM1, SCALE_1, (u32)offset), EAX);
|
||||
if (update && offset)
|
||||
{
|
||||
gpr.Lock(a);
|
||||
gpr.KillImmediate(a, true, true);
|
||||
ADD(32, gpr.R(a), Imm32(offset));
|
||||
gpr.UnlockAll();
|
||||
}
|
||||
gpr.UnlockAllX();
|
||||
return;
|
||||
}
|
||||
|
||||
/* // TODO - figure out why Beyond Good and Evil hates this
|
||||
#if defined(_WIN32) && _M_X86_64
|
||||
if (accessSize == 32 && !update)
|
||||
{
|
||||
// Fast and daring - requires 64-bit
|
||||
MOV(32, R(EAX), gpr.R(s));
|
||||
gpr.BindToRegister(a, true, false);
|
||||
SwapAndStore(32, MComplex(RBX, gpr.RX(a), SCALE_1, (u32)offset), EAX);
|
||||
return;
|
||||
}
|
||||
#endif*/
|
||||
|
||||
//Still here? Do regular path.
|
||||
|
||||
gpr.FlushLockX(ECX, EDX);
|
||||
gpr.Lock(s, a);
|
||||
MOV(32, R(EDX), gpr.R(a));
|
||||
|
@ -415,15 +383,16 @@ void Jit64::lmw(UGeckoInstruction inst)
|
|||
INSTRUCTION_START
|
||||
JITDISABLE(bJITLoadStoreOff);
|
||||
|
||||
// TODO: This doesn't handle rollback on DSI correctly
|
||||
gpr.FlushLockX(ECX);
|
||||
MOV(32, R(EAX), Imm32((u32)(s32)inst.SIMM_16));
|
||||
MOV(32, R(ECX), Imm32((u32)(s32)inst.SIMM_16));
|
||||
if (inst.RA)
|
||||
ADD(32, R(EAX), gpr.R(inst.RA));
|
||||
ADD(32, R(ECX), gpr.R(inst.RA));
|
||||
for (int i = inst.RD; i < 32; i++)
|
||||
{
|
||||
LoadAndSwap(32, ECX, MComplex(EBX, EAX, SCALE_1, (i - inst.RD) * 4));
|
||||
SafeLoadToReg(EAX, R(ECX), 32, (i - inst.RD) * 4, RegistersInUse(), false);
|
||||
gpr.BindToRegister(i, false, true);
|
||||
MOV(32, gpr.R(i), R(ECX));
|
||||
MOV(32, gpr.R(i), R(EAX));
|
||||
}
|
||||
gpr.UnlockAllX();
|
||||
}
|
||||
|
@ -433,14 +402,16 @@ void Jit64::stmw(UGeckoInstruction inst)
|
|||
INSTRUCTION_START
|
||||
JITDISABLE(bJITLoadStoreOff);
|
||||
|
||||
// TODO: This doesn't handle rollback on DSI correctly
|
||||
gpr.FlushLockX(ECX);
|
||||
MOV(32, R(EAX), Imm32((u32)(s32)inst.SIMM_16));
|
||||
if (inst.RA)
|
||||
ADD(32, R(EAX), gpr.R(inst.RA));
|
||||
for (int i = inst.RD; i < 32; i++)
|
||||
{
|
||||
if (inst.RA)
|
||||
MOV(32, R(EAX), gpr.R(inst.RA));
|
||||
else
|
||||
XOR(32, R(EAX), R(EAX));
|
||||
MOV(32, R(ECX), gpr.R(i));
|
||||
SwapAndStore(32, MComplex(EBX, EAX, SCALE_1, (i - inst.RD) * 4), ECX);
|
||||
SafeWriteRegToReg(ECX, EAX, 32, (i - inst.RD) * 4 + (u32)(s32)inst.SIMM_16, RegistersInUse());
|
||||
}
|
||||
gpr.UnlockAllX();
|
||||
}
|
||||
|
|
|
@ -2,9 +2,6 @@
|
|||
// Licensed under GPLv2
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
// TODO(ector): Tons of pshufb optimization of the loads/stores, for SSSE3+, possibly SSE4, only.
|
||||
// Should give a very noticeable speed boost to paired single heavy code.
|
||||
|
||||
#include "Common/Common.h"
|
||||
#include "Common/CPUDetect.h"
|
||||
|
||||
|
@ -12,20 +9,8 @@
|
|||
#include "Core/PowerPC/Jit64/JitAsm.h"
|
||||
#include "Core/PowerPC/Jit64/JitRegCache.h"
|
||||
|
||||
namespace {
|
||||
|
||||
// pshufb todo: MOVQ
|
||||
const u8 GC_ALIGNED16(bswapShuffle1x4[16]) = {3, 2, 1, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
|
||||
const u8 GC_ALIGNED16(bswapShuffle1x8[16]) = {7, 6, 5, 4, 3, 2, 1, 0, 8, 9, 10, 11, 12, 13, 14, 15};
|
||||
const u8 GC_ALIGNED16(bswapShuffle1x8Dupe[16]) = {7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2, 1, 0};
|
||||
|
||||
u64 GC_ALIGNED16(temp64);
|
||||
|
||||
}
|
||||
|
||||
// TODO: Add peephole optimizations for multiple consecutive lfd/lfs/stfd/stfs since they are so common,
|
||||
// and pshufb could help a lot.
|
||||
// Also add hacks for things like lfs/stfs the same reg consecutively, that is, simple memory moves.
|
||||
|
||||
void Jit64::lfs(UGeckoInstruction inst)
|
||||
{
|
||||
|
@ -40,12 +25,11 @@ void Jit64::lfs(UGeckoInstruction inst)
|
|||
|
||||
SafeLoadToReg(EAX, gpr.R(a), 32, offset, RegistersInUse(), false);
|
||||
|
||||
MEMCHECK_START
|
||||
|
||||
fpr.Lock(d);
|
||||
fpr.BindToRegister(d, false);
|
||||
ConvertSingleToDouble(fpr.RX(d), EAX, true);
|
||||
fpr.BindToRegister(d, js.memcheck);
|
||||
|
||||
MEMCHECK_START
|
||||
ConvertSingleToDouble(fpr.RX(d), EAX, true);
|
||||
MEMCHECK_END
|
||||
|
||||
fpr.UnlockAll();
|
||||
|
@ -56,61 +40,23 @@ void Jit64::lfd(UGeckoInstruction inst)
|
|||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITLoadStoreFloatingOff);
|
||||
FALLBACK_IF(js.memcheck || !inst.RA);
|
||||
FALLBACK_IF(!inst.RA);
|
||||
|
||||
int d = inst.RD;
|
||||
int a = inst.RA;
|
||||
|
||||
s32 offset = (s32)(s16)inst.SIMM_16;
|
||||
gpr.FlushLockX(ABI_PARAM1);
|
||||
gpr.Lock(a);
|
||||
MOV(32, R(ABI_PARAM1), gpr.R(a));
|
||||
// TODO - optimize. This has to load the previous value - upper double should stay unmodified.
|
||||
|
||||
SafeLoadToReg(RAX, gpr.R(a), 64, offset, RegistersInUse(), false);
|
||||
|
||||
fpr.Lock(d);
|
||||
fpr.BindToRegister(d, true);
|
||||
X64Reg xd = fpr.RX(d);
|
||||
|
||||
if (cpu_info.bSSSE3)
|
||||
{
|
||||
#if _M_X86_64
|
||||
MOVQ_xmm(XMM0, MComplex(RBX, ABI_PARAM1, SCALE_1, offset));
|
||||
#else
|
||||
AND(32, R(ABI_PARAM1), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOVQ_xmm(XMM0, MDisp(ABI_PARAM1, (u32)Memory::base + offset));
|
||||
#endif
|
||||
PSHUFB(XMM0, M((void *)bswapShuffle1x8Dupe));
|
||||
MOVSD(xd, R(XMM0));
|
||||
} else {
|
||||
#if _M_X86_64
|
||||
LoadAndSwap(64, EAX, MComplex(RBX, ABI_PARAM1, SCALE_1, offset));
|
||||
MOV(64, M(&temp64), R(EAX));
|
||||
MEMCHECK_START
|
||||
MOVQ_xmm(XMM0, R(RAX));
|
||||
MOVSD(fpr.RX(d), R(XMM0));
|
||||
MEMCHECK_END
|
||||
|
||||
MEMCHECK_START
|
||||
|
||||
MOVSD(XMM0, M(&temp64));
|
||||
MOVSD(xd, R(XMM0));
|
||||
|
||||
MEMCHECK_END
|
||||
#else
|
||||
AND(32, R(ABI_PARAM1), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOV(32, R(EAX), MDisp(ABI_PARAM1, (u32)Memory::base + offset));
|
||||
BSWAP(32, EAX);
|
||||
MOV(32, M((void*)((u8 *)&temp64+4)), R(EAX));
|
||||
|
||||
MEMCHECK_START
|
||||
|
||||
MOV(32, R(EAX), MDisp(ABI_PARAM1, (u32)Memory::base + offset + 4));
|
||||
BSWAP(32, EAX);
|
||||
MOV(32, M(&temp64), R(EAX));
|
||||
MOVSD(XMM0, M(&temp64));
|
||||
MOVSD(xd, R(XMM0));
|
||||
|
||||
MEMCHECK_END
|
||||
#endif
|
||||
}
|
||||
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
|
||||
|
@ -119,146 +65,49 @@ void Jit64::stfd(UGeckoInstruction inst)
|
|||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITLoadStoreFloatingOff);
|
||||
FALLBACK_IF(js.memcheck || !inst.RA);
|
||||
FALLBACK_IF(!inst.RA);
|
||||
|
||||
int s = inst.RS;
|
||||
int a = inst.RA;
|
||||
|
||||
u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS;
|
||||
if (Core::g_CoreStartupParameter.bMMU ||
|
||||
Core::g_CoreStartupParameter.bTLBHack) {
|
||||
mem_mask |= Memory::ADDR_MASK_MEM1;
|
||||
}
|
||||
#ifdef ENABLE_MEM_CHECK
|
||||
if (Core::g_CoreStartupParameter.bEnableDebugging)
|
||||
{
|
||||
mem_mask |= Memory::EXRAM_MASK;
|
||||
}
|
||||
#endif
|
||||
|
||||
gpr.FlushLockX(ABI_PARAM1);
|
||||
gpr.Lock(a);
|
||||
fpr.Lock(s);
|
||||
gpr.BindToRegister(a, true, false);
|
||||
MOV(32, R(ABI_PARAM1), gpr.R(a));
|
||||
|
||||
if (fpr.R(s).IsSimpleReg())
|
||||
MOVQ_xmm(R(RAX), fpr.RX(s));
|
||||
else
|
||||
MOV(64, R(RAX), fpr.R(s));
|
||||
|
||||
s32 offset = (s32)(s16)inst.SIMM_16;
|
||||
LEA(32, ABI_PARAM1, MDisp(gpr.R(a).GetSimpleReg(), offset));
|
||||
TEST(32, R(ABI_PARAM1), Imm32(mem_mask));
|
||||
FixupBranch safe = J_CC(CC_NZ);
|
||||
SafeWriteRegToReg(RAX, ABI_PARAM1, 64, offset, RegistersInUse());
|
||||
|
||||
// Fast routine
|
||||
if (cpu_info.bSSSE3) {
|
||||
MOVAPD(XMM0, fpr.R(s));
|
||||
PSHUFB(XMM0, M((void*)bswapShuffle1x8));
|
||||
#if _M_X86_64
|
||||
MOVQ_xmm(MComplex(RBX, ABI_PARAM1, SCALE_1, 0), XMM0);
|
||||
#else
|
||||
AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOVQ_xmm(MDisp(ABI_PARAM1, (u32)Memory::base), XMM0);
|
||||
#endif
|
||||
} else {
|
||||
MOVAPD(XMM0, fpr.R(s));
|
||||
MOVD_xmm(R(EAX), XMM0);
|
||||
UnsafeWriteRegToReg(EAX, ABI_PARAM1, 32, 4);
|
||||
|
||||
PSRLQ(XMM0, 32);
|
||||
MOVD_xmm(R(EAX), XMM0);
|
||||
UnsafeWriteRegToReg(EAX, ABI_PARAM1, 32, 0);
|
||||
}
|
||||
FixupBranch exit = J(true);
|
||||
SetJumpTarget(safe);
|
||||
|
||||
// Safe but slow routine
|
||||
MOVAPD(XMM0, fpr.R(s));
|
||||
PSRLQ(XMM0, 32);
|
||||
MOVD_xmm(R(EAX), XMM0);
|
||||
SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 0, RegistersInUse() | (1 << (16 + XMM0)));
|
||||
|
||||
MOVAPD(XMM0, fpr.R(s));
|
||||
MOVD_xmm(R(EAX), XMM0);
|
||||
LEA(32, ABI_PARAM1, MDisp(gpr.R(a).GetSimpleReg(), offset));
|
||||
SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 4, RegistersInUse());
|
||||
|
||||
SetJumpTarget(exit);
|
||||
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
fpr.UnlockAll();
|
||||
}
|
||||
|
||||
// In Release on 32bit build,
|
||||
// this seemed to cause a problem with PokePark2
|
||||
// at start after talking to first pokemon,
|
||||
// you run and smash a box, then he goes on about
|
||||
// following him and then you cant do anything.
|
||||
// I have enabled interpreter for this function
|
||||
// in the mean time.
|
||||
// Parlane
|
||||
void Jit64::stfs(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITLoadStoreFloatingOff);
|
||||
FALLBACK_IF(!inst.RA);
|
||||
|
||||
bool update = inst.OPCD & 1;
|
||||
int s = inst.RS;
|
||||
int a = inst.RA;
|
||||
s32 offset = (s32)(s16)inst.SIMM_16;
|
||||
|
||||
FALLBACK_IF(!a || update);
|
||||
|
||||
fpr.BindToRegister(s, true, false);
|
||||
ConvertDoubleToSingle(XMM0, fpr.RX(s));
|
||||
|
||||
if (gpr.R(a).IsImm())
|
||||
{
|
||||
u32 addr = (u32)(gpr.R(a).offset + offset);
|
||||
if (Memory::IsRAMAddress(addr))
|
||||
{
|
||||
if (cpu_info.bSSSE3) {
|
||||
PSHUFB(XMM0, M((void *)bswapShuffle1x4));
|
||||
WriteFloatToConstRamAddress(XMM0, addr);
|
||||
return;
|
||||
}
|
||||
}
|
||||
else if (addr == 0xCC008000)
|
||||
{
|
||||
// Float directly to write gather pipe! Fun!
|
||||
CALL((void*)asm_routines.fifoDirectWriteFloat);
|
||||
// TODO
|
||||
js.fifoBytesThisBlock += 4;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
gpr.FlushLockX(ABI_PARAM1, ABI_PARAM2);
|
||||
gpr.Lock(a);
|
||||
MOV(32, R(ABI_PARAM2), gpr.R(a));
|
||||
ADD(32, R(ABI_PARAM2), Imm32(offset));
|
||||
if (update && offset)
|
||||
{
|
||||
// We must flush immediate values from the following register because
|
||||
// it may take another value at runtime if no MMU exception has been raised
|
||||
gpr.KillImmediate(a, true, true);
|
||||
|
||||
MEMCHECK_START
|
||||
|
||||
MOV(32, gpr.R(a), R(ABI_PARAM2));
|
||||
|
||||
MEMCHECK_END
|
||||
}
|
||||
SafeWriteFloatToReg(XMM0, ABI_PARAM2, RegistersInUse());
|
||||
gpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
gpr.FlushLockX(ABI_PARAM1);
|
||||
MOV(32, R(ABI_PARAM1), gpr.R(a));
|
||||
SafeWriteF32ToReg(XMM0, ABI_PARAM1, offset, RegistersInUse());
|
||||
fpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
}
|
||||
|
||||
|
||||
void Jit64::stfsx(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITLoadStoreFloatingOff);
|
||||
|
||||
// We can take a shortcut here - it's not likely that a hardware access would use this instruction.
|
||||
gpr.FlushLockX(ABI_PARAM1);
|
||||
MOV(32, R(ABI_PARAM1), gpr.R(inst.RB));
|
||||
if (inst.RA)
|
||||
|
@ -268,14 +117,11 @@ void Jit64::stfsx(UGeckoInstruction inst)
|
|||
fpr.Lock(s);
|
||||
fpr.BindToRegister(s, true, false);
|
||||
ConvertDoubleToSingle(XMM0, fpr.RX(s));
|
||||
MOVD_xmm(R(EAX), XMM0);
|
||||
SafeWriteRegToReg(EAX, ABI_PARAM1, 32, 0, RegistersInUse());
|
||||
|
||||
gpr.UnlockAllX();
|
||||
SafeWriteF32ToReg(XMM0, ABI_PARAM1, 0, RegistersInUse());
|
||||
fpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
}
|
||||
|
||||
|
||||
void Jit64::lfsx(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
|
@ -283,30 +129,17 @@ void Jit64::lfsx(UGeckoInstruction inst)
|
|||
|
||||
MOV(32, R(EAX), gpr.R(inst.RB));
|
||||
if (inst.RA)
|
||||
{
|
||||
ADD(32, R(EAX), gpr.R(inst.RA));
|
||||
}
|
||||
|
||||
SafeLoadToReg(EAX, R(EAX), 32, 0, RegistersInUse(), false);
|
||||
|
||||
fpr.Lock(inst.RS);
|
||||
fpr.BindToRegister(inst.RS, false);
|
||||
X64Reg s = fpr.RX(inst.RS);
|
||||
if (cpu_info.bSSSE3 && !js.memcheck) {
|
||||
#if _M_X86_32
|
||||
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOVD_xmm(XMM0, MDisp(EAX, (u32)Memory::base));
|
||||
#else
|
||||
MOVD_xmm(XMM0, MComplex(RBX, EAX, SCALE_1, 0));
|
||||
#endif
|
||||
PSHUFB(XMM0, M((void *)bswapShuffle1x4));
|
||||
ConvertSingleToDouble(s, XMM0);
|
||||
} else {
|
||||
SafeLoadToReg(EAX, R(EAX), 32, 0, RegistersInUse(), false);
|
||||
fpr.BindToRegister(inst.RS, js.memcheck);
|
||||
|
||||
MEMCHECK_START
|
||||
MEMCHECK_START
|
||||
ConvertSingleToDouble(fpr.RX(inst.RS), EAX, true);
|
||||
MEMCHECK_END
|
||||
|
||||
ConvertSingleToDouble(s, EAX, true);
|
||||
|
||||
MEMCHECK_END
|
||||
}
|
||||
fpr.UnlockAll();
|
||||
gpr.UnlockAllX();
|
||||
}
|
||||
|
||||
|
|
|
@ -1118,7 +1118,7 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) {
|
|||
if (!thisUsed) break;
|
||||
X64Reg reg = fregFindFreeReg(RI);
|
||||
Jit->MOV(32, R(ECX), regLocForInst(RI, getOp1(I)));
|
||||
RI.Jit->UnsafeLoadRegToReg(ECX, ECX, 32, 0, false);
|
||||
RI.Jit->SafeLoadToReg(ECX, R(ECX), 32, 0, regsInUse(RI), false, EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM);
|
||||
Jit->MOVD_xmm(reg, R(ECX));
|
||||
RI.fregs[reg] = I;
|
||||
regNormalRegClear(RI, I);
|
||||
|
@ -1127,30 +1127,10 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) {
|
|||
case LoadDouble: {
|
||||
if (!thisUsed) break;
|
||||
X64Reg reg = fregFindFreeReg(RI);
|
||||
if (cpu_info.bSSSE3) {
|
||||
static const u32 GC_ALIGNED16(maskSwapa64_1[4]) =
|
||||
{0x04050607L, 0x00010203L, 0xFFFFFFFFL, 0xFFFFFFFFL};
|
||||
#if _M_X86_64
|
||||
// TODO: Remove regEnsureInReg() and use ECX
|
||||
X64Reg address = regEnsureInReg(RI, getOp1(I));
|
||||
Jit->MOVQ_xmm(reg, MComplex(RBX, address, SCALE_1, 0));
|
||||
#else
|
||||
X64Reg address = regBinLHSReg(RI, I);
|
||||
Jit->AND(32, R(address), Imm32(Memory::MEMVIEW32_MASK));
|
||||
Jit->MOVQ_xmm(reg, MDisp(address, (u32)Memory::base));
|
||||
#endif
|
||||
Jit->PSHUFB(reg, M((void*)maskSwapa64_1));
|
||||
} else {
|
||||
const OpArg loc = regLocForInst(RI, getOp1(I));
|
||||
Jit->MOV(32, R(ECX), loc);
|
||||
Jit->ADD(32, R(ECX), Imm8(4));
|
||||
RI.Jit->UnsafeLoadRegToReg(ECX, ECX, 32, 0, false);
|
||||
Jit->MOVD_xmm(reg, R(ECX));
|
||||
Jit->MOV(32, R(ECX), loc);
|
||||
RI.Jit->UnsafeLoadRegToReg(ECX, ECX, 32, 0, false);
|
||||
Jit->MOVD_xmm(XMM0, R(ECX));
|
||||
Jit->PUNPCKLDQ(reg, R(XMM0));
|
||||
}
|
||||
const OpArg loc = regLocForInst(RI, getOp1(I));
|
||||
Jit->MOV(32, R(ECX), loc);
|
||||
RI.Jit->SafeLoadToReg(RCX, R(ECX), 64, 0, regsInUse(RI), false, EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM);
|
||||
Jit->MOVQ_xmm(reg, R(RCX));
|
||||
RI.fregs[reg] = I;
|
||||
regNormalRegClear(RI, I);
|
||||
break;
|
||||
|
@ -1196,67 +1176,13 @@ static void DoWriteCode(IRBuilder* ibuild, JitIL* Jit, u32 exitAddress) {
|
|||
}
|
||||
case StoreDouble: {
|
||||
regSpill(RI, EAX);
|
||||
// Please fix the following code
|
||||
// if SafeWriteRegToReg() is modified.
|
||||
u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS;
|
||||
if (Core::g_CoreStartupParameter.bMMU ||
|
||||
Core::g_CoreStartupParameter.bTLBHack) {
|
||||
mem_mask |= Memory::ADDR_MASK_MEM1;
|
||||
}
|
||||
#ifdef ENABLE_MEM_CHECK
|
||||
if (Core::g_CoreStartupParameter.bEnableDebugging)
|
||||
{
|
||||
mem_mask |= Memory::EXRAM_MASK;
|
||||
}
|
||||
#endif
|
||||
Jit->TEST(32, regLocForInst(RI, getOp2(I)), Imm32(mem_mask));
|
||||
FixupBranch safe = Jit->J_CC(CC_NZ);
|
||||
// Fast routine
|
||||
if (cpu_info.bSSSE3) {
|
||||
static const u32 GC_ALIGNED16(maskSwapa64_1[4]) =
|
||||
{0x04050607L, 0x00010203L, 0xFFFFFFFFL, 0xFFFFFFFFL};
|
||||
|
||||
X64Reg value = fregBinLHSRegWithMov(RI, I);
|
||||
Jit->PSHUFB(value, M((void*)maskSwapa64_1));
|
||||
Jit->MOV(32, R(ECX), regLocForInst(RI, getOp2(I)));
|
||||
#if _M_X86_64
|
||||
Jit->MOVQ_xmm(MComplex(RBX, ECX, SCALE_1, 0), value);
|
||||
#else
|
||||
Jit->AND(32, R(ECX), Imm32(Memory::MEMVIEW32_MASK));
|
||||
Jit->MOVQ_xmm(MDisp(ECX, (u32)Memory::base), value);
|
||||
#endif
|
||||
} else {
|
||||
regSpill(RI, EAX);
|
||||
OpArg loc = fregLocForInst(RI, getOp1(I));
|
||||
if (!loc.IsSimpleReg() || !(RI.IInfo[I - RI.FirstI] & 4)) {
|
||||
Jit->MOVAPD(XMM0, loc);
|
||||
loc = R(XMM0);
|
||||
}
|
||||
Jit->MOVD_xmm(R(EAX), loc.GetSimpleReg());
|
||||
Jit->MOV(32, R(ECX), regLocForInst(RI, getOp2(I)));
|
||||
RI.Jit->UnsafeWriteRegToReg(EAX, ECX, 32, 4);
|
||||
|
||||
Jit->PSRLQ(loc.GetSimpleReg(), 32);
|
||||
Jit->MOVD_xmm(R(EAX), loc.GetSimpleReg());
|
||||
Jit->MOV(32, R(ECX), regLocForInst(RI, getOp2(I)));
|
||||
RI.Jit->UnsafeWriteRegToReg(EAX, ECX, 32, 0);
|
||||
}
|
||||
FixupBranch exit = Jit->J(true);
|
||||
Jit->SetJumpTarget(safe);
|
||||
// Safe but slow routine
|
||||
OpArg value = fregLocForInst(RI, getOp1(I));
|
||||
OpArg address = regLocForInst(RI, getOp2(I));
|
||||
Jit->MOVAPD(XMM0, value);
|
||||
Jit->PSRLQ(XMM0, 32);
|
||||
Jit->MOVD_xmm(R(EAX), XMM0);
|
||||
Jit->MOV(32, R(ECX), address);
|
||||
RI.Jit->SafeWriteRegToReg(EAX, ECX, 32, 0, regsInUse(RI), EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM);
|
||||
|
||||
Jit->MOVAPD(XMM0, value);
|
||||
Jit->MOVD_xmm(R(EAX), XMM0);
|
||||
Jit->MOV(32, R(ECX), address);
|
||||
RI.Jit->SafeWriteRegToReg(EAX, ECX, 32, 4, regsInUse(RI), EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM);
|
||||
Jit->SetJumpTarget(exit);
|
||||
OpArg value = fregLocForInst(RI, getOp1(I));
|
||||
OpArg address = regLocForInst(RI, getOp2(I));
|
||||
Jit->MOVAPD(XMM0, value);
|
||||
Jit->MOVQ_xmm(R(RAX), XMM0);
|
||||
Jit->MOV(32, R(ECX), address);
|
||||
RI.Jit->SafeWriteRegToReg(RAX, ECX, 64, 0, regsInUse(RI), EmuCodeBlock::SAFE_LOADSTORE_NO_FASTMEM);
|
||||
|
||||
if (RI.IInfo[I - RI.FirstI] & 4)
|
||||
fregClearInst(RI, getOp1(I));
|
||||
|
|
|
@ -93,8 +93,6 @@ public:
|
|||
void WriteCallInterpreter(UGeckoInstruction _inst);
|
||||
void Cleanup();
|
||||
|
||||
void WriteToConstRamAddress(int accessSize, const Gen::OpArg& arg, u32 address);
|
||||
void WriteFloatToConstRamAddress(const Gen::X64Reg& xmm_reg, u32 address);
|
||||
void GenerateCarry(Gen::X64Reg temp_reg);
|
||||
|
||||
void tri_op(int d, int a, int b, bool reversible, void (Gen::XEmitter::*op)(Gen::X64Reg, Gen::OpArg));
|
||||
|
|
|
@ -266,7 +266,7 @@ void CommonAsmRoutines::GenQuantizedSingleStores()
|
|||
|
||||
// Easy!
|
||||
const u8* storeSingleFloat = AlignCode4();
|
||||
SafeWriteFloatToReg(XMM0, ECX, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
|
||||
SafeWriteF32ToReg(XMM0, ECX, 0, QUANTIZED_REGS_TO_SAVE, SAFE_LOADSTORE_NO_PROLOG | SAFE_LOADSTORE_NO_FASTMEM);
|
||||
RET();
|
||||
/*
|
||||
if (cpu_info.bSSSE3) {
|
||||
|
|
|
@ -101,7 +101,7 @@ u8 *EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, Gen::OpArg opAddress, int ac
|
|||
if (accessSize == 8 && signExtend)
|
||||
MOVSX(32, accessSize, reg_value, MComplex(RBX, opAddress.GetSimpleReg(), SCALE_1, offset));
|
||||
else
|
||||
MOVZX(32, accessSize, reg_value, MComplex(RBX, opAddress.GetSimpleReg(), SCALE_1, offset));
|
||||
MOVZX(64, accessSize, reg_value, MComplex(RBX, opAddress.GetSimpleReg(), SCALE_1, offset));
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -110,7 +110,7 @@ u8 *EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, Gen::OpArg opAddress, int ac
|
|||
if (accessSize == 8 && signExtend)
|
||||
MOVSX(32, accessSize, reg_value, MComplex(RBX, reg_value, SCALE_1, offset));
|
||||
else
|
||||
MOVZX(32, accessSize, reg_value, MComplex(RBX, reg_value, SCALE_1, offset));
|
||||
MOVZX(64, accessSize, reg_value, MComplex(RBX, reg_value, SCALE_1, offset));
|
||||
}
|
||||
#else
|
||||
if (opAddress.IsImm())
|
||||
|
@ -151,6 +151,10 @@ u8 *EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, Gen::OpArg opAddress, int ac
|
|||
case 32:
|
||||
BSWAP(32, reg_value);
|
||||
break;
|
||||
|
||||
case 64:
|
||||
BSWAP(64, reg_value);
|
||||
break;
|
||||
}
|
||||
|
||||
return result;
|
||||
|
@ -272,6 +276,8 @@ void EmuCodeBlock::MMIOLoadToReg(MMIO::Mapping* mmio, Gen::X64Reg reg_value,
|
|||
}
|
||||
}
|
||||
|
||||
// Always clobbers EAX. Preserves the address.
|
||||
// Preserves the value if the load fails and js.memcheck is enabled.
|
||||
void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress, int accessSize, s32 offset, u32 registersInUse, bool signExtend, int flags)
|
||||
{
|
||||
if (!jit->js.memcheck)
|
||||
|
@ -325,7 +331,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress,
|
|||
{
|
||||
UnsafeLoadToReg(reg_value, opAddress, accessSize, offset, signExtend);
|
||||
}
|
||||
else if (!Core::g_CoreStartupParameter.bMMU && MMIO::IsMMIOAddress(address))
|
||||
else if (!Core::g_CoreStartupParameter.bMMU && MMIO::IsMMIOAddress(address) && accessSize != 64)
|
||||
{
|
||||
MMIOLoadToReg(Memory::mmio_mapping, reg_value, registersInUse,
|
||||
address, accessSize, signExtend);
|
||||
|
@ -335,6 +341,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress,
|
|||
ABI_PushRegistersAndAdjustStack(registersInUse, false);
|
||||
switch (accessSize)
|
||||
{
|
||||
case 64: ABI_CallFunctionC((void *)&Memory::Read_U64, address); break;
|
||||
case 32: ABI_CallFunctionC((void *)&Memory::Read_U32, address); break;
|
||||
case 16: ABI_CallFunctionC((void *)&Memory::Read_U16_ZX, address); break;
|
||||
case 8: ABI_CallFunctionC((void *)&Memory::Read_U8_ZX, address); break;
|
||||
|
@ -350,7 +357,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress,
|
|||
}
|
||||
else if (reg_value != EAX)
|
||||
{
|
||||
MOVZX(32, accessSize, reg_value, R(EAX));
|
||||
MOVZX(64, accessSize, reg_value, R(EAX));
|
||||
}
|
||||
|
||||
MEMCHECK_END
|
||||
|
@ -372,6 +379,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress,
|
|||
ABI_PushRegistersAndAdjustStack(registersInUse, false);
|
||||
switch (accessSize)
|
||||
{
|
||||
case 64: ABI_CallFunctionA((void *)&Memory::Read_U64, addr_loc); break;
|
||||
case 32: ABI_CallFunctionA((void *)&Memory::Read_U32, addr_loc); break;
|
||||
case 16: ABI_CallFunctionA((void *)&Memory::Read_U16_ZX, addr_loc); break;
|
||||
case 8: ABI_CallFunctionA((void *)&Memory::Read_U8_ZX, addr_loc); break;
|
||||
|
@ -387,7 +395,7 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress,
|
|||
}
|
||||
else if (reg_value != EAX)
|
||||
{
|
||||
MOVZX(32, accessSize, reg_value, R(EAX));
|
||||
MOVZX(64, accessSize, reg_value, R(EAX));
|
||||
}
|
||||
|
||||
MEMCHECK_END
|
||||
|
@ -490,6 +498,7 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce
|
|||
ABI_PushRegistersAndAdjustStack(registersInUse, noProlog);
|
||||
switch (accessSize)
|
||||
{
|
||||
case 64: ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U64) : ((void *)&Memory::Write_U64_Swap), reg_value, reg_addr, false); break;
|
||||
case 32: ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U32) : ((void *)&Memory::Write_U32_Swap), reg_value, reg_addr, false); break;
|
||||
case 16: ABI_CallFunctionRR(swap ? ((void *)&Memory::Write_U16) : ((void *)&Memory::Write_U16_Swap), reg_value, reg_addr, false); break;
|
||||
case 8: ABI_CallFunctionRR((void *)&Memory::Write_U8, reg_value, reg_addr, false); break;
|
||||
|
@ -501,43 +510,12 @@ void EmuCodeBlock::SafeWriteRegToReg(X64Reg reg_value, X64Reg reg_addr, int acce
|
|||
SetJumpTarget(exit);
|
||||
}
|
||||
|
||||
void EmuCodeBlock::SafeWriteFloatToReg(X64Reg xmm_value, X64Reg reg_addr, u32 registersInUse, int flags)
|
||||
// Destroys both arg registers and EAX
|
||||
void EmuCodeBlock::SafeWriteF32ToReg(X64Reg xmm_value, X64Reg reg_addr, s32 offset, u32 registersInUse, int flags)
|
||||
{
|
||||
// FIXME
|
||||
if (false && cpu_info.bSSSE3) {
|
||||
// This path should be faster but for some reason it causes errors so I've disabled it.
|
||||
u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS;
|
||||
|
||||
if (Core::g_CoreStartupParameter.bMMU || Core::g_CoreStartupParameter.bTLBHack)
|
||||
mem_mask |= Memory::ADDR_MASK_MEM1;
|
||||
|
||||
#ifdef ENABLE_MEM_CHECK
|
||||
if (Core::g_CoreStartupParameter.bEnableDebugging)
|
||||
mem_mask |= Memory::EXRAM_MASK;
|
||||
#endif
|
||||
TEST(32, R(reg_addr), Imm32(mem_mask));
|
||||
FixupBranch argh = J_CC(CC_Z);
|
||||
MOVSS(M(&float_buffer), xmm_value);
|
||||
LoadAndSwap(32, EAX, M(&float_buffer));
|
||||
MOV(32, M(&PC), Imm32(jit->js.compilerPC)); // Helps external systems know which instruction triggered the write
|
||||
ABI_PushRegistersAndAdjustStack(registersInUse, false);
|
||||
ABI_CallFunctionRR((void *)&Memory::Write_U32, EAX, reg_addr);
|
||||
ABI_PopRegistersAndAdjustStack(registersInUse, false);
|
||||
FixupBranch arg2 = J();
|
||||
SetJumpTarget(argh);
|
||||
PSHUFB(xmm_value, M((void *)pbswapShuffle1x4));
|
||||
#if _M_X86_64
|
||||
MOVD_xmm(MComplex(RBX, reg_addr, SCALE_1, 0), xmm_value);
|
||||
#else
|
||||
AND(32, R(reg_addr), Imm32(Memory::MEMVIEW32_MASK));
|
||||
MOVD_xmm(MDisp(reg_addr, (u32)Memory::base), xmm_value);
|
||||
#endif
|
||||
SetJumpTarget(arg2);
|
||||
} else {
|
||||
MOVSS(M(&float_buffer), xmm_value);
|
||||
MOV(32, R(EAX), M(&float_buffer));
|
||||
SafeWriteRegToReg(EAX, reg_addr, 32, 0, registersInUse, flags);
|
||||
}
|
||||
// TODO: PSHUFB might be faster if fastmem supported MOVSS.
|
||||
MOVD_xmm(R(EAX), xmm_value);
|
||||
SafeWriteRegToReg(EAX, reg_addr, 32, offset, registersInUse, flags);
|
||||
}
|
||||
|
||||
void EmuCodeBlock::WriteToConstRamAddress(int accessSize, Gen::X64Reg arg, u32 address, bool swap)
|
||||
|
@ -555,16 +533,6 @@ void EmuCodeBlock::WriteToConstRamAddress(int accessSize, Gen::X64Reg arg, u32 a
|
|||
#endif
|
||||
}
|
||||
|
||||
void EmuCodeBlock::WriteFloatToConstRamAddress(const Gen::X64Reg& xmm_reg, u32 address)
|
||||
{
|
||||
#if _M_X86_64
|
||||
MOV(32, R(RAX), Imm32(address));
|
||||
MOVSS(MComplex(RBX, RAX, 1, 0), xmm_reg);
|
||||
#else
|
||||
MOVSS(M((void*)((u32)Memory::base + (address & Memory::MEMVIEW32_MASK))), xmm_reg);
|
||||
#endif
|
||||
}
|
||||
|
||||
void EmuCodeBlock::ForceSinglePrecisionS(X64Reg xmm) {
|
||||
// Most games don't need these. Zelda requires it though - some platforms get stuck without them.
|
||||
if (jit->jo.accurateSinglePrecision)
|
||||
|
|
|
@ -47,11 +47,9 @@ public:
|
|||
void SafeLoadToReg(Gen::X64Reg reg_value, const Gen::OpArg & opAddress, int accessSize, s32 offset, u32 registersInUse, bool signExtend, int flags = 0);
|
||||
void SafeWriteRegToReg(Gen::X64Reg reg_value, Gen::X64Reg reg_addr, int accessSize, s32 offset, u32 registersInUse, int flags = 0);
|
||||
|
||||
// Trashes both inputs and EAX.
|
||||
void SafeWriteFloatToReg(Gen::X64Reg xmm_value, Gen::X64Reg reg_addr, u32 registersInUse, int flags = 0);
|
||||
void SafeWriteF32ToReg(Gen::X64Reg xmm_value, Gen::X64Reg reg_addr, s32 offset, u32 registersInUse, int flags = 0);
|
||||
|
||||
void WriteToConstRamAddress(int accessSize, Gen::X64Reg arg, u32 address, bool swap = false);
|
||||
void WriteFloatToConstRamAddress(const Gen::X64Reg& xmm_reg, u32 address);
|
||||
void JitClearCA();
|
||||
void JitSetCA();
|
||||
void JitClearCAOV(bool oe);
|
||||
|
|
Loading…
Reference in New Issue