Jit64: some load/store optimizations
Avoid extra ops during address calculation in loads; use LEAs or immediates whenever possible.
This commit is contained in:
parent
7d05ebbc9b
commit
043256449e
|
@ -136,11 +136,12 @@ void Jit64::lXXx(UGeckoInstruction inst)
|
||||||
// Determine whether this instruction updates inst.RA
|
// Determine whether this instruction updates inst.RA
|
||||||
bool update;
|
bool update;
|
||||||
if (inst.OPCD == 31)
|
if (inst.OPCD == 31)
|
||||||
update = ((inst.SUBOP10 & 0x20) != 0);
|
update = ((inst.SUBOP10 & 0x20) != 0) && (!gpr.R(b).IsImm() || gpr.R(b).offset != 0);
|
||||||
else
|
else
|
||||||
update = ((inst.OPCD & 1) != 0);
|
update = ((inst.OPCD & 1) != 0) && inst.SIMM_16 != 0;
|
||||||
|
|
||||||
bool zeroOffset = inst.OPCD != 31 && inst.SIMM_16 == 0;
|
bool storeAddress = false;
|
||||||
|
s32 loadOffset = 0;
|
||||||
|
|
||||||
// Prepare address operand
|
// Prepare address operand
|
||||||
Gen::OpArg opAddress;
|
Gen::OpArg opAddress;
|
||||||
|
@ -178,30 +179,59 @@ void Jit64::lXXx(UGeckoInstruction inst)
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if ((update && !js.memcheck) || zeroOffset)
|
// If we're using reg+reg mode and b is an immediate, pretend we're using constant offset mode
|
||||||
|
bool use_constant_offset = inst.OPCD != 31 || gpr.R(b).IsImm();
|
||||||
|
s32 offset = inst.OPCD == 31 ? (s32)gpr.R(b).offset : (s32)inst.SIMM_16;
|
||||||
|
// Depending on whether we have an immediate and/or update, find the optimum way to calculate
|
||||||
|
// the load address.
|
||||||
|
if ((update || use_constant_offset) && !js.memcheck)
|
||||||
{
|
{
|
||||||
gpr.BindToRegister(a, true, update);
|
gpr.BindToRegister(a, true, update);
|
||||||
opAddress = gpr.R(a);
|
opAddress = gpr.R(a);
|
||||||
|
if (!use_constant_offset)
|
||||||
|
ADD(32, opAddress, gpr.R(b));
|
||||||
|
else if (update)
|
||||||
|
ADD(32, opAddress, Imm32((u32)offset));
|
||||||
|
else
|
||||||
|
loadOffset = offset;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
// In this case we need an extra temporary register.
|
||||||
gpr.FlushLockX(ABI_PARAM1);
|
gpr.FlushLockX(ABI_PARAM1);
|
||||||
opAddress = R(ABI_PARAM1);
|
opAddress = R(ABI_PARAM1);
|
||||||
MOV(32, opAddress, gpr.R(a));
|
storeAddress = true;
|
||||||
|
if (use_constant_offset)
|
||||||
|
{
|
||||||
|
if (gpr.R(a).IsSimpleReg() && offset != 0)
|
||||||
|
{
|
||||||
|
LEA(32, ABI_PARAM1, MDisp(gpr.RX(a), offset));
|
||||||
}
|
}
|
||||||
|
else
|
||||||
if (inst.OPCD == 31)
|
{
|
||||||
|
MOV(32, opAddress, gpr.R(a));
|
||||||
|
if (offset != 0)
|
||||||
|
ADD(32, opAddress, Imm32((u32)offset));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg())
|
||||||
|
{
|
||||||
|
LEA(32, ABI_PARAM1, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
MOV(32, opAddress, gpr.R(a));
|
||||||
ADD(32, opAddress, gpr.R(b));
|
ADD(32, opAddress, gpr.R(b));
|
||||||
else if (inst.SIMM_16 != 0)
|
}
|
||||||
ADD(32, opAddress, Imm32((u32)(s32)inst.SIMM_16));
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
gpr.Lock(a, b, d);
|
gpr.Lock(a, b, d);
|
||||||
gpr.BindToRegister(d, js.memcheck, true);
|
gpr.BindToRegister(d, js.memcheck, true);
|
||||||
SafeLoadToReg(gpr.RX(d), opAddress, accessSize, 0, CallerSavedRegistersInUse(), signExtend);
|
SafeLoadToReg(gpr.RX(d), opAddress, accessSize, loadOffset, CallerSavedRegistersInUse(), signExtend);
|
||||||
|
|
||||||
if (update && js.memcheck && !zeroOffset)
|
if (update && storeAddress)
|
||||||
{
|
{
|
||||||
gpr.BindToRegister(a, true, true);
|
gpr.BindToRegister(a, true, true);
|
||||||
MEMCHECK_START
|
MEMCHECK_START
|
||||||
|
@ -385,6 +415,10 @@ void Jit64::stXx(UGeckoInstruction inst)
|
||||||
MOV(32, R(EDX), gpr.R(a));
|
MOV(32, R(EDX), gpr.R(a));
|
||||||
MEMCHECK_END
|
MEMCHECK_END
|
||||||
}
|
}
|
||||||
|
else if (gpr.R(a).IsSimpleReg() && gpr.R(b).IsSimpleReg())
|
||||||
|
{
|
||||||
|
LEA(32, EDX, MComplex(gpr.RX(a), gpr.RX(b), SCALE_1, 0));
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
MOV(32, R(EDX), gpr.R(a));
|
MOV(32, R(EDX), gpr.R(a));
|
||||||
|
@ -423,17 +457,17 @@ void Jit64::lmw(UGeckoInstruction inst)
|
||||||
JITDISABLE(bJITLoadStoreOff);
|
JITDISABLE(bJITLoadStoreOff);
|
||||||
|
|
||||||
// TODO: This doesn't handle rollback on DSI correctly
|
// TODO: This doesn't handle rollback on DSI correctly
|
||||||
gpr.FlushLockX(ECX);
|
|
||||||
MOV(32, R(ECX), Imm32((u32)(s32)inst.SIMM_16));
|
|
||||||
if (inst.RA)
|
if (inst.RA)
|
||||||
ADD(32, R(ECX), gpr.R(inst.RA));
|
{
|
||||||
|
gpr.Lock(inst.RA);
|
||||||
|
gpr.BindToRegister(inst.RA, true, false);
|
||||||
|
}
|
||||||
for (int i = inst.RD; i < 32; i++)
|
for (int i = inst.RD; i < 32; i++)
|
||||||
{
|
{
|
||||||
SafeLoadToReg(EAX, R(ECX), 32, (i - inst.RD) * 4, CallerSavedRegistersInUse(), false);
|
|
||||||
gpr.BindToRegister(i, false, true);
|
gpr.BindToRegister(i, false, true);
|
||||||
MOV(32, gpr.R(i), R(EAX));
|
SafeLoadToReg(gpr.RX(i), inst.RA ? gpr.R(inst.RA) : Imm32(0), 32, (i - inst.RD) * 4 + (s32)inst.SIMM_16, CallerSavedRegistersInUse(), false);
|
||||||
}
|
}
|
||||||
gpr.UnlockAllX();
|
gpr.UnlockAll();
|
||||||
}
|
}
|
||||||
|
|
||||||
void Jit64::stmw(UGeckoInstruction inst)
|
void Jit64::stmw(UGeckoInstruction inst)
|
||||||
|
|
|
@ -66,9 +66,10 @@ void EmuCodeBlock::UnsafeLoadRegToRegNoSwap(X64Reg reg_addr, X64Reg reg_value, i
|
||||||
MOVZX(32, accessSize, reg_value, MComplex(RBX, reg_addr, SCALE_1, offset));
|
MOVZX(32, accessSize, reg_value, MComplex(RBX, reg_addr, SCALE_1, offset));
|
||||||
}
|
}
|
||||||
|
|
||||||
u8 *EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, Gen::OpArg opAddress, int accessSize, s32 offset, bool signExtend)
|
u8 *EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, OpArg opAddress, int accessSize, s32 offset, bool signExtend)
|
||||||
{
|
{
|
||||||
u8 *result;
|
u8 *result;
|
||||||
|
OpArg memOperand;
|
||||||
if (opAddress.IsSimpleReg())
|
if (opAddress.IsSimpleReg())
|
||||||
{
|
{
|
||||||
// Deal with potential wraparound. (This is just a heuristic, and it would
|
// Deal with potential wraparound. (This is just a heuristic, and it would
|
||||||
|
@ -84,21 +85,23 @@ u8 *EmuCodeBlock::UnsafeLoadToReg(X64Reg reg_value, Gen::OpArg opAddress, int ac
|
||||||
offset = 0;
|
offset = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
result = GetWritableCodePtr();
|
memOperand = MComplex(RBX, opAddress.GetSimpleReg(), SCALE_1, offset);
|
||||||
if (accessSize == 8 && signExtend)
|
}
|
||||||
MOVSX(32, accessSize, reg_value, MComplex(RBX, opAddress.GetSimpleReg(), SCALE_1, offset));
|
else if (opAddress.IsImm())
|
||||||
else
|
{
|
||||||
MOVZX(64, accessSize, reg_value, MComplex(RBX, opAddress.GetSimpleReg(), SCALE_1, offset));
|
memOperand = MDisp(RBX, (opAddress.offset + offset) & 0x3FFFFFFF);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
MOV(32, R(reg_value), opAddress);
|
MOV(32, R(reg_value), opAddress);
|
||||||
|
memOperand = MComplex(RBX, reg_value, SCALE_1, offset);
|
||||||
|
}
|
||||||
|
|
||||||
result = GetWritableCodePtr();
|
result = GetWritableCodePtr();
|
||||||
if (accessSize == 8 && signExtend)
|
if (accessSize == 8 && signExtend)
|
||||||
MOVSX(32, accessSize, reg_value, MComplex(RBX, reg_value, SCALE_1, offset));
|
MOVSX(32, accessSize, reg_value, memOperand);
|
||||||
else
|
else
|
||||||
MOVZX(64, accessSize, reg_value, MComplex(RBX, reg_value, SCALE_1, offset));
|
MOVZX(64, accessSize, reg_value, memOperand);
|
||||||
}
|
|
||||||
|
|
||||||
switch (accessSize)
|
switch (accessSize)
|
||||||
{
|
{
|
||||||
|
@ -335,9 +338,16 @@ void EmuCodeBlock::SafeLoadToReg(X64Reg reg_value, const Gen::OpArg & opAddress,
|
||||||
if (offset)
|
if (offset)
|
||||||
{
|
{
|
||||||
addr_loc = R(EAX);
|
addr_loc = R(EAX);
|
||||||
|
if (opAddress.IsSimpleReg())
|
||||||
|
{
|
||||||
|
LEA(32, EAX, MDisp(opAddress.GetSimpleReg(), offset));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
MOV(32, R(EAX), opAddress);
|
MOV(32, R(EAX), opAddress);
|
||||||
ADD(32, R(EAX), Imm32(offset));
|
ADD(32, R(EAX), Imm32(offset));
|
||||||
}
|
}
|
||||||
|
}
|
||||||
TEST(32, addr_loc, Imm32(mem_mask));
|
TEST(32, addr_loc, Imm32(mem_mask));
|
||||||
|
|
||||||
FixupBranch fast = J_CC(CC_Z, true);
|
FixupBranch fast = J_CC(CC_Z, true);
|
||||||
|
|
Loading…
Reference in New Issue