Jit: Use one less register in dcbx
We were using a "value" register to avoid clobbering physical_addr, but this isn't actually needed anymore. The only bits we need from physical_addr after we start clobbering it are bits 5-9, and those bits are identical in effective_addr and physical_addr, so we can read them from effective_addr instead.
This commit is contained in:
parent
62e7b34c88
commit
90fcaf7e96
|
@ -245,9 +245,7 @@ void Jit64::dcbx(UGeckoInstruction inst)
|
||||||
|
|
||||||
RCOpArg Ra = inst.RA ? gpr.Use(inst.RA, RCMode::Read) : RCOpArg::Imm32(0);
|
RCOpArg Ra = inst.RA ? gpr.Use(inst.RA, RCMode::Read) : RCOpArg::Imm32(0);
|
||||||
RCX64Reg Rb = gpr.Bind(inst.RB, make_loop ? RCMode::ReadWrite : RCMode::Read);
|
RCX64Reg Rb = gpr.Bind(inst.RB, make_loop ? RCMode::ReadWrite : RCMode::Read);
|
||||||
RCX64Reg tmp = gpr.Scratch();
|
RegCache::Realize(Ra, Rb);
|
||||||
RCX64Reg effective_address = gpr.Scratch();
|
|
||||||
RegCache::Realize(Ra, Rb, tmp, effective_address);
|
|
||||||
|
|
||||||
RCX64Reg loop_counter;
|
RCX64Reg loop_counter;
|
||||||
if (make_loop)
|
if (make_loop)
|
||||||
|
@ -259,10 +257,10 @@ void Jit64::dcbx(UGeckoInstruction inst)
|
||||||
// bdnz afterwards! So if we invalidate a single cache line, we don't adjust the registers at
|
// bdnz afterwards! So if we invalidate a single cache line, we don't adjust the registers at
|
||||||
// all, if we invalidate 2 cachelines we adjust the registers by one step, and so on.
|
// all, if we invalidate 2 cachelines we adjust the registers by one step, and so on.
|
||||||
|
|
||||||
RCX64Reg& reg_cycle_count = tmp;
|
RCX64Reg reg_cycle_count = gpr.Scratch();
|
||||||
RCX64Reg& reg_downcount = effective_address;
|
RCX64Reg reg_downcount = gpr.Scratch();
|
||||||
loop_counter = gpr.Scratch();
|
loop_counter = gpr.Scratch();
|
||||||
RegCache::Realize(loop_counter);
|
RegCache::Realize(reg_cycle_count, reg_downcount, loop_counter);
|
||||||
|
|
||||||
// This must be true in order for us to pick up the DIV results and not trash any data.
|
// This must be true in order for us to pick up the DIV results and not trash any data.
|
||||||
static_assert(RSCRATCH == Gen::EAX && RSCRATCH2 == Gen::EDX);
|
static_assert(RSCRATCH == Gen::EAX && RSCRATCH2 == Gen::EDX);
|
||||||
|
@ -304,8 +302,8 @@ void Jit64::dcbx(UGeckoInstruction inst)
|
||||||
LEA(32, loop_counter, MDisp(RSCRATCH2, 1));
|
LEA(32, loop_counter, MDisp(RSCRATCH2, 1));
|
||||||
}
|
}
|
||||||
|
|
||||||
X64Reg value = RSCRATCH;
|
X64Reg addr = RSCRATCH;
|
||||||
MOV_sum(32, value, Ra, Rb);
|
MOV_sum(32, addr, Ra, Rb);
|
||||||
|
|
||||||
if (make_loop)
|
if (make_loop)
|
||||||
{
|
{
|
||||||
|
@ -315,33 +313,36 @@ void Jit64::dcbx(UGeckoInstruction inst)
|
||||||
ADD(32, R(Rb), R(RSCRATCH2)); // Rb += (RSCRATCH2 * 32)
|
ADD(32, R(Rb), R(RSCRATCH2)); // Rb += (RSCRATCH2 * 32)
|
||||||
}
|
}
|
||||||
|
|
||||||
X64Reg addr = RSCRATCH2;
|
X64Reg tmp = RSCRATCH2;
|
||||||
|
RCX64Reg effective_address = gpr.Scratch();
|
||||||
|
RegCache::Realize(effective_address);
|
||||||
|
|
||||||
FixupBranch bat_lookup_failed;
|
FixupBranch bat_lookup_failed;
|
||||||
MOV(32, R(effective_address), R(value));
|
MOV(32, R(effective_address), R(addr));
|
||||||
const u8* loop_start = GetCodePtr();
|
const u8* loop_start = GetCodePtr();
|
||||||
if (MSR.IR)
|
if (MSR.IR)
|
||||||
{
|
{
|
||||||
// Translate effective address to physical address.
|
// Translate effective address to physical address.
|
||||||
bat_lookup_failed = BATAddressLookup(value, tmp, PowerPC::ibat_table.data());
|
bat_lookup_failed = BATAddressLookup(addr, tmp, PowerPC::ibat_table.data());
|
||||||
MOV(32, R(addr), R(effective_address));
|
MOV(32, R(tmp), R(effective_address));
|
||||||
AND(32, R(addr), Imm32(0x0001ffff));
|
AND(32, R(tmp), Imm32(0x0001ffff));
|
||||||
AND(32, R(value), Imm32(0xfffe0000));
|
AND(32, R(addr), Imm32(0xfffe0000));
|
||||||
OR(32, R(value), R(addr));
|
OR(32, R(addr), R(tmp));
|
||||||
}
|
}
|
||||||
MOV(32, R(addr), R(value));
|
|
||||||
|
|
||||||
// Check whether a JIT cache line needs to be invalidated.
|
// Check whether a JIT cache line needs to be invalidated.
|
||||||
SHR(32, R(value), Imm8(5 + 5)); // >> 5 for cache line size, >> 5 for width of bitset
|
SHR(32, R(addr), Imm8(5 + 5)); // >> 5 for cache line size, >> 5 for width of bitset
|
||||||
MOV(64, R(tmp), ImmPtr(GetBlockCache()->GetBlockBitSet()));
|
MOV(64, R(tmp), ImmPtr(GetBlockCache()->GetBlockBitSet()));
|
||||||
MOV(32, R(value), MComplex(tmp, value, SCALE_4, 0));
|
MOV(32, R(addr), MComplex(tmp, addr, SCALE_4, 0));
|
||||||
SHR(32, R(addr), Imm8(5));
|
MOV(32, R(tmp), R(effective_address));
|
||||||
BT(32, R(value), R(addr));
|
SHR(32, R(tmp), Imm8(5));
|
||||||
|
BT(32, R(addr), R(tmp));
|
||||||
FixupBranch invalidate_needed = J_CC(CC_C, true);
|
FixupBranch invalidate_needed = J_CC(CC_C, true);
|
||||||
|
|
||||||
if (make_loop)
|
if (make_loop)
|
||||||
{
|
{
|
||||||
ADD(32, R(effective_address), Imm8(32));
|
ADD(32, R(effective_address), Imm8(32));
|
||||||
MOV(32, R(value), R(effective_address));
|
MOV(32, R(addr), R(effective_address));
|
||||||
SUB(32, R(loop_counter), Imm8(1));
|
SUB(32, R(loop_counter), Imm8(1));
|
||||||
J_CC(CC_NZ, loop_start);
|
J_CC(CC_NZ, loop_start);
|
||||||
}
|
}
|
||||||
|
|
|
@ -627,8 +627,7 @@ void JitArm64::dcbx(UGeckoInstruction inst)
|
||||||
}
|
}
|
||||||
|
|
||||||
ARM64Reg effective_addr = ARM64Reg::W0;
|
ARM64Reg effective_addr = ARM64Reg::W0;
|
||||||
ARM64Reg physical_addr = MSR.IR ? gpr.GetReg() : effective_addr;
|
ARM64Reg physical_addr = gpr.GetReg();
|
||||||
ARM64Reg value = gpr.GetReg();
|
|
||||||
|
|
||||||
if (a)
|
if (a)
|
||||||
ADD(effective_addr, gpr.R(a), gpr.R(b));
|
ADD(effective_addr, gpr.R(a), gpr.R(b));
|
||||||
|
@ -653,15 +652,15 @@ void JitArm64::dcbx(UGeckoInstruction inst)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check whether a JIT cache line needs to be invalidated.
|
// Check whether a JIT cache line needs to be invalidated.
|
||||||
LSR(value, physical_addr, 5 + 5); // >> 5 for cache line size, >> 5 for width of bitset
|
LSR(physical_addr, physical_addr, 5 + 5); // >> 5 for cache line size, >> 5 for width of bitset
|
||||||
MOVP2R(EncodeRegTo64(WA), GetBlockCache()->GetBlockBitSet());
|
MOVP2R(EncodeRegTo64(WA), GetBlockCache()->GetBlockBitSet());
|
||||||
LDR(value, EncodeRegTo64(WA), ArithOption(EncodeRegTo64(value), true));
|
LDR(physical_addr, EncodeRegTo64(WA), ArithOption(EncodeRegTo64(physical_addr), true));
|
||||||
|
|
||||||
LSR(WA, physical_addr, 5); // mask sizeof cacheline, & 0x1f is the position within the bitset
|
LSR(WA, effective_addr, 5); // mask sizeof cacheline, & 0x1f is the position within the bitset
|
||||||
|
|
||||||
LSRV(value, value, WA); // move current bit to bit 0
|
LSRV(physical_addr, physical_addr, WA); // move current bit to bit 0
|
||||||
|
|
||||||
FixupBranch bit_not_set = TBZ(value, 0);
|
FixupBranch bit_not_set = TBZ(physical_addr, 0);
|
||||||
FixupBranch invalidate_needed = B();
|
FixupBranch invalidate_needed = B();
|
||||||
SetJumpTarget(bit_not_set);
|
SetJumpTarget(bit_not_set);
|
||||||
|
|
||||||
|
@ -681,7 +680,6 @@ void JitArm64::dcbx(UGeckoInstruction inst)
|
||||||
BitSet32 fprs_to_push = fpr.GetCallerSavedUsed();
|
BitSet32 fprs_to_push = fpr.GetCallerSavedUsed();
|
||||||
gprs_to_push[DecodeReg(effective_addr)] = false;
|
gprs_to_push[DecodeReg(effective_addr)] = false;
|
||||||
gprs_to_push[DecodeReg(physical_addr)] = false;
|
gprs_to_push[DecodeReg(physical_addr)] = false;
|
||||||
gprs_to_push[DecodeReg(value)] = false;
|
|
||||||
gprs_to_push[DecodeReg(WA)] = false;
|
gprs_to_push[DecodeReg(WA)] = false;
|
||||||
if (make_loop)
|
if (make_loop)
|
||||||
gprs_to_push[DecodeReg(loop_counter)] = false;
|
gprs_to_push[DecodeReg(loop_counter)] = false;
|
||||||
|
@ -703,9 +701,7 @@ void JitArm64::dcbx(UGeckoInstruction inst)
|
||||||
SwitchToNearCode();
|
SwitchToNearCode();
|
||||||
SetJumpTarget(near_addr);
|
SetJumpTarget(near_addr);
|
||||||
|
|
||||||
gpr.Unlock(effective_addr, value, WA);
|
gpr.Unlock(effective_addr, physical_addr, WA);
|
||||||
if (MSR.IR)
|
|
||||||
gpr.Unlock(physical_addr);
|
|
||||||
if (make_loop)
|
if (make_loop)
|
||||||
gpr.Unlock(loop_counter);
|
gpr.Unlock(loop_counter);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue