Merge pull request #2930 from Sonicadvance1/aarch64_dcbz
[AArch64] Implement dcbz instruction
This commit is contained in:
commit
14589a3eca
|
@ -562,6 +562,10 @@ public:
|
|||
void EOR(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert = false);
|
||||
void ORR(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert = false);
|
||||
void TST(ARM64Reg Rn, u32 immr, u32 imms, bool invert = false);
|
||||
void TST(ARM64Reg Rn, ARM64Reg Rm)
|
||||
{
|
||||
ANDS(Is64Bit(Rn) ? ZR : WZR, Rn, Rm);
|
||||
}
|
||||
|
||||
// Add/subtract (immediate)
|
||||
void ADD(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift = false);
|
||||
|
|
|
@ -129,6 +129,7 @@ public:
|
|||
void lmw(UGeckoInstruction inst);
|
||||
void stmw(UGeckoInstruction inst);
|
||||
void dcbt(UGeckoInstruction inst);
|
||||
void dcbz(UGeckoInstruction inst);
|
||||
|
||||
// LoadStore floating point
|
||||
void lfXX(UGeckoInstruction inst);
|
||||
|
|
|
@ -100,6 +100,13 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode,
|
|||
else
|
||||
STRB(RS, X28, addr);
|
||||
}
|
||||
else if (flags & BackPatchInfo::FLAG_ZERO_256)
|
||||
{
|
||||
// This literally only stores 32bytes of zeros to the target address
|
||||
ADD(addr, addr, X28);
|
||||
STP(INDEX_SIGNED, ZR, ZR, addr, 0);
|
||||
STP(INDEX_SIGNED, ZR, ZR, addr, 16);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (flags & BackPatchInfo::FLAG_SIZE_32)
|
||||
|
@ -212,6 +219,11 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode,
|
|||
|
||||
BLR(X30);
|
||||
}
|
||||
else if (flags & BackPatchInfo::FLAG_ZERO_256)
|
||||
{
|
||||
MOVI2R(X30, (u64)&PowerPC::ClearCacheLine);
|
||||
BLR(X30);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (flags & BackPatchInfo::FLAG_SIZE_32)
|
||||
|
|
|
@ -663,3 +663,78 @@ void JitArm64::dcbt(UGeckoInstruction inst)
|
|||
js.skipInstructions = 1;
|
||||
}
|
||||
}
|
||||
|
||||
void JitArm64::dcbz(UGeckoInstruction inst)
|
||||
{
|
||||
INSTRUCTION_START
|
||||
JITDISABLE(bJITLoadStoreOff);
|
||||
|
||||
int a = inst.RA, b = inst.RB;
|
||||
|
||||
u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS;
|
||||
|
||||
// The following masks the region used by the GC/Wii virtual memory lib
|
||||
mem_mask |= Memory::ADDR_MASK_MEM1;
|
||||
|
||||
gpr.Lock(W0);
|
||||
|
||||
ARM64Reg addr_reg = W0;
|
||||
|
||||
if (a)
|
||||
{
|
||||
bool is_imm_a, is_imm_b;
|
||||
is_imm_a = gpr.IsImm(a);
|
||||
is_imm_b = gpr.IsImm(b);
|
||||
if (is_imm_a && is_imm_b)
|
||||
{
|
||||
// full imm_addr
|
||||
u32 imm_addr = gpr.GetImm(b) + gpr.GetImm(a);
|
||||
MOVI2R(addr_reg, imm_addr);
|
||||
}
|
||||
else if (is_imm_a || is_imm_b)
|
||||
{
|
||||
// Only one register is an immediate
|
||||
ARM64Reg base = is_imm_a ? gpr.R(b) : gpr.R(a);
|
||||
u32 imm_offset = is_imm_a ? gpr.GetImm(a) : gpr.GetImm(b);
|
||||
if (imm_offset < 4096)
|
||||
{
|
||||
ADD(addr_reg, base, imm_offset);
|
||||
}
|
||||
else
|
||||
{
|
||||
MOVI2R(addr_reg, imm_offset);
|
||||
ADD(addr_reg, addr_reg, base);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Both are registers
|
||||
ADD(addr_reg, gpr.R(a), gpr.R(b));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// RA isn't used, only RB
|
||||
if (gpr.IsImm(b))
|
||||
{
|
||||
u32 imm_addr = gpr.GetImm(b);
|
||||
MOVI2R(addr_reg, imm_addr);
|
||||
}
|
||||
else
|
||||
{
|
||||
MOV(addr_reg, gpr.R(b));
|
||||
}
|
||||
}
|
||||
|
||||
// We don't care about being /too/ terribly efficient here
|
||||
// As long as we aren't falling back to interpreter we're winning a lot
|
||||
|
||||
BitSet32 gprs_to_push = gpr.GetCallerSavedUsed();
|
||||
BitSet32 fprs_to_push = fpr.GetCallerSavedUsed();
|
||||
gprs_to_push[W0] = 0;
|
||||
|
||||
EmitBackpatchRoutine(BackPatchInfo::FLAG_ZERO_256, true, true, W0, EncodeRegTo64(addr_reg), gprs_to_push, fprs_to_push);
|
||||
|
||||
gpr.Unlock(W0);
|
||||
|
||||
}
|
||||
|
|
|
@ -225,7 +225,7 @@ static GekkoOPTemplate table31[] =
|
|||
{278, &JitArm64::dcbt}, // dcbt
|
||||
{470, &JitArm64::FallBackToInterpreter}, // dcbi
|
||||
{758, &JitArm64::DoNothing}, // dcba
|
||||
{1014, &JitArm64::FallBackToInterpreter}, // dcbz
|
||||
{1014, &JitArm64::dcbz}, // dcbz
|
||||
|
||||
//load word
|
||||
{23, &JitArm64::lXX}, // lwzx
|
||||
|
|
|
@ -19,6 +19,7 @@ struct BackPatchInfo
|
|||
FLAG_REVERSE = (1 << 7),
|
||||
FLAG_EXTEND = (1 << 8),
|
||||
FLAG_SIZE_F32I = (1 << 9),
|
||||
FLAG_ZERO_256 = (1 << 10),
|
||||
};
|
||||
|
||||
static u32 GetFlagSize(u32 flags)
|
||||
|
@ -33,6 +34,8 @@ struct BackPatchInfo
|
|||
return 32;
|
||||
if (flags & FLAG_SIZE_F64)
|
||||
return 64;
|
||||
if (flags & FLAG_ZERO_256)
|
||||
return 256;
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue