[AArch64] Implement dcbz instruction

This commit is contained in:
Ryan Houdek 2015-08-31 14:03:57 -05:00
parent d495ad5104
commit ae0a06a018
5 changed files with 92 additions and 1 deletions

View File

@ -129,6 +129,7 @@ public:
void lmw(UGeckoInstruction inst); void lmw(UGeckoInstruction inst);
void stmw(UGeckoInstruction inst); void stmw(UGeckoInstruction inst);
void dcbt(UGeckoInstruction inst); void dcbt(UGeckoInstruction inst);
void dcbz(UGeckoInstruction inst);
// LoadStore floating point // LoadStore floating point
void lfXX(UGeckoInstruction inst); void lfXX(UGeckoInstruction inst);

View File

@ -100,6 +100,13 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode,
else else
STRB(RS, X28, addr); STRB(RS, X28, addr);
} }
else if (flags & BackPatchInfo::FLAG_ZERO_256)
{
// This literally only stores 32bytes of zeros to the target address
ADD(addr, addr, X28);
STP(INDEX_SIGNED, ZR, ZR, addr, 0);
STP(INDEX_SIGNED, ZR, ZR, addr, 16);
}
else else
{ {
if (flags & BackPatchInfo::FLAG_SIZE_32) if (flags & BackPatchInfo::FLAG_SIZE_32)
@ -212,6 +219,11 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode,
BLR(X30); BLR(X30);
} }
else if (flags & BackPatchInfo::FLAG_ZERO_256)
{
MOVI2R(X30, (u64)&PowerPC::ClearCacheLine);
BLR(X30);
}
else else
{ {
if (flags & BackPatchInfo::FLAG_SIZE_32) if (flags & BackPatchInfo::FLAG_SIZE_32)

View File

@ -663,3 +663,78 @@ void JitArm64::dcbt(UGeckoInstruction inst)
js.skipInstructions = 1; js.skipInstructions = 1;
} }
} }
void JitArm64::dcbz(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStoreOff);
int a = inst.RA, b = inst.RB;
u32 mem_mask = Memory::ADDR_MASK_HW_ACCESS;
// The following masks the region used by the GC/Wii virtual memory lib
mem_mask |= Memory::ADDR_MASK_MEM1;
gpr.Lock(W0);
ARM64Reg addr_reg = W0;
if (a)
{
bool is_imm_a, is_imm_b;
is_imm_a = gpr.IsImm(a);
is_imm_b = gpr.IsImm(b);
if (is_imm_a && is_imm_b)
{
// full imm_addr
u32 imm_addr = gpr.GetImm(b) + gpr.GetImm(a);
MOVI2R(addr_reg, imm_addr);
}
else if (is_imm_a || is_imm_b)
{
// Only one register is an immediate
ARM64Reg base = is_imm_a ? gpr.R(b) : gpr.R(a);
u32 imm_offset = is_imm_a ? gpr.GetImm(a) : gpr.GetImm(b);
if (imm_offset < 4096)
{
ADD(addr_reg, base, imm_offset);
}
else
{
MOVI2R(addr_reg, imm_offset);
ADD(addr_reg, addr_reg, base);
}
}
else
{
// Both are registers
ADD(addr_reg, gpr.R(a), gpr.R(b));
}
}
else
{
// RA isn't used, only RB
if (gpr.IsImm(b))
{
u32 imm_addr = gpr.GetImm(b);
MOVI2R(addr_reg, imm_addr);
}
else
{
MOV(addr_reg, gpr.R(b));
}
}
// We don't care about being /too/ terribly efficient here
// As long as we aren't falling back to interpreter we're winning a lot
BitSet32 gprs_to_push = gpr.GetCallerSavedUsed();
BitSet32 fprs_to_push = fpr.GetCallerSavedUsed();
gprs_to_push[W0] = 0;
EmitBackpatchRoutine(BackPatchInfo::FLAG_ZERO_256, true, true, W0, EncodeRegTo64(addr_reg), gprs_to_push, fprs_to_push);
gpr.Unlock(W0);
}

View File

@ -225,7 +225,7 @@ static GekkoOPTemplate table31[] =
{278, &JitArm64::dcbt}, // dcbt {278, &JitArm64::dcbt}, // dcbt
{470, &JitArm64::FallBackToInterpreter}, // dcbi {470, &JitArm64::FallBackToInterpreter}, // dcbi
{758, &JitArm64::DoNothing}, // dcba {758, &JitArm64::DoNothing}, // dcba
{1014, &JitArm64::FallBackToInterpreter}, // dcbz {1014, &JitArm64::dcbz}, // dcbz
//load word //load word
{23, &JitArm64::lXX}, // lwzx {23, &JitArm64::lXX}, // lwzx

View File

@ -19,6 +19,7 @@ struct BackPatchInfo
FLAG_REVERSE = (1 << 7), FLAG_REVERSE = (1 << 7),
FLAG_EXTEND = (1 << 8), FLAG_EXTEND = (1 << 8),
FLAG_SIZE_F32I = (1 << 9), FLAG_SIZE_F32I = (1 << 9),
FLAG_ZERO_256 = (1 << 10),
}; };
static u32 GetFlagSize(u32 flags) static u32 GetFlagSize(u32 flags)
@ -33,6 +34,8 @@ struct BackPatchInfo
return 32; return 32;
if (flags & FLAG_SIZE_F64) if (flags & FLAG_SIZE_F64)
return 64; return 64;
if (flags & FLAG_ZERO_256)
return 256;
return 0; return 0;
} }
}; };