[AArch64] Implements loadstore instructions in the JIT recompiler.

These instructions are all implemented with fastmem support.
Currently loads with update are disabled due to an issue that I've yet to figure out.
I'm sure I'll figure that out later.
This commit is contained in:
Ryan Houdek 2014-12-19 21:20:46 -06:00
parent 17a4208fe5
commit 4247506c00
3 changed files with 429 additions and 14 deletions

View File

@ -97,6 +97,8 @@ public:
// LoadStore // LoadStore
void icbi(UGeckoInstruction inst); void icbi(UGeckoInstruction inst);
void lXX(UGeckoInstruction inst);
void stX(UGeckoInstruction inst);
private: private:
Arm64GPRCache gpr; Arm64GPRCache gpr;
@ -114,6 +116,9 @@ private:
bool DisasmLoadStore(const u8* ptr, u32* flags, Arm64Gen::ARM64Reg* reg); bool DisasmLoadStore(const u8* ptr, u32* flags, Arm64Gen::ARM64Reg* reg);
void InitBackpatch(); void InitBackpatch();
u32 EmitBackpatchRoutine(ARM64XEmitter* emit, u32 flags, bool fastmem, bool do_padding, Arm64Gen::ARM64Reg RS, Arm64Gen::ARM64Reg addr); u32 EmitBackpatchRoutine(ARM64XEmitter* emit, u32 flags, bool fastmem, bool do_padding, Arm64Gen::ARM64Reg RS, Arm64Gen::ARM64Reg addr);
// Loadstore routines
void SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 offset, bool update);
void SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s32 offset);
const u8* DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlock *b); const u8* DoJit(u32 em_address, PPCAnalyst::CodeBuffer *code_buf, JitBlock *b);

View File

@ -17,6 +17,416 @@ using namespace Arm64Gen;
void JitArm64::icbi(UGeckoInstruction inst) void JitArm64::icbi(UGeckoInstruction inst)
{ {
gpr.Flush(FlushMode::FLUSH_ALL);
fpr.Flush(FlushMode::FLUSH_ALL);
FallBackToInterpreter(inst); FallBackToInterpreter(inst);
WriteExit(js.compilerPC + 4); WriteExit(js.compilerPC + 4);
} }
void JitArm64::SafeLoadToReg(u32 dest, s32 addr, s32 offsetReg, u32 flags, s32 offset, bool update)
{
// We want to make sure to not get LR as a temp register
gpr.Lock(W0, W30);
gpr.BindToRegister(dest, dest == (u32)addr || dest == (u32)offsetReg);
ARM64Reg dest_reg = gpr.R(dest);
ARM64Reg up_reg = INVALID_REG;
ARM64Reg off_reg = INVALID_REG;
if (addr != -1 && !gpr.IsImm(addr))
up_reg = gpr.R(addr);
if (offsetReg != -1 && !gpr.IsImm(offsetReg))
off_reg = gpr.R(offsetReg);
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
BitSet32 ignore_mask(0);
regs_in_use[W0] = 0;
regs_in_use[W30] = 0;
ignore_mask[dest_reg] = 1;
ARM64Reg addr_reg = W0;
u32 imm_addr = 0;
bool is_immediate = false;
if (offsetReg == -1)
{
if (addr != -1)
{
if (gpr.IsImm(addr))
{
is_immediate = true;
imm_addr = gpr.GetImm(addr) + offset;
}
else
{
MOVI2R(addr_reg, offset);
ADD(addr_reg, addr_reg, up_reg);
}
}
else
{
is_immediate = true;
imm_addr = offset;
}
}
else
{
if (addr != -1)
{
if (gpr.IsImm(addr) && gpr.IsImm(offsetReg))
{
is_immediate = true;
imm_addr = gpr.GetImm(addr) + gpr.GetImm(offsetReg);
}
else if (gpr.IsImm(addr) && !gpr.IsImm(offsetReg))
{
MOVI2R(addr_reg, gpr.GetImm(addr));
ADD(addr_reg, addr_reg, off_reg);
}
else if (!gpr.IsImm(addr) && gpr.IsImm(offsetReg))
{
MOVI2R(addr_reg, gpr.GetImm(offsetReg));
ADD(addr_reg, addr_reg, up_reg);
}
else
{
ADD(addr_reg, up_reg, off_reg);
}
}
else
{
if (gpr.IsImm(offsetReg))
{
is_immediate = true;
imm_addr = gpr.GetImm(offsetReg);
}
else
{
MOV(addr_reg, off_reg);
}
}
}
ARM64Reg XA = EncodeRegTo64(addr_reg);
if (is_immediate)
MOVI2R(XA, imm_addr);
if (is_immediate && Memory::IsRAMAddress(imm_addr))
{
EmitBackpatchRoutine(this, flags, true, false, dest_reg, XA);
if (update)
MOVI2R(up_reg, imm_addr);
}
else
{
if (update)
MOV(up_reg, addr_reg);
// Has a chance of being backpatched which will destroy our state
// push and pop everything in this instance
ABI_PushRegisters(regs_in_use);
EmitBackpatchRoutine(this, flags,
SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem,
SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem,
dest_reg, XA);
ABI_PopRegisters(regs_in_use, ignore_mask);
}
gpr.Unlock(W0, W30);
}
void JitArm64::SafeStoreFromReg(s32 dest, u32 value, s32 regOffset, u32 flags, s32 offset)
{
// We want to make sure to not get LR as a temp register
gpr.Lock(W0, W1, W30);
ARM64Reg RS = gpr.R(value);
ARM64Reg reg_dest = INVALID_REG;
ARM64Reg reg_off = INVALID_REG;
if (regOffset != -1 && !gpr.IsImm(regOffset))
reg_off = gpr.R(regOffset);
if (dest != -1 && !gpr.IsImm(dest))
reg_dest = gpr.R(dest);
BitSet32 regs_in_use = gpr.GetCallerSavedUsed();
regs_in_use[W0] = 0;
regs_in_use[W1] = 0;
regs_in_use[W30] = 0;
ARM64Reg addr_reg = W1;
u32 imm_addr = 0;
bool is_immediate = false;
if (regOffset == -1)
{
if (dest != -1)
{
if (gpr.IsImm(dest))
{
is_immediate = true;
imm_addr = gpr.GetImm(dest) + offset;
}
else
{
MOVI2R(addr_reg, offset);
ADD(addr_reg, addr_reg, reg_dest);
}
}
else
{
is_immediate = true;
imm_addr = offset;
}
}
else
{
if (dest != -1)
{
if (gpr.IsImm(dest) && gpr.IsImm(regOffset))
{
is_immediate = true;
imm_addr = gpr.GetImm(dest) + gpr.GetImm(regOffset);
}
else if (gpr.IsImm(dest) && !gpr.IsImm(regOffset))
{
MOVI2R(addr_reg, gpr.GetImm(dest));
ADD(addr_reg, addr_reg, reg_off);
}
else if (!gpr.IsImm(dest) && gpr.IsImm(regOffset))
{
MOVI2R(addr_reg, gpr.GetImm(regOffset));
ADD(addr_reg, addr_reg, reg_dest);
}
else
{
ADD(addr_reg, reg_dest, reg_off);
}
}
else
{
if (gpr.IsImm(regOffset))
{
is_immediate = true;
imm_addr = gpr.GetImm(regOffset);
}
else
{
MOV(addr_reg, reg_off);
}
}
}
ARM64Reg XA = EncodeRegTo64(addr_reg);
if (is_immediate)
MOVI2R(XA, imm_addr);
if (is_immediate && Memory::IsRAMAddress(imm_addr))
{
EmitBackpatchRoutine(this, flags, true, false, RS, XA);
}
else
{
// Has a chance of being backpatched which will destroy our state
// push and pop everything in this instance
ABI_PushRegisters(regs_in_use);
EmitBackpatchRoutine(this, flags,
SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem,
SConfig::GetInstance().m_LocalCoreStartupParameter.bFastmem,
RS, XA);
ABI_PopRegisters(regs_in_use);
}
gpr.Unlock(W0, W1, W30);
}
void JitArm64::lXX(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStoreOff);
u32 a = inst.RA, b = inst.RB, d = inst.RD;
s32 offset = inst.SIMM_16;
s32 offsetReg = -1;
u32 flags = BackPatchInfo::FLAG_LOAD;
bool update = false;
switch (inst.OPCD)
{
case 31:
switch (inst.SUBOP10)
{
case 55: // lwzux
update = true;
case 23: // lwzx
flags |= BackPatchInfo::FLAG_SIZE_32;
offsetReg = b;
break;
case 119: //lbzux
update = true;
case 87: // lbzx
flags |= BackPatchInfo::FLAG_SIZE_8;
offsetReg = b;
break;
case 311: // lhzux
update = true;
case 279: // lhzx
flags |= BackPatchInfo::FLAG_SIZE_16;
offsetReg = b;
break;
case 375: // lhaux
update = true;
case 343: // lhax
flags |= BackPatchInfo::FLAG_EXTEND |
BackPatchInfo::FLAG_SIZE_16;
offsetReg = b;
break;
case 534: // lwbrx
flags |= BackPatchInfo::FLAG_REVERSE |
BackPatchInfo::FLAG_SIZE_32;
break;
case 790: // lhbrx
flags |= BackPatchInfo::FLAG_REVERSE |
BackPatchInfo::FLAG_SIZE_16;
break;
}
break;
case 33: // lwzu
update = true;
case 32: // lwz
flags |= BackPatchInfo::FLAG_SIZE_32;
break;
case 35: // lbzu
update = true;
case 34: // lbz
flags |= BackPatchInfo::FLAG_SIZE_8;
break;
case 41: // lhzu
update = true;
case 40: // lhz
flags |= BackPatchInfo::FLAG_SIZE_16;
break;
case 43: // lhau
update = true;
case 42: // lha
flags |= BackPatchInfo::FLAG_EXTEND |
BackPatchInfo::FLAG_SIZE_16;
break;
}
FALLBACK_IF(update);
SafeLoadToReg(d, update ? a : (a ? a : -1), offsetReg, flags, offset, update);
// LWZ idle skipping
if (SConfig::GetInstance().m_LocalCoreStartupParameter.bSkipIdle &&
inst.OPCD == 32 &&
(inst.hex & 0xFFFF0000) == 0x800D0000 &&
(Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x28000000 ||
(SConfig::GetInstance().m_LocalCoreStartupParameter.bWii && Memory::ReadUnchecked_U32(js.compilerPC + 4) == 0x2C000000)) &&
Memory::ReadUnchecked_U32(js.compilerPC + 8) == 0x4182fff8)
{
// if it's still 0, we can wait until the next event
FixupBranch noIdle = CBNZ(gpr.R(d));
gpr.Flush(FLUSH_MAINTAIN_STATE);
fpr.Flush(FLUSH_MAINTAIN_STATE);
ARM64Reg WA = gpr.GetReg();
ARM64Reg XA = EncodeRegTo64(WA);
MOVI2R(XA, (u64)&PowerPC::OnIdle);
MOVI2R(W0, PowerPC::ppcState.gpr[a] + (s32)(s16)inst.SIMM_16);
BLR(XA);
gpr.Unlock(WA);
WriteExceptionExit();
SetJumpTarget(noIdle);
//js.compilerPC += 8;
return;
}
}
void JitArm64::stX(UGeckoInstruction inst)
{
INSTRUCTION_START
JITDISABLE(bJITLoadStoreOff);
u32 a = inst.RA, b = inst.RB, s = inst.RS;
s32 offset = inst.SIMM_16;
s32 regOffset = -1;
u32 flags = BackPatchInfo::FLAG_STORE;
bool update = false;
switch (inst.OPCD)
{
case 31:
switch (inst.SUBOP10)
{
case 183: // stwux
update = true;
case 151: // stwx
flags |= BackPatchInfo::FLAG_SIZE_32;
regOffset = b;
break;
case 247: // stbux
update = true;
case 215: // stbx
flags |= BackPatchInfo::FLAG_SIZE_8;
regOffset = b;
break;
case 439: // sthux
update = true;
case 407: // sthx
flags |= BackPatchInfo::FLAG_SIZE_16;
regOffset = b;
break;
}
break;
case 37: // stwu
update = true;
case 36: // stw
flags |= BackPatchInfo::FLAG_SIZE_32;
break;
case 39: // stbu
update = true;
case 38: // stb
flags |= BackPatchInfo::FLAG_SIZE_8;
break;
case 45: // sthu
update = true;
case 44: // sth
flags |= BackPatchInfo::FLAG_SIZE_16;
break;
}
SafeStoreFromReg(update ? a : (a ? a : -1), s, regOffset, flags, offset);
if (update)
{
ARM64Reg WA = gpr.GetReg();
ARM64Reg RB;
ARM64Reg RA = gpr.R(a);
if (regOffset != -1)
RB = gpr.R(regOffset);
if (regOffset == -1)
{
MOVI2R(WA, offset);
ADD(RA, RA, WA);
}
else
{
ADD(RA, RA, RB);
}
gpr.Unlock(WA);
}
}

View File

@ -65,21 +65,21 @@ static GekkoOPTemplate primarytable[] =
{28, &JitArm64::arith_imm}, //"andi_rc", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_SET_CR0}}, {28, &JitArm64::arith_imm}, //"andi_rc", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_SET_CR0}},
{29, &JitArm64::arith_imm}, //"andis_rc", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_SET_CR0}}, {29, &JitArm64::arith_imm}, //"andis_rc", OPTYPE_INTEGER, FL_OUT_A | FL_IN_S | FL_SET_CR0}},
{32, &JitArm64::FallBackToInterpreter}, //"lwz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, {32, &JitArm64::lXX}, //"lwz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}},
{33, &JitArm64::FallBackToInterpreter}, //"lwzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, {33, &JitArm64::lXX}, //"lwzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}},
{34, &JitArm64::FallBackToInterpreter}, //"lbz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, {34, &JitArm64::lXX}, //"lbz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}},
{35, &JitArm64::FallBackToInterpreter}, //"lbzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, {35, &JitArm64::lXX}, //"lbzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}},
{40, &JitArm64::FallBackToInterpreter}, //"lhz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, {40, &JitArm64::lXX}, //"lhz", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}},
{41, &JitArm64::FallBackToInterpreter}, //"lhzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, {41, &JitArm64::lXX}, //"lhzu", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}},
{42, &JitArm64::FallBackToInterpreter}, //"lha", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}}, {42, &JitArm64::lXX}, //"lha", OPTYPE_LOAD, FL_OUT_D | FL_IN_A}},
{43, &JitArm64::FallBackToInterpreter}, //"lhau", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}}, {43, &JitArm64::lXX}, //"lhau", OPTYPE_LOAD, FL_OUT_D | FL_OUT_A | FL_IN_A}},
{44, &JitArm64::FallBackToInterpreter}, //"sth", OPTYPE_STORE, FL_IN_A | FL_IN_S}}, {44, &JitArm64::stX}, //"sth", OPTYPE_STORE, FL_IN_A | FL_IN_S}},
{45, &JitArm64::FallBackToInterpreter}, //"sthu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}}, {45, &JitArm64::stX}, //"sthu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}},
{36, &JitArm64::FallBackToInterpreter}, //"stw", OPTYPE_STORE, FL_IN_A | FL_IN_S}}, {36, &JitArm64::stX}, //"stw", OPTYPE_STORE, FL_IN_A | FL_IN_S}},
{37, &JitArm64::FallBackToInterpreter}, //"stwu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}}, {37, &JitArm64::stX}, //"stwu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}},
{38, &JitArm64::FallBackToInterpreter}, //"stb", OPTYPE_STORE, FL_IN_A | FL_IN_S}}, {38, &JitArm64::stX}, //"stb", OPTYPE_STORE, FL_IN_A | FL_IN_S}},
{39, &JitArm64::FallBackToInterpreter}, //"stbu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}}, {39, &JitArm64::stX}, //"stbu", OPTYPE_STORE, FL_OUT_A | FL_IN_A | FL_IN_S}},
{46, &JitArm64::FallBackToInterpreter}, //"lmw", OPTYPE_SYSTEM, FL_EVIL, 10}}, {46, &JitArm64::FallBackToInterpreter}, //"lmw", OPTYPE_SYSTEM, FL_EVIL, 10}},
{47, &JitArm64::FallBackToInterpreter}, //"stmw", OPTYPE_SYSTEM, FL_EVIL, 10}}, {47, &JitArm64::FallBackToInterpreter}, //"stmw", OPTYPE_SYSTEM, FL_EVIL, 10}},