[AArch64] Improve floating point single instructions.

Instead of having an "INS" instruction after every single instruction to duplicate the bottom 64bits in to the top 64bits of the register,
create a new FPR register cache type to track when a register's lower 64bits is supposed to be duplicated in to the high 64bits.
Not necessarily actually having the lower bits duplicated in the host side register. This removes inefficient INS instructions from sequential single
float instructions.
In particular a very heavy single heavy block in Animal Crossing went from 712 instructions down to 520 instructions(~37% less instructions!)
This commit is contained in:
Ryan Houdek 2015-08-30 17:03:54 -05:00
parent 5110574c1f
commit bcde1aa8ff
8 changed files with 300 additions and 224 deletions

View File

@ -76,23 +76,13 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode,
if (flags & BackPatchInfo::FLAG_SIZE_F32)
{
m_float_emit.LDR(32, EncodeRegToDouble(RS), X28, addr);
m_float_emit.INS(32, RS, 1, RS, 0);
m_float_emit.REV32(8, EncodeRegToDouble(RS), EncodeRegToDouble(RS));
m_float_emit.FCVTL(64, EncodeRegToDouble(RS), EncodeRegToDouble(RS));
}
else
{
if (flags & BackPatchInfo::FLAG_ONLY_LOWER)
{
m_float_emit.LDR(64, EncodeRegToDouble(RS), X28, addr);
m_float_emit.REV64(8, EncodeRegToDouble(RS), EncodeRegToDouble(RS));
}
else
{
m_float_emit.LDR(64, Q0, X28, addr);
m_float_emit.REV64(8, D0, D0);
m_float_emit.INS(64, RS, 0, Q0, 0);
}
m_float_emit.LDR(64, EncodeRegToDouble(RS), X28, addr);
m_float_emit.REV64(8, EncodeRegToDouble(RS), EncodeRegToDouble(RS));
}
}
else if (flags & BackPatchInfo::FLAG_STORE)
@ -142,7 +132,7 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode,
handler.addr_reg = addr;
handler.gprs = gprs_to_push;
handler.fprs = fprs_to_push;
handler.flags = flags & ~BackPatchInfo::FLAG_ONLY_LOWER;
handler.flags = flags;
FastmemArea* fastmem_area = &m_fault_to_handler[fastmem_start];
auto handler_loc_iter = m_handler_to_loc.find(handler);
@ -199,7 +189,7 @@ void JitArm64::EmitBackpatchRoutine(u32 flags, bool fastmem, bool do_farcode,
{
MOVI2R(X30, (u64)&PowerPC::Read_U32);
BLR(X30);
m_float_emit.DUP(32, RS, X0);
m_float_emit.INS(32, RS, 0, X0);
m_float_emit.FCVTL(64, RS, RS);
}
else

View File

@ -24,7 +24,7 @@ void JitArm64::fabsx(UGeckoInstruction inst)
u32 b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == b);
ARM64Reg VB = fpr.R(b);
ARM64Reg VB = fpr.R(b, REG_IS_LOADED);
ARM64Reg VD = fpr.R(d);
m_float_emit.FABS(EncodeRegToDouble(VD), EncodeRegToDouble(VB));
@ -37,14 +37,13 @@ void JitArm64::faddsx(UGeckoInstruction inst)
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b, false);
fpr.BindToRegister(d, d == a || d == b, REG_DUP);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d, false);
ARM64Reg VA = fpr.R(a, REG_IS_LOADED);
ARM64Reg VB = fpr.R(b, REG_IS_LOADED);
ARM64Reg VD = fpr.R(d, REG_DUP);
m_float_emit.FADD(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VB));
m_float_emit.INS(64, VD, 1, VD, 0);
}
void JitArm64::faddx(UGeckoInstruction inst)
@ -56,8 +55,8 @@ void JitArm64::faddx(UGeckoInstruction inst)
u32 a = inst.FA, b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VA = fpr.R(a, REG_IS_LOADED);
ARM64Reg VB = fpr.R(b, REG_IS_LOADED);
ARM64Reg VD = fpr.R(d);
m_float_emit.FADD(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VB));
@ -70,17 +69,17 @@ void JitArm64::fmaddsx(UGeckoInstruction inst)
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c, false);
fpr.BindToRegister(d, d == a || d == b || d == c, REG_DUP);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d, false);
ARM64Reg VA = fpr.R(a, REG_IS_LOADED);
ARM64Reg VB = fpr.R(b, REG_IS_LOADED);
ARM64Reg VC = fpr.R(c, REG_IS_LOADED);
ARM64Reg VD = fpr.R(d, REG_DUP);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FMUL(EncodeRegToDouble(V0), EncodeRegToDouble(VA), EncodeRegToDouble(VC));
m_float_emit.FADD(EncodeRegToDouble(VD), EncodeRegToDouble(V0), EncodeRegToDouble(VB));
m_float_emit.INS(64, VD, 1, VD, 0);
fpr.Unlock(V0);
}
@ -93,9 +92,9 @@ void JitArm64::fmaddx(UGeckoInstruction inst)
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VA = fpr.R(a, REG_IS_LOADED);
ARM64Reg VB = fpr.R(b, REG_IS_LOADED);
ARM64Reg VC = fpr.R(c, REG_IS_LOADED);
ARM64Reg VD = fpr.R(d);
m_float_emit.FMADD(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VC), EncodeRegToDouble(VB));
@ -110,7 +109,7 @@ void JitArm64::fmrx(UGeckoInstruction inst)
u32 b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == b);
ARM64Reg VB = fpr.R(b);
ARM64Reg VB = fpr.R(b, REG_IS_LOADED);
ARM64Reg VD = fpr.R(d);
m_float_emit.INS(64, VD, 0, VB, 0);
@ -123,17 +122,17 @@ void JitArm64::fmsubsx(UGeckoInstruction inst)
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c, false);
fpr.BindToRegister(d, d == a || d == b || d == c, REG_DUP);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d, false);
ARM64Reg VA = fpr.R(a, REG_IS_LOADED);
ARM64Reg VB = fpr.R(b, REG_IS_LOADED);
ARM64Reg VC = fpr.R(c, REG_IS_LOADED);
ARM64Reg VD = fpr.R(d, REG_DUP);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FMUL(EncodeRegToDouble(V0), EncodeRegToDouble(VA), EncodeRegToDouble(VC));
m_float_emit.FSUB(EncodeRegToDouble(VD), EncodeRegToDouble(V0), EncodeRegToDouble(VB));
m_float_emit.INS(64, VD, 1, VD, 0);
fpr.Unlock(V0);
}
@ -146,9 +145,9 @@ void JitArm64::fmsubx(UGeckoInstruction inst)
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VA = fpr.R(a, REG_IS_LOADED);
ARM64Reg VB = fpr.R(b, REG_IS_LOADED);
ARM64Reg VC = fpr.R(c, REG_IS_LOADED);
ARM64Reg VD = fpr.R(d);
m_float_emit.FNMSUB(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VC), EncodeRegToDouble(VB));
@ -161,14 +160,13 @@ void JitArm64::fmulsx(UGeckoInstruction inst)
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == c, false);
fpr.BindToRegister(d, d == a || d == c, REG_DUP);
ARM64Reg VA = fpr.R(a);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d, false);
ARM64Reg VA = fpr.R(a, REG_IS_LOADED);
ARM64Reg VC = fpr.R(c, REG_IS_LOADED);
ARM64Reg VD = fpr.R(d, REG_DUP);
m_float_emit.FMUL(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VC));
m_float_emit.INS(64, VD, 1, VD, 0);
}
void JitArm64::fmulx(UGeckoInstruction inst)
@ -180,8 +178,8 @@ void JitArm64::fmulx(UGeckoInstruction inst)
u32 a = inst.FA, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VC = fpr.R(c);
ARM64Reg VA = fpr.R(a, REG_IS_LOADED);
ARM64Reg VC = fpr.R(c, REG_IS_LOADED);
ARM64Reg VD = fpr.R(d);
m_float_emit.FMUL(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VC));
@ -196,7 +194,7 @@ void JitArm64::fnabsx(UGeckoInstruction inst)
u32 b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == b);
ARM64Reg VB = fpr.R(b);
ARM64Reg VB = fpr.R(b, REG_IS_LOADED);
ARM64Reg VD = fpr.R(d);
m_float_emit.FABS(EncodeRegToDouble(VD), EncodeRegToDouble(VB));
@ -212,7 +210,7 @@ void JitArm64::fnegx(UGeckoInstruction inst)
u32 b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == b);
ARM64Reg VB = fpr.R(b);
ARM64Reg VB = fpr.R(b, REG_IS_LOADED);
ARM64Reg VD = fpr.R(d);
m_float_emit.FNEG(EncodeRegToDouble(VD), EncodeRegToDouble(VB));
@ -225,18 +223,18 @@ void JitArm64::fnmaddsx(UGeckoInstruction inst)
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c, false);
fpr.BindToRegister(d, d == a || d == b || d == c, REG_DUP);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d, false);
ARM64Reg VA = fpr.R(a, REG_IS_LOADED);
ARM64Reg VB = fpr.R(b, REG_IS_LOADED);
ARM64Reg VC = fpr.R(c, REG_IS_LOADED);
ARM64Reg VD = fpr.R(d, REG_DUP);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FMUL(EncodeRegToDouble(V0), EncodeRegToDouble(VA), EncodeRegToDouble(VC));
m_float_emit.FADD(EncodeRegToDouble(VD), EncodeRegToDouble(V0), EncodeRegToDouble(VB));
m_float_emit.FNEG(EncodeRegToDouble(VD), EncodeRegToDouble(VD));
m_float_emit.INS(64, VD, 1, VD, 0);
fpr.Unlock(V0);
}
@ -249,9 +247,9 @@ void JitArm64::fnmaddx(UGeckoInstruction inst)
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VA = fpr.R(a, REG_IS_LOADED);
ARM64Reg VB = fpr.R(b, REG_IS_LOADED);
ARM64Reg VC = fpr.R(c, REG_IS_LOADED);
ARM64Reg VD = fpr.R(d);
m_float_emit.FNMADD(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VC), EncodeRegToDouble(VB));
@ -264,18 +262,18 @@ void JitArm64::fnmsubsx(UGeckoInstruction inst)
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c, false);
fpr.BindToRegister(d, d == a || d == b || d == c, REG_DUP);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VD = fpr.R(d, false);
ARM64Reg VA = fpr.R(a, REG_IS_LOADED);
ARM64Reg VB = fpr.R(b, REG_IS_LOADED);
ARM64Reg VC = fpr.R(c, REG_IS_LOADED);
ARM64Reg VD = fpr.R(d, REG_DUP);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FMUL(EncodeRegToDouble(V0), EncodeRegToDouble(VA), EncodeRegToDouble(VC));
m_float_emit.FSUB(EncodeRegToDouble(VD), EncodeRegToDouble(V0), EncodeRegToDouble(VB));
m_float_emit.FNEG(EncodeRegToDouble(VD), EncodeRegToDouble(VD));
m_float_emit.INS(64, VD, 1, VD, 0);
fpr.Unlock(V0);
}
@ -288,9 +286,9 @@ void JitArm64::fnmsubx(UGeckoInstruction inst)
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VC = fpr.R(c);
ARM64Reg VA = fpr.R(a, REG_IS_LOADED);
ARM64Reg VB = fpr.R(b, REG_IS_LOADED);
ARM64Reg VC = fpr.R(c, REG_IS_LOADED);
ARM64Reg VD = fpr.R(d);
m_float_emit.FMSUB(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VC), EncodeRegToDouble(VB));
@ -305,9 +303,9 @@ void JitArm64::fselx(UGeckoInstruction inst)
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c);
ARM64Reg VD = fpr.R(d);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d, REG_IS_LOADED);
ARM64Reg VA = fpr.R(a, REG_IS_LOADED);
ARM64Reg VB = fpr.R(b, REG_IS_LOADED);
ARM64Reg VC = fpr.R(c);
m_float_emit.FCMPE(EncodeRegToDouble(VA));
@ -321,14 +319,13 @@ void JitArm64::fsubsx(UGeckoInstruction inst)
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b, false);
fpr.BindToRegister(d, d == a || d == b, REG_DUP);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d, false);
ARM64Reg VA = fpr.R(a, REG_IS_LOADED);
ARM64Reg VB = fpr.R(b, REG_IS_LOADED);
ARM64Reg VD = fpr.R(d, REG_DUP);
m_float_emit.FSUB(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VB));
m_float_emit.INS(64, VD, 1, VD, 0);
}
void JitArm64::fsubx(UGeckoInstruction inst)
@ -340,8 +337,8 @@ void JitArm64::fsubx(UGeckoInstruction inst)
u32 a = inst.FA, b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VA = fpr.R(a, REG_IS_LOADED);
ARM64Reg VB = fpr.R(b, REG_IS_LOADED);
ARM64Reg VD = fpr.R(d);
m_float_emit.FSUB(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VB));
@ -353,14 +350,13 @@ void JitArm64::frspx(UGeckoInstruction inst)
JITDISABLE(bJITFloatingPointOff);
u32 b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == b, false);
fpr.BindToRegister(d, d == b, REG_DUP);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d, false);
ARM64Reg VB = fpr.R(b, REG_IS_LOADED);
ARM64Reg VD = fpr.R(d, REG_DUP);
m_float_emit.FCVTN(32, EncodeRegToDouble(VD), EncodeRegToDouble(VB));
m_float_emit.FCVTL(64, EncodeRegToDouble(VD), EncodeRegToDouble(VD));
m_float_emit.INS(64, VD, 1, VD, 0);
}
void JitArm64::fcmpx(UGeckoInstruction inst)
@ -371,8 +367,8 @@ void JitArm64::fcmpx(UGeckoInstruction inst)
u32 a = inst.FA, b = inst.FB;
int crf = inst.CRFD;
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VA = fpr.R(a, REG_IS_LOADED);
ARM64Reg VB = fpr.R(b, REG_IS_LOADED);
ARM64Reg WA = gpr.GetReg();
ARM64Reg XA = EncodeRegTo64(WA);
@ -457,7 +453,7 @@ void JitArm64::fctiwzx(UGeckoInstruction inst)
u32 b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == b);
ARM64Reg VB = fpr.R(b);
ARM64Reg VB = fpr.R(b, REG_IS_LOADED);
ARM64Reg VD = fpr.R(d);
ARM64Reg V0 = fpr.GetReg();
@ -481,8 +477,8 @@ void JitArm64::fdivx(UGeckoInstruction inst)
u32 a = inst.FA, b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VA = fpr.R(a, REG_IS_LOADED);
ARM64Reg VB = fpr.R(b, REG_IS_LOADED);
ARM64Reg VD = fpr.R(d);
m_float_emit.FDIV(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VB));
@ -495,12 +491,11 @@ void JitArm64::fdivsx(UGeckoInstruction inst)
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b, false);
fpr.BindToRegister(d, d == a || d == b, REG_DUP);
ARM64Reg VA = fpr.R(a);
ARM64Reg VB = fpr.R(b);
ARM64Reg VD = fpr.R(d, false);
ARM64Reg VA = fpr.R(a, REG_IS_LOADED);
ARM64Reg VB = fpr.R(b, REG_IS_LOADED);
ARM64Reg VD = fpr.R(d, REG_DUP);
m_float_emit.FDIV(EncodeRegToDouble(VD), EncodeRegToDouble(VA), EncodeRegToDouble(VB));
m_float_emit.INS(64, VD, 1, VD, 0);
}

View File

@ -71,19 +71,13 @@ void JitArm64::lfXX(UGeckoInstruction inst)
u32 imm_addr = 0;
bool is_immediate = false;
bool only_lower = !!(flags & BackPatchInfo::FLAG_SIZE_F64);
RegType type = !!(flags & BackPatchInfo::FLAG_SIZE_F64) ? REG_LOWER_PAIR : REG_DUP;
fpr.BindToRegister(inst.FD, false, only_lower);
fpr.BindToRegister(inst.FD, false, type);
ARM64Reg VD = fpr.R(inst.FD, only_lower);
ARM64Reg VD = fpr.R(inst.FD, type);
ARM64Reg addr_reg = W0;
if (!fpr.IsLower(inst.FD))
only_lower = false;
if (only_lower)
flags |= BackPatchInfo::FLAG_ONLY_LOWER;
gpr.Lock(W0, W30);
fpr.Lock(Q0);
@ -270,7 +264,7 @@ void JitArm64::stfXX(UGeckoInstruction inst)
u32 imm_addr = 0;
bool is_immediate = false;
ARM64Reg V0 = fpr.R(inst.FS);
ARM64Reg V0 = fpr.R(inst.FS, REG_IS_LOADED);
ARM64Reg addr_reg = W1;
gpr.Lock(W0, W1, W30);

View File

@ -58,7 +58,7 @@ void JitArm64::psq_l(UGeckoInstruction inst)
if (update)
{
gpr.BindToRegister(inst.RA, false);
gpr.BindToRegister(inst.RA, REG_REG);
MOV(arm_addr, addr_reg);
}
@ -66,8 +66,8 @@ void JitArm64::psq_l(UGeckoInstruction inst)
LDR(X30, X30, ArithOption(EncodeRegTo64(type_reg), true));
BLR(X30);
fpr.BindToRegister(inst.RS, false, false);
ARM64Reg VS = fpr.R(inst.RS, false);
fpr.BindToRegister(inst.RS, false, REG_REG);
ARM64Reg VS = fpr.R(inst.RS, REG_REG);
m_float_emit.FCVTL(64, VS, D0);
if (inst.W)
{
@ -97,7 +97,7 @@ void JitArm64::psq_st(UGeckoInstruction inst)
fpr.Lock(Q0, Q1);
ARM64Reg arm_addr = gpr.R(inst.RA);
ARM64Reg VS = fpr.R(inst.RS, false);
ARM64Reg VS = fpr.R(inst.RS, REG_REG);
ARM64Reg scale_reg = W0;
ARM64Reg addr_reg = W1;
@ -129,7 +129,7 @@ void JitArm64::psq_st(UGeckoInstruction inst)
if (update)
{
gpr.BindToRegister(inst.RA, false);
gpr.BindToRegister(inst.RA, REG_REG);
MOV(arm_addr, addr_reg);
}

View File

@ -23,10 +23,10 @@ void JitArm64::ps_abs(UGeckoInstruction inst)
FALLBACK_IF(inst.Rc);
u32 b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == b, false);
fpr.BindToRegister(d, d == b, REG_REG);
ARM64Reg VB = fpr.R(b, false);
ARM64Reg VD = fpr.R(d, false);
ARM64Reg VB = fpr.R(b, REG_REG);
ARM64Reg VD = fpr.R(d, REG_REG);
m_float_emit.FABS(64, VD, VB);
}
@ -38,11 +38,11 @@ void JitArm64::ps_add(UGeckoInstruction inst)
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b, false);
fpr.BindToRegister(d, d == a || d == b, REG_REG);
ARM64Reg VA = fpr.R(a, false);
ARM64Reg VB = fpr.R(b, false);
ARM64Reg VD = fpr.R(d, false);
ARM64Reg VA = fpr.R(a, REG_REG);
ARM64Reg VB = fpr.R(b, REG_REG);
ARM64Reg VD = fpr.R(d, REG_REG);
m_float_emit.FADD(64, VD, VA, VB);
}
@ -54,11 +54,11 @@ void JitArm64::ps_div(UGeckoInstruction inst)
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b, false);
fpr.BindToRegister(d, d == a || d == b, REG_REG);
ARM64Reg VA = fpr.R(a, false);
ARM64Reg VB = fpr.R(b, false);
ARM64Reg VD = fpr.R(d, false);
ARM64Reg VA = fpr.R(a, REG_REG);
ARM64Reg VB = fpr.R(b, REG_REG);
ARM64Reg VD = fpr.R(d, REG_REG);
m_float_emit.FDIV(64, VD, VA, VB);
}
@ -70,12 +70,12 @@ void JitArm64::ps_madd(UGeckoInstruction inst)
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c, false);
fpr.BindToRegister(d, d == a || d == b || d == c, REG_REG);
ARM64Reg VA = fpr.R(a, false);
ARM64Reg VB = fpr.R(b, false);
ARM64Reg VC = fpr.R(c, false);
ARM64Reg VD = fpr.R(d, false);
ARM64Reg VA = fpr.R(a, REG_REG);
ARM64Reg VB = fpr.R(b, REG_REG);
ARM64Reg VC = fpr.R(c, REG_REG);
ARM64Reg VD = fpr.R(d, REG_REG);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FMUL(64, V0, VA, VC);
@ -91,12 +91,12 @@ void JitArm64::ps_madds0(UGeckoInstruction inst)
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c, false);
fpr.BindToRegister(d, d == a || d == b || d == c, REG_REG);
ARM64Reg VA = fpr.R(a, false);
ARM64Reg VB = fpr.R(b, false);
ARM64Reg VC = fpr.R(c, false);
ARM64Reg VD = fpr.R(d, false);
ARM64Reg VA = fpr.R(a, REG_REG);
ARM64Reg VB = fpr.R(b, REG_REG);
ARM64Reg VC = fpr.R(c, REG_REG);
ARM64Reg VD = fpr.R(d, REG_REG);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.DUP(64, V0, VC, 0);
@ -113,12 +113,12 @@ void JitArm64::ps_madds1(UGeckoInstruction inst)
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c, false);
fpr.BindToRegister(d, d == a || d == b || d == c, REG_REG);
ARM64Reg VA = fpr.R(a, false);
ARM64Reg VB = fpr.R(b, false);
ARM64Reg VC = fpr.R(c, false);
ARM64Reg VD = fpr.R(d, false);
ARM64Reg VA = fpr.R(a, REG_REG);
ARM64Reg VB = fpr.R(b, REG_REG);
ARM64Reg VC = fpr.R(c, REG_REG);
ARM64Reg VD = fpr.R(d, REG_REG);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.DUP(64, V0, VC, 1);
@ -135,11 +135,11 @@ void JitArm64::ps_merge00(UGeckoInstruction inst)
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b, false);
fpr.BindToRegister(d, d == a || d == b, REG_REG);
ARM64Reg VA = fpr.R(a, false);
ARM64Reg VB = fpr.R(b, false);
ARM64Reg VD = fpr.R(d, false);
ARM64Reg VA = fpr.R(a, REG_REG);
ARM64Reg VB = fpr.R(b, REG_REG);
ARM64Reg VD = fpr.R(d, REG_REG);
m_float_emit.TRN1(64, VD, VA, VB);
}
@ -151,11 +151,11 @@ void JitArm64::ps_merge01(UGeckoInstruction inst)
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b, false);
fpr.BindToRegister(d, d == a || d == b, REG_REG);
ARM64Reg VA = fpr.R(a, false);
ARM64Reg VB = fpr.R(b, false);
ARM64Reg VD = fpr.R(d, false);
ARM64Reg VA = fpr.R(a, REG_REG);
ARM64Reg VB = fpr.R(b, REG_REG);
ARM64Reg VD = fpr.R(d, REG_REG);
m_float_emit.INS(64, VD, 0, VA, 0);
m_float_emit.INS(64, VD, 1, VB, 1);
@ -168,11 +168,11 @@ void JitArm64::ps_merge10(UGeckoInstruction inst)
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b, false);
fpr.BindToRegister(d, d == a || d == b, REG_REG);
ARM64Reg VA = fpr.R(a, false);
ARM64Reg VB = fpr.R(b, false);
ARM64Reg VD = fpr.R(d, false);
ARM64Reg VA = fpr.R(a, REG_REG);
ARM64Reg VB = fpr.R(b, REG_REG);
ARM64Reg VD = fpr.R(d, REG_REG);
if (d != a && d != b)
{
@ -196,11 +196,11 @@ void JitArm64::ps_merge11(UGeckoInstruction inst)
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b, false);
fpr.BindToRegister(d, d == a || d == b, REG_REG);
ARM64Reg VA = fpr.R(a, false);
ARM64Reg VB = fpr.R(b, false);
ARM64Reg VD = fpr.R(d, false);
ARM64Reg VA = fpr.R(a, REG_REG);
ARM64Reg VB = fpr.R(b, REG_REG);
ARM64Reg VD = fpr.R(d, REG_REG);
m_float_emit.TRN2(64, VD, VA, VB);
}
@ -216,10 +216,10 @@ void JitArm64::ps_mr(UGeckoInstruction inst)
if (d == b)
return;
fpr.BindToRegister(d, false, false);
fpr.BindToRegister(d, REG_REG, REG_REG);
ARM64Reg VB = fpr.R(b, false);
ARM64Reg VD = fpr.R(d, false);
ARM64Reg VB = fpr.R(b, REG_REG);
ARM64Reg VD = fpr.R(d, REG_REG);
m_float_emit.ORR(VD, VB, VB);
}
@ -231,11 +231,11 @@ void JitArm64::ps_mul(UGeckoInstruction inst)
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == c, false);
fpr.BindToRegister(d, d == a || d == c, REG_REG);
ARM64Reg VA = fpr.R(a, false);
ARM64Reg VC = fpr.R(c, false);
ARM64Reg VD = fpr.R(d, false);
ARM64Reg VA = fpr.R(a, REG_REG);
ARM64Reg VC = fpr.R(c, REG_REG);
ARM64Reg VD = fpr.R(d, REG_REG);
m_float_emit.FMUL(64, VD, VA, VC);
}
@ -247,11 +247,11 @@ void JitArm64::ps_muls0(UGeckoInstruction inst)
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == c, false);
fpr.BindToRegister(d, d == a || d == c, REG_REG);
ARM64Reg VA = fpr.R(a, false);
ARM64Reg VC = fpr.R(c, false);
ARM64Reg VD = fpr.R(d, false);
ARM64Reg VA = fpr.R(a, REG_REG);
ARM64Reg VC = fpr.R(c, REG_REG);
ARM64Reg VD = fpr.R(d, REG_REG);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.DUP(64, V0, VC, 0);
@ -266,11 +266,11 @@ void JitArm64::ps_muls1(UGeckoInstruction inst)
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == c, false);
fpr.BindToRegister(d, d == a || d == c, REG_REG);
ARM64Reg VA = fpr.R(a, false);
ARM64Reg VC = fpr.R(c, false);
ARM64Reg VD = fpr.R(d, false);
ARM64Reg VA = fpr.R(a, REG_REG);
ARM64Reg VC = fpr.R(c, REG_REG);
ARM64Reg VD = fpr.R(d, REG_REG);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.DUP(64, V0, VC, 1);
@ -285,12 +285,12 @@ void JitArm64::ps_msub(UGeckoInstruction inst)
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c, false);
fpr.BindToRegister(d, d == a || d == b || d == c, REG_REG);
ARM64Reg VA = fpr.R(a, false);
ARM64Reg VB = fpr.R(b, false);
ARM64Reg VC = fpr.R(c, false);
ARM64Reg VD = fpr.R(d, false);
ARM64Reg VA = fpr.R(a, REG_REG);
ARM64Reg VB = fpr.R(b, REG_REG);
ARM64Reg VC = fpr.R(c, REG_REG);
ARM64Reg VD = fpr.R(d, REG_REG);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FMUL(64, V0, VA, VC);
@ -306,10 +306,10 @@ void JitArm64::ps_nabs(UGeckoInstruction inst)
FALLBACK_IF(inst.Rc);
u32 b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == b, false);
fpr.BindToRegister(d, d == b, REG_REG);
ARM64Reg VB = fpr.R(b, false);
ARM64Reg VD = fpr.R(d, false);
ARM64Reg VB = fpr.R(b, REG_REG);
ARM64Reg VD = fpr.R(d, REG_REG);
m_float_emit.FABS(64, VD, VB);
m_float_emit.FNEG(64, VD, VD);
@ -322,10 +322,10 @@ void JitArm64::ps_neg(UGeckoInstruction inst)
FALLBACK_IF(inst.Rc);
u32 b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == b, false);
fpr.BindToRegister(d, d == b, REG_REG);
ARM64Reg VB = fpr.R(b, false);
ARM64Reg VD = fpr.R(d, false);
ARM64Reg VB = fpr.R(b, REG_REG);
ARM64Reg VD = fpr.R(d, REG_REG);
m_float_emit.FNEG(64, VD, VB);
}
@ -337,12 +337,12 @@ void JitArm64::ps_nmadd(UGeckoInstruction inst)
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c, false);
fpr.BindToRegister(d, d == a || d == b || d == c, REG_REG);
ARM64Reg VA = fpr.R(a, false);
ARM64Reg VB = fpr.R(b, false);
ARM64Reg VC = fpr.R(c, false);
ARM64Reg VD = fpr.R(d, false);
ARM64Reg VA = fpr.R(a, REG_REG);
ARM64Reg VB = fpr.R(b, REG_REG);
ARM64Reg VC = fpr.R(c, REG_REG);
ARM64Reg VD = fpr.R(d, REG_REG);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FMUL(64, V0, VA, VC);
@ -359,12 +359,12 @@ void JitArm64::ps_nmsub(UGeckoInstruction inst)
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c, false);
fpr.BindToRegister(d, d == a || d == b || d == c, REG_REG);
ARM64Reg VA = fpr.R(a, false);
ARM64Reg VB = fpr.R(b, false);
ARM64Reg VC = fpr.R(c, false);
ARM64Reg VD = fpr.R(d, false);
ARM64Reg VA = fpr.R(a, REG_REG);
ARM64Reg VB = fpr.R(b, REG_REG);
ARM64Reg VC = fpr.R(c, REG_REG);
ARM64Reg VD = fpr.R(d, REG_REG);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.FMUL(64, V0, VA, VC);
@ -381,10 +381,10 @@ void JitArm64::ps_res(UGeckoInstruction inst)
FALLBACK_IF(inst.Rc);
u32 b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == b, false);
fpr.BindToRegister(d, d == b, REG_REG);
ARM64Reg VB = fpr.R(b, false);
ARM64Reg VD = fpr.R(d, false);
ARM64Reg VB = fpr.R(b, REG_REG);
ARM64Reg VD = fpr.R(d, REG_REG);
m_float_emit.FRSQRTE(64, VD, VB);
}
@ -396,12 +396,12 @@ void JitArm64::ps_sel(UGeckoInstruction inst)
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c, false);
fpr.BindToRegister(d, d == a || d == b || d == c, REG_REG);
ARM64Reg VA = fpr.R(a, false);
ARM64Reg VB = fpr.R(b, false);
ARM64Reg VC = fpr.R(c, false);
ARM64Reg VD = fpr.R(d, false);
ARM64Reg VA = fpr.R(a, REG_REG);
ARM64Reg VB = fpr.R(b, REG_REG);
ARM64Reg VC = fpr.R(c, REG_REG);
ARM64Reg VD = fpr.R(d, REG_REG);
if (d != a && d != b && d != c)
{
@ -425,11 +425,11 @@ void JitArm64::ps_sub(UGeckoInstruction inst)
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b, false);
fpr.BindToRegister(d, d == a || d == b, REG_REG);
ARM64Reg VA = fpr.R(a, false);
ARM64Reg VB = fpr.R(b, false);
ARM64Reg VD = fpr.R(d, false);
ARM64Reg VA = fpr.R(a, REG_REG);
ARM64Reg VB = fpr.R(b, REG_REG);
ARM64Reg VD = fpr.R(d, REG_REG);
m_float_emit.FSUB(64, VD, VA, VB);
}
@ -441,12 +441,12 @@ void JitArm64::ps_sum0(UGeckoInstruction inst)
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c, false);
fpr.BindToRegister(d, d == a || d == b || d == c, REG_REG);
ARM64Reg VA = fpr.R(a, false);
ARM64Reg VB = fpr.R(b, false);
ARM64Reg VC = fpr.R(c, false);
ARM64Reg VD = fpr.R(d, false);
ARM64Reg VA = fpr.R(a, REG_REG);
ARM64Reg VB = fpr.R(b, REG_REG);
ARM64Reg VC = fpr.R(c, REG_REG);
ARM64Reg VD = fpr.R(d, REG_REG);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.DUP(64, V0, VB, 1);
@ -471,12 +471,12 @@ void JitArm64::ps_sum1(UGeckoInstruction inst)
FALLBACK_IF(inst.Rc);
u32 a = inst.FA, b = inst.FB, c = inst.FC, d = inst.FD;
fpr.BindToRegister(d, d == a || d == b || d == c, false);
fpr.BindToRegister(d, d == a || d == b || d == c, REG_REG);
ARM64Reg VA = fpr.R(a, false);
ARM64Reg VB = fpr.R(b, false);
ARM64Reg VC = fpr.R(c, false);
ARM64Reg VD = fpr.R(d, false);
ARM64Reg VA = fpr.R(a, REG_REG);
ARM64Reg VB = fpr.R(b, REG_REG);
ARM64Reg VC = fpr.R(c, REG_REG);
ARM64Reg VD = fpr.R(d, REG_REG);
ARM64Reg V0 = fpr.GetReg();
m_float_emit.DUP(64, V0, VA, 0);

View File

@ -274,7 +274,7 @@ void Arm64FPRCache::Flush(FlushMode mode, PPCAnalyst::CodeOp* op)
}
}
ARM64Reg Arm64FPRCache::R(u32 preg, bool only_lower)
ARM64Reg Arm64FPRCache::R(u32 preg, RegType type)
{
OpArg& reg = m_guest_registers[preg];
IncrementAllUsed();
@ -287,7 +287,7 @@ ARM64Reg Arm64FPRCache::R(u32 preg, bool only_lower)
break;
case REG_LOWER_PAIR:
{
if (!only_lower)
if (type == REG_REG)
{
// Load the high 64bits from the file and insert them in to the high 64bits of the host register
ARM64Reg tmp_reg = GetReg();
@ -298,18 +298,52 @@ ARM64Reg Arm64FPRCache::R(u32 preg, bool only_lower)
// Change it over to a full 128bit register
reg.LoadToReg(reg.GetReg());
}
else if (type == REG_DUP)
{
// We already only have the lower 64bits
// Don't do anything
}
return reg.GetReg();
}
break;
case REG_DUP:
{
ARM64Reg host_reg = reg.GetReg();
if (type == REG_REG)
{
// We are requesting a full 128bit register
// but we are only available in the lower 64bits
// Duplicate to the top and change over
m_float_emit->INS(64, host_reg, 1, host_reg, 0);
reg.LoadToReg(host_reg);
}
else if (type == REG_LOWER_PAIR)
{
// We are only requesting the lower 64bits of a pair
// We've got to be careful in this instance
// Store our current duplicated high bits to the file
// then convert over to a lower reg
if (reg.IsDirty())
m_float_emit->STR(64, INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(ps[preg][1]));
reg.LoadLowerReg(host_reg);
}
return host_reg;
}
break;
case REG_NOTLOADED: // Register isn't loaded at /all/
{
ARM64Reg host_reg = GetReg();
u32 load_size;
if (only_lower)
if (type == REG_LOWER_PAIR)
{
load_size = 64;
reg.LoadLowerReg(host_reg);
}
else if (type == REG_DUP)
{
load_size = 64;
reg.LoadDup(host_reg);
}
else
{
load_size = 128;
@ -328,7 +362,7 @@ ARM64Reg Arm64FPRCache::R(u32 preg, bool only_lower)
return INVALID_REG;
}
void Arm64FPRCache::BindToRegister(u32 preg, bool do_load, bool only_lower)
void Arm64FPRCache::BindToRegister(u32 preg, bool do_load, RegType type)
{
OpArg& reg = m_guest_registers[preg];
@ -343,12 +377,17 @@ void Arm64FPRCache::BindToRegister(u32 preg, bool do_load, bool only_lower)
{
ARM64Reg host_reg = GetReg();
u32 load_size;
if (only_lower)
if (type == REG_LOWER_PAIR)
{
// We only want the lower 64bits
load_size = 64;
reg.LoadLowerReg(host_reg);
}
else if (type == REG_DUP)
{
load_size = 64;
reg.LoadDup(host_reg);
}
else
{
// We want the full 128bit register
@ -361,7 +400,8 @@ void Arm64FPRCache::BindToRegister(u32 preg, bool do_load, bool only_lower)
break;
case REG_LOWER_PAIR:
{
if (!only_lower)
ARM64Reg host_reg = reg.GetReg();
if (type == REG_REG)
{
// Okay, we've got the lower reg loaded and we really wanted the full register
if (do_load)
@ -369,27 +409,63 @@ void Arm64FPRCache::BindToRegister(u32 preg, bool do_load, bool only_lower)
// Load the high 64bits from the file and insert them in to the high 64bits of the host register
ARM64Reg tmp_reg = GetReg();
m_float_emit->LDR(64, INDEX_UNSIGNED, tmp_reg, X29, PPCSTATE_OFF(ps[preg][1]));
m_float_emit->INS(64, reg.GetReg(), 1, tmp_reg, 0);
m_float_emit->INS(64, host_reg, 1, tmp_reg, 0);
UnlockRegister(tmp_reg);
}
// Change it over to a full 128bit register
reg.LoadToReg(reg.GetReg());
reg.LoadToReg(host_reg);
}
else if (type == REG_DUP)
{
// Register is already the lower pair
// Just convert it over to a dup
reg.LoadDup(host_reg);
}
}
break;
case REG_REG:
{
if (only_lower)
ARM64Reg host_reg = reg.GetReg();
if (type == REG_LOWER_PAIR)
{
// If we only want the lower bits, let's store away the high bits and drop to a lower only register
// We are doing a full 128bit store because it takes 2 cycles on a Cortex-A57 to do a 128bit store.
// It would take longer to do an insert to a temporary and a 64bit store than to just do this.
ARM64Reg host_reg = reg.GetReg();
if (was_dirty)
m_float_emit->STR(128, INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(ps[preg][0]));
reg.LoadLowerReg(host_reg);
}
else if (type == REG_DUP)
{
// If we are going from a full 128bit register to a duplicate
// then we can just change over
reg.LoadDup(host_reg);
}
}
break;
case REG_DUP:
{
ARM64Reg host_reg = reg.GetReg();
if (type == REG_REG)
{
// We are a duplicated register going to a full 128bit register
// Do an insert of our lower 64bits to the higher 64bits
m_float_emit->INS(64, host_reg, 1, host_reg, 0);
// Change over to the full 128bit register
reg.LoadToReg(host_reg);
}
else if (type == REG_LOWER_PAIR)
{
// We are duplicated changing over to a lower register
// We've got to be careful in this instance and do a store of our lower 64bits
// to the upper 64bits in the PowerPC state
// That way incase if we hit the path of DUP->LOWER->REG we get the correct bits back
if (was_dirty)
m_float_emit->STR(64, INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(ps[preg][1]));
reg.LoadLowerReg(host_reg);
}
}
break;
default:
@ -454,6 +530,24 @@ void Arm64FPRCache::FlushRegister(u32 preg, bool maintain_state)
if (reg.IsDirty())
m_float_emit->STR(store_size, INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(ps[preg][0]));
if (!maintain_state)
{
UnlockRegister(host_reg);
reg.Flush();
}
}
else if (reg.GetType() == REG_DUP)
{
ARM64Reg host_reg = reg.GetReg();
if (reg.IsDirty())
{
// If the paired registers were at the start of ppcState we could do an STP here.
// Too bad moving them would break savestate compatibility between x86_64 and AArch64
//m_float_emit->STP(64, INDEX_SIGNED, host_reg, host_reg, X29, PPCSTATE_OFF(ps[preg][0]));
m_float_emit->STR(64, INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(ps[preg][0]));
m_float_emit->STR(64, INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(ps[preg][1]));
}
if (!maintain_state)
{
UnlockRegister(host_reg);

View File

@ -20,6 +20,8 @@ enum RegType
REG_REG, // Reg type is register
REG_IMM, // Reg is really a IMM
REG_LOWER_PAIR, // Only the lower pair of a paired register
REG_DUP, // The lower reg is the same as the upper one (physical upper doesn't actually have the duplicated value)
REG_IS_LOADED, // We don't care what type it is, as long as the lower 64bits are loaded
};
enum FlushMode
@ -65,6 +67,11 @@ public:
m_type = REG_LOWER_PAIR;
m_reg = reg;
}
void LoadDup(ARM64Reg reg)
{
m_type = REG_DUP;
m_reg = reg;
}
void LoadToImm(u32 imm)
{
m_type = REG_IMM;
@ -262,12 +269,9 @@ public:
// Returns a guest register inside of a host register
// Will dump an immediate to the host register as well
ARM64Reg R(u32 preg, bool only_lower = true);
ARM64Reg R(u32 preg, RegType type = REG_LOWER_PAIR);
void BindToRegister(u32 preg, bool do_load, bool only_lower = true);
// Returns if the register is only the lower 64bit register
bool IsLower(u32 preg) const { return m_guest_registers[preg].GetType() == REG_LOWER_PAIR; }
void BindToRegister(u32 preg, bool do_load, RegType type = REG_LOWER_PAIR);
BitSet32 GetCallerSavedUsed() override;

View File

@ -18,8 +18,7 @@ struct BackPatchInfo
FLAG_SIZE_F64 = (1 << 6),
FLAG_REVERSE = (1 << 7),
FLAG_EXTEND = (1 << 8),
FLAG_ONLY_LOWER = (1 << 9),
FLAG_SIZE_F32I = (1 << 10),
FLAG_SIZE_F32I = (1 << 9),
};
static u32 GetFlagSize(u32 flags)