JitArm64: Track singles in fabs.
This commit is contained in:
parent
84395b65f6
commit
c30a66b2d5
|
@ -33,7 +33,7 @@ void JitArm64::fp_arith(UGeckoInstruction inst)
|
|||
bool use_c = op5 >= 25; // fmul and all kind of fmaddXX
|
||||
bool use_b = op5 != 25; // fmul uses no B
|
||||
|
||||
bool inputs_are_singles = fpr.IsSingle(a) && (!use_b || fpr.IsSingle(b)) && (!use_c || fpr.IsSingle(c));
|
||||
bool inputs_are_singles = fpr.IsSingle(a, !packed) && (!use_b || fpr.IsSingle(b, !packed)) && (!use_c || fpr.IsSingle(c, !packed));
|
||||
|
||||
ARM64Reg VA, VB, VC, VD;
|
||||
|
||||
|
@ -105,13 +105,13 @@ void JitArm64::fp_logic(UGeckoInstruction inst)
|
|||
if (op10 == 72 && b == d)
|
||||
return;
|
||||
|
||||
bool is_single = fpr.IsSingle(b);
|
||||
bool single = fpr.IsSingle(b, !packed);
|
||||
u8 size = single ? 32 : 64;
|
||||
|
||||
if (packed)
|
||||
{
|
||||
RegType type = is_single ? REG_REG_SINGLE : REG_REG;
|
||||
u8 size = is_single ? 32 : 64;
|
||||
ARM64Reg (*reg_encoder)(ARM64Reg) = is_single ? EncodeRegToDouble : EncodeRegToQuad;
|
||||
RegType type = single ? REG_REG_SINGLE : REG_REG;
|
||||
ARM64Reg (*reg_encoder)(ARM64Reg) = single ? EncodeRegToDouble : EncodeRegToQuad;
|
||||
|
||||
ARM64Reg VB = reg_encoder(fpr.R(b, type));
|
||||
ARM64Reg VD = reg_encoder(fpr.RW(d, type));
|
||||
|
@ -128,16 +128,20 @@ void JitArm64::fp_logic(UGeckoInstruction inst)
|
|||
}
|
||||
else
|
||||
{
|
||||
ARM64Reg VB = fpr.R(b, REG_IS_LOADED);
|
||||
ARM64Reg VD = fpr.RW(d);
|
||||
RegType type = single ? REG_IS_LOADED_SINGLE : REG_IS_LOADED;
|
||||
RegType type2 = single ? REG_LOWER_PAIR_SINGLE : REG_LOWER_PAIR;
|
||||
ARM64Reg (*reg_encoder)(ARM64Reg) = single ? EncodeRegToSingle : EncodeRegToDouble;
|
||||
|
||||
ARM64Reg VB = fpr.R(b, type);
|
||||
ARM64Reg VD = fpr.RW(d, type2);
|
||||
|
||||
switch (op10)
|
||||
{
|
||||
case 40: m_float_emit.FNEG(EncodeRegToDouble(VD), EncodeRegToDouble(VB)); break;
|
||||
case 72: m_float_emit.INS(64, VD, 0, VB, 0); break;
|
||||
case 136: m_float_emit.FABS(EncodeRegToDouble(VD), EncodeRegToDouble(VB));
|
||||
m_float_emit.FNEG(EncodeRegToDouble(VD), EncodeRegToDouble(VD)); break;
|
||||
case 264: m_float_emit.FABS(EncodeRegToDouble(VD), EncodeRegToDouble(VB)); break;
|
||||
case 40: m_float_emit.FNEG(reg_encoder(VD), reg_encoder(VB)); break;
|
||||
case 72: m_float_emit.INS(size, VD, 0, VB, 0); break;
|
||||
case 136: m_float_emit.FABS(reg_encoder(VD), reg_encoder(VB));
|
||||
m_float_emit.FNEG(reg_encoder(VD), reg_encoder(VD)); break;
|
||||
case 264: m_float_emit.FABS(reg_encoder(VD), reg_encoder(VB)); break;
|
||||
default: _assert_msg_(DYNA_REC, 0, "fp_logic"); break;
|
||||
}
|
||||
}
|
||||
|
@ -169,7 +173,7 @@ void JitArm64::frspx(UGeckoInstruction inst)
|
|||
|
||||
u32 b = inst.FB, d = inst.FD;
|
||||
|
||||
if (fpr.IsSingle(b))
|
||||
if (fpr.IsSingle(b, true))
|
||||
{
|
||||
// Source is already in single precision, so no need to do anything but to copy to PSR1.
|
||||
ARM64Reg VB = fpr.R(b, REG_IS_LOADED_SINGLE);
|
||||
|
@ -196,7 +200,7 @@ void JitArm64::fcmpX(UGeckoInstruction inst)
|
|||
u32 a = inst.FA, b = inst.FB;
|
||||
int crf = inst.CRFD;
|
||||
|
||||
bool singles = fpr.IsSingle(a) && fpr.IsSingle(b);
|
||||
bool singles = fpr.IsSingle(a, true) && fpr.IsSingle(b, true);
|
||||
RegType type = singles ? REG_IS_LOADED_SINGLE : REG_IS_LOADED;
|
||||
ARM64Reg (*reg_encoder)(ARM64Reg) = singles ? EncodeRegToSingle : EncodeRegToDouble;
|
||||
|
||||
|
|
|
@ -270,7 +270,7 @@ void JitArm64::stfXX(UGeckoInstruction inst)
|
|||
gpr.Lock(W0, W1, W30);
|
||||
fpr.Lock(Q0);
|
||||
|
||||
bool single = (flags & BackPatchInfo::FLAG_SIZE_F32) && fpr.IsSingle(inst.FS);
|
||||
bool single = (flags & BackPatchInfo::FLAG_SIZE_F32) && fpr.IsSingle(inst.FS, true);
|
||||
|
||||
ARM64Reg V0 = fpr.R(inst.FS, single ? REG_IS_LOADED_SINGLE : REG_IS_LOADED);
|
||||
|
||||
|
|
|
@ -327,6 +327,18 @@ ARM64Reg Arm64FPRCache::R(u32 preg, RegType type)
|
|||
{
|
||||
return host_reg;
|
||||
}
|
||||
case REG_LOWER_PAIR_SINGLE:
|
||||
{
|
||||
// We're asked for the lower single, so just return the register.
|
||||
if (type == REG_IS_LOADED_SINGLE)
|
||||
return host_reg;
|
||||
|
||||
// Else convert this register back to a double.
|
||||
m_float_emit->FCVT(64, 32, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg));
|
||||
reg.LoadLowerReg(host_reg);
|
||||
|
||||
// fall through
|
||||
}
|
||||
case REG_LOWER_PAIR:
|
||||
{
|
||||
if (type == REG_REG)
|
||||
|
@ -417,31 +429,40 @@ ARM64Reg Arm64FPRCache::RW(u32 preg, RegType type)
|
|||
}
|
||||
|
||||
// Only the lower value will be overwritten, so we must be extra careful to store PSR1 if dirty.
|
||||
if (type == REG_LOWER_PAIR && was_dirty)
|
||||
if ((type == REG_LOWER_PAIR || type == REG_LOWER_PAIR_SINGLE) && was_dirty)
|
||||
{
|
||||
// We must *not* change host_reg as this register might still be in use. So it's fine to
|
||||
// store this register, but it's *not* fine to convert it to double. So for double convertion,
|
||||
// a temporary register needs to be used.
|
||||
ARM64Reg host_reg = reg.GetReg();
|
||||
ARM64Reg flush_reg = host_reg;
|
||||
|
||||
switch (reg.GetType())
|
||||
{
|
||||
case REG_REG_SINGLE:
|
||||
m_float_emit->FCVTL(64, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg));
|
||||
flush_reg = GetReg();
|
||||
m_float_emit->FCVTL(64, EncodeRegToDouble(flush_reg), EncodeRegToDouble(host_reg));
|
||||
// fall through
|
||||
case REG_REG:
|
||||
// We are doing a full 128bit store because it takes 2 cycles on a Cortex-A57 to do a 128bit store.
|
||||
// It would take longer to do an insert to a temporary and a 64bit store than to just do this.
|
||||
m_float_emit->STR(128, INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(ps[preg][0]));
|
||||
m_float_emit->STR(128, INDEX_UNSIGNED, flush_reg, X29, PPCSTATE_OFF(ps[preg][0]));
|
||||
break;
|
||||
case REG_DUP_SINGLE:
|
||||
m_float_emit->FCVT(64, 32, EncodeRegToDouble(reg.GetReg()), EncodeRegToDouble(reg.GetReg()));
|
||||
flush_reg = GetReg();
|
||||
m_float_emit->FCVT(64, 32, EncodeRegToDouble(flush_reg), EncodeRegToDouble(host_reg));
|
||||
// fall through
|
||||
case REG_DUP:
|
||||
// Store PSR1 (which is equal to PSR0) in memory.
|
||||
m_float_emit->STR(64, INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(ps[preg][1]));
|
||||
m_float_emit->STR(64, INDEX_UNSIGNED, flush_reg, X29, PPCSTATE_OFF(ps[preg][1]));
|
||||
break;
|
||||
default:
|
||||
// All other types doesn't store anything in PSR1.
|
||||
break;
|
||||
}
|
||||
|
||||
if (host_reg != flush_reg)
|
||||
Unlock(flush_reg);
|
||||
}
|
||||
|
||||
reg.Load(reg.GetReg(), type);
|
||||
|
@ -502,11 +523,15 @@ void Arm64FPRCache::FlushRegister(u32 preg, bool maintain_state)
|
|||
m_float_emit->FCVTL(64, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg));
|
||||
type = REG_REG;
|
||||
}
|
||||
if (type == REG_DUP_SINGLE)
|
||||
if (type == REG_DUP_SINGLE || type == REG_LOWER_PAIR_SINGLE)
|
||||
{
|
||||
if (dirty)
|
||||
m_float_emit->FCVT(64, 32, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg));
|
||||
type = REG_DUP;
|
||||
|
||||
if (type == REG_DUP_SINGLE)
|
||||
type = REG_DUP;
|
||||
else
|
||||
type = REG_LOWER_PAIR;
|
||||
}
|
||||
|
||||
if (type == REG_REG || type == REG_LOWER_PAIR)
|
||||
|
@ -560,10 +585,10 @@ BitSet32 Arm64FPRCache::GetCallerSavedUsed()
|
|||
return registers;
|
||||
}
|
||||
|
||||
bool Arm64FPRCache::IsSingle(u32 preg)
|
||||
bool Arm64FPRCache::IsSingle(u32 preg, bool lower_only)
|
||||
{
|
||||
RegType type = m_guest_registers[preg].GetType();
|
||||
return type == REG_REG_SINGLE || type == REG_DUP_SINGLE;
|
||||
return type == REG_REG_SINGLE || type == REG_DUP_SINGLE || (lower_only && type == REG_LOWER_PAIR_SINGLE);
|
||||
}
|
||||
|
||||
void Arm64FPRCache::FixSinglePrecision(u32 preg)
|
||||
|
|
|
@ -24,6 +24,7 @@ enum RegType
|
|||
REG_DUP, // The lower reg is the same as the upper one (physical upper doesn't actually have the duplicated value)
|
||||
REG_IS_LOADED, // We don't care what type it is, as long as the lower 64bits are loaded
|
||||
REG_REG_SINGLE, // Both registers are loaded as single
|
||||
REG_LOWER_PAIR_SINGLE, // Only the lower pair of a paired register, as single
|
||||
REG_DUP_SINGLE, // The lower one contains both registers, as single
|
||||
REG_IS_LOADED_SINGLE, // We only want to access the lower one as single
|
||||
};
|
||||
|
@ -296,7 +297,7 @@ public:
|
|||
|
||||
BitSet32 GetCallerSavedUsed() override;
|
||||
|
||||
bool IsSingle(u32 preg);
|
||||
bool IsSingle(u32 preg, bool lower_only = false);
|
||||
|
||||
void FixSinglePrecision(u32 preg);
|
||||
|
||||
|
|
Loading…
Reference in New Issue