JitArm64: Track singles in fabs.

This commit is contained in:
degasus 2016-02-21 10:38:24 +01:00
parent 84395b65f6
commit c30a66b2d5
4 changed files with 55 additions and 25 deletions

View File

@ -33,7 +33,7 @@ void JitArm64::fp_arith(UGeckoInstruction inst)
bool use_c = op5 >= 25; // fmul and all kind of fmaddXX bool use_c = op5 >= 25; // fmul and all kind of fmaddXX
bool use_b = op5 != 25; // fmul uses no B bool use_b = op5 != 25; // fmul uses no B
bool inputs_are_singles = fpr.IsSingle(a) && (!use_b || fpr.IsSingle(b)) && (!use_c || fpr.IsSingle(c)); bool inputs_are_singles = fpr.IsSingle(a, !packed) && (!use_b || fpr.IsSingle(b, !packed)) && (!use_c || fpr.IsSingle(c, !packed));
ARM64Reg VA, VB, VC, VD; ARM64Reg VA, VB, VC, VD;
@ -105,13 +105,13 @@ void JitArm64::fp_logic(UGeckoInstruction inst)
if (op10 == 72 && b == d) if (op10 == 72 && b == d)
return; return;
bool is_single = fpr.IsSingle(b); bool single = fpr.IsSingle(b, !packed);
u8 size = single ? 32 : 64;
if (packed) if (packed)
{ {
RegType type = is_single ? REG_REG_SINGLE : REG_REG; RegType type = single ? REG_REG_SINGLE : REG_REG;
u8 size = is_single ? 32 : 64; ARM64Reg (*reg_encoder)(ARM64Reg) = single ? EncodeRegToDouble : EncodeRegToQuad;
ARM64Reg (*reg_encoder)(ARM64Reg) = is_single ? EncodeRegToDouble : EncodeRegToQuad;
ARM64Reg VB = reg_encoder(fpr.R(b, type)); ARM64Reg VB = reg_encoder(fpr.R(b, type));
ARM64Reg VD = reg_encoder(fpr.RW(d, type)); ARM64Reg VD = reg_encoder(fpr.RW(d, type));
@ -128,16 +128,20 @@ void JitArm64::fp_logic(UGeckoInstruction inst)
} }
else else
{ {
ARM64Reg VB = fpr.R(b, REG_IS_LOADED); RegType type = single ? REG_IS_LOADED_SINGLE : REG_IS_LOADED;
ARM64Reg VD = fpr.RW(d); RegType type2 = single ? REG_LOWER_PAIR_SINGLE : REG_LOWER_PAIR;
ARM64Reg (*reg_encoder)(ARM64Reg) = single ? EncodeRegToSingle : EncodeRegToDouble;
ARM64Reg VB = fpr.R(b, type);
ARM64Reg VD = fpr.RW(d, type2);
switch (op10) switch (op10)
{ {
case 40: m_float_emit.FNEG(EncodeRegToDouble(VD), EncodeRegToDouble(VB)); break; case 40: m_float_emit.FNEG(reg_encoder(VD), reg_encoder(VB)); break;
case 72: m_float_emit.INS(64, VD, 0, VB, 0); break; case 72: m_float_emit.INS(size, VD, 0, VB, 0); break;
case 136: m_float_emit.FABS(EncodeRegToDouble(VD), EncodeRegToDouble(VB)); case 136: m_float_emit.FABS(reg_encoder(VD), reg_encoder(VB));
m_float_emit.FNEG(EncodeRegToDouble(VD), EncodeRegToDouble(VD)); break; m_float_emit.FNEG(reg_encoder(VD), reg_encoder(VD)); break;
case 264: m_float_emit.FABS(EncodeRegToDouble(VD), EncodeRegToDouble(VB)); break; case 264: m_float_emit.FABS(reg_encoder(VD), reg_encoder(VB)); break;
default: _assert_msg_(DYNA_REC, 0, "fp_logic"); break; default: _assert_msg_(DYNA_REC, 0, "fp_logic"); break;
} }
} }
@ -169,7 +173,7 @@ void JitArm64::frspx(UGeckoInstruction inst)
u32 b = inst.FB, d = inst.FD; u32 b = inst.FB, d = inst.FD;
if (fpr.IsSingle(b)) if (fpr.IsSingle(b, true))
{ {
// Source is already in single precision, so no need to do anything but to copy to PSR1. // Source is already in single precision, so no need to do anything but to copy to PSR1.
ARM64Reg VB = fpr.R(b, REG_IS_LOADED_SINGLE); ARM64Reg VB = fpr.R(b, REG_IS_LOADED_SINGLE);
@ -196,7 +200,7 @@ void JitArm64::fcmpX(UGeckoInstruction inst)
u32 a = inst.FA, b = inst.FB; u32 a = inst.FA, b = inst.FB;
int crf = inst.CRFD; int crf = inst.CRFD;
bool singles = fpr.IsSingle(a) && fpr.IsSingle(b); bool singles = fpr.IsSingle(a, true) && fpr.IsSingle(b, true);
RegType type = singles ? REG_IS_LOADED_SINGLE : REG_IS_LOADED; RegType type = singles ? REG_IS_LOADED_SINGLE : REG_IS_LOADED;
ARM64Reg (*reg_encoder)(ARM64Reg) = singles ? EncodeRegToSingle : EncodeRegToDouble; ARM64Reg (*reg_encoder)(ARM64Reg) = singles ? EncodeRegToSingle : EncodeRegToDouble;

View File

@ -270,7 +270,7 @@ void JitArm64::stfXX(UGeckoInstruction inst)
gpr.Lock(W0, W1, W30); gpr.Lock(W0, W1, W30);
fpr.Lock(Q0); fpr.Lock(Q0);
bool single = (flags & BackPatchInfo::FLAG_SIZE_F32) && fpr.IsSingle(inst.FS); bool single = (flags & BackPatchInfo::FLAG_SIZE_F32) && fpr.IsSingle(inst.FS, true);
ARM64Reg V0 = fpr.R(inst.FS, single ? REG_IS_LOADED_SINGLE : REG_IS_LOADED); ARM64Reg V0 = fpr.R(inst.FS, single ? REG_IS_LOADED_SINGLE : REG_IS_LOADED);

View File

@ -327,6 +327,18 @@ ARM64Reg Arm64FPRCache::R(u32 preg, RegType type)
{ {
return host_reg; return host_reg;
} }
case REG_LOWER_PAIR_SINGLE:
{
// We're asked for the lower single, so just return the register.
if (type == REG_IS_LOADED_SINGLE)
return host_reg;
// Else convert this register back to a double.
m_float_emit->FCVT(64, 32, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg));
reg.LoadLowerReg(host_reg);
// fall through
}
case REG_LOWER_PAIR: case REG_LOWER_PAIR:
{ {
if (type == REG_REG) if (type == REG_REG)
@ -417,31 +429,40 @@ ARM64Reg Arm64FPRCache::RW(u32 preg, RegType type)
} }
// Only the lower value will be overwritten, so we must be extra careful to store PSR1 if dirty. // Only the lower value will be overwritten, so we must be extra careful to store PSR1 if dirty.
if (type == REG_LOWER_PAIR && was_dirty) if ((type == REG_LOWER_PAIR || type == REG_LOWER_PAIR_SINGLE) && was_dirty)
{ {
// We must *not* change host_reg as this register might still be in use. So it's fine to
// store this register, but it's *not* fine to convert it to double. So for double convertion,
// a temporary register needs to be used.
ARM64Reg host_reg = reg.GetReg(); ARM64Reg host_reg = reg.GetReg();
ARM64Reg flush_reg = host_reg;
switch (reg.GetType()) switch (reg.GetType())
{ {
case REG_REG_SINGLE: case REG_REG_SINGLE:
m_float_emit->FCVTL(64, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg)); flush_reg = GetReg();
m_float_emit->FCVTL(64, EncodeRegToDouble(flush_reg), EncodeRegToDouble(host_reg));
// fall through // fall through
case REG_REG: case REG_REG:
// We are doing a full 128bit store because it takes 2 cycles on a Cortex-A57 to do a 128bit store. // We are doing a full 128bit store because it takes 2 cycles on a Cortex-A57 to do a 128bit store.
// It would take longer to do an insert to a temporary and a 64bit store than to just do this. // It would take longer to do an insert to a temporary and a 64bit store than to just do this.
m_float_emit->STR(128, INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(ps[preg][0])); m_float_emit->STR(128, INDEX_UNSIGNED, flush_reg, X29, PPCSTATE_OFF(ps[preg][0]));
break; break;
case REG_DUP_SINGLE: case REG_DUP_SINGLE:
m_float_emit->FCVT(64, 32, EncodeRegToDouble(reg.GetReg()), EncodeRegToDouble(reg.GetReg())); flush_reg = GetReg();
m_float_emit->FCVT(64, 32, EncodeRegToDouble(flush_reg), EncodeRegToDouble(host_reg));
// fall through // fall through
case REG_DUP: case REG_DUP:
// Store PSR1 (which is equal to PSR0) in memory. // Store PSR1 (which is equal to PSR0) in memory.
m_float_emit->STR(64, INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(ps[preg][1])); m_float_emit->STR(64, INDEX_UNSIGNED, flush_reg, X29, PPCSTATE_OFF(ps[preg][1]));
break; break;
default: default:
// All other types doesn't store anything in PSR1. // All other types doesn't store anything in PSR1.
break; break;
} }
if (host_reg != flush_reg)
Unlock(flush_reg);
} }
reg.Load(reg.GetReg(), type); reg.Load(reg.GetReg(), type);
@ -502,11 +523,15 @@ void Arm64FPRCache::FlushRegister(u32 preg, bool maintain_state)
m_float_emit->FCVTL(64, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg)); m_float_emit->FCVTL(64, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg));
type = REG_REG; type = REG_REG;
} }
if (type == REG_DUP_SINGLE) if (type == REG_DUP_SINGLE || type == REG_LOWER_PAIR_SINGLE)
{ {
if (dirty) if (dirty)
m_float_emit->FCVT(64, 32, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg)); m_float_emit->FCVT(64, 32, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg));
type = REG_DUP;
if (type == REG_DUP_SINGLE)
type = REG_DUP;
else
type = REG_LOWER_PAIR;
} }
if (type == REG_REG || type == REG_LOWER_PAIR) if (type == REG_REG || type == REG_LOWER_PAIR)
@ -560,10 +585,10 @@ BitSet32 Arm64FPRCache::GetCallerSavedUsed()
return registers; return registers;
} }
bool Arm64FPRCache::IsSingle(u32 preg) bool Arm64FPRCache::IsSingle(u32 preg, bool lower_only)
{ {
RegType type = m_guest_registers[preg].GetType(); RegType type = m_guest_registers[preg].GetType();
return type == REG_REG_SINGLE || type == REG_DUP_SINGLE; return type == REG_REG_SINGLE || type == REG_DUP_SINGLE || (lower_only && type == REG_LOWER_PAIR_SINGLE);
} }
void Arm64FPRCache::FixSinglePrecision(u32 preg) void Arm64FPRCache::FixSinglePrecision(u32 preg)

View File

@ -24,6 +24,7 @@ enum RegType
REG_DUP, // The lower reg is the same as the upper one (physical upper doesn't actually have the duplicated value) REG_DUP, // The lower reg is the same as the upper one (physical upper doesn't actually have the duplicated value)
REG_IS_LOADED, // We don't care what type it is, as long as the lower 64bits are loaded REG_IS_LOADED, // We don't care what type it is, as long as the lower 64bits are loaded
REG_REG_SINGLE, // Both registers are loaded as single REG_REG_SINGLE, // Both registers are loaded as single
REG_LOWER_PAIR_SINGLE, // Only the lower pair of a paired register, as single
REG_DUP_SINGLE, // The lower one contains both registers, as single REG_DUP_SINGLE, // The lower one contains both registers, as single
REG_IS_LOADED_SINGLE, // We only want to access the lower one as single REG_IS_LOADED_SINGLE, // We only want to access the lower one as single
}; };
@ -296,7 +297,7 @@ public:
BitSet32 GetCallerSavedUsed() override; BitSet32 GetCallerSavedUsed() override;
bool IsSingle(u32 preg); bool IsSingle(u32 preg, bool lower_only = false);
void FixSinglePrecision(u32 preg); void FixSinglePrecision(u32 preg);