JitArm64: Track singles in fabs.

2016-02-21 10:38:24 +01:00 · 2016-02-21 10:38:24 +01:00 · c30a66b2d5
parent 84395b65f6
commit c30a66b2d5
4 changed files with 55 additions and 25 deletions
--- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp
+++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_FloatingPoint.cpp
@ -33,7 +33,7 @@ void JitArm64::fp_arith(UGeckoInstruction inst)
 	bool use_c = op5 >= 25; // fmul and all kind of fmaddXX
 	bool use_b = op5 != 25; // fmul uses no B

-	bool inputs_are_singles = fpr.IsSingle(a) && (!use_b || fpr.IsSingle(b)) && (!use_c || fpr.IsSingle(c));
+	bool inputs_are_singles = fpr.IsSingle(a, !packed) && (!use_b || fpr.IsSingle(b, !packed)) && (!use_c || fpr.IsSingle(c, !packed));

 	ARM64Reg VA, VB, VC, VD;

@ -105,13 +105,13 @@ void JitArm64::fp_logic(UGeckoInstruction inst)
 	if (op10 == 72 && b == d)
 		return;

-	bool is_single = fpr.IsSingle(b);
+	bool single = fpr.IsSingle(b, !packed);
+	u8 size = single ? 32 : 64;

 	if (packed)
 	{
-		RegType type = is_single ? REG_REG_SINGLE : REG_REG;
-		u8 size = is_single ? 32 : 64;
-		ARM64Reg (*reg_encoder)(ARM64Reg) = is_single ? EncodeRegToDouble : EncodeRegToQuad;
+		RegType type = single ? REG_REG_SINGLE : REG_REG;
+		ARM64Reg (*reg_encoder)(ARM64Reg) = single ? EncodeRegToDouble : EncodeRegToQuad;

 		ARM64Reg VB = reg_encoder(fpr.R(b, type));
 		ARM64Reg VD = reg_encoder(fpr.RW(d, type));
@ -128,16 +128,20 @@ void JitArm64::fp_logic(UGeckoInstruction inst)
 	}
 	else
 	{
-		ARM64Reg VB = fpr.R(b, REG_IS_LOADED);
-		ARM64Reg VD = fpr.RW(d);
+		RegType type = single ? REG_IS_LOADED_SINGLE : REG_IS_LOADED;
+		RegType type2 = single ? REG_LOWER_PAIR_SINGLE : REG_LOWER_PAIR;
+		ARM64Reg (*reg_encoder)(ARM64Reg) = single ? EncodeRegToSingle : EncodeRegToDouble;
+
+		ARM64Reg VB = fpr.R(b, type);
+		ARM64Reg VD = fpr.RW(d, type2);

 		switch (op10)
 		{
-		case  40: m_float_emit.FNEG(EncodeRegToDouble(VD), EncodeRegToDouble(VB)); break;
-		case  72: m_float_emit.INS(64, VD, 0, VB, 0); break;
-		case 136: m_float_emit.FABS(EncodeRegToDouble(VD), EncodeRegToDouble(VB));
-		          m_float_emit.FNEG(EncodeRegToDouble(VD), EncodeRegToDouble(VD)); break;
-		case 264: m_float_emit.FABS(EncodeRegToDouble(VD), EncodeRegToDouble(VB)); break;
+		case  40: m_float_emit.FNEG(reg_encoder(VD), reg_encoder(VB)); break;
+		case  72: m_float_emit.INS(size, VD, 0, VB, 0); break;
+		case 136: m_float_emit.FABS(reg_encoder(VD), reg_encoder(VB));
+		          m_float_emit.FNEG(reg_encoder(VD), reg_encoder(VD)); break;
+		case 264: m_float_emit.FABS(reg_encoder(VD), reg_encoder(VB)); break;
 		default: _assert_msg_(DYNA_REC, 0, "fp_logic"); break;
 		}
 	}
@ -169,7 +173,7 @@ void JitArm64::frspx(UGeckoInstruction inst)

 	u32 b = inst.FB, d = inst.FD;

-	if (fpr.IsSingle(b))
+	if (fpr.IsSingle(b, true))
 	{
 		// Source is already in single precision, so no need to do anything but to copy to PSR1.
 		ARM64Reg VB = fpr.R(b, REG_IS_LOADED_SINGLE);
@ -196,7 +200,7 @@ void JitArm64::fcmpX(UGeckoInstruction inst)
 	u32 a = inst.FA, b = inst.FB;
 	int crf = inst.CRFD;

-	bool singles = fpr.IsSingle(a) && fpr.IsSingle(b);
+	bool singles = fpr.IsSingle(a, true) && fpr.IsSingle(b, true);
 	RegType type = singles ? REG_IS_LOADED_SINGLE : REG_IS_LOADED;
 	ARM64Reg (*reg_encoder)(ARM64Reg) = singles ? EncodeRegToSingle : EncodeRegToDouble;

--- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp
+++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_LoadStoreFloating.cpp
@ -270,7 +270,7 @@ void JitArm64::stfXX(UGeckoInstruction inst)
 	gpr.Lock(W0, W1, W30);
 	fpr.Lock(Q0);

-	bool single = (flags & BackPatchInfo::FLAG_SIZE_F32) && fpr.IsSingle(inst.FS);
+	bool single = (flags & BackPatchInfo::FLAG_SIZE_F32) && fpr.IsSingle(inst.FS, true);

 	ARM64Reg V0 = fpr.R(inst.FS, single ? REG_IS_LOADED_SINGLE : REG_IS_LOADED);

--- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp
+++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.cpp
@ -327,6 +327,18 @@ ARM64Reg Arm64FPRCache::R(u32 preg, RegType type)
 	{
 		return host_reg;
 	}
+	case REG_LOWER_PAIR_SINGLE:
+	{
+		// We're asked for the lower single, so just return the register.
+		if (type == REG_IS_LOADED_SINGLE)
+			return host_reg;
+
+		// Else convert this register back to a double.
+		m_float_emit->FCVT(64, 32, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg));
+		reg.LoadLowerReg(host_reg);
+
+		// fall through
+	}
 	case REG_LOWER_PAIR:
 	{
 		if (type == REG_REG)
@ -417,31 +429,40 @@ ARM64Reg Arm64FPRCache::RW(u32 preg, RegType type)
 	}

 	// Only the lower value will be overwritten, so we must be extra careful to store PSR1 if dirty.
-	if (type == REG_LOWER_PAIR && was_dirty)
+	if ((type == REG_LOWER_PAIR || type == REG_LOWER_PAIR_SINGLE) && was_dirty)
 	{
+		// We must *not* change host_reg as this register might still be in use. So it's fine to
+		// store this register, but it's *not* fine to convert it to double. So for double convertion,
+		// a temporary register needs to be used.
 		ARM64Reg host_reg = reg.GetReg();
+		ARM64Reg flush_reg = host_reg;

 		switch (reg.GetType())
 		{
 		case REG_REG_SINGLE:
-			m_float_emit->FCVTL(64, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg));
+			flush_reg = GetReg();
+			m_float_emit->FCVTL(64, EncodeRegToDouble(flush_reg), EncodeRegToDouble(host_reg));
 			// fall through
 		case REG_REG:
 			// We are doing a full 128bit store because it takes 2 cycles on a Cortex-A57 to do a 128bit store.
 			// It would take longer to do an insert to a temporary and a 64bit store than to just do this.
-			m_float_emit->STR(128, INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(ps[preg][0]));
+			m_float_emit->STR(128, INDEX_UNSIGNED, flush_reg, X29, PPCSTATE_OFF(ps[preg][0]));
 			break;
 		case REG_DUP_SINGLE:
-			m_float_emit->FCVT(64, 32, EncodeRegToDouble(reg.GetReg()), EncodeRegToDouble(reg.GetReg()));
+			flush_reg = GetReg();
+			m_float_emit->FCVT(64, 32, EncodeRegToDouble(flush_reg), EncodeRegToDouble(host_reg));
 			// fall through
 		case REG_DUP:
 			// Store PSR1 (which is equal to PSR0) in memory.
-			m_float_emit->STR(64, INDEX_UNSIGNED, host_reg, X29, PPCSTATE_OFF(ps[preg][1]));
+			m_float_emit->STR(64, INDEX_UNSIGNED, flush_reg, X29, PPCSTATE_OFF(ps[preg][1]));
 			break;
 		default:
 			// All other types doesn't store anything in PSR1.
 			break;
 		}
+
+		if (host_reg != flush_reg)
+			Unlock(flush_reg);
 	}

 	reg.Load(reg.GetReg(), type);
@ -502,11 +523,15 @@ void Arm64FPRCache::FlushRegister(u32 preg, bool maintain_state)
 			m_float_emit->FCVTL(64, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg));
 		type = REG_REG;
 	}
-	if (type == REG_DUP_SINGLE)
+	if (type == REG_DUP_SINGLE || type == REG_LOWER_PAIR_SINGLE)
 	{
 		if (dirty)
 			m_float_emit->FCVT(64, 32, EncodeRegToDouble(host_reg), EncodeRegToDouble(host_reg));
-		type = REG_DUP;
+
+		if (type == REG_DUP_SINGLE)
+			type = REG_DUP;
+		else
+			type = REG_LOWER_PAIR;
 	}

 	if (type == REG_REG || type == REG_LOWER_PAIR)
@ -560,10 +585,10 @@ BitSet32 Arm64FPRCache::GetCallerSavedUsed()
 	return registers;
 }

-bool Arm64FPRCache::IsSingle(u32 preg)
+bool Arm64FPRCache::IsSingle(u32 preg, bool lower_only)
 {
 	RegType type = m_guest_registers[preg].GetType();
-	return type == REG_REG_SINGLE || type == REG_DUP_SINGLE;
+	return type == REG_REG_SINGLE || type == REG_DUP_SINGLE || (lower_only && type == REG_LOWER_PAIR_SINGLE);
 }

 void Arm64FPRCache::FixSinglePrecision(u32 preg)
--- a/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h
+++ b/Source/Core/Core/PowerPC/JitArm64/JitArm64_RegCache.h
@ -24,6 +24,7 @@ enum RegType
 	REG_DUP, // The lower reg is the same as the upper one (physical upper doesn't actually have the duplicated value)
 	REG_IS_LOADED, // We don't care what type it is, as long as the lower 64bits are loaded
 	REG_REG_SINGLE, // Both registers are loaded as single
+	REG_LOWER_PAIR_SINGLE, // Only the lower pair of a paired register, as single
 	REG_DUP_SINGLE, // The lower one contains both registers, as single
 	REG_IS_LOADED_SINGLE, // We only want to access the lower one as single
 };
@ -296,7 +297,7 @@ public:

 	BitSet32 GetCallerSavedUsed() override;

-	bool IsSingle(u32 preg);
+	bool IsSingle(u32 preg, bool lower_only = false);

 	void FixSinglePrecision(u32 preg);