Merge pull request #2189 from magumagu/paired-loadstore-cleanup

Fix paired loadstore to use correct load/store calls.
2015-03-17 11:26:01 +11:00 · 2015-03-17 11:26:01 +11:00 · b103aa7122
parent b170173aca dda5e610eb
commit b103aa7122
5 changed files with 205 additions and 253 deletions
--- a/Source/Core/Core/PowerPC/Gekko.h
+++ b/Source/Core/Core/PowerPC/Gekko.h
@ -305,6 +305,9 @@ union UGeckoInstruction
 enum EQuantizeType : u32
 {
 	QUANTIZE_FLOAT = 0,
 	QUANTIZE_INVALID1 = 1,
 	QUANTIZE_INVALID2 = 2,
 	QUANTIZE_INVALID3 = 3,
 	QUANTIZE_U8    = 4,
 	QUANTIZE_U16   = 5,
 	QUANTIZE_S8    = 6,
--- a/Source/Core/Core/PowerPC/Interpreter/Interpreter.h
+++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter.h
@ -311,8 +311,8 @@ private:
 	static u32 Helper_Get_EA_UX(const UGeckoInstruction _inst);
 	// paired helper
-	static float Helper_Dequantize(const u32 _Addr, const EQuantizeType _quantizeType, const unsigned int _uScale);
+	static void Helper_Dequantize(u32 addr, u32 instI, u32 instRD, u32 instW);
-	static void  Helper_Quantize  (const u32 _Addr, const double _fValue, const EQuantizeType _quantizeType, const unsigned _uScale);
+	static void Helper_Quantize(u32 addr, u32 instI, u32 instRS, u32 instW);
 	// other helper
 	static u32 Helper_Mask(int mb, int me);
--- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStorePaired.cpp
+++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStorePaired.cpp
@ -48,212 +48,260 @@ const float m_quantizeTable[] =
 	1.0 / (1ULL <<  4), 1.0 / (1ULL <<  3), 1.0 / (1ULL <<  2), 1.0 / (1ULL <<  1),
 };
-void Interpreter::Helper_Quantize(const u32 _Addr, const double _fValue, const EQuantizeType _quantizeType, const unsigned int _uScale)
+template<typename SType> SType ScaleAndClamp(double ps, u32 stScale)
 {
-	switch (_quantizeType)
+	float convPS = (float)ps * m_quantizeTable[stScale];
 	float min = (float)std::numeric_limits<SType>::min();
 	float max = (float)std::numeric_limits<SType>::max();
 	MathUtil::Clamp(&convPS, min, max);
 	return (SType)convPS;
 }
 template<typename T> static T ReadUnpaired(u32 addr);
 template<> u8 ReadUnpaired<u8>(u32 addr)
 {
 	return PowerPC::Read_U8(addr);
 }
 template<> u16 ReadUnpaired<u16>(u32 addr)
 {
 	return PowerPC::Read_U16(addr);
 }
 template<> u32 ReadUnpaired<u32>(u32 addr)
 {
 	return PowerPC::Read_U32(addr);
 }
 template<typename T> static std::pair<T, T> ReadPair(u32 addr);
 template<> std::pair<u8, u8> ReadPair<u8>(u32 addr)
 {
 	u16 val = PowerPC::Read_U16(addr);
 	return { (u8)(val >> 8), (u8)val };
 }
 template<> std::pair<u16, u16> ReadPair<u16>(u32 addr)
 {
 	u32 val = PowerPC::Read_U32(addr);
 	return { (u16)(val >> 16), (u16)val };
 }
 template<> std::pair<u32, u32> ReadPair<u32>(u32 addr)
 {
 	u64 val = PowerPC::Read_U64(addr);
 	return { (u32)(val >> 32), (u32)val };
 }
 template<typename T> static void WriteUnpaired(T val, u32 addr);
 template<> void WriteUnpaired<u8>(u8 val, u32 addr)
 {
 	PowerPC::Write_U8(val, addr);
 }
 template<> void WriteUnpaired<u16>(u16 val, u32 addr)
 {
 	PowerPC::Write_U16(val, addr);
 }
 template<> void WriteUnpaired<u32>(u32 val, u32 addr)
 {
 	PowerPC::Write_U32(val, addr);
 }
 template<typename T> static void WritePair(T val1, T val2, u32 addr);
 template<> void WritePair<u8>(u8 val1, u8 val2, u32 addr)
 {
 	PowerPC::Write_U16(((u16)val1 << 8) | (u16)val2, addr);
 }
 template<> void WritePair<u16>(u16 val1, u16 val2, u32 addr)
 {
 	PowerPC::Write_U32(((u32)val1 << 16) | (u32)val2, addr);
 }
 template<> void WritePair<u32>(u32 val1, u32 val2, u32 addr)
 {
 	PowerPC::Write_U64(((u64)val1 << 32) | (u64)val2, addr);
 }
 template<typename T>
 void QuantizeAndStore(double ps0, double ps1, u32 addr, u32 instW, u32 stScale)
 {
 	typedef typename std::make_unsigned<T>::type U;
 	U convPS0 = (U)ScaleAndClamp<T>(ps0, stScale);
 	if (instW)
 	{
 		WriteUnpaired<U>(convPS0, addr);
 	}
 	else
 	{
 		U convPS1 = (U)ScaleAndClamp<T>(ps1, stScale);
 		WritePair<U>(convPS0, convPS1, addr);
 	}
 }
 void Interpreter::Helper_Quantize(u32 addr, u32 instI, u32 instRS, u32 instW)
 {
 	const UGQR gqr(rSPR(SPR_GQR0 + instI));
 	const EQuantizeType stType = gqr.st_type;
 	const unsigned int stScale = gqr.st_scale;
 	double ps0 = rPS0(instRS);
 	double ps1 = rPS1(instRS);
 	switch (stType)
 	{
 	case QUANTIZE_FLOAT:
-		PowerPC::Write_U32(ConvertToSingleFTZ(*(u64*)&_fValue), _Addr);
+	{
-		break;
+		u32 convPS0 = ConvertToSingleFTZ(MathUtil::IntDouble(ps0).i);
-
+		if (instW)
 	// used for THP player
 	case QUANTIZE_U8:
 		{
-			float fResult = (float)_fValue * m_quantizeTable[_uScale];
+			WriteUnpaired<u32>(convPS0, addr);
 			MathUtil::Clamp(&fResult, 0.0f, 255.0f);
 			PowerPC::Write_U8((u8)fResult, _Addr);
 		}
 		else
 		{
 			u32 convPS1 = ConvertToSingleFTZ(MathUtil::IntDouble(ps1).i);
 			WritePair<u32>(convPS0, convPS1, addr);
 		}
 		break;
 	}
 	case QUANTIZE_U8:
 		QuantizeAndStore<u8>(ps0, ps1, addr, instW, stScale);
 		break;
 	case QUANTIZE_U16:
-		{
+		QuantizeAndStore<u16>(ps0, ps1, addr, instW, stScale);
 			float fResult = (float)_fValue * m_quantizeTable[_uScale];
 			MathUtil::Clamp(&fResult, 0.0f, 65535.0f);
 			PowerPC::Write_U16((u16)fResult, _Addr);
 		}
 		break;
 	case QUANTIZE_S8:
-		{
+		QuantizeAndStore<s8>(ps0, ps1, addr, instW, stScale);
 			float fResult = (float)_fValue * m_quantizeTable[_uScale];
 			MathUtil::Clamp(&fResult, -128.0f, 127.0f);
 			PowerPC::Write_U8((u8)(s8)fResult, _Addr);
 		}
 		break;
 	case QUANTIZE_S16:
-		{
+		QuantizeAndStore<s16>(ps0, ps1, addr, instW, stScale);
 			float fResult = (float)_fValue * m_quantizeTable[_uScale];
 			MathUtil::Clamp(&fResult, -32768.0f, 32767.0f);
 			PowerPC::Write_U16((u16)(s16)fResult, _Addr);
 		}
 		break;
-	default:
+	case QUANTIZE_INVALID1:
-		_dbg_assert_msg_(POWERPC, 0, "PS dequantize - unknown type to read");
+	case QUANTIZE_INVALID2:
 	case QUANTIZE_INVALID3:
 		_assert_msg_(POWERPC, 0, "PS dequantize - unknown type to read");
 		break;
 	}
 }
-float Interpreter::Helper_Dequantize(const u32 _Addr, const EQuantizeType _quantizeType, const unsigned int _uScale)
+template<typename T>
 std::pair<float, float> LoadAndDequantize(u32 addr, u32 instW, u32 ldScale)
 {
-	// dequantize the value
+	typedef typename std::make_unsigned<T>::type U;
-	float fResult;
+	float ps0, ps1;
-	switch (_quantizeType)
+	if (instW)
 	{
 		U value = ReadUnpaired<U>(addr);
 		ps0 = (float)(T)(value) * m_dequantizeTable[ldScale];
 		ps1 = 1.0f;
 	}
 	else
 	{
 		std::pair<U, U> value = ReadPair<U>(addr);
 		ps0 = (float)(T)(value.first) * m_dequantizeTable[ldScale];
 		ps1 = (float)(T)(value.second) * m_dequantizeTable[ldScale];
 	}
 	return { ps0, ps1 };
 }
 void Interpreter::Helper_Dequantize(u32 addr, u32 instI, u32 instRD, u32 instW)
 {
 	UGQR gqr(rSPR(SPR_GQR0 + instI));
 	EQuantizeType ldType = gqr.ld_type;
 	unsigned int ldScale = gqr.ld_scale;
 	float ps0, ps1;
 	switch (ldType)
 	{
 	case QUANTIZE_FLOAT:
 		if (instW)
 		{
-			u32 dwValue = PowerPC::Read_U32(_Addr);
+			u32 value = ReadUnpaired<u32>(addr);
-			fResult = *(float*)&dwValue;
+			ps0 = MathUtil::IntFloat(value).f;
 			ps1 = 1.0f;
 		}
 		else
 		{
 			std::pair<u32, u32> value = ReadPair<u32>(addr);
 			ps0 = MathUtil::IntFloat(value.first).f;
 			ps1 = MathUtil::IntFloat(value.second).f;
 		}
 		break;
 	case QUANTIZE_U8:
-		fResult = static_cast<float>(PowerPC::Read_U8(_Addr)) * m_dequantizeTable[_uScale];
+		std::tie(ps0, ps1) = LoadAndDequantize<u8>(addr, instW, ldScale);
 		break;
 	case QUANTIZE_U16:
-		fResult = static_cast<float>(PowerPC::Read_U16(_Addr)) * m_dequantizeTable[_uScale];
+		std::tie(ps0, ps1) = LoadAndDequantize<u16>(addr, instW, ldScale);
 		break;
 	case QUANTIZE_S8:
-		fResult = static_cast<float>((s8)PowerPC::Read_U8(_Addr)) * m_dequantizeTable[_uScale];
+		std::tie(ps0, ps1) = LoadAndDequantize<s8>(addr, instW, ldScale);
 		break;
 		// used for THP player
 	case QUANTIZE_S16:
-		fResult = static_cast<float>((s16)PowerPC::Read_U16(_Addr)) * m_dequantizeTable[_uScale];
+		std::tie(ps0, ps1) = LoadAndDequantize<s16>(addr, instW, ldScale);
 		break;
-	default:
+	case QUANTIZE_INVALID1:
-		_dbg_assert_msg_(POWERPC, 0, "PS dequantize - unknown type to read");
+	case QUANTIZE_INVALID2:
-		fResult = 0;
+	case QUANTIZE_INVALID3:
 		_assert_msg_(POWERPC, 0, "PS dequantize - unknown type to read");
 		ps0 = 0.f;
 		ps1 = 0.f;
 		break;
 	}
-	return fResult;
+
 	if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI)
 	{
 		return;
 	}
 	rPS0(instRD) = ps0;
 	rPS1(instRD) = ps1;
 }
 void Interpreter::psq_l(UGeckoInstruction _inst)
 {
 	const UGQR gqr(rSPR(SPR_GQR0 + _inst.I));
 	const EQuantizeType ldType = gqr.ld_type;
 	const unsigned int ldScale = gqr.ld_scale;
 	const u32 EA = _inst.RA ?
 		(rGPR[_inst.RA] + _inst.SIMM_12) : (u32)_inst.SIMM_12;
-
+	Helper_Dequantize(EA, _inst.I, _inst.RD, _inst.W);
 	int c = 4;
 	if (ldType == QUANTIZE_U8  || ldType == QUANTIZE_S8)
 		c = 0x1;
 	else if (ldType == QUANTIZE_U16 || ldType == QUANTIZE_S16)
 		c = 0x2;
 	if (_inst.W == 0)
 	{
 		float ps0 = Helper_Dequantize(EA,     ldType, ldScale);
 		float ps1 = Helper_Dequantize(EA + c, ldType, ldScale);
 		if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI)
 		{
 			return;
 		}
 		rPS0(_inst.RD) = ps0;
 		rPS1(_inst.RD) = ps1;
 	}
 	else
 	{
 		float ps0 = Helper_Dequantize(EA, ldType, ldScale);
 		if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI)
 		{
 			return;
 		}
 		rPS0(_inst.RD) = ps0;
 		rPS1(_inst.RD) = 1.0f;
 	}
 }
 void Interpreter::psq_lu(UGeckoInstruction _inst)
 {
 	const UGQR gqr(rSPR(SPR_GQR0 + _inst.I));
 	const EQuantizeType ldType = gqr.ld_type;
 	const unsigned int ldScale = gqr.ld_scale;
 	const u32 EA = rGPR[_inst.RA] + _inst.SIMM_12;
 	Helper_Dequantize(EA, _inst.I, _inst.RD, _inst.W);
-	int c = 4;
+	if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI)
 	if (ldType == QUANTIZE_U8 || ldType == QUANTIZE_S8)
 		c = 0x1;
 	else if (ldType == QUANTIZE_U16 || ldType == QUANTIZE_S16)
 		c = 0x2;
 	if (_inst.W == 0)
 	{
-		float ps0 = Helper_Dequantize(EA,     ldType, ldScale);
+		return;
 		float ps1 = Helper_Dequantize(EA + c, ldType, ldScale);
 		if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI)
 		{
 			return;
 		}
 		rPS0(_inst.RD) = ps0;
 		rPS1(_inst.RD) = ps1;
 	}
 	else
 	{
 		float ps0 = Helper_Dequantize(EA, ldType, ldScale);
 		if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI)
 		{
 			return;
 		}
 		rPS0(_inst.RD) = ps0;
 		rPS1(_inst.RD) = 1.0f;
 	}
 	rGPR[_inst.RA] = EA;
 }
 void Interpreter::psq_st(UGeckoInstruction _inst)
 {
 	const UGQR gqr(rSPR(SPR_GQR0 + _inst.I));
 	const EQuantizeType stType = gqr.st_type;
 	const unsigned int stScale = gqr.st_scale;
 	const u32 EA = _inst.RA ?
 		(rGPR[_inst.RA] + _inst.SIMM_12) : (u32)_inst.SIMM_12;
-
+	Helper_Quantize(EA, _inst.I, _inst.RS, _inst.W);
 	int c = 4;
 	if (stType == QUANTIZE_U8 || stType == QUANTIZE_S8)
 		c = 0x1;
 	else if (stType == QUANTIZE_U16 || stType == QUANTIZE_S16)
 		c = 0x2;
 	if (_inst.W == 0)
 	{
 		Helper_Quantize(EA,     rPS0(_inst.RS), stType, stScale);
 		Helper_Quantize(EA + c, rPS1(_inst.RS), stType, stScale);
 	}
 	else
 	{
 		Helper_Quantize(EA, rPS0(_inst.RS), stType, stScale);
 	}
 }
 void Interpreter::psq_stu(UGeckoInstruction _inst)
 {
 	const UGQR gqr(rSPR(SPR_GQR0 + _inst.I));
 	const EQuantizeType stType = gqr.st_type;
 	const unsigned int stScale = gqr.st_scale;
 	const u32 EA = rGPR[_inst.RA] + _inst.SIMM_12;
 	Helper_Quantize(EA, _inst.I, _inst.RS, _inst.W);
 	int c = 4;
 	if (stType == QUANTIZE_U8 || stType == QUANTIZE_S8)
 		c = 0x1;
 	else if (stType == QUANTIZE_U16 || stType == QUANTIZE_S16)
 		c = 0x2;
 	if (_inst.W == 0)
 	{
 		Helper_Quantize(EA,     rPS0(_inst.RS), stType, stScale);
 		Helper_Quantize(EA + c, rPS1(_inst.RS), stType, stScale);
 	}
 	else
 	{
 		Helper_Quantize(EA, rPS0(_inst.RS), stType, stScale);
 	}
 	if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI)
 	{
 		return;
@ -263,132 +311,36 @@ void Interpreter::psq_stu(UGeckoInstruction _inst)
 void Interpreter::psq_lx(UGeckoInstruction _inst)
 {
 	const UGQR gqr(rSPR(SPR_GQR0 + _inst.Ix));
 	const EQuantizeType ldType = gqr.ld_type;
 	const unsigned int ldScale = gqr.ld_scale;
 	const u32 EA = _inst.RA ? (rGPR[_inst.RA] + rGPR[_inst.RB]) : rGPR[_inst.RB];
-
+	Helper_Dequantize(EA, _inst.Ix, _inst.RD, _inst.Wx);
 	int c = 4;
 	if (ldType == QUANTIZE_U8 || ldType == QUANTIZE_S8)
 		c = 0x1;
 	else if (ldType == QUANTIZE_U16 || ldType == QUANTIZE_S16)
 		c = 0x2;
 	if (_inst.Wx == 0)
 	{
 		float ps0 = Helper_Dequantize(EA,     ldType, ldScale);
 		float ps1 = Helper_Dequantize(EA + c, ldType, ldScale);
 		if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI)
 		{
 			return;
 		}
 		rPS0(_inst.RD) = ps0;
 		rPS1(_inst.RD) = ps1;
 	}
 	else
 	{
 		float ps0 = Helper_Dequantize(EA, ldType, ldScale);
 		float ps1 = 1.0f;
 		if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI)
 		{
 			return;
 		}
 		rPS0(_inst.RD) = ps0;
 		rPS1(_inst.RD) = ps1;
 	}
 }
 void Interpreter::psq_stx(UGeckoInstruction _inst)
 {
 	const UGQR gqr(rSPR(SPR_GQR0 + _inst.Ix));
 	const EQuantizeType stType = gqr.st_type;
 	const unsigned int stScale = gqr.st_scale;
 	const u32 EA = _inst.RA ? (rGPR[_inst.RA] + rGPR[_inst.RB]) : rGPR[_inst.RB];
-
+	Helper_Quantize(EA, _inst.Ix, _inst.RS, _inst.Wx);
 	int c = 4;
 	if (stType == QUANTIZE_U8 || stType == QUANTIZE_S8)
 		c = 0x1;
 	else if (stType == QUANTIZE_U16 || stType == QUANTIZE_S16)
 		c = 0x2;
 	if (_inst.Wx == 0)
 	{
 		Helper_Quantize(EA,     rPS0(_inst.RS), stType, stScale);
 		Helper_Quantize(EA + c, rPS1(_inst.RS), stType, stScale);
 	}
 	else
 	{
 		Helper_Quantize(EA, rPS0(_inst.RS), stType, stScale);
 	}
 }
 void Interpreter::psq_lux(UGeckoInstruction _inst)
 {
 	const UGQR gqr(rSPR(SPR_GQR0 + _inst.Ix));
 	const EQuantizeType ldType = gqr.ld_type;
 	const unsigned int ldScale = gqr.ld_scale;
 	const u32 EA = rGPR[_inst.RA] + rGPR[_inst.RB];
 	Helper_Dequantize(EA, _inst.Ix, _inst.RD, _inst.Wx);
-	int c = 4;
+	if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI)
 	if (ldType == QUANTIZE_U8 || ldType == QUANTIZE_S8)
 		c = 0x1;
 	else if (ldType == QUANTIZE_U16 || ldType == QUANTIZE_S16)
 		c = 0x2;
 	if (_inst.Wx == 0)
 	{
-		float ps0 = Helper_Dequantize(EA,     ldType, ldScale);
+		return;
 		float ps1 = Helper_Dequantize(EA + c, ldType, ldScale);
 		if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI)
 		{
 			return;
 		}
 		rPS0(_inst.RD) = ps0;
 		rPS1(_inst.RD) = ps1;
 	}
 	else
 	{
 		float ps0 = Helper_Dequantize(EA, ldType, ldScale);
 		if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI)
 		{
 			return;
 		}
 		rPS0(_inst.RD) = ps0;
 		rPS1(_inst.RD) = 1.0f;
 	}
 	rGPR[_inst.RA] = EA;
 }
 void Interpreter::psq_stux(UGeckoInstruction _inst)
 {
 	const UGQR gqr(rSPR(SPR_GQR0 + _inst.Ix));
 	const EQuantizeType stType = gqr.st_type;
 	const unsigned int stScale = gqr.st_scale;
 	const u32 EA = rGPR[_inst.RA] + rGPR[_inst.RB];
 	Helper_Quantize(EA, _inst.Ix, _inst.RS, _inst.Wx);
 	int c = 4;
 	if (stType == QUANTIZE_U8 || stType == QUANTIZE_S8)
 		c = 0x1;
 	else if (stType == QUANTIZE_U16 || stType == QUANTIZE_S16)
 		c = 0x2;
 	if (_inst.Wx == 0)
 	{
 		Helper_Quantize(EA,     rPS0(_inst.RS), stType, stScale);
 		Helper_Quantize(EA + c, rPS1(_inst.RS), stType, stScale);
 	}
 	else
 	{
 		Helper_Quantize(EA, rPS0(_inst.RS), stType, stScale);
 	}
 	if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI)
 	{
 		return;
 	}
 	rGPR[_inst.RA] = EA;
-
+}
 }  // namespace=======
--- a/Source/Core/Core/PowerPC/JitArm32/JitAsm.cpp
+++ b/Source/Core/Core/PowerPC/JitArm32/JitAsm.cpp
@ -27,22 +27,19 @@ using namespace ArmGen;
 JitArmAsmRoutineManager asm_routines;
-static void WriteDual32(u32 value1, u32 value2, u32 address)
+static void WriteDual8(u32 val1, u32 val2, u32 addr)
 {
-	PowerPC::Write_U32(value1, address);
+	PowerPC::Write_U16(((u16)(u8)val1 << 8) | (u16)(u8)val2, addr);
 	PowerPC::Write_U32(value2, address + 4);
 }
-static void WriteDual16(u32 value1, u32 value2, u32 address)
+static void WriteDual16(u32 val1, u32 val2, u32 addr)
 {
-	PowerPC::Write_U16(value1, address);
+	PowerPC::Write_U32(((u32)(u16)val1 << 16) | (u32)(u16)val2, addr);
 	PowerPC::Write_U16(value2, address + 2);
 }
-static void WriteDual8(u32 value1, u32 value2, u32 address)
+static void WriteDual32(u32 val1, u32 val2, u32 addr)
 {
-	PowerPC::Write_U8(value1, address);
+	PowerPC::Write_U64(((u64)val1 << 32) | (u64)val2, addr);
 	PowerPC::Write_U8(value2, address + 1);
 }
 void JitArmAsmRoutineManager::Generate()
--- a/Source/Core/Core/PowerPC/MMU.cpp
+++ b/Source/Core/Core/PowerPC/MMU.cpp
@ -238,7 +238,7 @@ __forceinline static void WriteToHardware(u32 em_address, const T data)
 	{
 		// First, let's check for FIFO writes, since they are probably the most common
 		// reason we end up in this function:
-		if (flag == FLAG_WRITE && (em_address & 0xFFFFF000) == 0xCC008000)
+		if (flag == FLAG_WRITE && em_address == 0xCC008000)
 		{
 			switch (sizeof(T))
 			{