diff --git a/Source/Core/Core/PowerPC/Gekko.h b/Source/Core/Core/PowerPC/Gekko.h index c9eb9b277c..14f4083af6 100644 --- a/Source/Core/Core/PowerPC/Gekko.h +++ b/Source/Core/Core/PowerPC/Gekko.h @@ -305,6 +305,9 @@ union UGeckoInstruction enum EQuantizeType : u32 { QUANTIZE_FLOAT = 0, + QUANTIZE_INVALID1 = 1, + QUANTIZE_INVALID2 = 2, + QUANTIZE_INVALID3 = 3, QUANTIZE_U8 = 4, QUANTIZE_U16 = 5, QUANTIZE_S8 = 6, diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter.h b/Source/Core/Core/PowerPC/Interpreter/Interpreter.h index 3afaaf9644..d88081fedd 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter.h +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter.h @@ -312,8 +312,8 @@ private: static u32 Helper_Get_EA_UX(const UGeckoInstruction _inst); // paired helper - static float Helper_Dequantize(const u32 _Addr, const EQuantizeType _quantizeType, const unsigned int _uScale); - static void Helper_Quantize (const u32 _Addr, const double _fValue, const EQuantizeType _quantizeType, const unsigned _uScale); + static void Helper_Dequantize(u32 addr, u32 instI, u32 instRD, u32 instW); + static void Helper_Quantize(u32 addr, u32 instI, u32 instRS, u32 instW); // other helper static u32 Helper_Mask(int mb, int me); diff --git a/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStorePaired.cpp b/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStorePaired.cpp index 2a0975a411..f1a01ffaee 100644 --- a/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStorePaired.cpp +++ b/Source/Core/Core/PowerPC/Interpreter/Interpreter_LoadStorePaired.cpp @@ -48,212 +48,260 @@ const float m_quantizeTable[] = 1.0 / (1ULL << 4), 1.0 / (1ULL << 3), 1.0 / (1ULL << 2), 1.0 / (1ULL << 1), }; -void Interpreter::Helper_Quantize(const u32 _Addr, const double _fValue, const EQuantizeType _quantizeType, const unsigned int _uScale) +template SType ScaleAndClamp(double ps, u32 stScale) { - switch (_quantizeType) + float convPS = (float)ps * m_quantizeTable[stScale]; + float min = (float)std::numeric_limits::min(); + float max = (float)std::numeric_limits::max(); + MathUtil::Clamp(&convPS, min, max); + return (SType)convPS; +} + +template static T ReadUnpaired(u32 addr); + +template<> u8 ReadUnpaired(u32 addr) +{ + return PowerPC::Read_U8(addr); +} + +template<> u16 ReadUnpaired(u32 addr) +{ + return PowerPC::Read_U16(addr); +} + +template<> u32 ReadUnpaired(u32 addr) +{ + return PowerPC::Read_U32(addr); +} + +template static std::pair ReadPair(u32 addr); + +template<> std::pair ReadPair(u32 addr) +{ + u16 val = PowerPC::Read_U16(addr); + return { (u8)(val >> 8), (u8)val }; +} + +template<> std::pair ReadPair(u32 addr) +{ + u32 val = PowerPC::Read_U32(addr); + return { (u16)(val >> 16), (u16)val }; +} + +template<> std::pair ReadPair(u32 addr) +{ + u64 val = PowerPC::Read_U64(addr); + return { (u32)(val >> 32), (u32)val }; +} + +template static void WriteUnpaired(T val, u32 addr); + +template<> void WriteUnpaired(u8 val, u32 addr) +{ + PowerPC::Write_U8(val, addr); +} + +template<> void WriteUnpaired(u16 val, u32 addr) +{ + PowerPC::Write_U16(val, addr); +} + +template<> void WriteUnpaired(u32 val, u32 addr) +{ + PowerPC::Write_U32(val, addr); +} + +template static void WritePair(T val1, T val2, u32 addr); + +template<> void WritePair(u8 val1, u8 val2, u32 addr) +{ + PowerPC::Write_U16(((u16)val1 << 8) | (u16)val2, addr); +} + +template<> void WritePair(u16 val1, u16 val2, u32 addr) +{ + PowerPC::Write_U32(((u32)val1 << 16) | (u32)val2, addr); +} + +template<> void WritePair(u32 val1, u32 val2, u32 addr) +{ + PowerPC::Write_U64(((u64)val1 << 32) | (u64)val2, addr); +} + +template +void QuantizeAndStore(double ps0, double ps1, u32 addr, u32 instW, u32 stScale) +{ + typedef typename std::make_unsigned::type U; + U convPS0 = (U)ScaleAndClamp(ps0, stScale); + if (instW) + { + WriteUnpaired(convPS0, addr); + } + else + { + U convPS1 = (U)ScaleAndClamp(ps1, stScale); + WritePair(convPS0, convPS1, addr); + } +} + +void Interpreter::Helper_Quantize(u32 addr, u32 instI, u32 instRS, u32 instW) +{ + const UGQR gqr(rSPR(SPR_GQR0 + instI)); + const EQuantizeType stType = gqr.st_type; + const unsigned int stScale = gqr.st_scale; + + double ps0 = rPS0(instRS); + double ps1 = rPS1(instRS); + switch (stType) { case QUANTIZE_FLOAT: - PowerPC::Write_U32(ConvertToSingleFTZ(*(u64*)&_fValue), _Addr); - break; - - // used for THP player - case QUANTIZE_U8: + { + u32 convPS0 = ConvertToSingleFTZ(MathUtil::IntDouble(ps0).i); + if (instW) { - float fResult = (float)_fValue * m_quantizeTable[_uScale]; - MathUtil::Clamp(&fResult, 0.0f, 255.0f); - PowerPC::Write_U8((u8)fResult, _Addr); + WriteUnpaired(convPS0, addr); } + else + { + u32 convPS1 = ConvertToSingleFTZ(MathUtil::IntDouble(ps1).i); + WritePair(convPS0, convPS1, addr); + } + break; + } + + case QUANTIZE_U8: + QuantizeAndStore(ps0, ps1, addr, instW, stScale); break; case QUANTIZE_U16: - { - float fResult = (float)_fValue * m_quantizeTable[_uScale]; - MathUtil::Clamp(&fResult, 0.0f, 65535.0f); - PowerPC::Write_U16((u16)fResult, _Addr); - } + QuantizeAndStore(ps0, ps1, addr, instW, stScale); break; case QUANTIZE_S8: - { - float fResult = (float)_fValue * m_quantizeTable[_uScale]; - MathUtil::Clamp(&fResult, -128.0f, 127.0f); - PowerPC::Write_U8((u8)(s8)fResult, _Addr); - } + QuantizeAndStore(ps0, ps1, addr, instW, stScale); break; case QUANTIZE_S16: - { - float fResult = (float)_fValue * m_quantizeTable[_uScale]; - MathUtil::Clamp(&fResult, -32768.0f, 32767.0f); - PowerPC::Write_U16((u16)(s16)fResult, _Addr); - } + QuantizeAndStore(ps0, ps1, addr, instW, stScale); break; - default: - _dbg_assert_msg_(POWERPC, 0, "PS dequantize - unknown type to read"); + case QUANTIZE_INVALID1: + case QUANTIZE_INVALID2: + case QUANTIZE_INVALID3: + _assert_msg_(POWERPC, 0, "PS dequantize - unknown type to read"); break; } } -float Interpreter::Helper_Dequantize(const u32 _Addr, const EQuantizeType _quantizeType, const unsigned int _uScale) +template +std::pair LoadAndDequantize(u32 addr, u32 instW, u32 ldScale) { - // dequantize the value - float fResult; - switch (_quantizeType) + typedef typename std::make_unsigned::type U; + float ps0, ps1; + if (instW) + { + U value = ReadUnpaired(addr); + ps0 = (float)(T)(value) * m_dequantizeTable[ldScale]; + ps1 = 1.0f; + } + else + { + std::pair value = ReadPair(addr); + ps0 = (float)(T)(value.first) * m_dequantizeTable[ldScale]; + ps1 = (float)(T)(value.second) * m_dequantizeTable[ldScale]; + } + return { ps0, ps1 }; +} + +void Interpreter::Helper_Dequantize(u32 addr, u32 instI, u32 instRD, u32 instW) +{ + UGQR gqr(rSPR(SPR_GQR0 + instI)); + EQuantizeType ldType = gqr.ld_type; + unsigned int ldScale = gqr.ld_scale; + + float ps0, ps1; + switch (ldType) { case QUANTIZE_FLOAT: + if (instW) { - u32 dwValue = PowerPC::Read_U32(_Addr); - fResult = *(float*)&dwValue; + u32 value = ReadUnpaired(addr); + ps0 = MathUtil::IntFloat(value).f; + ps1 = 1.0f; + } + else + { + std::pair value = ReadPair(addr); + ps0 = MathUtil::IntFloat(value.first).f; + ps1 = MathUtil::IntFloat(value.second).f; } break; case QUANTIZE_U8: - fResult = static_cast(PowerPC::Read_U8(_Addr)) * m_dequantizeTable[_uScale]; + std::tie(ps0, ps1) = LoadAndDequantize(addr, instW, ldScale); break; case QUANTIZE_U16: - fResult = static_cast(PowerPC::Read_U16(_Addr)) * m_dequantizeTable[_uScale]; + std::tie(ps0, ps1) = LoadAndDequantize(addr, instW, ldScale); break; case QUANTIZE_S8: - fResult = static_cast((s8)PowerPC::Read_U8(_Addr)) * m_dequantizeTable[_uScale]; + std::tie(ps0, ps1) = LoadAndDequantize(addr, instW, ldScale); break; - // used for THP player case QUANTIZE_S16: - fResult = static_cast((s16)PowerPC::Read_U16(_Addr)) * m_dequantizeTable[_uScale]; + std::tie(ps0, ps1) = LoadAndDequantize(addr, instW, ldScale); break; - default: - _dbg_assert_msg_(POWERPC, 0, "PS dequantize - unknown type to read"); - fResult = 0; + case QUANTIZE_INVALID1: + case QUANTIZE_INVALID2: + case QUANTIZE_INVALID3: + _assert_msg_(POWERPC, 0, "PS dequantize - unknown type to read"); + ps0 = 0.f; + ps1 = 0.f; break; } - return fResult; + + if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI) + { + return; + } + + rPS0(instRD) = ps0; + rPS1(instRD) = ps1; } void Interpreter::psq_l(UGeckoInstruction _inst) { - const UGQR gqr(rSPR(SPR_GQR0 + _inst.I)); - const EQuantizeType ldType = gqr.ld_type; - const unsigned int ldScale = gqr.ld_scale; const u32 EA = _inst.RA ? (rGPR[_inst.RA] + _inst.SIMM_12) : (u32)_inst.SIMM_12; - - int c = 4; - if (ldType == QUANTIZE_U8 || ldType == QUANTIZE_S8) - c = 0x1; - else if (ldType == QUANTIZE_U16 || ldType == QUANTIZE_S16) - c = 0x2; - - if (_inst.W == 0) - { - float ps0 = Helper_Dequantize(EA, ldType, ldScale); - float ps1 = Helper_Dequantize(EA + c, ldType, ldScale); - if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI) - { - return; - } - rPS0(_inst.RD) = ps0; - rPS1(_inst.RD) = ps1; - } - else - { - float ps0 = Helper_Dequantize(EA, ldType, ldScale); - if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI) - { - return; - } - rPS0(_inst.RD) = ps0; - rPS1(_inst.RD) = 1.0f; - } + Helper_Dequantize(EA, _inst.I, _inst.RD, _inst.W); } void Interpreter::psq_lu(UGeckoInstruction _inst) { - const UGQR gqr(rSPR(SPR_GQR0 + _inst.I)); - const EQuantizeType ldType = gqr.ld_type; - const unsigned int ldScale = gqr.ld_scale; const u32 EA = rGPR[_inst.RA] + _inst.SIMM_12; + Helper_Dequantize(EA, _inst.I, _inst.RD, _inst.W); - int c = 4; - if (ldType == QUANTIZE_U8 || ldType == QUANTIZE_S8) - c = 0x1; - else if (ldType == QUANTIZE_U16 || ldType == QUANTIZE_S16) - c = 0x2; - - if (_inst.W == 0) + if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI) { - float ps0 = Helper_Dequantize(EA, ldType, ldScale); - float ps1 = Helper_Dequantize(EA + c, ldType, ldScale); - if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI) - { - return; - } - rPS0(_inst.RD) = ps0; - rPS1(_inst.RD) = ps1; - } - else - { - float ps0 = Helper_Dequantize(EA, ldType, ldScale); - if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI) - { - return; - } - rPS0(_inst.RD) = ps0; - rPS1(_inst.RD) = 1.0f; + return; } rGPR[_inst.RA] = EA; } void Interpreter::psq_st(UGeckoInstruction _inst) { - const UGQR gqr(rSPR(SPR_GQR0 + _inst.I)); - const EQuantizeType stType = gqr.st_type; - const unsigned int stScale = gqr.st_scale; const u32 EA = _inst.RA ? (rGPR[_inst.RA] + _inst.SIMM_12) : (u32)_inst.SIMM_12; - - int c = 4; - if (stType == QUANTIZE_U8 || stType == QUANTIZE_S8) - c = 0x1; - else if (stType == QUANTIZE_U16 || stType == QUANTIZE_S16) - c = 0x2; - - if (_inst.W == 0) - { - Helper_Quantize(EA, rPS0(_inst.RS), stType, stScale); - Helper_Quantize(EA + c, rPS1(_inst.RS), stType, stScale); - } - else - { - Helper_Quantize(EA, rPS0(_inst.RS), stType, stScale); - } + Helper_Quantize(EA, _inst.I, _inst.RS, _inst.W); } void Interpreter::psq_stu(UGeckoInstruction _inst) { - const UGQR gqr(rSPR(SPR_GQR0 + _inst.I)); - const EQuantizeType stType = gqr.st_type; - const unsigned int stScale = gqr.st_scale; const u32 EA = rGPR[_inst.RA] + _inst.SIMM_12; + Helper_Quantize(EA, _inst.I, _inst.RS, _inst.W); - int c = 4; - if (stType == QUANTIZE_U8 || stType == QUANTIZE_S8) - c = 0x1; - else if (stType == QUANTIZE_U16 || stType == QUANTIZE_S16) - c = 0x2; - - if (_inst.W == 0) - { - Helper_Quantize(EA, rPS0(_inst.RS), stType, stScale); - Helper_Quantize(EA + c, rPS1(_inst.RS), stType, stScale); - } - else - { - Helper_Quantize(EA, rPS0(_inst.RS), stType, stScale); - } if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI) { return; @@ -263,132 +311,36 @@ void Interpreter::psq_stu(UGeckoInstruction _inst) void Interpreter::psq_lx(UGeckoInstruction _inst) { - const UGQR gqr(rSPR(SPR_GQR0 + _inst.Ix)); - const EQuantizeType ldType = gqr.ld_type; - const unsigned int ldScale = gqr.ld_scale; const u32 EA = _inst.RA ? (rGPR[_inst.RA] + rGPR[_inst.RB]) : rGPR[_inst.RB]; - - int c = 4; - if (ldType == QUANTIZE_U8 || ldType == QUANTIZE_S8) - c = 0x1; - else if (ldType == QUANTIZE_U16 || ldType == QUANTIZE_S16) - c = 0x2; - - if (_inst.Wx == 0) - { - float ps0 = Helper_Dequantize(EA, ldType, ldScale); - float ps1 = Helper_Dequantize(EA + c, ldType, ldScale); - - if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI) - { - return; - } - - rPS0(_inst.RD) = ps0; - rPS1(_inst.RD) = ps1; - } - else - { - float ps0 = Helper_Dequantize(EA, ldType, ldScale); - float ps1 = 1.0f; - - if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI) - { - return; - } - - rPS0(_inst.RD) = ps0; - rPS1(_inst.RD) = ps1; - } + Helper_Dequantize(EA, _inst.Ix, _inst.RD, _inst.Wx); } void Interpreter::psq_stx(UGeckoInstruction _inst) { - const UGQR gqr(rSPR(SPR_GQR0 + _inst.Ix)); - const EQuantizeType stType = gqr.st_type; - const unsigned int stScale = gqr.st_scale; const u32 EA = _inst.RA ? (rGPR[_inst.RA] + rGPR[_inst.RB]) : rGPR[_inst.RB]; - - int c = 4; - if (stType == QUANTIZE_U8 || stType == QUANTIZE_S8) - c = 0x1; - else if (stType == QUANTIZE_U16 || stType == QUANTIZE_S16) - c = 0x2; - - if (_inst.Wx == 0) - { - Helper_Quantize(EA, rPS0(_inst.RS), stType, stScale); - Helper_Quantize(EA + c, rPS1(_inst.RS), stType, stScale); - } - else - { - Helper_Quantize(EA, rPS0(_inst.RS), stType, stScale); - } + Helper_Quantize(EA, _inst.Ix, _inst.RS, _inst.Wx); } void Interpreter::psq_lux(UGeckoInstruction _inst) { - const UGQR gqr(rSPR(SPR_GQR0 + _inst.Ix)); - const EQuantizeType ldType = gqr.ld_type; - const unsigned int ldScale = gqr.ld_scale; const u32 EA = rGPR[_inst.RA] + rGPR[_inst.RB]; + Helper_Dequantize(EA, _inst.Ix, _inst.RD, _inst.Wx); - int c = 4; - if (ldType == QUANTIZE_U8 || ldType == QUANTIZE_S8) - c = 0x1; - else if (ldType == QUANTIZE_U16 || ldType == QUANTIZE_S16) - c = 0x2; - - if (_inst.Wx == 0) + if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI) { - float ps0 = Helper_Dequantize(EA, ldType, ldScale); - float ps1 = Helper_Dequantize(EA + c, ldType, ldScale); - if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI) - { - return; - } - rPS0(_inst.RD) = ps0; - rPS1(_inst.RD) = ps1; - } - else - { - float ps0 = Helper_Dequantize(EA, ldType, ldScale); - if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI) - { - return; - } - rPS0(_inst.RD) = ps0; - rPS1(_inst.RD) = 1.0f; + return; } rGPR[_inst.RA] = EA; } void Interpreter::psq_stux(UGeckoInstruction _inst) { - const UGQR gqr(rSPR(SPR_GQR0 + _inst.Ix)); - const EQuantizeType stType = gqr.st_type; - const unsigned int stScale = gqr.st_scale; const u32 EA = rGPR[_inst.RA] + rGPR[_inst.RB]; + Helper_Quantize(EA, _inst.Ix, _inst.RS, _inst.Wx); - int c = 4; - if (stType == QUANTIZE_U8 || stType == QUANTIZE_S8) - c = 0x1; - else if (stType == QUANTIZE_U16 || stType == QUANTIZE_S16) - c = 0x2; - - if (_inst.Wx == 0) - { - Helper_Quantize(EA, rPS0(_inst.RS), stType, stScale); - Helper_Quantize(EA + c, rPS1(_inst.RS), stType, stScale); - } - else - { - Helper_Quantize(EA, rPS0(_inst.RS), stType, stScale); - } if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI) { return; } rGPR[_inst.RA] = EA; - -} // namespace======= +} diff --git a/Source/Core/Core/PowerPC/JitArm32/JitAsm.cpp b/Source/Core/Core/PowerPC/JitArm32/JitAsm.cpp index 13b97d6cb3..f00478858a 100644 --- a/Source/Core/Core/PowerPC/JitArm32/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/JitArm32/JitAsm.cpp @@ -27,22 +27,19 @@ using namespace ArmGen; JitArmAsmRoutineManager asm_routines; -static void WriteDual32(u32 value1, u32 value2, u32 address) +static void WriteDual8(u32 val1, u32 val2, u32 addr) { - PowerPC::Write_U32(value1, address); - PowerPC::Write_U32(value2, address + 4); + PowerPC::Write_U16(((u16)(u8)val1 << 8) | (u16)(u8)val2, addr); } -static void WriteDual16(u32 value1, u32 value2, u32 address) +static void WriteDual16(u32 val1, u32 val2, u32 addr) { - PowerPC::Write_U16(value1, address); - PowerPC::Write_U16(value2, address + 2); + PowerPC::Write_U32(((u32)(u16)val1 << 16) | (u32)(u16)val2, addr); } -static void WriteDual8(u32 value1, u32 value2, u32 address) +static void WriteDual32(u32 val1, u32 val2, u32 addr) { - PowerPC::Write_U8(value1, address); - PowerPC::Write_U8(value2, address + 1); + PowerPC::Write_U64(((u64)val1 << 32) | (u64)val2, addr); } void JitArmAsmRoutineManager::Generate() diff --git a/Source/Core/Core/PowerPC/MMU.cpp b/Source/Core/Core/PowerPC/MMU.cpp index 18b7aad61c..c4993c7e2e 100644 --- a/Source/Core/Core/PowerPC/MMU.cpp +++ b/Source/Core/Core/PowerPC/MMU.cpp @@ -238,7 +238,7 @@ __forceinline static void WriteToHardware(u32 em_address, const T data) { // First, let's check for FIFO writes, since they are probably the most common // reason we end up in this function: - if (flag == FLAG_WRITE && (em_address & 0xFFFFF000) == 0xCC008000) + if (flag == FLAG_WRITE && em_address == 0xCC008000) { switch (sizeof(T)) {