Fix paired loadstore to use correct load/store calls.

psq_st performs one store, and psq_ld one load, from the perspective of the
MMU; getting this wrong leads to potentially incorrect behavior (incorrect page
faults, weirdness with the gather pipe, etc.).  Fix this, and stop masking
the address when checking for gather pipe writes.

Also a bunch of cleanup.
This commit is contained in:
magumagu 2015-03-08 13:44:26 -07:00
parent 647cd3c8e0
commit dda5e610eb
5 changed files with 205 additions and 253 deletions

View File

@ -305,6 +305,9 @@ union UGeckoInstruction
enum EQuantizeType : u32
{
QUANTIZE_FLOAT = 0,
QUANTIZE_INVALID1 = 1,
QUANTIZE_INVALID2 = 2,
QUANTIZE_INVALID3 = 3,
QUANTIZE_U8 = 4,
QUANTIZE_U16 = 5,
QUANTIZE_S8 = 6,

View File

@ -312,8 +312,8 @@ private:
static u32 Helper_Get_EA_UX(const UGeckoInstruction _inst);
// paired helper
static float Helper_Dequantize(const u32 _Addr, const EQuantizeType _quantizeType, const unsigned int _uScale);
static void Helper_Quantize (const u32 _Addr, const double _fValue, const EQuantizeType _quantizeType, const unsigned _uScale);
static void Helper_Dequantize(u32 addr, u32 instI, u32 instRD, u32 instW);
static void Helper_Quantize(u32 addr, u32 instI, u32 instRS, u32 instW);
// other helper
static u32 Helper_Mask(int mb, int me);

View File

@ -48,212 +48,260 @@ const float m_quantizeTable[] =
1.0 / (1ULL << 4), 1.0 / (1ULL << 3), 1.0 / (1ULL << 2), 1.0 / (1ULL << 1),
};
void Interpreter::Helper_Quantize(const u32 _Addr, const double _fValue, const EQuantizeType _quantizeType, const unsigned int _uScale)
template<typename SType> SType ScaleAndClamp(double ps, u32 stScale)
{
switch (_quantizeType)
float convPS = (float)ps * m_quantizeTable[stScale];
float min = (float)std::numeric_limits<SType>::min();
float max = (float)std::numeric_limits<SType>::max();
MathUtil::Clamp(&convPS, min, max);
return (SType)convPS;
}
template<typename T> static T ReadUnpaired(u32 addr);
template<> u8 ReadUnpaired<u8>(u32 addr)
{
return PowerPC::Read_U8(addr);
}
template<> u16 ReadUnpaired<u16>(u32 addr)
{
return PowerPC::Read_U16(addr);
}
template<> u32 ReadUnpaired<u32>(u32 addr)
{
return PowerPC::Read_U32(addr);
}
template<typename T> static std::pair<T, T> ReadPair(u32 addr);
template<> std::pair<u8, u8> ReadPair<u8>(u32 addr)
{
u16 val = PowerPC::Read_U16(addr);
return { (u8)(val >> 8), (u8)val };
}
template<> std::pair<u16, u16> ReadPair<u16>(u32 addr)
{
u32 val = PowerPC::Read_U32(addr);
return { (u16)(val >> 16), (u16)val };
}
template<> std::pair<u32, u32> ReadPair<u32>(u32 addr)
{
u64 val = PowerPC::Read_U64(addr);
return { (u32)(val >> 32), (u32)val };
}
template<typename T> static void WriteUnpaired(T val, u32 addr);
template<> void WriteUnpaired<u8>(u8 val, u32 addr)
{
PowerPC::Write_U8(val, addr);
}
template<> void WriteUnpaired<u16>(u16 val, u32 addr)
{
PowerPC::Write_U16(val, addr);
}
template<> void WriteUnpaired<u32>(u32 val, u32 addr)
{
PowerPC::Write_U32(val, addr);
}
template<typename T> static void WritePair(T val1, T val2, u32 addr);
template<> void WritePair<u8>(u8 val1, u8 val2, u32 addr)
{
PowerPC::Write_U16(((u16)val1 << 8) | (u16)val2, addr);
}
template<> void WritePair<u16>(u16 val1, u16 val2, u32 addr)
{
PowerPC::Write_U32(((u32)val1 << 16) | (u32)val2, addr);
}
template<> void WritePair<u32>(u32 val1, u32 val2, u32 addr)
{
PowerPC::Write_U64(((u64)val1 << 32) | (u64)val2, addr);
}
template<typename T>
void QuantizeAndStore(double ps0, double ps1, u32 addr, u32 instW, u32 stScale)
{
typedef typename std::make_unsigned<T>::type U;
U convPS0 = (U)ScaleAndClamp<T>(ps0, stScale);
if (instW)
{
WriteUnpaired<U>(convPS0, addr);
}
else
{
U convPS1 = (U)ScaleAndClamp<T>(ps1, stScale);
WritePair<U>(convPS0, convPS1, addr);
}
}
void Interpreter::Helper_Quantize(u32 addr, u32 instI, u32 instRS, u32 instW)
{
const UGQR gqr(rSPR(SPR_GQR0 + instI));
const EQuantizeType stType = gqr.st_type;
const unsigned int stScale = gqr.st_scale;
double ps0 = rPS0(instRS);
double ps1 = rPS1(instRS);
switch (stType)
{
case QUANTIZE_FLOAT:
PowerPC::Write_U32(ConvertToSingleFTZ(*(u64*)&_fValue), _Addr);
break;
// used for THP player
case QUANTIZE_U8:
{
u32 convPS0 = ConvertToSingleFTZ(MathUtil::IntDouble(ps0).i);
if (instW)
{
float fResult = (float)_fValue * m_quantizeTable[_uScale];
MathUtil::Clamp(&fResult, 0.0f, 255.0f);
PowerPC::Write_U8((u8)fResult, _Addr);
WriteUnpaired<u32>(convPS0, addr);
}
else
{
u32 convPS1 = ConvertToSingleFTZ(MathUtil::IntDouble(ps1).i);
WritePair<u32>(convPS0, convPS1, addr);
}
break;
}
case QUANTIZE_U8:
QuantizeAndStore<u8>(ps0, ps1, addr, instW, stScale);
break;
case QUANTIZE_U16:
{
float fResult = (float)_fValue * m_quantizeTable[_uScale];
MathUtil::Clamp(&fResult, 0.0f, 65535.0f);
PowerPC::Write_U16((u16)fResult, _Addr);
}
QuantizeAndStore<u16>(ps0, ps1, addr, instW, stScale);
break;
case QUANTIZE_S8:
{
float fResult = (float)_fValue * m_quantizeTable[_uScale];
MathUtil::Clamp(&fResult, -128.0f, 127.0f);
PowerPC::Write_U8((u8)(s8)fResult, _Addr);
}
QuantizeAndStore<s8>(ps0, ps1, addr, instW, stScale);
break;
case QUANTIZE_S16:
{
float fResult = (float)_fValue * m_quantizeTable[_uScale];
MathUtil::Clamp(&fResult, -32768.0f, 32767.0f);
PowerPC::Write_U16((u16)(s16)fResult, _Addr);
}
QuantizeAndStore<s16>(ps0, ps1, addr, instW, stScale);
break;
default:
_dbg_assert_msg_(POWERPC, 0, "PS dequantize - unknown type to read");
case QUANTIZE_INVALID1:
case QUANTIZE_INVALID2:
case QUANTIZE_INVALID3:
_assert_msg_(POWERPC, 0, "PS dequantize - unknown type to read");
break;
}
}
float Interpreter::Helper_Dequantize(const u32 _Addr, const EQuantizeType _quantizeType, const unsigned int _uScale)
template<typename T>
std::pair<float, float> LoadAndDequantize(u32 addr, u32 instW, u32 ldScale)
{
// dequantize the value
float fResult;
switch (_quantizeType)
typedef typename std::make_unsigned<T>::type U;
float ps0, ps1;
if (instW)
{
U value = ReadUnpaired<U>(addr);
ps0 = (float)(T)(value) * m_dequantizeTable[ldScale];
ps1 = 1.0f;
}
else
{
std::pair<U, U> value = ReadPair<U>(addr);
ps0 = (float)(T)(value.first) * m_dequantizeTable[ldScale];
ps1 = (float)(T)(value.second) * m_dequantizeTable[ldScale];
}
return { ps0, ps1 };
}
void Interpreter::Helper_Dequantize(u32 addr, u32 instI, u32 instRD, u32 instW)
{
UGQR gqr(rSPR(SPR_GQR0 + instI));
EQuantizeType ldType = gqr.ld_type;
unsigned int ldScale = gqr.ld_scale;
float ps0, ps1;
switch (ldType)
{
case QUANTIZE_FLOAT:
if (instW)
{
u32 dwValue = PowerPC::Read_U32(_Addr);
fResult = *(float*)&dwValue;
u32 value = ReadUnpaired<u32>(addr);
ps0 = MathUtil::IntFloat(value).f;
ps1 = 1.0f;
}
else
{
std::pair<u32, u32> value = ReadPair<u32>(addr);
ps0 = MathUtil::IntFloat(value.first).f;
ps1 = MathUtil::IntFloat(value.second).f;
}
break;
case QUANTIZE_U8:
fResult = static_cast<float>(PowerPC::Read_U8(_Addr)) * m_dequantizeTable[_uScale];
std::tie(ps0, ps1) = LoadAndDequantize<u8>(addr, instW, ldScale);
break;
case QUANTIZE_U16:
fResult = static_cast<float>(PowerPC::Read_U16(_Addr)) * m_dequantizeTable[_uScale];
std::tie(ps0, ps1) = LoadAndDequantize<u16>(addr, instW, ldScale);
break;
case QUANTIZE_S8:
fResult = static_cast<float>((s8)PowerPC::Read_U8(_Addr)) * m_dequantizeTable[_uScale];
std::tie(ps0, ps1) = LoadAndDequantize<s8>(addr, instW, ldScale);
break;
// used for THP player
case QUANTIZE_S16:
fResult = static_cast<float>((s16)PowerPC::Read_U16(_Addr)) * m_dequantizeTable[_uScale];
std::tie(ps0, ps1) = LoadAndDequantize<s16>(addr, instW, ldScale);
break;
default:
_dbg_assert_msg_(POWERPC, 0, "PS dequantize - unknown type to read");
fResult = 0;
case QUANTIZE_INVALID1:
case QUANTIZE_INVALID2:
case QUANTIZE_INVALID3:
_assert_msg_(POWERPC, 0, "PS dequantize - unknown type to read");
ps0 = 0.f;
ps1 = 0.f;
break;
}
return fResult;
if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI)
{
return;
}
rPS0(instRD) = ps0;
rPS1(instRD) = ps1;
}
void Interpreter::psq_l(UGeckoInstruction _inst)
{
const UGQR gqr(rSPR(SPR_GQR0 + _inst.I));
const EQuantizeType ldType = gqr.ld_type;
const unsigned int ldScale = gqr.ld_scale;
const u32 EA = _inst.RA ?
(rGPR[_inst.RA] + _inst.SIMM_12) : (u32)_inst.SIMM_12;
int c = 4;
if (ldType == QUANTIZE_U8 || ldType == QUANTIZE_S8)
c = 0x1;
else if (ldType == QUANTIZE_U16 || ldType == QUANTIZE_S16)
c = 0x2;
if (_inst.W == 0)
{
float ps0 = Helper_Dequantize(EA, ldType, ldScale);
float ps1 = Helper_Dequantize(EA + c, ldType, ldScale);
if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI)
{
return;
}
rPS0(_inst.RD) = ps0;
rPS1(_inst.RD) = ps1;
}
else
{
float ps0 = Helper_Dequantize(EA, ldType, ldScale);
if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI)
{
return;
}
rPS0(_inst.RD) = ps0;
rPS1(_inst.RD) = 1.0f;
}
Helper_Dequantize(EA, _inst.I, _inst.RD, _inst.W);
}
void Interpreter::psq_lu(UGeckoInstruction _inst)
{
const UGQR gqr(rSPR(SPR_GQR0 + _inst.I));
const EQuantizeType ldType = gqr.ld_type;
const unsigned int ldScale = gqr.ld_scale;
const u32 EA = rGPR[_inst.RA] + _inst.SIMM_12;
Helper_Dequantize(EA, _inst.I, _inst.RD, _inst.W);
int c = 4;
if (ldType == QUANTIZE_U8 || ldType == QUANTIZE_S8)
c = 0x1;
else if (ldType == QUANTIZE_U16 || ldType == QUANTIZE_S16)
c = 0x2;
if (_inst.W == 0)
if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI)
{
float ps0 = Helper_Dequantize(EA, ldType, ldScale);
float ps1 = Helper_Dequantize(EA + c, ldType, ldScale);
if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI)
{
return;
}
rPS0(_inst.RD) = ps0;
rPS1(_inst.RD) = ps1;
}
else
{
float ps0 = Helper_Dequantize(EA, ldType, ldScale);
if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI)
{
return;
}
rPS0(_inst.RD) = ps0;
rPS1(_inst.RD) = 1.0f;
return;
}
rGPR[_inst.RA] = EA;
}
void Interpreter::psq_st(UGeckoInstruction _inst)
{
const UGQR gqr(rSPR(SPR_GQR0 + _inst.I));
const EQuantizeType stType = gqr.st_type;
const unsigned int stScale = gqr.st_scale;
const u32 EA = _inst.RA ?
(rGPR[_inst.RA] + _inst.SIMM_12) : (u32)_inst.SIMM_12;
int c = 4;
if (stType == QUANTIZE_U8 || stType == QUANTIZE_S8)
c = 0x1;
else if (stType == QUANTIZE_U16 || stType == QUANTIZE_S16)
c = 0x2;
if (_inst.W == 0)
{
Helper_Quantize(EA, rPS0(_inst.RS), stType, stScale);
Helper_Quantize(EA + c, rPS1(_inst.RS), stType, stScale);
}
else
{
Helper_Quantize(EA, rPS0(_inst.RS), stType, stScale);
}
Helper_Quantize(EA, _inst.I, _inst.RS, _inst.W);
}
void Interpreter::psq_stu(UGeckoInstruction _inst)
{
const UGQR gqr(rSPR(SPR_GQR0 + _inst.I));
const EQuantizeType stType = gqr.st_type;
const unsigned int stScale = gqr.st_scale;
const u32 EA = rGPR[_inst.RA] + _inst.SIMM_12;
Helper_Quantize(EA, _inst.I, _inst.RS, _inst.W);
int c = 4;
if (stType == QUANTIZE_U8 || stType == QUANTIZE_S8)
c = 0x1;
else if (stType == QUANTIZE_U16 || stType == QUANTIZE_S16)
c = 0x2;
if (_inst.W == 0)
{
Helper_Quantize(EA, rPS0(_inst.RS), stType, stScale);
Helper_Quantize(EA + c, rPS1(_inst.RS), stType, stScale);
}
else
{
Helper_Quantize(EA, rPS0(_inst.RS), stType, stScale);
}
if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI)
{
return;
@ -263,132 +311,36 @@ void Interpreter::psq_stu(UGeckoInstruction _inst)
void Interpreter::psq_lx(UGeckoInstruction _inst)
{
const UGQR gqr(rSPR(SPR_GQR0 + _inst.Ix));
const EQuantizeType ldType = gqr.ld_type;
const unsigned int ldScale = gqr.ld_scale;
const u32 EA = _inst.RA ? (rGPR[_inst.RA] + rGPR[_inst.RB]) : rGPR[_inst.RB];
int c = 4;
if (ldType == QUANTIZE_U8 || ldType == QUANTIZE_S8)
c = 0x1;
else if (ldType == QUANTIZE_U16 || ldType == QUANTIZE_S16)
c = 0x2;
if (_inst.Wx == 0)
{
float ps0 = Helper_Dequantize(EA, ldType, ldScale);
float ps1 = Helper_Dequantize(EA + c, ldType, ldScale);
if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI)
{
return;
}
rPS0(_inst.RD) = ps0;
rPS1(_inst.RD) = ps1;
}
else
{
float ps0 = Helper_Dequantize(EA, ldType, ldScale);
float ps1 = 1.0f;
if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI)
{
return;
}
rPS0(_inst.RD) = ps0;
rPS1(_inst.RD) = ps1;
}
Helper_Dequantize(EA, _inst.Ix, _inst.RD, _inst.Wx);
}
void Interpreter::psq_stx(UGeckoInstruction _inst)
{
const UGQR gqr(rSPR(SPR_GQR0 + _inst.Ix));
const EQuantizeType stType = gqr.st_type;
const unsigned int stScale = gqr.st_scale;
const u32 EA = _inst.RA ? (rGPR[_inst.RA] + rGPR[_inst.RB]) : rGPR[_inst.RB];
int c = 4;
if (stType == QUANTIZE_U8 || stType == QUANTIZE_S8)
c = 0x1;
else if (stType == QUANTIZE_U16 || stType == QUANTIZE_S16)
c = 0x2;
if (_inst.Wx == 0)
{
Helper_Quantize(EA, rPS0(_inst.RS), stType, stScale);
Helper_Quantize(EA + c, rPS1(_inst.RS), stType, stScale);
}
else
{
Helper_Quantize(EA, rPS0(_inst.RS), stType, stScale);
}
Helper_Quantize(EA, _inst.Ix, _inst.RS, _inst.Wx);
}
void Interpreter::psq_lux(UGeckoInstruction _inst)
{
const UGQR gqr(rSPR(SPR_GQR0 + _inst.Ix));
const EQuantizeType ldType = gqr.ld_type;
const unsigned int ldScale = gqr.ld_scale;
const u32 EA = rGPR[_inst.RA] + rGPR[_inst.RB];
Helper_Dequantize(EA, _inst.Ix, _inst.RD, _inst.Wx);
int c = 4;
if (ldType == QUANTIZE_U8 || ldType == QUANTIZE_S8)
c = 0x1;
else if (ldType == QUANTIZE_U16 || ldType == QUANTIZE_S16)
c = 0x2;
if (_inst.Wx == 0)
if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI)
{
float ps0 = Helper_Dequantize(EA, ldType, ldScale);
float ps1 = Helper_Dequantize(EA + c, ldType, ldScale);
if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI)
{
return;
}
rPS0(_inst.RD) = ps0;
rPS1(_inst.RD) = ps1;
}
else
{
float ps0 = Helper_Dequantize(EA, ldType, ldScale);
if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI)
{
return;
}
rPS0(_inst.RD) = ps0;
rPS1(_inst.RD) = 1.0f;
return;
}
rGPR[_inst.RA] = EA;
}
void Interpreter::psq_stux(UGeckoInstruction _inst)
{
const UGQR gqr(rSPR(SPR_GQR0 + _inst.Ix));
const EQuantizeType stType = gqr.st_type;
const unsigned int stScale = gqr.st_scale;
const u32 EA = rGPR[_inst.RA] + rGPR[_inst.RB];
Helper_Quantize(EA, _inst.Ix, _inst.RS, _inst.Wx);
int c = 4;
if (stType == QUANTIZE_U8 || stType == QUANTIZE_S8)
c = 0x1;
else if (stType == QUANTIZE_U16 || stType == QUANTIZE_S16)
c = 0x2;
if (_inst.Wx == 0)
{
Helper_Quantize(EA, rPS0(_inst.RS), stType, stScale);
Helper_Quantize(EA + c, rPS1(_inst.RS), stType, stScale);
}
else
{
Helper_Quantize(EA, rPS0(_inst.RS), stType, stScale);
}
if (PowerPC::ppcState.Exceptions & EXCEPTION_DSI)
{
return;
}
rGPR[_inst.RA] = EA;
} // namespace=======
}

View File

@ -27,22 +27,19 @@ using namespace ArmGen;
JitArmAsmRoutineManager asm_routines;
static void WriteDual32(u32 value1, u32 value2, u32 address)
static void WriteDual8(u32 val1, u32 val2, u32 addr)
{
PowerPC::Write_U32(value1, address);
PowerPC::Write_U32(value2, address + 4);
PowerPC::Write_U16(((u16)(u8)val1 << 8) | (u16)(u8)val2, addr);
}
static void WriteDual16(u32 value1, u32 value2, u32 address)
static void WriteDual16(u32 val1, u32 val2, u32 addr)
{
PowerPC::Write_U16(value1, address);
PowerPC::Write_U16(value2, address + 2);
PowerPC::Write_U32(((u32)(u16)val1 << 16) | (u32)(u16)val2, addr);
}
static void WriteDual8(u32 value1, u32 value2, u32 address)
static void WriteDual32(u32 val1, u32 val2, u32 addr)
{
PowerPC::Write_U8(value1, address);
PowerPC::Write_U8(value2, address + 1);
PowerPC::Write_U64(((u64)val1 << 32) | (u64)val2, addr);
}
void JitArmAsmRoutineManager::Generate()

View File

@ -238,7 +238,7 @@ __forceinline static void WriteToHardware(u32 em_address, const T data)
{
// First, let's check for FIFO writes, since they are probably the most common
// reason we end up in this function:
if (flag == FLAG_WRITE && (em_address & 0xFFFFF000) == 0xCC008000)
if (flag == FLAG_WRITE && em_address == 0xCC008000)
{
switch (sizeof(T))
{