GTE: Fix MVMVA flags due to missing 43-bit-sign-extend
This commit is contained in:
parent
d3893bc9f2
commit
8841934009
|
@ -190,6 +190,15 @@ constexpr bool ConvertToBoolUnchecked(TValue value)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Generic sign extension
|
||||||
|
template<int NBITS, typename T>
|
||||||
|
constexpr T SignExtendN(T value)
|
||||||
|
{
|
||||||
|
// http://graphics.stanford.edu/~seander/bithacks.html#VariableSignExtend
|
||||||
|
constexpr int shift = 8 * sizeof(T) - NBITS;
|
||||||
|
return static_cast<T>((static_cast<std::make_signed_t<T>>(value) << shift) >> shift);
|
||||||
|
}
|
||||||
|
|
||||||
// Enum class bitwise operators
|
// Enum class bitwise operators
|
||||||
#define IMPLEMENT_ENUM_CLASS_BITWISE_OPERATORS(type_) \
|
#define IMPLEMENT_ENUM_CLASS_BITWISE_OPERATORS(type_) \
|
||||||
inline constexpr type_ operator&(type_ lhs, type_ rhs) \
|
inline constexpr type_ operator&(type_ lhs, type_ rhs) \
|
||||||
|
|
|
@ -405,11 +405,12 @@ void Core::RTPS(const s16 V[3], bool sf, bool lm, bool last)
|
||||||
{
|
{
|
||||||
const u8 shift = sf ? 12 : 0;
|
const u8 shift = sf ? 12 : 0;
|
||||||
#define dot3(i) \
|
#define dot3(i) \
|
||||||
CheckMACResult<i + 1>( \
|
SignExtendMACResult<i + 1>( \
|
||||||
(s64(m_regs.TR[i]) << 12) + \
|
(s64(m_regs.TR[i]) << 12) + \
|
||||||
CheckMACResult<i + 1>(CheckMACResult<i + 1>(CheckMACResult<i + 1>(s64(s32(m_regs.RT[i][0]) * s32(V[0]))) + \
|
SignExtendMACResult<i + 1>( \
|
||||||
s64(s32(m_regs.RT[i][1]) * s32(V[1]))) + \
|
SignExtendMACResult<i + 1>(SignExtendMACResult<i + 1>(s64(s32(m_regs.RT[i][0]) * s32(V[0]))) + \
|
||||||
s64(s32(m_regs.RT[i][2]) * s32(V[2]))))
|
s64(s32(m_regs.RT[i][1]) * s32(V[1]))) + \
|
||||||
|
s64(s32(m_regs.RT[i][2]) * s32(V[2]))))
|
||||||
|
|
||||||
// IR1 = MAC1 = (TRX*1000h + RT11*VX0 + RT12*VY0 + RT13*VZ0) SAR (sf*12)
|
// IR1 = MAC1 = (TRX*1000h + RT11*VX0 + RT12*VY0 + RT13*VZ0) SAR (sf*12)
|
||||||
// IR2 = MAC2 = (TRY*1000h + RT21*VX0 + RT22*VY0 + RT23*VZ0) SAR (sf*12)
|
// IR2 = MAC2 = (TRY*1000h + RT21*VX0 + RT22*VY0 + RT23*VZ0) SAR (sf*12)
|
||||||
|
@ -451,14 +452,17 @@ void Core::RTPS(const s16 V[3], bool sf, bool lm, bool last)
|
||||||
|
|
||||||
// MAC0=(((H*20000h/SZ3)+1)/2)*IR1+OFX, SX2=MAC0/10000h ;ScrX FIFO -400h..+3FFh
|
// MAC0=(((H*20000h/SZ3)+1)/2)*IR1+OFX, SX2=MAC0/10000h ;ScrX FIFO -400h..+3FFh
|
||||||
// MAC0=(((H*20000h/SZ3)+1)/2)*IR2+OFY, SY2=MAC0/10000h ;ScrY FIFO -400h..+3FFh
|
// MAC0=(((H*20000h/SZ3)+1)/2)*IR2+OFY, SY2=MAC0/10000h ;ScrY FIFO -400h..+3FFh
|
||||||
const s64 Sx = TruncateAndSetMAC<0>(s64(result) * s64(m_regs.IR1) + s64(m_regs.OFX), 0);
|
const s64 Sx = s64(result) * s64(m_regs.IR1) + s64(m_regs.OFX);
|
||||||
const s64 Sy = TruncateAndSetMAC<0>(s64(result) * s64(m_regs.IR2) + s64(m_regs.OFY), 0);
|
const s64 Sy = s64(result) * s64(m_regs.IR2) + s64(m_regs.OFY);
|
||||||
|
TruncateAndSetMAC<0>(Sx, 0);
|
||||||
|
TruncateAndSetMAC<1>(Sy, 0);
|
||||||
PushSXY(s32(Sx >> 16), s32(Sy >> 16));
|
PushSXY(s32(Sx >> 16), s32(Sy >> 16));
|
||||||
|
|
||||||
if (last)
|
if (last)
|
||||||
{
|
{
|
||||||
// MAC0=(((H*20000h/SZ3)+1)/2)*DQA+DQB, IR0=MAC0/1000h ;Depth cueing 0..+1000h
|
// MAC0=(((H*20000h/SZ3)+1)/2)*DQA+DQB, IR0=MAC0/1000h ;Depth cueing 0..+1000h
|
||||||
const s64 Sz = TruncateAndSetMAC<0>(s64(result) * s64(m_regs.DQA) + s64(m_regs.DQB), 0);
|
const s64 Sz = s64(result) * s64(m_regs.DQA) + s64(m_regs.DQB);
|
||||||
|
TruncateAndSetMAC<0>(Sz, 0);
|
||||||
TruncateAndSetIR<0>(s32(Sz >> 12), true);
|
TruncateAndSetIR<0>(s32(Sz >> 12), true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -517,8 +521,7 @@ void Core::Execute_AVSZ3(Instruction inst)
|
||||||
{
|
{
|
||||||
m_regs.FLAG.Clear();
|
m_regs.FLAG.Clear();
|
||||||
|
|
||||||
const s64 result =
|
const s64 result = s64(m_regs.ZSF3) * s32(u32(m_regs.SZ1) + u32(m_regs.SZ2) + u32(m_regs.SZ3));
|
||||||
TruncateAndSetMAC<0>(s64(m_regs.ZSF3) * s32(u32(m_regs.SZ1) + u32(m_regs.SZ2) + u32(m_regs.SZ3)), 0);
|
|
||||||
TruncateAndSetMAC<0>(result, 0);
|
TruncateAndSetMAC<0>(result, 0);
|
||||||
SetOTZ(s32(result >> 12));
|
SetOTZ(s32(result >> 12));
|
||||||
|
|
||||||
|
@ -529,8 +532,7 @@ void Core::Execute_AVSZ4(Instruction inst)
|
||||||
{
|
{
|
||||||
m_regs.FLAG.Clear();
|
m_regs.FLAG.Clear();
|
||||||
|
|
||||||
const s64 result = TruncateAndSetMAC<0>(
|
const s64 result = s64(m_regs.ZSF4) * s32(u32(m_regs.SZ0) + u32(m_regs.SZ1) + u32(m_regs.SZ2) + u32(m_regs.SZ3));
|
||||||
s64(m_regs.ZSF4) * s32(u32(m_regs.SZ0) + u32(m_regs.SZ1) + u32(m_regs.SZ2) + u32(m_regs.SZ3)), 0);
|
|
||||||
TruncateAndSetMAC<0>(result, 0);
|
TruncateAndSetMAC<0>(result, 0);
|
||||||
SetOTZ(s32(result >> 12));
|
SetOTZ(s32(result >> 12));
|
||||||
|
|
||||||
|
@ -540,41 +542,31 @@ void Core::Execute_AVSZ4(Instruction inst)
|
||||||
void Core::MulMatVec(const s16 M[3][3], const s16 Vx, const s16 Vy, const s16 Vz, u8 shift, bool lm)
|
void Core::MulMatVec(const s16 M[3][3], const s16 Vx, const s16 Vy, const s16 Vz, u8 shift, bool lm)
|
||||||
{
|
{
|
||||||
#define dot3(i) \
|
#define dot3(i) \
|
||||||
TruncateAndSetMAC<i + 1>( \
|
TruncateAndSetMACAndIR<i + 1>(SignExtendMACResult<i + 1>((s64(M[i][0]) * s64(Vx)) + (s64(M[i][1]) * s64(Vy))) + \
|
||||||
CheckMACResult<i + 1>(CheckMACResult<i + 1>(s64(s32(M[i][0]) * s32(Vx))) + s64(s32(M[i][1]) * s32(Vy))) + \
|
(s64(M[i][2]) * s64(Vz)), \
|
||||||
s64(s32(M[i][2]) * s32(Vz)), \
|
shift, lm)
|
||||||
shift)
|
|
||||||
|
|
||||||
dot3(0);
|
dot3(0);
|
||||||
dot3(1);
|
dot3(1);
|
||||||
dot3(2);
|
dot3(2);
|
||||||
|
|
||||||
#undef dot3
|
#undef dot3
|
||||||
|
|
||||||
TruncateAndSetIR<1>(m_regs.MAC1, lm);
|
|
||||||
TruncateAndSetIR<2>(m_regs.MAC2, lm);
|
|
||||||
TruncateAndSetIR<3>(m_regs.MAC3, lm);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Core::MulMatVec(const s16 M[3][3], const s32 T[3], const s16 Vx, const s16 Vy, const s16 Vz, u8 shift, bool lm)
|
void Core::MulMatVec(const s16 M[3][3], const s32 T[3], const s16 Vx, const s16 Vy, const s16 Vz, u8 shift, bool lm)
|
||||||
{
|
{
|
||||||
#define dot3(i) \
|
#define dot3(i) \
|
||||||
TruncateAndSetMAC<i + 1>( \
|
TruncateAndSetMACAndIR<i + 1>( \
|
||||||
(s64(T[i]) << 12) + \
|
SignExtendMACResult<i + 1>(SignExtendMACResult<i + 1>((s64(T[i]) << 12) + (s64(M[i][0]) * s64(Vx))) + \
|
||||||
CheckMACResult<i + 1>( \
|
(s64(M[i][1]) * s64(Vy))) + \
|
||||||
CheckMACResult<i + 1>(CheckMACResult<i + 1>(s64(s32(M[i][0]) * s32(Vx))) + s64(s32(M[i][1]) * s32(Vy))) + \
|
(s64(M[i][2]) * s64(Vz)), \
|
||||||
s64(s32(M[i][2]) * s32(Vz))), \
|
shift, lm)
|
||||||
shift)
|
|
||||||
|
|
||||||
dot3(0);
|
dot3(0);
|
||||||
dot3(1);
|
dot3(1);
|
||||||
dot3(2);
|
dot3(2);
|
||||||
|
|
||||||
#undef dot3
|
#undef dot3
|
||||||
|
|
||||||
TruncateAndSetIR<1>(m_regs.MAC1, lm);
|
|
||||||
TruncateAndSetIR<2>(m_regs.MAC2, lm);
|
|
||||||
TruncateAndSetIR<3>(m_regs.MAC3, lm);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Core::NCCS(const s16 V[3], bool sf, bool lm)
|
void Core::NCCS(const s16 V[3], bool sf, bool lm)
|
||||||
|
|
|
@ -35,15 +35,22 @@ private:
|
||||||
static constexpr s32 IR123_MIN_VALUE = -(INT64_C(1) << 15);
|
static constexpr s32 IR123_MIN_VALUE = -(INT64_C(1) << 15);
|
||||||
static constexpr s32 IR123_MAX_VALUE = (INT64_C(1) << 15) - 1;
|
static constexpr s32 IR123_MAX_VALUE = (INT64_C(1) << 15) - 1;
|
||||||
|
|
||||||
// Checks for underflow/overflow. Returns the value untouched so it can be threaded through an expression.
|
// Checks for underflow/overflow.
|
||||||
template<u32 index>
|
template<u32 index>
|
||||||
s64 CheckMACResult(s64 value);
|
void CheckMACOverflow(s64 value);
|
||||||
|
|
||||||
|
// Checks for underflow/overflow, sign-extending to 31/43 bits.
|
||||||
|
template<u32 index>
|
||||||
|
s64 SignExtendMACResult(s64 value);
|
||||||
|
|
||||||
template<u32 index>
|
template<u32 index>
|
||||||
s64 TruncateAndSetMAC(s64 value, u8 shift);
|
void TruncateAndSetMAC(s64 value, u8 shift);
|
||||||
|
|
||||||
template<u32 index>
|
template<u32 index>
|
||||||
s16 TruncateAndSetIR(s32 value, bool lm);
|
void TruncateAndSetMACAndIR(s64 value, u8 shift, bool lm);
|
||||||
|
|
||||||
|
template<u32 index>
|
||||||
|
void TruncateAndSetIR(s32 value, bool lm);
|
||||||
|
|
||||||
template<u32 index>
|
template<u32 index>
|
||||||
u8 TruncateRGB(s32 value);
|
u8 TruncateRGB(s32 value);
|
||||||
|
@ -55,7 +62,7 @@ private:
|
||||||
|
|
||||||
// 3x3 matrix * 3x1 vector, updates MAC[1-3] and IR[1-3]
|
// 3x3 matrix * 3x1 vector, updates MAC[1-3] and IR[1-3]
|
||||||
void MulMatVec(const s16 M[3][3], const s16 Vx, const s16 Vy, const s16 Vz, u8 shift, bool lm);
|
void MulMatVec(const s16 M[3][3], const s16 Vx, const s16 Vy, const s16 Vz, u8 shift, bool lm);
|
||||||
|
|
||||||
// 3x3 matrix * 3x1 vector with translation, updates MAC[1-3] and IR[1-3]
|
// 3x3 matrix * 3x1 vector with translation, updates MAC[1-3] and IR[1-3]
|
||||||
void MulMatVec(const s16 M[3][3], const s32 T[3], const s16 Vx, const s16 Vy, const s16 Vz, u8 shift, bool lm);
|
void MulMatVec(const s16 M[3][3], const s32 T[3], const s16 Vx, const s16 Vy, const s16 Vz, u8 shift, bool lm);
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
#include "gte.h"
|
#include "gte.h"
|
||||||
|
|
||||||
template<u32 index>
|
template<u32 index>
|
||||||
s64 GTE::Core::CheckMACResult(s64 value)
|
void GTE::Core::CheckMACOverflow(s64 value)
|
||||||
{
|
{
|
||||||
constexpr s64 MIN_VALUE = (index == 0) ? MAC0_MIN_VALUE : MAC123_MIN_VALUE;
|
constexpr s64 MIN_VALUE = (index == 0) ? MAC0_MIN_VALUE : MAC123_MIN_VALUE;
|
||||||
constexpr s64 MAX_VALUE = (index == 0) ? MAC0_MAX_VALUE : MAC123_MAX_VALUE;
|
constexpr s64 MAX_VALUE = (index == 0) ? MAC0_MAX_VALUE : MAC123_MAX_VALUE;
|
||||||
|
@ -27,24 +27,28 @@ s64 GTE::Core::CheckMACResult(s64 value)
|
||||||
else if constexpr (index == 3)
|
else if constexpr (index == 3)
|
||||||
m_regs.FLAG.mac3_overflow = true;
|
m_regs.FLAG.mac3_overflow = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
return value;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<u32 index>
|
template<u32 index>
|
||||||
s64 GTE::Core::TruncateAndSetMAC(s64 value, u8 shift)
|
s64 GTE::Core::SignExtendMACResult(s64 value)
|
||||||
{
|
{
|
||||||
value = CheckMACResult<index>(value);
|
CheckMACOverflow<index>(value);
|
||||||
|
return SignExtendN < index == 0 ? 31 : 44 > (value);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<u32 index>
|
||||||
|
void GTE::Core::TruncateAndSetMAC(s64 value, u8 shift)
|
||||||
|
{
|
||||||
|
CheckMACOverflow<index>(value);
|
||||||
|
|
||||||
// shift should be done before storing to avoid losing precision
|
// shift should be done before storing to avoid losing precision
|
||||||
value >>= shift;
|
value >>= shift;
|
||||||
|
|
||||||
m_regs.dr32[24 + index] = Truncate32(static_cast<u64>(value));
|
m_regs.dr32[24 + index] = Truncate32(static_cast<u64>(value));
|
||||||
return value;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<u32 index>
|
template<u32 index>
|
||||||
s16 GTE::Core::TruncateAndSetIR(s32 value, bool lm)
|
void GTE::Core::TruncateAndSetIR(s32 value, bool lm)
|
||||||
{
|
{
|
||||||
constexpr s32 MIN_VALUE = (index == 0) ? IR0_MIN_VALUE : IR123_MIN_VALUE;
|
constexpr s32 MIN_VALUE = (index == 0) ? IR0_MIN_VALUE : IR123_MIN_VALUE;
|
||||||
constexpr s32 MAX_VALUE = (index == 0) ? IR0_MAX_VALUE : IR123_MAX_VALUE;
|
constexpr s32 MAX_VALUE = (index == 0) ? IR0_MAX_VALUE : IR123_MAX_VALUE;
|
||||||
|
@ -76,7 +80,22 @@ s16 GTE::Core::TruncateAndSetIR(s32 value, bool lm)
|
||||||
|
|
||||||
// store sign-extended 16-bit value as 32-bit
|
// store sign-extended 16-bit value as 32-bit
|
||||||
m_regs.dr32[8 + index] = value;
|
m_regs.dr32[8 + index] = value;
|
||||||
return static_cast<s16>(value);
|
}
|
||||||
|
|
||||||
|
template<u32 index>
|
||||||
|
void GTE::Core::TruncateAndSetMACAndIR(s64 value, u8 shift, bool lm)
|
||||||
|
{
|
||||||
|
CheckMACOverflow<index>(value);
|
||||||
|
|
||||||
|
// shift should be done before storing to avoid losing precision
|
||||||
|
value >>= shift;
|
||||||
|
|
||||||
|
// set MAC
|
||||||
|
const s32 value32 = static_cast<s32>(value);
|
||||||
|
m_regs.dr32[24 + index] = value32;
|
||||||
|
|
||||||
|
// set IR
|
||||||
|
TruncateAndSetIR<index>(value32, lm);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<u32 index>
|
template<u32 index>
|
||||||
|
|
Loading…
Reference in New Issue