GTE: Implement NCDS (but incorrectly)

This commit is contained in:
Connor McLaughlin 2019-09-22 21:41:11 +10:00
parent f2d62fcce0
commit fbd7fcec48
6 changed files with 220 additions and 66 deletions

View File

@ -270,6 +270,10 @@ void Core::ExecuteInstruction(Instruction inst)
Execute_NCLIP(inst); Execute_NCLIP(inst);
break; break;
case 0x13:
Execute_NCDS(inst);
break;
case 0x28: case 0x28:
Execute_SQR(inst); Execute_SQR(inst);
break; break;
@ -415,6 +419,13 @@ void Core::PushSZ(s32 value)
m_regs.dr32[19] = static_cast<u32>(value); // SZ3 <- value m_regs.dr32[19] = static_cast<u32>(value); // SZ3 <- value
} }
void Core::PushRGB(u8 r, u8 g, u8 b, u8 c)
{
m_regs.RGB0 = m_regs.RGB1;
m_regs.RGB1 = m_regs.RGB2;
m_regs.RGB2 = ZeroExtend32(r) | (ZeroExtend32(g) << 8) | (ZeroExtend32(b) << 16) | (ZeroExtend32(c) << 24);
}
s32 Core::Divide(s32 dividend, s32 divisor) s32 Core::Divide(s32 dividend, s32 divisor)
{ {
DebugAssert(divisor != 0); DebugAssert(divisor != 0);
@ -567,4 +578,71 @@ void Core::Execute_AVSZ4(Instruction inst)
m_regs.FLAG.UpdateError(); m_regs.FLAG.UpdateError();
} }
s64 Core::VecDot(const s16 A[3], const s16 B[3])
{
return s64(s32(A[0]) * s32(B[0])) + s64(s32(A[1]) * s32(B[1])) + s64(s32(A[2]) * s32(B[2]));
}
s64 Core::VecDot(const s16 A[3], s16 B_x, s16 B_y, s16 B_z)
{
return s64(s32(A[0]) * s32(B_x)) + s64(s32(A[1]) * s32(B_y)) + s64(s32(A[2]) * s32(B_z));
}
void Core::NCDS(const s16 V[3], bool sf, bool lm)
{
const u8 shift = sf ? 12 : 0;
// [IR1,IR2,IR3] = [MAC1,MAC2,MAC3] = (LLM*V0) SAR (sf*12)
m_regs.MAC1 = TruncateMAC<1>(VecDot(m_regs.LLM[0], V) >> shift);
m_regs.MAC2 = TruncateMAC<2>(VecDot(m_regs.LLM[1], V) >> shift);
m_regs.MAC3 = TruncateMAC<3>(VecDot(m_regs.LLM[2], V) >> shift);
SetIR(0, m_regs.MAC1, lm);
SetIR(1, m_regs.MAC2, lm);
SetIR(2, m_regs.MAC3, lm);
// [IR1,IR2,IR3] = [MAC1,MAC2,MAC3] = (BK*1000h + LCM*IR) SAR (sf*12)
// TODO: First multiply should check overflow
m_regs.MAC1 = TruncateMAC<1>(
((ZeroExtend64(m_regs.RBK) * 0x1000) + VecDot(m_regs.LCM[0], m_regs.IR1, m_regs.IR2, m_regs.IR3)) >> shift);
m_regs.MAC2 = TruncateMAC<2>(
((ZeroExtend64(m_regs.GBK) * 0x1000) + VecDot(m_regs.LCM[1], m_regs.IR1, m_regs.IR2, m_regs.IR3)) >> shift);
m_regs.MAC3 = TruncateMAC<3>(
((ZeroExtend64(m_regs.BBK) * 0x1000) + VecDot(m_regs.LCM[2], m_regs.IR1, m_regs.IR2, m_regs.IR3)) >> shift);
SetIR(1, m_regs.MAC1, lm);
SetIR(2, m_regs.MAC2, lm);
SetIR(3, m_regs.MAC3, lm);
// [MAC1,MAC2,MAC3] = [R*IR1,G*IR2,B*IR3] SHL 4 ;<--- for NCDx/NCCx
m_regs.MAC1 = TruncateMAC<1>((ZeroExtend64(m_regs.RGBC[0]) * static_cast<u16>(m_regs.IR1)) << 4);
m_regs.MAC2 = TruncateMAC<1>((ZeroExtend64(m_regs.RGBC[1]) * static_cast<u16>(m_regs.IR2)) << 4);
m_regs.MAC3 = TruncateMAC<1>((ZeroExtend64(m_regs.RGBC[2]) * static_cast<u16>(m_regs.IR3)) << 4);
SetIR(1, m_regs.MAC1, false);
SetIR(2, m_regs.MAC2, false);
SetIR(3, m_regs.MAC3, false);
// [MAC1,MAC2,MAC3] = MAC+(FC-MAC)*IR0 ;<--- for NCDx only
// [MAC1,MAC2,MAC3] = [MAC1,MAC2,MAC3] SAR (sf*12) ;<--- for NCDx/NCCx
m_regs.MAC1 = TruncateMAC<1>(m_regs.MAC1 + ((s32(m_regs.RFC) - m_regs.MAC1) * m_regs.IR0));
m_regs.MAC2 = TruncateMAC<2>(m_regs.MAC2 + ((s32(m_regs.GFC) - m_regs.MAC2) * m_regs.IR0));
m_regs.MAC3 = TruncateMAC<3>(m_regs.MAC3 + ((s32(m_regs.BFC) - m_regs.MAC3) * m_regs.IR0));
// [MAC1,MAC2,MAC3] = [MAC1,MAC2,MAC3] SAR (sf*12) ;<--- for NCDx/NCCx
m_regs.MAC1 >>= shift;
m_regs.MAC2 >>= shift;
m_regs.MAC3 >>= shift;
// Color FIFO = [MAC1/16,MAC2/16,MAC3/16,CODE], [IR1,IR2,IR3] = [MAC1,MAC2,MAC3]
PushRGB(TruncateRGB<0>(m_regs.MAC1 / 16), TruncateRGB<1>(m_regs.MAC2 / 16), TruncateRGB<2>(m_regs.MAC3 / 16),
m_regs.RGBC[3]);
}
void Core::Execute_NCDS(Instruction inst)
{
m_regs.FLAG.Clear();
NCDS(m_regs.V0, inst.sf, inst.lm);
m_regs.FLAG.UpdateError();
}
} // namespace GTE } // namespace GTE

View File

@ -26,16 +26,30 @@ public:
void ExecuteInstruction(Instruction inst); void ExecuteInstruction(Instruction inst);
private: private:
template<u32 index>
s32 TruncateMAC(s64 value);
template<u32 index>
u8 TruncateRGB(s32 value);
template<u32 index>
void SetIR(s32 value, bool lm);
void SetMAC(u32 index, s64 value); void SetMAC(u32 index, s64 value);
void SetIR(u32 index, s32 value, bool lm); void SetIR(u32 index, s32 value, bool lm);
void SetIR0(s32 value); void SetIR0(s32 value);
void SetOTZ(s32 value); void SetOTZ(s32 value);
void PushSXY(s32 x, s32 y); void PushSXY(s32 x, s32 y);
void PushSZ(s32 value); void PushSZ(s32 value);
void PushRGB(u8 r, u8 g, u8 b, u8 c);
s32 Divide(s32 dividend, s32 divisor); s32 Divide(s32 dividend, s32 divisor);
s32 SaturateDivide(s32 result); s32 SaturateDivide(s32 result);
static s64 VecDot(const s16 A[3], const s16 B[3]);
static s64 VecDot(const s16 A[3], s16 B_x, s16 B_y, s16 B_z);
void RTPS(const s16 V[3], bool sf); void RTPS(const s16 V[3], bool sf);
void NCDS(const s16 V[3], bool sf, bool lm);
void Execute_RTPS(Instruction inst); void Execute_RTPS(Instruction inst);
void Execute_RTPT(Instruction inst); void Execute_RTPT(Instruction inst);
@ -43,8 +57,11 @@ private:
void Execute_SQR(Instruction inst); void Execute_SQR(Instruction inst);
void Execute_AVSZ3(Instruction inst); void Execute_AVSZ3(Instruction inst);
void Execute_AVSZ4(Instruction inst); void Execute_AVSZ4(Instruction inst);
void Execute_NCDS(Instruction inst);
Regs m_regs = {}; Regs m_regs = {};
}; };
#include "gte.inl"
} // namespace GTE } // namespace GTE

57
src/pse/gte.inl Normal file
View File

@ -0,0 +1,57 @@
#include "gte.h"
template<u32 index>
u8 GTE::Core::TruncateRGB(s32 value)
{
if (value < 0 || value > 0xFF)
{
if constexpr (index == 0)
m_regs.FLAG.color_r_saturated = true;
else if constexpr (index == 1)
m_regs.FLAG.color_g_saturated = true;
else
m_regs.FLAG.color_b_saturated = true;
value = (value < 0) ? 0 : 0xFF;
}
return static_cast<u8>(value);
}
template<u32 index>
s32 GTE::Core::TruncateMAC(s64 value)
{
if (value < INT64_C(-2147483648))
{
if constexpr (index == 0)
m_regs.FLAG.mac0_underflow = true;
else if constexpr (index == 1)
m_regs.FLAG.mac1_underflow = true;
else if constexpr (index == 2)
m_regs.FLAG.mac2_underflow = true;
else if constexpr (index == 3)
m_regs.FLAG.mac3_underflow = true;
return static_cast<s32>(UINT32_C(0x80000000));
}
else if (value > INT64_C(2147483647))
{
if constexpr (index == 0)
m_regs.FLAG.mac0_overflow = true;
else if constexpr (index == 1)
m_regs.FLAG.mac1_overflow = true;
else if constexpr (index == 2)
m_regs.FLAG.mac2_overflow = true;
else if constexpr (index == 3)
m_regs.FLAG.mac3_overflow = true;
return static_cast<s32>(UINT32_C(0x7FFFFFFF));
}
return static_cast<s32>(value);
}
template<u32 index>
void GTE::Core::SetIR(s32 value, bool lm)
{
}

View File

@ -101,12 +101,12 @@ union Regs
s16 RT[3][3]; // 32-36 s16 RT[3][3]; // 32-36
u16 pad17; // 36 u16 pad17; // 36
s32 TR[3]; // 37-39 s32 TR[3]; // 37-39
u16 L[3][3]; // 40-44 s16 LLM[3][3]; // 40-44
u16 pad18; // 44 u16 pad18; // 44
u32 RBK; // 45 u32 RBK; // 45
u32 GBK; // 46 u32 GBK; // 46
u32 BBK; // 47 u32 BBK; // 47
u16 LR[3][3]; // 48-52 s16 LCM[3][3]; // 48-52
u16 pad19; // 52 u16 pad19; // 52
u32 RFC; // 53 u32 RFC; // 53
u32 GFC; // 54 u32 GFC; // 54

View File

@ -91,6 +91,7 @@
<ItemGroup> <ItemGroup>
<None Include="cpu_core.inl" /> <None Include="cpu_core.inl" />
<None Include="bus.inl" /> <None Include="bus.inl" />
<None Include="gte.inl" />
</ItemGroup> </ItemGroup>
<PropertyGroup Label="Globals"> <PropertyGroup Label="Globals">
<ProjectGuid>{868B98C8-65A1-494B-8346-250A73A48C0A}</ProjectGuid> <ProjectGuid>{868B98C8-65A1-494B-8346-250A73A48C0A}</ProjectGuid>

View File

@ -43,5 +43,6 @@
<ItemGroup> <ItemGroup>
<None Include="cpu_core.inl" /> <None Include="cpu_core.inl" />
<None Include="bus.inl" /> <None Include="bus.inl" />
<None Include="gte.inl" />
</ItemGroup> </ItemGroup>
</Project> </Project>