From 22c76d43c31af395cef4f5004e3f7f9861f111f8 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sat, 25 May 2024 02:06:40 +1000 Subject: [PATCH] CPU/PGXP: Reduce vertex cache memory --- src/core/cpu_pgxp.cpp | 47 ++++++++++++++++++------------------------- src/core/cpu_pgxp.h | 2 +- src/core/gte.cpp | 2 +- 3 files changed, 22 insertions(+), 29 deletions(-) diff --git a/src/core/cpu_pgxp.cpp b/src/core/cpu_pgxp.cpp index f27ff4ab9..9baaf42ce 100644 --- a/src/core/cpu_pgxp.cpp +++ b/src/core/cpu_pgxp.cpp @@ -27,8 +27,8 @@ namespace { enum : u32 { - VERTEX_CACHE_WIDTH = 0x800 * 2, - VERTEX_CACHE_HEIGHT = 0x800 * 2, + VERTEX_CACHE_WIDTH = 2048, + VERTEX_CACHE_HEIGHT = 2048, VERTEX_CACHE_SIZE = VERTEX_CACHE_WIDTH * VERTEX_CACHE_HEIGHT, PGXP_MEM_SIZE = (static_cast(Bus::RAM_8MB_SIZE) + static_cast(CPU::SCRATCHPAD_SIZE)) / 4, PGXP_MEM_SCRATCH_OFFSET = Bus::RAM_8MB_SIZE / 4, @@ -70,8 +70,8 @@ union psx_value }; } // namespace -static void CacheVertex(s16 sx, s16 sy, const PGXP_value& vertex); -static PGXP_value* GetCachedVertex(short sx, short sy); +static void CacheVertex(u32 value, const PGXP_value& vertex); +static PGXP_value* GetCachedVertex(u32 value); static float TruncateVertexPosition(float p); static bool IsWithinTolerance(float precise_x, float precise_y, int int_x, int int_y); @@ -168,7 +168,7 @@ void CPU::PGXP::Reset() if (s_mem) std::memset(s_mem, 0, sizeof(PGXP_value) * PGXP_MEM_SIZE); - if (s_vertex_cache) + if (g_settings.gpu_pgxp_vertex_cache && s_vertex_cache) std::memset(s_vertex_cache, 0, sizeof(PGXP_value) * VERTEX_CACHE_SIZE); } @@ -461,7 +461,7 @@ void CPU::PGXP::LogValueStr(SmallStringBase& str, const char* name, u32 rval, co #endif -void CPU::PGXP::GTE_PushSXYZ2f(float x, float y, float z, u32 v) +void CPU::PGXP::GTE_RTPS(float x, float y, float z, u32 value) { // push values down FIFO SXY0 = SXY1; @@ -470,11 +470,11 @@ void CPU::PGXP::GTE_PushSXYZ2f(float x, float y, float z, u32 v) SXY2.x = x; SXY2.y = y; SXY2.z = z; - SXY2.value = v; + SXY2.value = value; SXY2.flags = VALID_ALL; if (g_settings.gpu_pgxp_vertex_cache) - CacheVertex(static_cast(Truncate16(v)), static_cast(Truncate16(v >> 16)), SXY2); + CacheVertex(value, SXY2); } #define VX(n) (psxRegs.CP2D.p[n << 1].sw.l) @@ -583,24 +583,21 @@ void CPU::PGXP::CPU_SWC2(u32 instr, u32 addr, u32 rtVal) WriteMem(&g_state.pgxp_gte[idx], addr); } -ALWAYS_INLINE_RELEASE void CPU::PGXP::CacheVertex(s16 sx, s16 sy, const PGXP_value& vertex) +ALWAYS_INLINE_RELEASE void CPU::PGXP::CacheVertex(u32 value, const PGXP_value& vertex) { - if (sx >= -0x800 && sx <= 0x7ff && sy >= -0x800 && sy <= 0x7ff) - { - // Write vertex into cache - s_vertex_cache[(sy + 0x800) * VERTEX_CACHE_WIDTH + (sx + 0x800)] = vertex; - } + const s16 sx = static_cast(value & 0xFFFFu); + const s16 sy = static_cast(value >> 16); + DebugAssert(sx >= -1024 && sx <= 1023 && sy >= -1024 && sy <= 1023); + s_vertex_cache[(sy + 1024) * VERTEX_CACHE_WIDTH + (sx + 1024)] = vertex; } -ALWAYS_INLINE_RELEASE CPU::PGXP_value* CPU::PGXP::GetCachedVertex(short sx, short sy) +ALWAYS_INLINE_RELEASE CPU::PGXP_value* CPU::PGXP::GetCachedVertex(u32 value) { - if (sx >= -0x800 && sx <= 0x7ff && sy >= -0x800 && sy <= 0x7ff) - { - // Return pointer to cache entry - return &s_vertex_cache[(sy + 0x800) * VERTEX_CACHE_WIDTH + (sx + 0x800)]; - } - - return nullptr; + const s16 sx = static_cast(value & 0xFFFFu); + const s16 sy = static_cast(value >> 16); + return (sx >= -1024 && sx <= 1023 && sy >= -1024 && sy <= 1013) ? + &s_vertex_cache[(sy + 1024) * VERTEX_CACHE_WIDTH + (sx + 1024)] : + nullptr; } ALWAYS_INLINE_RELEASE float CPU::PGXP::TruncateVertexPosition(float p) @@ -646,11 +643,7 @@ bool CPU::PGXP::GetPreciseVertex(u32 addr, u32 value, int x, int y, int xOffs, i if (g_settings.gpu_pgxp_vertex_cache) { - const short psx_x = (short)(value & 0xFFFFu); - const short psx_y = (short)(value >> 16); - - // Look in cache for valid vertex - vert = GetCachedVertex(psx_x, psx_y); + vert = GetCachedVertex(value); if (vert && (vert->flags & VALID_XY) == VALID_XY) { *out_x = TruncateVertexPosition(vert->x) + static_cast(xOffs); diff --git a/src/core/cpu_pgxp.h b/src/core/cpu_pgxp.h index a8305a56f..ae9a009a2 100644 --- a/src/core/cpu_pgxp.h +++ b/src/core/cpu_pgxp.h @@ -12,7 +12,7 @@ void Shutdown(); // -- GTE functions // Transforms -void GTE_PushSXYZ2f(float x, float y, float z, u32 v); +void GTE_RTPS(float x, float y, float z, u32 value); int GTE_NCLIP_valid(u32 sxy0, u32 sxy1, u32 sxy2); float GTE_NCLIP(); diff --git a/src/core/gte.cpp b/src/core/gte.cpp index 3582429dd..492594b2a 100644 --- a/src/core/gte.cpp +++ b/src/core/gte.cpp @@ -794,7 +794,7 @@ void GTE::RTPS(const s16 V[3], u8 shift, bool lm, bool last) precise_x = std::clamp(precise_x, -1024.0f, 1023.0f); precise_y = std::clamp(precise_y, -1024.0f, 1023.0f); - CPU::PGXP::GTE_PushSXYZ2f(precise_x, precise_y, precise_z, REGS.dr32[14]); + CPU::PGXP::GTE_RTPS(precise_x, precise_y, precise_z, REGS.dr32[14]); } if (last)