diff --git a/src/core/gpu.cpp b/src/core/gpu.cpp index 5bad2255d..f41447d0b 100644 --- a/src/core/gpu.cpp +++ b/src/core/gpu.cpp @@ -33,6 +33,7 @@ Log_SetChannel(GPU); std::unique_ptr g_gpu; +alignas(HOST_PAGE_SIZE) u16 g_vram[VRAM_SIZE / sizeof(u16)]; const GPU::GP0CommandHandlerTable GPU::s_GP0_command_handler_table = GPU::GenerateGP0CommandHandlerTable(); @@ -132,6 +133,10 @@ void GPU::Reset(bool clear_vram) m_crtc_state.in_vblank = false; m_crtc_state.interlaced_field = 0; m_crtc_state.interlaced_display_field = 0; + + if (clear_vram) + std::memset(g_vram, 0, sizeof(g_vram)); + SoftReset(); UpdateDisplay(); } @@ -300,7 +305,7 @@ bool GPU::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_displ else { ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); - sw.DoBytes(m_vram_ptr, VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16)); + sw.DoBytes(g_vram, VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16)); } } @@ -1074,7 +1079,7 @@ u32 GPU::ReadGPUREAD() // Read with correct wrap-around behavior. const u16 read_x = (m_vram_transfer.x + m_vram_transfer.col) % VRAM_WIDTH; const u16 read_y = (m_vram_transfer.y + m_vram_transfer.row) % VRAM_HEIGHT; - value |= ZeroExtend32(m_vram_ptr[read_y * VRAM_WIDTH + read_x]) << (i * 16); + value |= ZeroExtend32(g_vram[read_y * VRAM_WIDTH + read_x]) << (i * 16); if (++m_vram_transfer.col == m_vram_transfer.width) { @@ -1357,7 +1362,7 @@ void GPU::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) for (u32 yoffs = 0; yoffs < height; yoffs++) { const u32 row = (y + yoffs) % VRAM_HEIGHT; - std::fill_n(&m_vram_ptr[row * VRAM_WIDTH + x], width, color16); + std::fill_n(&g_vram[row * VRAM_WIDTH + x], width, color16); } } else if (IsInterlacedRenderingEnabled()) @@ -1373,7 +1378,7 @@ void GPU::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) if ((row & u32(1)) == active_field) continue; - u16* row_ptr = &m_vram_ptr[row * VRAM_WIDTH]; + u16* row_ptr = &g_vram[row * VRAM_WIDTH]; for (u32 xoffs = 0; xoffs < width; xoffs++) { const u32 col = (x + xoffs) % VRAM_WIDTH; @@ -1386,7 +1391,7 @@ void GPU::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) for (u32 yoffs = 0; yoffs < height; yoffs++) { const u32 row = (y + yoffs) % VRAM_HEIGHT; - u16* row_ptr = &m_vram_ptr[row * VRAM_WIDTH]; + u16* row_ptr = &g_vram[row * VRAM_WIDTH]; for (u32 xoffs = 0; xoffs < width; xoffs++) { const u32 col = (x + xoffs) % VRAM_WIDTH; @@ -1402,7 +1407,7 @@ void GPU::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool if ((x + width) <= VRAM_WIDTH && (y + height) <= VRAM_HEIGHT && !set_mask && !check_mask) { const u16* src_ptr = static_cast(data); - u16* dst_ptr = &m_vram_ptr[y * VRAM_WIDTH + x]; + u16* dst_ptr = &g_vram[y * VRAM_WIDTH + x]; for (u32 yoffs = 0; yoffs < height; yoffs++) { std::copy_n(src_ptr, width, dst_ptr); @@ -1420,7 +1425,7 @@ void GPU::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool for (u32 row = 0; row < height;) { - u16* dst_row_ptr = &m_vram_ptr[((y + row++) % VRAM_HEIGHT) * VRAM_WIDTH]; + u16* dst_row_ptr = &g_vram[((y + row++) % VRAM_HEIGHT) * VRAM_WIDTH]; for (u32 col = 0; col < width;) { // TODO: Handle unaligned reads... @@ -1475,8 +1480,8 @@ void GPU::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 he { for (u32 row = 0; row < height; row++) { - const u16* src_row_ptr = &m_vram_ptr[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH]; - u16* dst_row_ptr = &m_vram_ptr[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH]; + const u16* src_row_ptr = &g_vram[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH]; + u16* dst_row_ptr = &g_vram[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH]; for (s32 col = static_cast(width - 1); col >= 0; col--) { @@ -1491,8 +1496,8 @@ void GPU::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 he { for (u32 row = 0; row < height; row++) { - const u16* src_row_ptr = &m_vram_ptr[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH]; - u16* dst_row_ptr = &m_vram_ptr[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH]; + const u16* src_row_ptr = &g_vram[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH]; + u16* dst_row_ptr = &g_vram[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH]; for (u32 col = 0; col < width; col++) { @@ -2175,11 +2180,11 @@ bool GPU::DumpVRAMToFile(const char* filename) const char* extension = std::strrchr(filename, '.'); if (extension && StringUtil::Strcasecmp(extension, ".png") == 0) { - return DumpVRAMToFile(filename, VRAM_WIDTH, VRAM_HEIGHT, sizeof(u16) * VRAM_WIDTH, m_vram_ptr, true); + return DumpVRAMToFile(filename, VRAM_WIDTH, VRAM_HEIGHT, sizeof(u16) * VRAM_WIDTH, g_vram, true); } else if (extension && StringUtil::Strcasecmp(extension, ".bin") == 0) { - return FileSystem::WriteBinaryFile(filename, m_vram_ptr, VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16)); + return FileSystem::WriteBinaryFile(filename, g_vram, VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16)); } else { diff --git a/src/core/gpu.h b/src/core/gpu.h index 42165fd3d..702711e53 100644 --- a/src/core/gpu.h +++ b/src/core/gpu.h @@ -11,6 +11,7 @@ #include "common/bitfield.h" #include "common/fifo_queue.h" #include "common/rectangle.h" +#include "common/types.h" #include #include @@ -359,9 +360,6 @@ protected: std::unique_ptr m_crtc_tick_event; std::unique_ptr m_command_tick_event; - // Pointer to VRAM, used for reads/writes. In the hardware backends, this is the shadow buffer. - u16* m_vram_ptr = nullptr; - union GPUSTAT { u32 bits; @@ -651,3 +649,4 @@ private: }; extern std::unique_ptr g_gpu; +extern u16 g_vram[VRAM_SIZE / sizeof(u16)]; diff --git a/src/core/gpu_backend.cpp b/src/core/gpu_backend.cpp index ffcda244e..f232d870a 100644 --- a/src/core/gpu_backend.cpp +++ b/src/core/gpu_backend.cpp @@ -23,7 +23,7 @@ bool GPUBackend::Initialize(bool force_thread) return true; } -void GPUBackend::Reset(bool clear_vram) +void GPUBackend::Reset() { Sync(true); m_drawing_area = {}; diff --git a/src/core/gpu_backend.h b/src/core/gpu_backend.h index f34f0e0b9..3115b4d22 100644 --- a/src/core/gpu_backend.h +++ b/src/core/gpu_backend.h @@ -22,12 +22,11 @@ public: GPUBackend(); virtual ~GPUBackend(); - ALWAYS_INLINE u16* GetVRAM() const { return m_vram_ptr; } ALWAYS_INLINE const Threading::Thread* GetThread() const { return m_use_gpu_thread ? &m_gpu_thread : nullptr; } virtual bool Initialize(bool force_thread); virtual void UpdateSettings(); - virtual void Reset(bool clear_vram); + virtual void Reset(); virtual void Shutdown(); GPUBackendFillVRAMCommand* NewFillVRAMCommand(); @@ -64,8 +63,6 @@ protected: void HandleCommand(const GPUBackendCommand* cmd); - u16* m_vram_ptr = nullptr; - Common::Rectangle m_drawing_area{}; Threading::KernelSemaphore m_sync_semaphore; diff --git a/src/core/gpu_commands.cpp b/src/core/gpu_commands.cpp index 33b60c9bf..4b7101b00 100644 --- a/src/core/gpu_commands.cpp +++ b/src/core/gpu_commands.cpp @@ -582,7 +582,7 @@ bool GPU::HandleCopyRectangleVRAMToCPUCommand() { DumpVRAMToFile(TinyString::from_format("vram_to_cpu_copy_{}.png", s_vram_to_cpu_dump_id++), m_vram_transfer.width, m_vram_transfer.height, sizeof(u16) * VRAM_WIDTH, - &m_vram_ptr[m_vram_transfer.y * VRAM_WIDTH + m_vram_transfer.x], true); + &g_vram[m_vram_transfer.y * VRAM_WIDTH + m_vram_transfer.x], true); } // switch to pixel-by-pixel read state diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index 235d83240..bbb1e4a72 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -130,8 +130,6 @@ private: GPU_HW::GPU_HW() : GPU() { - m_vram_ptr = m_vram_shadow.data(); - #ifdef _DEBUG s_draw_number = 0; #endif @@ -251,9 +249,8 @@ void GPU_HW::Reset(bool clear_vram) m_batch_current_vertex_ptr = m_batch_start_vertex_ptr; - m_vram_shadow.fill(0); if (m_sw_renderer) - m_sw_renderer->Reset(clear_vram); + m_sw_renderer->Reset(); m_batch = {}; m_batch_ubo_data = {}; @@ -442,7 +439,7 @@ void GPU_HW::UpdateSettings(const Settings& old_settings) Panic("Failed to recreate buffers."); RestoreDeviceContext(); - UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_ptr, false, false); + UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, g_vram, false, false); UpdateDepthBufferFromMaskBit(); UpdateDisplay(); } @@ -2291,8 +2288,6 @@ void GPU_HW::UpdateSoftwareRenderer(bool copy_vram_from_hw) if (current_enabled == new_enabled) return; - m_vram_ptr = m_vram_shadow.data(); - if (!new_enabled) { if (m_sw_renderer) @@ -2310,7 +2305,6 @@ void GPU_HW::UpdateSoftwareRenderer(bool copy_vram_from_hw) { FlushRender(); ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); - std::memcpy(sw_renderer->GetVRAM(), m_vram_ptr, sizeof(u16) * VRAM_WIDTH * VRAM_HEIGHT); // Sync the drawing area. GPUBackendSetDrawingAreaCommand* cmd = sw_renderer->NewSetDrawingAreaCommand(); @@ -2319,7 +2313,6 @@ void GPU_HW::UpdateSoftwareRenderer(bool copy_vram_from_hw) } m_sw_renderer = std::move(sw_renderer); - m_vram_ptr = m_sw_renderer->GetVRAM(); } void GPU_HW::FillBackendCommandParameters(GPUBackendCommand* cmd) const @@ -2429,7 +2422,7 @@ void GPU_HW::ReadVRAM(u32 x, u32 y, u32 width, u32 height) // Stage the readback and copy it into our shadow buffer. g_gpu_device->DownloadTexture(m_vram_readback_texture.get(), 0, 0, encoded_width, encoded_height, - reinterpret_cast(&m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left]), + reinterpret_cast(&g_vram[copy_rect.top * VRAM_WIDTH + copy_rect.left]), VRAM_WIDTH * sizeof(u16)); RestoreDeviceContext(); diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index 83fdee466..53d960b2e 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -222,8 +222,6 @@ private: std::unique_ptr m_vram_upload_buffer; std::unique_ptr m_vram_write_texture; - FixedHeapArray m_vram_shadow; - std::unique_ptr m_sw_renderer; BatchVertex* m_batch_start_vertex_ptr = nullptr; diff --git a/src/core/gpu_sw.cpp b/src/core/gpu_sw.cpp index 1e55bbcc4..ee61bc1e2 100644 --- a/src/core/gpu_sw.cpp +++ b/src/core/gpu_sw.cpp @@ -24,10 +24,7 @@ ALWAYS_INLINE static constexpr std::tuple MinMax(T v1, T v2) return std::tie(v1, v2); } -GPU_SW::GPU_SW() -{ - m_vram_ptr = m_backend.GetVRAM(); -} +GPU_SW::GPU_SW() = default; GPU_SW::~GPU_SW() { @@ -84,7 +81,7 @@ void GPU_SW::Reset(bool clear_vram) { GPU::Reset(clear_vram); - m_backend.Reset(clear_vram); + m_backend.Reset(); } void GPU_SW::UpdateSettings(const Settings& old_settings) @@ -269,7 +266,7 @@ void GPU_SW::CopyOut15Bit(u32 src_x, u32 src_y, u32 width, u32 height, u32 field const u32 rows = height >> interlaced_shift; dst_stride <<= interlaced_shift; - const u16* src_ptr = &m_vram_ptr[src_y * VRAM_WIDTH + src_x]; + const u16* src_ptr = &g_vram[src_y * VRAM_WIDTH + src_x]; const u32 src_step = VRAM_WIDTH << interleaved_shift; for (u32 row = 0; row < rows; row++) { @@ -286,7 +283,7 @@ void GPU_SW::CopyOut15Bit(u32 src_x, u32 src_y, u32 width, u32 height, u32 field const u32 end_x = src_x + width; for (u32 row = 0; row < rows; row++) { - const u16* src_row_ptr = &m_vram_ptr[(src_y % VRAM_HEIGHT) * VRAM_WIDTH]; + const u16* src_row_ptr = &g_vram[(src_y % VRAM_HEIGHT) * VRAM_WIDTH]; OutputPixelType* dst_row_ptr = reinterpret_cast(dst_ptr); for (u32 col = src_x; col < end_x; col++) @@ -352,7 +349,7 @@ void GPU_SW::CopyOut24Bit(u32 src_x, u32 src_y, u32 skip_x, u32 width, u32 heigh if ((src_x + width) <= VRAM_WIDTH && (src_y + (rows << interleaved_shift)) <= VRAM_HEIGHT) { - const u8* src_ptr = reinterpret_cast(&m_vram_ptr[src_y * VRAM_WIDTH + src_x]) + (skip_x * 3); + const u8* src_ptr = reinterpret_cast(&g_vram[src_y * VRAM_WIDTH + src_x]) + (skip_x * 3); const u32 src_stride = (VRAM_WIDTH << interleaved_shift) * sizeof(u16); for (u32 row = 0; row < rows; row++) { @@ -412,7 +409,7 @@ void GPU_SW::CopyOut24Bit(u32 src_x, u32 src_y, u32 skip_x, u32 width, u32 heigh { for (u32 row = 0; row < rows; row++) { - const u16* src_row_ptr = &m_vram_ptr[(src_y % VRAM_HEIGHT) * VRAM_WIDTH]; + const u16* src_row_ptr = &g_vram[(src_y % VRAM_HEIGHT) * VRAM_WIDTH]; OutputPixelType* dst_row_ptr = reinterpret_cast(dst_ptr); for (u32 col = 0; col < width; col++) diff --git a/src/core/gpu_sw_backend.cpp b/src/core/gpu_sw_backend.cpp index 8d839497d..6eb760bca 100644 --- a/src/core/gpu_sw_backend.cpp +++ b/src/core/gpu_sw_backend.cpp @@ -1,6 +1,7 @@ // SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0) +#include "gpu.h" #include "gpu_sw_backend.h" #include "system.h" @@ -8,11 +9,7 @@ #include -GPU_SW_Backend::GPU_SW_Backend() : GPUBackend() -{ - m_vram.fill(0); - m_vram_ptr = m_vram.data(); -} +GPU_SW_Backend::GPU_SW_Backend() = default; GPU_SW_Backend::~GPU_SW_Backend() = default; @@ -21,12 +18,9 @@ bool GPU_SW_Backend::Initialize(bool force_thread) return GPUBackend::Initialize(force_thread); } -void GPU_SW_Backend::Reset(bool clear_vram) +void GPU_SW_Backend::Reset() { - GPUBackend::Reset(clear_vram); - - if (clear_vram) - m_vram.fill(0); + GPUBackend::Reset(); } void GPU_SW_Backend::DrawPolygon(const GPUBackendDrawPolygonCommand* cmd) @@ -728,7 +722,7 @@ void GPU_SW_Backend::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GP for (u32 yoffs = 0; yoffs < height; yoffs++) { const u32 row = (y + yoffs) % VRAM_HEIGHT; - std::fill_n(&m_vram_ptr[row * VRAM_WIDTH + x], width, color16); + std::fill_n(&g_vram[row * VRAM_WIDTH + x], width, color16); } } else if (params.interlaced_rendering) @@ -741,7 +735,7 @@ void GPU_SW_Backend::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GP if ((row & u32(1)) == active_field) continue; - u16* row_ptr = &m_vram_ptr[row * VRAM_WIDTH]; + u16* row_ptr = &g_vram[row * VRAM_WIDTH]; for (u32 xoffs = 0; xoffs < width; xoffs++) { const u32 col = (x + xoffs) % VRAM_WIDTH; @@ -754,7 +748,7 @@ void GPU_SW_Backend::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GP for (u32 yoffs = 0; yoffs < height; yoffs++) { const u32 row = (y + yoffs) % VRAM_HEIGHT; - u16* row_ptr = &m_vram_ptr[row * VRAM_WIDTH]; + u16* row_ptr = &g_vram[row * VRAM_WIDTH]; for (u32 xoffs = 0; xoffs < width; xoffs++) { const u32 col = (x + xoffs) % VRAM_WIDTH; @@ -771,7 +765,7 @@ void GPU_SW_Backend::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* if ((x + width) <= VRAM_WIDTH && (y + height) <= VRAM_HEIGHT && !params.IsMaskingEnabled()) { const u16* src_ptr = static_cast(data); - u16* dst_ptr = &m_vram_ptr[y * VRAM_WIDTH + x]; + u16* dst_ptr = &g_vram[y * VRAM_WIDTH + x]; for (u32 yoffs = 0; yoffs < height; yoffs++) { std::copy_n(src_ptr, width, dst_ptr); @@ -788,7 +782,7 @@ void GPU_SW_Backend::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* for (u32 row = 0; row < height;) { - u16* dst_row_ptr = &m_vram_ptr[((y + row++) % VRAM_HEIGHT) * VRAM_WIDTH]; + u16* dst_row_ptr = &g_vram[((y + row++) % VRAM_HEIGHT) * VRAM_WIDTH]; for (u32 col = 0; col < width;) { // TODO: Handle unaligned reads... @@ -844,8 +838,8 @@ void GPU_SW_Backend::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 wi { for (u32 row = 0; row < height; row++) { - const u16* src_row_ptr = &m_vram_ptr[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH]; - u16* dst_row_ptr = &m_vram_ptr[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH]; + const u16* src_row_ptr = &g_vram[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH]; + u16* dst_row_ptr = &g_vram[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH]; for (s32 col = static_cast(width - 1); col >= 0; col--) { @@ -860,8 +854,8 @@ void GPU_SW_Backend::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 wi { for (u32 row = 0; row < height; row++) { - const u16* src_row_ptr = &m_vram_ptr[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH]; - u16* dst_row_ptr = &m_vram_ptr[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH]; + const u16* src_row_ptr = &g_vram[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH]; + u16* dst_row_ptr = &g_vram[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH]; for (u32 col = 0; col < width; col++) { diff --git a/src/core/gpu_sw_backend.h b/src/core/gpu_sw_backend.h index e6f09c708..88dfb70a8 100644 --- a/src/core/gpu_sw_backend.h +++ b/src/core/gpu_sw_backend.h @@ -14,12 +14,12 @@ public: ~GPU_SW_Backend() override; bool Initialize(bool force_thread) override; - void Reset(bool clear_vram) override; + void Reset() override; - ALWAYS_INLINE_RELEASE u16 GetPixel(const u32 x, const u32 y) const { return m_vram[VRAM_WIDTH * y + x]; } - ALWAYS_INLINE_RELEASE const u16* GetPixelPtr(const u32 x, const u32 y) const { return &m_vram[VRAM_WIDTH * y + x]; } - ALWAYS_INLINE_RELEASE u16* GetPixelPtr(const u32 x, const u32 y) { return &m_vram[VRAM_WIDTH * y + x]; } - ALWAYS_INLINE_RELEASE void SetPixel(const u32 x, const u32 y, const u16 value) { m_vram[VRAM_WIDTH * y + x] = value; } + ALWAYS_INLINE_RELEASE u16 GetPixel(const u32 x, const u32 y) const { return g_vram[VRAM_WIDTH * y + x]; } + ALWAYS_INLINE_RELEASE const u16* GetPixelPtr(const u32 x, const u32 y) const { return &g_vram[VRAM_WIDTH * y + x]; } + ALWAYS_INLINE_RELEASE u16* GetPixelPtr(const u32 x, const u32 y) { return &g_vram[VRAM_WIDTH * y + x]; } + ALWAYS_INLINE_RELEASE void SetPixel(const u32 x, const u32 y, const u16 value) { g_vram[VRAM_WIDTH * y + x] = value; } // this is actually (31 * 255) >> 4) == 494, but to simplify addressing we use the next power of two (512) static constexpr u32 DITHER_LUT_SIZE = 512; @@ -165,6 +165,4 @@ protected: const GPUBackendDrawLineCommand::Vertex* p0, const GPUBackendDrawLineCommand::Vertex* p1); DrawLineFunction GetDrawLineFunction(bool shading_enable, bool transparency_enable, bool dithering_enable); - - std::array m_vram; };