GPU: Make VRAM a global object

This commit is contained in:
Stenzek 2023-12-23 16:53:15 +10:00
parent 7340324ed9
commit 411213d6a0
No known key found for this signature in database
10 changed files with 50 additions and 69 deletions

View File

@ -33,6 +33,7 @@
Log_SetChannel(GPU);
std::unique_ptr<GPU> g_gpu;
alignas(HOST_PAGE_SIZE) u16 g_vram[VRAM_SIZE / sizeof(u16)];
const GPU::GP0CommandHandlerTable GPU::s_GP0_command_handler_table = GPU::GenerateGP0CommandHandlerTable();
@ -132,6 +133,10 @@ void GPU::Reset(bool clear_vram)
m_crtc_state.in_vblank = false;
m_crtc_state.interlaced_field = 0;
m_crtc_state.interlaced_display_field = 0;
if (clear_vram)
std::memset(g_vram, 0, sizeof(g_vram));
SoftReset();
UpdateDisplay();
}
@ -300,7 +305,7 @@ bool GPU::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_displ
else
{
ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT);
sw.DoBytes(m_vram_ptr, VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16));
sw.DoBytes(g_vram, VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16));
}
}
@ -1074,7 +1079,7 @@ u32 GPU::ReadGPUREAD()
// Read with correct wrap-around behavior.
const u16 read_x = (m_vram_transfer.x + m_vram_transfer.col) % VRAM_WIDTH;
const u16 read_y = (m_vram_transfer.y + m_vram_transfer.row) % VRAM_HEIGHT;
value |= ZeroExtend32(m_vram_ptr[read_y * VRAM_WIDTH + read_x]) << (i * 16);
value |= ZeroExtend32(g_vram[read_y * VRAM_WIDTH + read_x]) << (i * 16);
if (++m_vram_transfer.col == m_vram_transfer.width)
{
@ -1357,7 +1362,7 @@ void GPU::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
for (u32 yoffs = 0; yoffs < height; yoffs++)
{
const u32 row = (y + yoffs) % VRAM_HEIGHT;
std::fill_n(&m_vram_ptr[row * VRAM_WIDTH + x], width, color16);
std::fill_n(&g_vram[row * VRAM_WIDTH + x], width, color16);
}
}
else if (IsInterlacedRenderingEnabled())
@ -1373,7 +1378,7 @@ void GPU::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
if ((row & u32(1)) == active_field)
continue;
u16* row_ptr = &m_vram_ptr[row * VRAM_WIDTH];
u16* row_ptr = &g_vram[row * VRAM_WIDTH];
for (u32 xoffs = 0; xoffs < width; xoffs++)
{
const u32 col = (x + xoffs) % VRAM_WIDTH;
@ -1386,7 +1391,7 @@ void GPU::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
for (u32 yoffs = 0; yoffs < height; yoffs++)
{
const u32 row = (y + yoffs) % VRAM_HEIGHT;
u16* row_ptr = &m_vram_ptr[row * VRAM_WIDTH];
u16* row_ptr = &g_vram[row * VRAM_WIDTH];
for (u32 xoffs = 0; xoffs < width; xoffs++)
{
const u32 col = (x + xoffs) % VRAM_WIDTH;
@ -1402,7 +1407,7 @@ void GPU::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool
if ((x + width) <= VRAM_WIDTH && (y + height) <= VRAM_HEIGHT && !set_mask && !check_mask)
{
const u16* src_ptr = static_cast<const u16*>(data);
u16* dst_ptr = &m_vram_ptr[y * VRAM_WIDTH + x];
u16* dst_ptr = &g_vram[y * VRAM_WIDTH + x];
for (u32 yoffs = 0; yoffs < height; yoffs++)
{
std::copy_n(src_ptr, width, dst_ptr);
@ -1420,7 +1425,7 @@ void GPU::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool
for (u32 row = 0; row < height;)
{
u16* dst_row_ptr = &m_vram_ptr[((y + row++) % VRAM_HEIGHT) * VRAM_WIDTH];
u16* dst_row_ptr = &g_vram[((y + row++) % VRAM_HEIGHT) * VRAM_WIDTH];
for (u32 col = 0; col < width;)
{
// TODO: Handle unaligned reads...
@ -1475,8 +1480,8 @@ void GPU::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 he
{
for (u32 row = 0; row < height; row++)
{
const u16* src_row_ptr = &m_vram_ptr[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
u16* dst_row_ptr = &m_vram_ptr[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
const u16* src_row_ptr = &g_vram[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
u16* dst_row_ptr = &g_vram[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
for (s32 col = static_cast<s32>(width - 1); col >= 0; col--)
{
@ -1491,8 +1496,8 @@ void GPU::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 he
{
for (u32 row = 0; row < height; row++)
{
const u16* src_row_ptr = &m_vram_ptr[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
u16* dst_row_ptr = &m_vram_ptr[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
const u16* src_row_ptr = &g_vram[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
u16* dst_row_ptr = &g_vram[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
for (u32 col = 0; col < width; col++)
{
@ -2175,11 +2180,11 @@ bool GPU::DumpVRAMToFile(const char* filename)
const char* extension = std::strrchr(filename, '.');
if (extension && StringUtil::Strcasecmp(extension, ".png") == 0)
{
return DumpVRAMToFile(filename, VRAM_WIDTH, VRAM_HEIGHT, sizeof(u16) * VRAM_WIDTH, m_vram_ptr, true);
return DumpVRAMToFile(filename, VRAM_WIDTH, VRAM_HEIGHT, sizeof(u16) * VRAM_WIDTH, g_vram, true);
}
else if (extension && StringUtil::Strcasecmp(extension, ".bin") == 0)
{
return FileSystem::WriteBinaryFile(filename, m_vram_ptr, VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16));
return FileSystem::WriteBinaryFile(filename, g_vram, VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16));
}
else
{

View File

@ -11,6 +11,7 @@
#include "common/bitfield.h"
#include "common/fifo_queue.h"
#include "common/rectangle.h"
#include "common/types.h"
#include <algorithm>
#include <array>
@ -359,9 +360,6 @@ protected:
std::unique_ptr<TimingEvent> m_crtc_tick_event;
std::unique_ptr<TimingEvent> m_command_tick_event;
// Pointer to VRAM, used for reads/writes. In the hardware backends, this is the shadow buffer.
u16* m_vram_ptr = nullptr;
union GPUSTAT
{
u32 bits;
@ -651,3 +649,4 @@ private:
};
extern std::unique_ptr<GPU> g_gpu;
extern u16 g_vram[VRAM_SIZE / sizeof(u16)];

View File

@ -23,7 +23,7 @@ bool GPUBackend::Initialize(bool force_thread)
return true;
}
void GPUBackend::Reset(bool clear_vram)
void GPUBackend::Reset()
{
Sync(true);
m_drawing_area = {};

View File

@ -22,12 +22,11 @@ public:
GPUBackend();
virtual ~GPUBackend();
ALWAYS_INLINE u16* GetVRAM() const { return m_vram_ptr; }
ALWAYS_INLINE const Threading::Thread* GetThread() const { return m_use_gpu_thread ? &m_gpu_thread : nullptr; }
virtual bool Initialize(bool force_thread);
virtual void UpdateSettings();
virtual void Reset(bool clear_vram);
virtual void Reset();
virtual void Shutdown();
GPUBackendFillVRAMCommand* NewFillVRAMCommand();
@ -64,8 +63,6 @@ protected:
void HandleCommand(const GPUBackendCommand* cmd);
u16* m_vram_ptr = nullptr;
Common::Rectangle<u32> m_drawing_area{};
Threading::KernelSemaphore m_sync_semaphore;

View File

@ -582,7 +582,7 @@ bool GPU::HandleCopyRectangleVRAMToCPUCommand()
{
DumpVRAMToFile(TinyString::from_format("vram_to_cpu_copy_{}.png", s_vram_to_cpu_dump_id++), m_vram_transfer.width,
m_vram_transfer.height, sizeof(u16) * VRAM_WIDTH,
&m_vram_ptr[m_vram_transfer.y * VRAM_WIDTH + m_vram_transfer.x], true);
&g_vram[m_vram_transfer.y * VRAM_WIDTH + m_vram_transfer.x], true);
}
// switch to pixel-by-pixel read state

View File

@ -130,8 +130,6 @@ private:
GPU_HW::GPU_HW() : GPU()
{
m_vram_ptr = m_vram_shadow.data();
#ifdef _DEBUG
s_draw_number = 0;
#endif
@ -251,9 +249,8 @@ void GPU_HW::Reset(bool clear_vram)
m_batch_current_vertex_ptr = m_batch_start_vertex_ptr;
m_vram_shadow.fill(0);
if (m_sw_renderer)
m_sw_renderer->Reset(clear_vram);
m_sw_renderer->Reset();
m_batch = {};
m_batch_ubo_data = {};
@ -442,7 +439,7 @@ void GPU_HW::UpdateSettings(const Settings& old_settings)
Panic("Failed to recreate buffers.");
RestoreDeviceContext();
UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_ptr, false, false);
UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, g_vram, false, false);
UpdateDepthBufferFromMaskBit();
UpdateDisplay();
}
@ -2291,8 +2288,6 @@ void GPU_HW::UpdateSoftwareRenderer(bool copy_vram_from_hw)
if (current_enabled == new_enabled)
return;
m_vram_ptr = m_vram_shadow.data();
if (!new_enabled)
{
if (m_sw_renderer)
@ -2310,7 +2305,6 @@ void GPU_HW::UpdateSoftwareRenderer(bool copy_vram_from_hw)
{
FlushRender();
ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT);
std::memcpy(sw_renderer->GetVRAM(), m_vram_ptr, sizeof(u16) * VRAM_WIDTH * VRAM_HEIGHT);
// Sync the drawing area.
GPUBackendSetDrawingAreaCommand* cmd = sw_renderer->NewSetDrawingAreaCommand();
@ -2319,7 +2313,6 @@ void GPU_HW::UpdateSoftwareRenderer(bool copy_vram_from_hw)
}
m_sw_renderer = std::move(sw_renderer);
m_vram_ptr = m_sw_renderer->GetVRAM();
}
void GPU_HW::FillBackendCommandParameters(GPUBackendCommand* cmd) const
@ -2429,7 +2422,7 @@ void GPU_HW::ReadVRAM(u32 x, u32 y, u32 width, u32 height)
// Stage the readback and copy it into our shadow buffer.
g_gpu_device->DownloadTexture(m_vram_readback_texture.get(), 0, 0, encoded_width, encoded_height,
reinterpret_cast<u32*>(&m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left]),
reinterpret_cast<u32*>(&g_vram[copy_rect.top * VRAM_WIDTH + copy_rect.left]),
VRAM_WIDTH * sizeof(u16));
RestoreDeviceContext();

View File

@ -222,8 +222,6 @@ private:
std::unique_ptr<GPUTextureBuffer> m_vram_upload_buffer;
std::unique_ptr<GPUTexture> m_vram_write_texture;
FixedHeapArray<u16, VRAM_WIDTH * VRAM_HEIGHT> m_vram_shadow;
std::unique_ptr<GPU_SW_Backend> m_sw_renderer;
BatchVertex* m_batch_start_vertex_ptr = nullptr;

View File

@ -24,10 +24,7 @@ ALWAYS_INLINE static constexpr std::tuple<T, T> MinMax(T v1, T v2)
return std::tie(v1, v2);
}
GPU_SW::GPU_SW()
{
m_vram_ptr = m_backend.GetVRAM();
}
GPU_SW::GPU_SW() = default;
GPU_SW::~GPU_SW()
{
@ -84,7 +81,7 @@ void GPU_SW::Reset(bool clear_vram)
{
GPU::Reset(clear_vram);
m_backend.Reset(clear_vram);
m_backend.Reset();
}
void GPU_SW::UpdateSettings(const Settings& old_settings)
@ -269,7 +266,7 @@ void GPU_SW::CopyOut15Bit(u32 src_x, u32 src_y, u32 width, u32 height, u32 field
const u32 rows = height >> interlaced_shift;
dst_stride <<= interlaced_shift;
const u16* src_ptr = &m_vram_ptr[src_y * VRAM_WIDTH + src_x];
const u16* src_ptr = &g_vram[src_y * VRAM_WIDTH + src_x];
const u32 src_step = VRAM_WIDTH << interleaved_shift;
for (u32 row = 0; row < rows; row++)
{
@ -286,7 +283,7 @@ void GPU_SW::CopyOut15Bit(u32 src_x, u32 src_y, u32 width, u32 height, u32 field
const u32 end_x = src_x + width;
for (u32 row = 0; row < rows; row++)
{
const u16* src_row_ptr = &m_vram_ptr[(src_y % VRAM_HEIGHT) * VRAM_WIDTH];
const u16* src_row_ptr = &g_vram[(src_y % VRAM_HEIGHT) * VRAM_WIDTH];
OutputPixelType* dst_row_ptr = reinterpret_cast<OutputPixelType*>(dst_ptr);
for (u32 col = src_x; col < end_x; col++)
@ -352,7 +349,7 @@ void GPU_SW::CopyOut24Bit(u32 src_x, u32 src_y, u32 skip_x, u32 width, u32 heigh
if ((src_x + width) <= VRAM_WIDTH && (src_y + (rows << interleaved_shift)) <= VRAM_HEIGHT)
{
const u8* src_ptr = reinterpret_cast<const u8*>(&m_vram_ptr[src_y * VRAM_WIDTH + src_x]) + (skip_x * 3);
const u8* src_ptr = reinterpret_cast<const u8*>(&g_vram[src_y * VRAM_WIDTH + src_x]) + (skip_x * 3);
const u32 src_stride = (VRAM_WIDTH << interleaved_shift) * sizeof(u16);
for (u32 row = 0; row < rows; row++)
{
@ -412,7 +409,7 @@ void GPU_SW::CopyOut24Bit(u32 src_x, u32 src_y, u32 skip_x, u32 width, u32 heigh
{
for (u32 row = 0; row < rows; row++)
{
const u16* src_row_ptr = &m_vram_ptr[(src_y % VRAM_HEIGHT) * VRAM_WIDTH];
const u16* src_row_ptr = &g_vram[(src_y % VRAM_HEIGHT) * VRAM_WIDTH];
OutputPixelType* dst_row_ptr = reinterpret_cast<OutputPixelType*>(dst_ptr);
for (u32 col = 0; col < width; col++)

View File

@ -1,6 +1,7 @@
// SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin <stenzek@gmail.com>
// SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)
#include "gpu.h"
#include "gpu_sw_backend.h"
#include "system.h"
@ -8,11 +9,7 @@
#include <algorithm>
GPU_SW_Backend::GPU_SW_Backend() : GPUBackend()
{
m_vram.fill(0);
m_vram_ptr = m_vram.data();
}
GPU_SW_Backend::GPU_SW_Backend() = default;
GPU_SW_Backend::~GPU_SW_Backend() = default;
@ -21,12 +18,9 @@ bool GPU_SW_Backend::Initialize(bool force_thread)
return GPUBackend::Initialize(force_thread);
}
void GPU_SW_Backend::Reset(bool clear_vram)
void GPU_SW_Backend::Reset()
{
GPUBackend::Reset(clear_vram);
if (clear_vram)
m_vram.fill(0);
GPUBackend::Reset();
}
void GPU_SW_Backend::DrawPolygon(const GPUBackendDrawPolygonCommand* cmd)
@ -728,7 +722,7 @@ void GPU_SW_Backend::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GP
for (u32 yoffs = 0; yoffs < height; yoffs++)
{
const u32 row = (y + yoffs) % VRAM_HEIGHT;
std::fill_n(&m_vram_ptr[row * VRAM_WIDTH + x], width, color16);
std::fill_n(&g_vram[row * VRAM_WIDTH + x], width, color16);
}
}
else if (params.interlaced_rendering)
@ -741,7 +735,7 @@ void GPU_SW_Backend::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GP
if ((row & u32(1)) == active_field)
continue;
u16* row_ptr = &m_vram_ptr[row * VRAM_WIDTH];
u16* row_ptr = &g_vram[row * VRAM_WIDTH];
for (u32 xoffs = 0; xoffs < width; xoffs++)
{
const u32 col = (x + xoffs) % VRAM_WIDTH;
@ -754,7 +748,7 @@ void GPU_SW_Backend::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GP
for (u32 yoffs = 0; yoffs < height; yoffs++)
{
const u32 row = (y + yoffs) % VRAM_HEIGHT;
u16* row_ptr = &m_vram_ptr[row * VRAM_WIDTH];
u16* row_ptr = &g_vram[row * VRAM_WIDTH];
for (u32 xoffs = 0; xoffs < width; xoffs++)
{
const u32 col = (x + xoffs) % VRAM_WIDTH;
@ -771,7 +765,7 @@ void GPU_SW_Backend::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void*
if ((x + width) <= VRAM_WIDTH && (y + height) <= VRAM_HEIGHT && !params.IsMaskingEnabled())
{
const u16* src_ptr = static_cast<const u16*>(data);
u16* dst_ptr = &m_vram_ptr[y * VRAM_WIDTH + x];
u16* dst_ptr = &g_vram[y * VRAM_WIDTH + x];
for (u32 yoffs = 0; yoffs < height; yoffs++)
{
std::copy_n(src_ptr, width, dst_ptr);
@ -788,7 +782,7 @@ void GPU_SW_Backend::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void*
for (u32 row = 0; row < height;)
{
u16* dst_row_ptr = &m_vram_ptr[((y + row++) % VRAM_HEIGHT) * VRAM_WIDTH];
u16* dst_row_ptr = &g_vram[((y + row++) % VRAM_HEIGHT) * VRAM_WIDTH];
for (u32 col = 0; col < width;)
{
// TODO: Handle unaligned reads...
@ -844,8 +838,8 @@ void GPU_SW_Backend::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 wi
{
for (u32 row = 0; row < height; row++)
{
const u16* src_row_ptr = &m_vram_ptr[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
u16* dst_row_ptr = &m_vram_ptr[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
const u16* src_row_ptr = &g_vram[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
u16* dst_row_ptr = &g_vram[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
for (s32 col = static_cast<s32>(width - 1); col >= 0; col--)
{
@ -860,8 +854,8 @@ void GPU_SW_Backend::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 wi
{
for (u32 row = 0; row < height; row++)
{
const u16* src_row_ptr = &m_vram_ptr[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
u16* dst_row_ptr = &m_vram_ptr[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
const u16* src_row_ptr = &g_vram[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
u16* dst_row_ptr = &g_vram[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
for (u32 col = 0; col < width; col++)
{

View File

@ -14,12 +14,12 @@ public:
~GPU_SW_Backend() override;
bool Initialize(bool force_thread) override;
void Reset(bool clear_vram) override;
void Reset() override;
ALWAYS_INLINE_RELEASE u16 GetPixel(const u32 x, const u32 y) const { return m_vram[VRAM_WIDTH * y + x]; }
ALWAYS_INLINE_RELEASE const u16* GetPixelPtr(const u32 x, const u32 y) const { return &m_vram[VRAM_WIDTH * y + x]; }
ALWAYS_INLINE_RELEASE u16* GetPixelPtr(const u32 x, const u32 y) { return &m_vram[VRAM_WIDTH * y + x]; }
ALWAYS_INLINE_RELEASE void SetPixel(const u32 x, const u32 y, const u16 value) { m_vram[VRAM_WIDTH * y + x] = value; }
ALWAYS_INLINE_RELEASE u16 GetPixel(const u32 x, const u32 y) const { return g_vram[VRAM_WIDTH * y + x]; }
ALWAYS_INLINE_RELEASE const u16* GetPixelPtr(const u32 x, const u32 y) const { return &g_vram[VRAM_WIDTH * y + x]; }
ALWAYS_INLINE_RELEASE u16* GetPixelPtr(const u32 x, const u32 y) { return &g_vram[VRAM_WIDTH * y + x]; }
ALWAYS_INLINE_RELEASE void SetPixel(const u32 x, const u32 y, const u16 value) { g_vram[VRAM_WIDTH * y + x] = value; }
// this is actually (31 * 255) >> 4) == 494, but to simplify addressing we use the next power of two (512)
static constexpr u32 DITHER_LUT_SIZE = 512;
@ -165,6 +165,4 @@ protected:
const GPUBackendDrawLineCommand::Vertex* p0,
const GPUBackendDrawLineCommand::Vertex* p1);
DrawLineFunction GetDrawLineFunction(bool shading_enable, bool transparency_enable, bool dithering_enable);
std::array<u16, VRAM_WIDTH * VRAM_HEIGHT> m_vram;
};