GPU: Make VRAM a global object

2023-12-23 16:53:15 +10:00 · 2023-12-23 16:53:15 +10:00 · 411213d6a0
parent 7340324ed9
commit 411213d6a0
10 changed files with 50 additions and 69 deletions
--- a/src/core/gpu.cpp
+++ b/src/core/gpu.cpp
@ -33,6 +33,7 @@
 Log_SetChannel(GPU);

 std::unique_ptr<GPU> g_gpu;
+alignas(HOST_PAGE_SIZE) u16 g_vram[VRAM_SIZE / sizeof(u16)];

 const GPU::GP0CommandHandlerTable GPU::s_GP0_command_handler_table = GPU::GenerateGP0CommandHandlerTable();

@ -132,6 +133,10 @@ void GPU::Reset(bool clear_vram)
  m_crtc_state.in_vblank = false;
  m_crtc_state.interlaced_field = 0;
  m_crtc_state.interlaced_display_field = 0;
+
+  if (clear_vram)
+    std::memset(g_vram, 0, sizeof(g_vram));
+
  SoftReset();
  UpdateDisplay();
 }
@ -300,7 +305,7 @@ bool GPU::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_displ
    else
    {
      ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT);
-      sw.DoBytes(m_vram_ptr, VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16));
+      sw.DoBytes(g_vram, VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16));
    }
  }

@ -1074,7 +1079,7 @@ u32 GPU::ReadGPUREAD()
    // Read with correct wrap-around behavior.
    const u16 read_x = (m_vram_transfer.x + m_vram_transfer.col) % VRAM_WIDTH;
    const u16 read_y = (m_vram_transfer.y + m_vram_transfer.row) % VRAM_HEIGHT;
-    value |= ZeroExtend32(m_vram_ptr[read_y * VRAM_WIDTH + read_x]) << (i * 16);
+    value |= ZeroExtend32(g_vram[read_y * VRAM_WIDTH + read_x]) << (i * 16);

    if (++m_vram_transfer.col == m_vram_transfer.width)
    {
@ -1357,7 +1362,7 @@ void GPU::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
    for (u32 yoffs = 0; yoffs < height; yoffs++)
    {
      const u32 row = (y + yoffs) % VRAM_HEIGHT;
-      std::fill_n(&m_vram_ptr[row * VRAM_WIDTH + x], width, color16);
+      std::fill_n(&g_vram[row * VRAM_WIDTH + x], width, color16);
    }
  }
  else if (IsInterlacedRenderingEnabled())
@ -1373,7 +1378,7 @@ void GPU::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
      if ((row & u32(1)) == active_field)
        continue;

-      u16* row_ptr = &m_vram_ptr[row * VRAM_WIDTH];
+      u16* row_ptr = &g_vram[row * VRAM_WIDTH];
      for (u32 xoffs = 0; xoffs < width; xoffs++)
      {
        const u32 col = (x + xoffs) % VRAM_WIDTH;
@ -1386,7 +1391,7 @@ void GPU::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
    for (u32 yoffs = 0; yoffs < height; yoffs++)
    {
      const u32 row = (y + yoffs) % VRAM_HEIGHT;
-      u16* row_ptr = &m_vram_ptr[row * VRAM_WIDTH];
+      u16* row_ptr = &g_vram[row * VRAM_WIDTH];
      for (u32 xoffs = 0; xoffs < width; xoffs++)
      {
        const u32 col = (x + xoffs) % VRAM_WIDTH;
@ -1402,7 +1407,7 @@ void GPU::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool
  if ((x + width) <= VRAM_WIDTH && (y + height) <= VRAM_HEIGHT && !set_mask && !check_mask)
  {
    const u16* src_ptr = static_cast<const u16*>(data);
-    u16* dst_ptr = &m_vram_ptr[y * VRAM_WIDTH + x];
+    u16* dst_ptr = &g_vram[y * VRAM_WIDTH + x];
    for (u32 yoffs = 0; yoffs < height; yoffs++)
    {
      std::copy_n(src_ptr, width, dst_ptr);
@ -1420,7 +1425,7 @@ void GPU::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, bool

    for (u32 row = 0; row < height;)
    {
-      u16* dst_row_ptr = &m_vram_ptr[((y + row++) % VRAM_HEIGHT) * VRAM_WIDTH];
+      u16* dst_row_ptr = &g_vram[((y + row++) % VRAM_HEIGHT) * VRAM_WIDTH];
      for (u32 col = 0; col < width;)
      {
        // TODO: Handle unaligned reads...
@ -1475,8 +1480,8 @@ void GPU::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 he
  {
    for (u32 row = 0; row < height; row++)
    {
-      const u16* src_row_ptr = &m_vram_ptr[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
-      u16* dst_row_ptr = &m_vram_ptr[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
+      const u16* src_row_ptr = &g_vram[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
+      u16* dst_row_ptr = &g_vram[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];

      for (s32 col = static_cast<s32>(width - 1); col >= 0; col--)
      {
@ -1491,8 +1496,8 @@ void GPU::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 he
  {
    for (u32 row = 0; row < height; row++)
    {
-      const u16* src_row_ptr = &m_vram_ptr[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
-      u16* dst_row_ptr = &m_vram_ptr[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
+      const u16* src_row_ptr = &g_vram[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
+      u16* dst_row_ptr = &g_vram[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];

      for (u32 col = 0; col < width; col++)
      {
@ -2175,11 +2180,11 @@ bool GPU::DumpVRAMToFile(const char* filename)
  const char* extension = std::strrchr(filename, '.');
  if (extension && StringUtil::Strcasecmp(extension, ".png") == 0)
  {
-    return DumpVRAMToFile(filename, VRAM_WIDTH, VRAM_HEIGHT, sizeof(u16) * VRAM_WIDTH, m_vram_ptr, true);
+    return DumpVRAMToFile(filename, VRAM_WIDTH, VRAM_HEIGHT, sizeof(u16) * VRAM_WIDTH, g_vram, true);
  }
  else if (extension && StringUtil::Strcasecmp(extension, ".bin") == 0)
  {
-    return FileSystem::WriteBinaryFile(filename, m_vram_ptr, VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16));
+    return FileSystem::WriteBinaryFile(filename, g_vram, VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16));
  }
  else
  {
--- a/src/core/gpu.h
+++ b/src/core/gpu.h
@ -11,6 +11,7 @@
 #include "common/bitfield.h"
 #include "common/fifo_queue.h"
 #include "common/rectangle.h"
+#include "common/types.h"

 #include <algorithm>
 #include <array>
@ -359,9 +360,6 @@ protected:
  std::unique_ptr<TimingEvent> m_crtc_tick_event;
  std::unique_ptr<TimingEvent> m_command_tick_event;

-  // Pointer to VRAM, used for reads/writes. In the hardware backends, this is the shadow buffer.
-  u16* m_vram_ptr = nullptr;
-
  union GPUSTAT
  {
    u32 bits;
@ -651,3 +649,4 @@ private:
 };

 extern std::unique_ptr<GPU> g_gpu;
+extern u16 g_vram[VRAM_SIZE / sizeof(u16)];
--- a/src/core/gpu_backend.cpp
+++ b/src/core/gpu_backend.cpp
@ -23,7 +23,7 @@ bool GPUBackend::Initialize(bool force_thread)
  return true;
 }

-void GPUBackend::Reset(bool clear_vram)
+void GPUBackend::Reset()
 {
  Sync(true);
  m_drawing_area = {};
--- a/src/core/gpu_backend.h
+++ b/src/core/gpu_backend.h
@ -22,12 +22,11 @@ public:
  GPUBackend();
  virtual ~GPUBackend();

-  ALWAYS_INLINE u16* GetVRAM() const { return m_vram_ptr; }
  ALWAYS_INLINE const Threading::Thread* GetThread() const { return m_use_gpu_thread ? &m_gpu_thread : nullptr; }

  virtual bool Initialize(bool force_thread);
  virtual void UpdateSettings();
-  virtual void Reset(bool clear_vram);
+  virtual void Reset();
  virtual void Shutdown();

  GPUBackendFillVRAMCommand* NewFillVRAMCommand();
@ -64,8 +63,6 @@ protected:

  void HandleCommand(const GPUBackendCommand* cmd);

-  u16* m_vram_ptr = nullptr;
-
  Common::Rectangle<u32> m_drawing_area{};

  Threading::KernelSemaphore m_sync_semaphore;
--- a/src/core/gpu_commands.cpp
+++ b/src/core/gpu_commands.cpp
@ -582,7 +582,7 @@ bool GPU::HandleCopyRectangleVRAMToCPUCommand()
  {
    DumpVRAMToFile(TinyString::from_format("vram_to_cpu_copy_{}.png", s_vram_to_cpu_dump_id++), m_vram_transfer.width,
                   m_vram_transfer.height, sizeof(u16) * VRAM_WIDTH,
-                   &m_vram_ptr[m_vram_transfer.y * VRAM_WIDTH + m_vram_transfer.x], true);
+                   &g_vram[m_vram_transfer.y * VRAM_WIDTH + m_vram_transfer.x], true);
  }

  // switch to pixel-by-pixel read state
--- a/src/core/gpu_hw.cpp
+++ b/src/core/gpu_hw.cpp
@ -130,8 +130,6 @@ private:

 GPU_HW::GPU_HW() : GPU()
 {
-  m_vram_ptr = m_vram_shadow.data();
-
 #ifdef _DEBUG
  s_draw_number = 0;
 #endif
@ -251,9 +249,8 @@ void GPU_HW::Reset(bool clear_vram)

  m_batch_current_vertex_ptr = m_batch_start_vertex_ptr;

-  m_vram_shadow.fill(0);
  if (m_sw_renderer)
-    m_sw_renderer->Reset(clear_vram);
+    m_sw_renderer->Reset();

  m_batch = {};
  m_batch_ubo_data = {};
@ -442,7 +439,7 @@ void GPU_HW::UpdateSettings(const Settings& old_settings)
      Panic("Failed to recreate buffers.");

    RestoreDeviceContext();
-    UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_ptr, false, false);
+    UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, g_vram, false, false);
    UpdateDepthBufferFromMaskBit();
    UpdateDisplay();
  }
@ -2291,8 +2288,6 @@ void GPU_HW::UpdateSoftwareRenderer(bool copy_vram_from_hw)
  if (current_enabled == new_enabled)
    return;

-  m_vram_ptr = m_vram_shadow.data();
-
  if (!new_enabled)
  {
    if (m_sw_renderer)
@ -2310,7 +2305,6 @@ void GPU_HW::UpdateSoftwareRenderer(bool copy_vram_from_hw)
  {
    FlushRender();
    ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT);
-    std::memcpy(sw_renderer->GetVRAM(), m_vram_ptr, sizeof(u16) * VRAM_WIDTH * VRAM_HEIGHT);

    // Sync the drawing area.
    GPUBackendSetDrawingAreaCommand* cmd = sw_renderer->NewSetDrawingAreaCommand();
@ -2319,7 +2313,6 @@ void GPU_HW::UpdateSoftwareRenderer(bool copy_vram_from_hw)
  }

  m_sw_renderer = std::move(sw_renderer);
-  m_vram_ptr = m_sw_renderer->GetVRAM();
 }

 void GPU_HW::FillBackendCommandParameters(GPUBackendCommand* cmd) const
@ -2429,7 +2422,7 @@ void GPU_HW::ReadVRAM(u32 x, u32 y, u32 width, u32 height)

  // Stage the readback and copy it into our shadow buffer.
  g_gpu_device->DownloadTexture(m_vram_readback_texture.get(), 0, 0, encoded_width, encoded_height,
-                                reinterpret_cast<u32*>(&m_vram_shadow[copy_rect.top * VRAM_WIDTH + copy_rect.left]),
+                                reinterpret_cast<u32*>(&g_vram[copy_rect.top * VRAM_WIDTH + copy_rect.left]),
                                VRAM_WIDTH * sizeof(u16));

  RestoreDeviceContext();
--- a/src/core/gpu_hw.h
+++ b/src/core/gpu_hw.h
@ -222,8 +222,6 @@ private:
  std::unique_ptr<GPUTextureBuffer> m_vram_upload_buffer;
  std::unique_ptr<GPUTexture> m_vram_write_texture;

-  FixedHeapArray<u16, VRAM_WIDTH * VRAM_HEIGHT> m_vram_shadow;
-
  std::unique_ptr<GPU_SW_Backend> m_sw_renderer;

  BatchVertex* m_batch_start_vertex_ptr = nullptr;
--- a/src/core/gpu_sw.cpp
+++ b/src/core/gpu_sw.cpp
@ -24,10 +24,7 @@ ALWAYS_INLINE static constexpr std::tuple<T, T> MinMax(T v1, T v2)
    return std::tie(v1, v2);
 }

-GPU_SW::GPU_SW()
-{
-  m_vram_ptr = m_backend.GetVRAM();
-}
+GPU_SW::GPU_SW() = default;

 GPU_SW::~GPU_SW()
 {
@ -84,7 +81,7 @@ void GPU_SW::Reset(bool clear_vram)
 {
  GPU::Reset(clear_vram);

-  m_backend.Reset(clear_vram);
+  m_backend.Reset();
 }

 void GPU_SW::UpdateSettings(const Settings& old_settings)
@ -269,7 +266,7 @@ void GPU_SW::CopyOut15Bit(u32 src_x, u32 src_y, u32 width, u32 height, u32 field
    const u32 rows = height >> interlaced_shift;
    dst_stride <<= interlaced_shift;

-    const u16* src_ptr = &m_vram_ptr[src_y * VRAM_WIDTH + src_x];
+    const u16* src_ptr = &g_vram[src_y * VRAM_WIDTH + src_x];
    const u32 src_step = VRAM_WIDTH << interleaved_shift;
    for (u32 row = 0; row < rows; row++)
    {
@ -286,7 +283,7 @@ void GPU_SW::CopyOut15Bit(u32 src_x, u32 src_y, u32 width, u32 height, u32 field
    const u32 end_x = src_x + width;
    for (u32 row = 0; row < rows; row++)
    {
-      const u16* src_row_ptr = &m_vram_ptr[(src_y % VRAM_HEIGHT) * VRAM_WIDTH];
+      const u16* src_row_ptr = &g_vram[(src_y % VRAM_HEIGHT) * VRAM_WIDTH];
      OutputPixelType* dst_row_ptr = reinterpret_cast<OutputPixelType*>(dst_ptr);

      for (u32 col = src_x; col < end_x; col++)
@ -352,7 +349,7 @@ void GPU_SW::CopyOut24Bit(u32 src_x, u32 src_y, u32 skip_x, u32 width, u32 heigh

  if ((src_x + width) <= VRAM_WIDTH && (src_y + (rows << interleaved_shift)) <= VRAM_HEIGHT)
  {
-    const u8* src_ptr = reinterpret_cast<const u8*>(&m_vram_ptr[src_y * VRAM_WIDTH + src_x]) + (skip_x * 3);
+    const u8* src_ptr = reinterpret_cast<const u8*>(&g_vram[src_y * VRAM_WIDTH + src_x]) + (skip_x * 3);
    const u32 src_stride = (VRAM_WIDTH << interleaved_shift) * sizeof(u16);
    for (u32 row = 0; row < rows; row++)
    {
@ -412,7 +409,7 @@ void GPU_SW::CopyOut24Bit(u32 src_x, u32 src_y, u32 skip_x, u32 width, u32 heigh
  {
    for (u32 row = 0; row < rows; row++)
    {
-      const u16* src_row_ptr = &m_vram_ptr[(src_y % VRAM_HEIGHT) * VRAM_WIDTH];
+      const u16* src_row_ptr = &g_vram[(src_y % VRAM_HEIGHT) * VRAM_WIDTH];
      OutputPixelType* dst_row_ptr = reinterpret_cast<OutputPixelType*>(dst_ptr);

      for (u32 col = 0; col < width; col++)
--- a/src/core/gpu_sw_backend.cpp
+++ b/src/core/gpu_sw_backend.cpp
@ -1,6 +1,7 @@
 // SPDX-FileCopyrightText: 2019-2022 Connor McLaughlin <stenzek@gmail.com>
 // SPDX-License-Identifier: (GPL-3.0 OR CC-BY-NC-ND-4.0)

+#include "gpu.h"
 #include "gpu_sw_backend.h"
 #include "system.h"

@ -8,11 +9,7 @@

 #include <algorithm>

-GPU_SW_Backend::GPU_SW_Backend() : GPUBackend()
-{
-  m_vram.fill(0);
-  m_vram_ptr = m_vram.data();
-}
+GPU_SW_Backend::GPU_SW_Backend() = default;

 GPU_SW_Backend::~GPU_SW_Backend() = default;

@ -21,12 +18,9 @@ bool GPU_SW_Backend::Initialize(bool force_thread)
  return GPUBackend::Initialize(force_thread);
 }

-void GPU_SW_Backend::Reset(bool clear_vram)
+void GPU_SW_Backend::Reset()
 {
-  GPUBackend::Reset(clear_vram);
-
-  if (clear_vram)
-    m_vram.fill(0);
+  GPUBackend::Reset();
 }

 void GPU_SW_Backend::DrawPolygon(const GPUBackendDrawPolygonCommand* cmd)
@ -728,7 +722,7 @@ void GPU_SW_Backend::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GP
    for (u32 yoffs = 0; yoffs < height; yoffs++)
    {
      const u32 row = (y + yoffs) % VRAM_HEIGHT;
-      std::fill_n(&m_vram_ptr[row * VRAM_WIDTH + x], width, color16);
+      std::fill_n(&g_vram[row * VRAM_WIDTH + x], width, color16);
    }
  }
  else if (params.interlaced_rendering)
@ -741,7 +735,7 @@ void GPU_SW_Backend::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GP
      if ((row & u32(1)) == active_field)
        continue;

-      u16* row_ptr = &m_vram_ptr[row * VRAM_WIDTH];
+      u16* row_ptr = &g_vram[row * VRAM_WIDTH];
      for (u32 xoffs = 0; xoffs < width; xoffs++)
      {
        const u32 col = (x + xoffs) % VRAM_WIDTH;
@ -754,7 +748,7 @@ void GPU_SW_Backend::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GP
    for (u32 yoffs = 0; yoffs < height; yoffs++)
    {
      const u32 row = (y + yoffs) % VRAM_HEIGHT;
-      u16* row_ptr = &m_vram_ptr[row * VRAM_WIDTH];
+      u16* row_ptr = &g_vram[row * VRAM_WIDTH];
      for (u32 xoffs = 0; xoffs < width; xoffs++)
      {
        const u32 col = (x + xoffs) % VRAM_WIDTH;
@ -771,7 +765,7 @@ void GPU_SW_Backend::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void*
  if ((x + width) <= VRAM_WIDTH && (y + height) <= VRAM_HEIGHT && !params.IsMaskingEnabled())
  {
    const u16* src_ptr = static_cast<const u16*>(data);
-    u16* dst_ptr = &m_vram_ptr[y * VRAM_WIDTH + x];
+    u16* dst_ptr = &g_vram[y * VRAM_WIDTH + x];
    for (u32 yoffs = 0; yoffs < height; yoffs++)
    {
      std::copy_n(src_ptr, width, dst_ptr);
@ -788,7 +782,7 @@ void GPU_SW_Backend::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void*

    for (u32 row = 0; row < height;)
    {
-      u16* dst_row_ptr = &m_vram_ptr[((y + row++) % VRAM_HEIGHT) * VRAM_WIDTH];
+      u16* dst_row_ptr = &g_vram[((y + row++) % VRAM_HEIGHT) * VRAM_WIDTH];
      for (u32 col = 0; col < width;)
      {
        // TODO: Handle unaligned reads...
@ -844,8 +838,8 @@ void GPU_SW_Backend::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 wi
  {
    for (u32 row = 0; row < height; row++)
    {
-      const u16* src_row_ptr = &m_vram_ptr[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
-      u16* dst_row_ptr = &m_vram_ptr[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
+      const u16* src_row_ptr = &g_vram[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
+      u16* dst_row_ptr = &g_vram[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];

      for (s32 col = static_cast<s32>(width - 1); col >= 0; col--)
      {
@ -860,8 +854,8 @@ void GPU_SW_Backend::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 wi
  {
    for (u32 row = 0; row < height; row++)
    {
-      const u16* src_row_ptr = &m_vram_ptr[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
-      u16* dst_row_ptr = &m_vram_ptr[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
+      const u16* src_row_ptr = &g_vram[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
+      u16* dst_row_ptr = &g_vram[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];

      for (u32 col = 0; col < width; col++)
      {
--- a/src/core/gpu_sw_backend.h
+++ b/src/core/gpu_sw_backend.h
@ -14,12 +14,12 @@ public:
  ~GPU_SW_Backend() override;

  bool Initialize(bool force_thread) override;
-  void Reset(bool clear_vram) override;
+  void Reset() override;

-  ALWAYS_INLINE_RELEASE u16 GetPixel(const u32 x, const u32 y) const { return m_vram[VRAM_WIDTH * y + x]; }
-  ALWAYS_INLINE_RELEASE const u16* GetPixelPtr(const u32 x, const u32 y) const { return &m_vram[VRAM_WIDTH * y + x]; }
-  ALWAYS_INLINE_RELEASE u16* GetPixelPtr(const u32 x, const u32 y) { return &m_vram[VRAM_WIDTH * y + x]; }
-  ALWAYS_INLINE_RELEASE void SetPixel(const u32 x, const u32 y, const u16 value) { m_vram[VRAM_WIDTH * y + x] = value; }
+  ALWAYS_INLINE_RELEASE u16 GetPixel(const u32 x, const u32 y) const { return g_vram[VRAM_WIDTH * y + x]; }
+  ALWAYS_INLINE_RELEASE const u16* GetPixelPtr(const u32 x, const u32 y) const { return &g_vram[VRAM_WIDTH * y + x]; }
+  ALWAYS_INLINE_RELEASE u16* GetPixelPtr(const u32 x, const u32 y) { return &g_vram[VRAM_WIDTH * y + x]; }
+  ALWAYS_INLINE_RELEASE void SetPixel(const u32 x, const u32 y, const u16 value) { g_vram[VRAM_WIDTH * y + x] = value; }

  // this is actually (31 * 255) >> 4) == 494, but to simplify addressing we use the next power of two (512)
  static constexpr u32 DITHER_LUT_SIZE = 512;
@ -165,6 +165,4 @@ protected:
                                                    const GPUBackendDrawLineCommand::Vertex* p0,
                                                    const GPUBackendDrawLineCommand::Vertex* p1);
  DrawLineFunction GetDrawLineFunction(bool shading_enable, bool transparency_enable, bool dithering_enable);
-
-  std::array<u16, VRAM_WIDTH * VRAM_HEIGHT> m_vram;
 };