GPU: Implement mask bit handling in software renderer

Still needs implementation in the hardware renderers.
2019-11-24 18:47:40 +10:00 · 2019-11-24 18:47:40 +10:00 · 9d6d00480c
parent 6c6bf8714c
commit 9d6d00480c
4 changed files with 55 additions and 16 deletions
--- a/src/core/gpu.cpp
+++ b/src/core/gpu.cpp
@ -137,10 +137,19 @@ bool GPU::DoState(StateWrapper& sw)

  if (sw.IsReading())
  {
+    // Need to clear the mask bits since we want to pull it in from the copy.
+    const u32 old_GPUSTAT = m_GPUSTAT.bits;
+    m_GPUSTAT.check_mask_before_draw = false;
+    m_GPUSTAT.set_mask_while_drawing = false;
+
    // Still need a temporary here.
    HeapArray<u16, VRAM_WIDTH * VRAM_HEIGHT> temp;
    sw.DoBytes(temp.data(), VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16));
    UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, temp.data());
+
+    // Restore mask setting.
+    m_GPUSTAT.bits = old_GPUSTAT;
+
    UpdateDisplay();
    UpdateSliceTicks();
  }
@ -673,7 +682,7 @@ void GPU::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) {}
 void GPU::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data)
 {
  // Fast path when the copy is not oversized.
-  if ((x + width) <= VRAM_WIDTH && (y + height) <= VRAM_HEIGHT)
+  if ((x + width) <= VRAM_WIDTH && (y + height) <= VRAM_HEIGHT && !m_GPUSTAT.IsMaskingEnabled())
  {
    const u16* src_ptr = static_cast<const u16*>(data);
    u16* dst_ptr = &m_vram_ptr[y * VRAM_WIDTH + x];
@ -688,13 +697,18 @@ void GPU::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data)
  {
    // Slow path when we need to handle wrap-around.
    const u16* src_ptr = static_cast<const u16*>(data);
+    const u16 mask_and = m_GPUSTAT.GetMaskAND();
+    const u16 mask_or = m_GPUSTAT.GetMaskOR();
+
    for (u32 row = 0; row < height;)
    {
      u16* dst_row_ptr = &m_vram_ptr[((y + row++) % VRAM_HEIGHT) * VRAM_WIDTH];
      for (u32 col = 0; col < width;)
      {
        // TODO: Handle unaligned reads...
-        dst_row_ptr[(x + col++) % VRAM_WIDTH] = *(src_ptr++);
+        u16* pixel_ptr = &dst_row_ptr[(x + col++) % VRAM_WIDTH];
+        if (((*pixel_ptr) & mask_and) == mask_and)
+          *pixel_ptr = *(src_ptr++) | mask_or;
      }
    }
  }
@ -869,8 +883,8 @@ void GPU::DrawDebugStateWindow()
  {
    ImGui::Text("Dither: %s", m_GPUSTAT.dither_enable ? "Enabled" : "Disabled");
    ImGui::Text("Draw To Display Area: %s", m_GPUSTAT.dither_enable ? "Yes" : "No");
-    ImGui::Text("Draw Set Mask Bit: %s", m_GPUSTAT.draw_set_mask_bit ? "Yes" : "No");
-    ImGui::Text("Draw To Masked Pixels: %s", m_GPUSTAT.draw_to_masked_pixels ? "Yes" : "No");
+    ImGui::Text("Draw Set Mask Bit: %s", m_GPUSTAT.set_mask_while_drawing ? "Yes" : "No");
+    ImGui::Text("Draw To Masked Pixels: %s", m_GPUSTAT.check_mask_before_draw ? "Yes" : "No");
    ImGui::Text("Reverse Flag: %s", m_GPUSTAT.reverse_flag ? "Yes" : "No");
    ImGui::Text("Texture Disable: %s", m_GPUSTAT.texture_disable ? "Yes" : "No");
    ImGui::Text("PAL Mode: %s", m_GPUSTAT.pal_mode ? "Yes" : "No");
--- a/src/core/gpu.h
+++ b/src/core/gpu.h
@ -326,8 +326,8 @@ protected:
    BitField<u32, TextureMode, 7, 2> texture_color_mode;
    BitField<u32, bool, 9, 1> dither_enable;
    BitField<u32, bool, 10, 1> draw_to_display_area;
-    BitField<u32, bool, 11, 1> draw_set_mask_bit;
-    BitField<u32, bool, 12, 1> draw_to_masked_pixels;
+    BitField<u32, bool, 11, 1> set_mask_while_drawing;
+    BitField<u32, bool, 12, 1> check_mask_before_draw;
    BitField<u32, bool, 13, 1> interlaced_field;
    BitField<u32, bool, 14, 1> reverse_flag;
    BitField<u32, bool, 15, 1> texture_disable;
@ -346,7 +346,12 @@ protected:
    BitField<u32, DMADirection, 29, 2> dma_direction;
    BitField<u32, bool, 31, 1> drawing_even_line;

-    bool In480iMode() const { return vertical_interlace & vertical_resolution; }
+    bool IsMaskingEnabled() const { return (bits & ((1 << 11) | (1 << 12))) != 0; }
+    bool In480iMode() const { return (bits & ((1 << 22) | (1 << 19))) != 0; }
+
+    // During transfer/render operations, if ((dst_pixel & mask_and) == mask_and) { pixel = src_pixel | mask_or }
+    u16 GetMaskAND() const { return check_mask_before_draw ? 0x8000 : 0x0000; }
+    u16 GetMaskOR() const { return set_mask_while_drawing ? 0x8000 : 0x0000; }
  } m_GPUSTAT = {};

  struct RenderState
--- a/src/core/gpu_commands.cpp
+++ b/src/core/gpu_commands.cpp
@ -212,10 +212,15 @@ bool GPU::HandleSetMaskBitCommand(const u32*& command_ptr, u32 command_size)
 {
  const u32 param = *(command_ptr++) & 0x00FFFFFF;

-  m_GPUSTAT.draw_set_mask_bit = (param & 0x01) != 0;
-  m_GPUSTAT.draw_to_masked_pixels = (param & 0x01) != 0;
-  Log_DebugPrintf("Set mask bit %u %u", BoolToUInt32(m_GPUSTAT.draw_set_mask_bit),
-                  BoolToUInt32(m_GPUSTAT.draw_to_masked_pixels));
+  constexpr u32 gpustat_mask = (1 << 11) | (1 << 12);
+  const u32 gpustat_bits = (param & 0x03) << 11;
+  if ((m_GPUSTAT.bits & gpustat_mask) != gpustat_bits)
+  {
+    FlushRender();
+    m_GPUSTAT.bits = (m_GPUSTAT.bits & ~gpustat_mask) | gpustat_bits;
+  }
+  Log_DebugPrintf("Set mask bit %u %u", BoolToUInt32(m_GPUSTAT.set_mask_while_drawing),
+                  BoolToUInt32(m_GPUSTAT.check_mask_before_draw));

  EndCommand();
  return true;
--- a/src/core/gpu_sw.cpp
+++ b/src/core/gpu_sw.cpp
@ -52,11 +52,22 @@ void GPU_SW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)

 void GPU_SW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height)
 {
-  for (u32 yoffs = 0; yoffs < height; yoffs++)
+  // This doesn't have a fast path, but do we really need one? It's not common.
+  const u16 mask_and = m_GPUSTAT.GetMaskAND();
+  const u16 mask_or = m_GPUSTAT.GetMaskOR();
+
+  for (u32 row = 0; row < height; row++)
  {
-    const u16* src_ptr = GetPixelPtr(src_x, src_y + yoffs);
-    u16* dst_ptr = GetPixelPtr(dst_x, dst_y + yoffs);
-    std::copy_n(src_ptr, width, dst_ptr);
+    const u16* src_row_ptr = &m_vram_ptr[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
+    u16* dst_row_ptr = &m_vram_ptr[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
+
+    for (u32 col = 0; col < width; col++)
+    {
+      const u16 src_pixel = src_row_ptr[(src_x + col) % VRAM_WIDTH];
+      u16* dst_pixel_ptr = &dst_row_ptr[(dst_x + col) % VRAM_WIDTH];
+      if ((*dst_pixel_ptr & mask_and) == mask_and)
+        *dst_pixel_ptr = src_pixel | mask_or;
+    }
  }
 }

@ -502,7 +513,11 @@ void GPU_SW::ShadePixel(RenderCommand rc, u32 x, u32 y, u8 color_r, u8 color_g,
 #undef BLEND_AVERAGE
  }

-  SetPixel(static_cast<u32>(x), static_cast<u32>(y), color.bits);
+  const u16 mask_and = m_GPUSTAT.GetMaskAND();
+  if ((color.bits & mask_and) != mask_and)
+    return;
+
+  SetPixel(static_cast<u32>(x), static_cast<u32>(y), color.bits | m_GPUSTAT.GetMaskOR());
 }

 std::unique_ptr<GPU> GPU::CreateSoftwareRenderer()