This commit is contained in:
Connor McLaughlin 2020-09-05 00:23:52 +10:00
parent fa3307e5f1
commit 5a2e4ed08f
36 changed files with 2709 additions and 1829 deletions

View File

@ -1,5 +1,6 @@
#pragma once
#include <algorithm>
#include <cstring>
#include <limits>
#include <tuple>
#include <type_traits>
@ -63,10 +64,7 @@ struct Rectangle
/// Assignment operator.
constexpr Rectangle& operator=(const Rectangle& rhs)
{
left = rhs.left;
top = rhs.top;
right = rhs.right;
bottom = rhs.bottom;
std::memcpy(this, &rhs, sizeof(Rectangle));
return *this;
}

View File

@ -28,6 +28,8 @@ add_library(core
dma.h
gpu.cpp
gpu.h
gpu_backend.cpp
gpu_backend.h
gpu_commands.cpp
gpu_hw.cpp
gpu_hw.h

View File

@ -579,14 +579,14 @@ ALWAYS_INLINE static TickCount DoGPUAccess(u32 offset, u32& value)
{
if constexpr (type == MemoryAccessType::Read)
{
value = g_gpu->ReadRegister(offset);
value = g_gpu.ReadRegister(offset);
FixupUnalignedWordAccessW32(offset, value);
return 2;
}
else
{
FixupUnalignedWordAccessW32(offset, value);
g_gpu->WriteRegister(offset, value);
g_gpu.WriteRegister(offset, value);
return 0;
}
}

View File

@ -60,7 +60,9 @@
<ClCompile Include="cpu_recompiler_register_cache.cpp" />
<ClCompile Include="cpu_types.cpp" />
<ClCompile Include="digital_controller.cpp" />
<ClCompile Include="gpu_backend.cpp" />
<ClCompile Include="gpu_commands.cpp" />
<ClCompile Include="gpu_hw.cpp" />
<ClCompile Include="gpu_hw_d3d11.cpp" />
<ClCompile Include="gpu_hw_shadergen.cpp" />
<ClCompile Include="gpu_hw_vulkan.cpp" />
@ -68,7 +70,6 @@
<ClCompile Include="gte.cpp" />
<ClCompile Include="dma.cpp" />
<ClCompile Include="gpu.cpp" />
<ClCompile Include="gpu_hw.cpp" />
<ClCompile Include="gpu_hw_opengl.cpp" />
<ClCompile Include="host_display.cpp" />
<ClCompile Include="host_interface.cpp" />
@ -109,6 +110,9 @@
<ClInclude Include="cpu_recompiler_thunks.h" />
<ClInclude Include="cpu_recompiler_types.h" />
<ClInclude Include="digital_controller.h" />
<ClInclude Include="gpu_backend.h" />
<ClInclude Include="gpu_types.h" />
<ClInclude Include="gpu_hw.h" />
<ClInclude Include="gpu_hw_d3d11.h" />
<ClInclude Include="gpu_hw_shadergen.h" />
<ClInclude Include="gpu_hw_vulkan.h" />
@ -117,7 +121,6 @@
<ClInclude Include="cpu_types.h" />
<ClInclude Include="dma.h" />
<ClInclude Include="gpu.h" />
<ClInclude Include="gpu_hw.h" />
<ClInclude Include="gpu_hw_opengl.h" />
<ClInclude Include="gte_types.h" />
<ClInclude Include="host_display.h" />

View File

@ -8,7 +8,6 @@
<ClCompile Include="dma.cpp" />
<ClCompile Include="gpu.cpp" />
<ClCompile Include="gpu_hw_opengl.cpp" />
<ClCompile Include="gpu_hw.cpp" />
<ClCompile Include="host_interface.cpp" />
<ClCompile Include="interrupt_controller.cpp" />
<ClCompile Include="cdrom.cpp" />
@ -49,6 +48,8 @@
<ClCompile Include="cheats.cpp" />
<ClCompile Include="shadergen.cpp" />
<ClCompile Include="memory_card_image.cpp" />
<ClCompile Include="gpu_backend.cpp" />
<ClCompile Include="gpu_hw.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="types.h" />
@ -61,7 +62,6 @@
<ClInclude Include="dma.h" />
<ClInclude Include="gpu.h" />
<ClInclude Include="gpu_hw_opengl.h" />
<ClInclude Include="gpu_hw.h" />
<ClInclude Include="host_interface.h" />
<ClInclude Include="interrupt_controller.h" />
<ClInclude Include="cdrom.h" />
@ -101,5 +101,8 @@
<ClInclude Include="cheats.h" />
<ClInclude Include="shadergen.h" />
<ClInclude Include="memory_card_image.h" />
<ClInclude Include="gpu_backend.h" />
<ClInclude Include="gpu_hw.h" />
<ClInclude Include="gpu_types.h" />
</ItemGroup>
</Project>
</Project>

View File

@ -129,7 +129,6 @@ static void ExecuteImpl()
{
CodeBlockKey next_block_key;
g_state.frame_done = false;
while (!g_state.frame_done)
{
TimingEvents::UpdateCPUDowncount();
@ -240,7 +239,6 @@ void Execute()
void ExecuteRecompiler()
{
g_state.frame_done = false;
while (!g_state.frame_done)
{
TimingEvents::UpdateCPUDowncount();

View File

@ -66,6 +66,8 @@ void Shutdown()
void Reset()
{
g_state.frame_done = true;
g_state.pending_ticks = 0;
g_state.downcount = MAX_SLICE_SIZE;
@ -1368,7 +1370,6 @@ restart_instruction:
template<PGXPMode pgxp_mode>
static void ExecuteImpl()
{
g_state.frame_done = false;
while (!g_state.frame_done)
{
TimingEvents::UpdateCPUDowncount();

View File

@ -66,7 +66,7 @@ struct State
bool branch_was_taken = false;
bool exception_raised = false;
bool interrupt_delay = false;
bool frame_done = false;
bool frame_done = true;
// load delays
Reg load_delay_reg = Reg::count;

View File

@ -449,17 +449,17 @@ TickCount DMA::TransferMemoryToDevice(Channel channel, u32 address, u32 incremen
{
case Channel::GPU:
{
if (g_gpu->BeginDMAWrite())
if (g_gpu.BeginDMAWrite())
{
u8* ram_pointer = Bus::g_ram;
for (u32 i = 0; i < word_count; i++)
{
u32 value;
std::memcpy(&value, &ram_pointer[address], sizeof(u32));
g_gpu->DMAWrite(address, value);
g_gpu.DMAWrite(address, value);
address = (address + increment) & ADDRESS_MASK;
}
g_gpu->EndDMAWrite();
g_gpu.EndDMAWrite();
}
}
break;
@ -516,7 +516,7 @@ TickCount DMA::TransferDeviceToMemory(Channel channel, u32 address, u32 incremen
switch (channel)
{
case Channel::GPU:
g_gpu->DMARead(dest_pointer, word_count);
g_gpu.DMARead(dest_pointer, word_count);
break;
case Channel::CDROM:

View File

@ -4,6 +4,7 @@
#include "common/log.h"
#include "common/state_wrapper.h"
#include "dma.h"
#include "gpu_backend.h"
#include "host_display.h"
#include "host_interface.h"
#include "interrupt_controller.h"
@ -16,7 +17,7 @@
#endif
Log_SetChannel(GPU);
std::unique_ptr<GPU> g_gpu;
GPU g_gpu;
const GPU::GP0CommandHandlerTable GPU::s_GP0_command_handler_table = GPU::GenerateGP0CommandHandlerTable();
@ -24,9 +25,8 @@ GPU::GPU() = default;
GPU::~GPU() = default;
bool GPU::Initialize(HostDisplay* host_display)
void GPU::Initialize()
{
m_host_display = host_display;
m_force_progressive_scan = g_settings.gpu_disable_interlacing;
m_force_ntsc_timings = g_settings.gpu_force_ntsc_timings;
m_crtc_state.display_aspect_ratio = Settings::GetDisplayAspectRatioValue(g_settings.display_aspect_ratio);
@ -38,7 +38,14 @@ bool GPU::Initialize(HostDisplay* host_display)
m_max_run_ahead = g_settings.gpu_max_run_ahead;
m_console_is_pal = System::IsPALRegion();
UpdateCRTCConfig();
return true;
}
void GPU::Shutdown()
{
m_command_tick_event.reset();
m_crtc_tick_event.reset();
m_fifo.Clear();
std::vector<u32>().swap(m_blit_buffer);
}
void GPU::UpdateSettings()
@ -58,13 +65,8 @@ void GPU::UpdateSettings()
// Crop mode calls this, so recalculate the display area
UpdateCRTCDisplayParameters();
}
void GPU::UpdateResolutionScale() {}
std::tuple<u32, u32> GPU::GetEffectiveDisplayResolution()
{
return std::tie(m_crtc_state.display_vram_width, m_crtc_state.display_vram_height);
g_gpu_backend->PushCommand(g_gpu_backend->NewUpdateSettingsCommand());
}
void GPU::Reset()
@ -72,6 +74,8 @@ void GPU::Reset()
SoftReset();
m_set_texture_disable_mask = false;
m_GPUREAD_latch = 0;
g_gpu_backend->PushCommand(g_gpu_backend->NewResetCommand());
}
void GPU::SoftReset()
@ -81,7 +85,7 @@ void GPU::SoftReset()
m_GPUSTAT.bits = 0x14802000;
m_GPUSTAT.pal_mode = System::IsPALRegion();
m_drawing_area.Set(0, 0, 0, 0);
m_drawing_area_changed = true;
UpdateDrawingArea();
m_drawing_offset = {};
std::memset(&m_crtc_state.regs, 0, sizeof(m_crtc_state.regs));
m_crtc_state.regs.horizontal_display_range = 0xC60260;
@ -98,9 +102,8 @@ void GPU::SoftReset()
m_fifo.Clear();
m_blit_buffer.clear();
m_blit_remaining_words = 0;
SetDrawMode(0);
SetTexturePalette(0);
SetTextureWindow(0);
m_draw_mode.bits = 0;
m_texture_window.bits = 0;
UpdateDMARequest();
UpdateCRTCConfig();
UpdateCRTCTickEvent();
@ -117,27 +120,14 @@ bool GPU::DoState(StateWrapper& sw)
sw.Do(&m_GPUSTAT.bits);
sw.Do(&m_draw_mode.mode_reg.bits);
sw.Do(&m_draw_mode.palette_reg);
sw.Do(&m_draw_mode.texture_window_value);
sw.Do(&m_draw_mode.texture_page_x);
sw.Do(&m_draw_mode.texture_page_y);
sw.Do(&m_draw_mode.texture_palette_x);
sw.Do(&m_draw_mode.texture_palette_y);
sw.Do(&m_draw_mode.texture_window_mask_x);
sw.Do(&m_draw_mode.texture_window_mask_y);
sw.Do(&m_draw_mode.texture_window_offset_x);
sw.Do(&m_draw_mode.texture_window_offset_y);
sw.Do(&m_draw_mode.texture_x_flip);
sw.Do(&m_draw_mode.texture_y_flip);
sw.Do(&m_drawing_offset.x);
sw.Do(&m_drawing_offset.y);
sw.Do(&m_drawing_area.left);
sw.Do(&m_drawing_area.top);
sw.Do(&m_drawing_area.right);
sw.Do(&m_drawing_area.bottom);
sw.Do(&m_drawing_offset.x);
sw.Do(&m_drawing_offset.y);
sw.Do(&m_drawing_offset.x);
sw.Do(&m_draw_mode.bits);
sw.Do(&m_texture_window.bits);
sw.Do(&m_console_is_pal);
sw.Do(&m_set_texture_disable_mask);
@ -195,9 +185,7 @@ bool GPU::DoState(StateWrapper& sw)
if (sw.IsReading())
{
m_draw_mode.texture_page_changed = true;
m_draw_mode.texture_window_changed = true;
m_drawing_area_changed = true;
UpdateDrawingArea();
UpdateDMARequest();
}
@ -206,37 +194,15 @@ bool GPU::DoState(StateWrapper& sw)
if (sw.IsReading())
{
// Need to clear the mask bits since we want to pull it in from the copy.
const u32 old_GPUSTAT = m_GPUSTAT.bits;
m_GPUSTAT.check_mask_before_draw = false;
m_GPUSTAT.set_mask_while_drawing = false;
// Still need a temporary here.
HeapArray<u16, VRAM_WIDTH * VRAM_HEIGHT> temp;
sw.DoBytes(temp.data(), VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16));
UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, temp.data());
// Restore mask setting.
m_GPUSTAT.bits = old_GPUSTAT;
UpdateCRTCConfig();
UpdateDisplay();
UpdateCRTCTickEvent();
UpdateCommandTickEvent();
}
else
{
ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT);
sw.DoBytes(m_vram_ptr, VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16));
}
return !sw.HasError();
}
void GPU::ResetGraphicsAPIState() {}
void GPU::RestoreGraphicsAPIState() {}
void GPU::UpdateDMARequest()
{
switch (m_blitter_state)
@ -818,9 +784,9 @@ void GPU::UpdateCommandTickEvent()
bool GPU::ConvertScreenCoordinatesToBeamTicksAndLines(s32 window_x, s32 window_y, u32* out_tick, u32* out_line) const
{
const auto [display_x, display_y] = m_host_display->ConvertWindowCoordinatesToDisplayCoordinates(
window_x, window_y, m_host_display->GetWindowWidth(), m_host_display->GetWindowHeight(),
m_host_display->GetDisplayTopMargin());
HostDisplay* display = g_host_interface->GetDisplay();
const auto [display_x, display_y] = display->ConvertWindowCoordinatesToDisplayCoordinates(
window_x, window_y, display->GetWindowWidth(), display->GetWindowHeight(), display->GetDisplayTopMargin());
Log_DebugPrintf("win %d,%d -> disp %d,%d (size %u,%u frac %f,%f)", window_x, window_y, display_x, display_y,
m_crtc_state.display_width, m_crtc_state.display_height,
static_cast<float>(display_x) / static_cast<float>(m_crtc_state.display_width),
@ -850,7 +816,7 @@ u32 GPU::ReadGPUREAD()
// Read with correct wrap-around behavior.
const u16 read_x = (m_vram_transfer.x + m_vram_transfer.col) % VRAM_WIDTH;
const u16 read_y = (m_vram_transfer.y + m_vram_transfer.row) % VRAM_HEIGHT;
value |= ZeroExtend32(m_vram_ptr[read_y * VRAM_WIDTH + read_x]) << (i * 16);
value |= ZeroExtend32(g_gpu_backend->GetVRAM()[read_y * VRAM_WIDTH + read_x]) << (i * 16);
if (++m_vram_transfer.col == m_vram_transfer.width)
{
@ -1064,7 +1030,7 @@ void GPU::HandleGetGPUInfoCommand(u32 value)
case 0x02: // Get Texture Window
{
Log_DebugPrintf("Get texture window");
m_GPUREAD_latch = m_draw_mode.texture_window_value;
m_GPUREAD_latch = m_texture_window.bits;
}
break;
@ -1098,227 +1064,6 @@ void GPU::HandleGetGPUInfoCommand(u32 value)
}
}
void GPU::ClearDisplay() {}
void GPU::UpdateDisplay() {}
void GPU::ReadVRAM(u32 x, u32 y, u32 width, u32 height) {}
void GPU::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
{
const u16 color16 = RGBA8888ToRGBA5551(color);
if ((x + width) <= VRAM_WIDTH && !IsInterlacedRenderingEnabled())
{
for (u32 yoffs = 0; yoffs < height; yoffs++)
{
const u32 row = (y + yoffs) % VRAM_HEIGHT;
std::fill_n(&m_vram_ptr[row * VRAM_WIDTH + x], width, color16);
}
}
else if (IsInterlacedRenderingEnabled())
{
// Hardware tests show that fills seem to break on the first two lines when the offset matches the displayed field.
if (IsCRTCScanlinePending())
SynchronizeCRTC();
const u32 active_field = GetActiveLineLSB();
for (u32 yoffs = 0; yoffs < height; yoffs++)
{
const u32 row = (y + yoffs) % VRAM_HEIGHT;
if ((row & u32(1)) == active_field)
continue;
u16* row_ptr = &m_vram_ptr[row * VRAM_WIDTH];
for (u32 xoffs = 0; xoffs < width; xoffs++)
{
const u32 col = (x + xoffs) % VRAM_WIDTH;
row_ptr[col] = color16;
}
}
}
else
{
for (u32 yoffs = 0; yoffs < height; yoffs++)
{
const u32 row = (y + yoffs) % VRAM_HEIGHT;
u16* row_ptr = &m_vram_ptr[row * VRAM_WIDTH];
for (u32 xoffs = 0; xoffs < width; xoffs++)
{
const u32 col = (x + xoffs) % VRAM_WIDTH;
row_ptr[col] = color16;
}
}
}
}
void GPU::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data)
{
// Fast path when the copy is not oversized.
if ((x + width) <= VRAM_WIDTH && (y + height) <= VRAM_HEIGHT && !m_GPUSTAT.IsMaskingEnabled())
{
const u16* src_ptr = static_cast<const u16*>(data);
u16* dst_ptr = &m_vram_ptr[y * VRAM_WIDTH + x];
for (u32 yoffs = 0; yoffs < height; yoffs++)
{
std::copy_n(src_ptr, width, dst_ptr);
src_ptr += width;
dst_ptr += VRAM_WIDTH;
}
}
else
{
// Slow path when we need to handle wrap-around.
const u16* src_ptr = static_cast<const u16*>(data);
const u16 mask_and = m_GPUSTAT.GetMaskAND();
const u16 mask_or = m_GPUSTAT.GetMaskOR();
for (u32 row = 0; row < height;)
{
u16* dst_row_ptr = &m_vram_ptr[((y + row++) % VRAM_HEIGHT) * VRAM_WIDTH];
for (u32 col = 0; col < width;)
{
// TODO: Handle unaligned reads...
u16* pixel_ptr = &dst_row_ptr[(x + col++) % VRAM_WIDTH];
if (((*pixel_ptr) & mask_and) == 0)
*pixel_ptr = *(src_ptr++) | mask_or;
}
}
}
}
void GPU::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height)
{
// Break up oversized copies. This behavior has not been verified on console.
if ((src_x + width) > VRAM_WIDTH || (dst_x + width) > VRAM_WIDTH)
{
u32 remaining_rows = height;
u32 current_src_y = src_y;
u32 current_dst_y = dst_y;
while (remaining_rows > 0)
{
const u32 rows_to_copy =
std::min<u32>(remaining_rows, std::min<u32>(VRAM_HEIGHT - current_src_y, VRAM_HEIGHT - current_dst_y));
u32 remaining_columns = width;
u32 current_src_x = src_x;
u32 current_dst_x = dst_x;
while (remaining_columns > 0)
{
const u32 columns_to_copy =
std::min<u32>(remaining_columns, std::min<u32>(VRAM_WIDTH - current_src_x, VRAM_WIDTH - current_dst_x));
CopyVRAM(current_src_x, current_src_y, current_dst_x, current_dst_y, columns_to_copy, rows_to_copy);
current_src_x = (current_src_x + columns_to_copy) % VRAM_WIDTH;
current_dst_x = (current_dst_x + columns_to_copy) % VRAM_WIDTH;
remaining_columns -= columns_to_copy;
}
current_src_y = (current_src_y + rows_to_copy) % VRAM_HEIGHT;
current_dst_y = (current_dst_y + rows_to_copy) % VRAM_HEIGHT;
remaining_rows -= rows_to_copy;
}
return;
}
// This doesn't have a fast path, but do we really need one? It's not common.
const u16 mask_and = m_GPUSTAT.GetMaskAND();
const u16 mask_or = m_GPUSTAT.GetMaskOR();
// Copy in reverse when src_x < dst_x, this is verified on console.
if (src_x < dst_x || ((src_x + width - 1) % VRAM_WIDTH) < ((dst_x + width - 1) % VRAM_WIDTH))
{
for (u32 row = 0; row < height; row++)
{
const u16* src_row_ptr = &m_vram_ptr[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
u16* dst_row_ptr = &m_vram_ptr[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
for (s32 col = static_cast<s32>(width - 1); col >= 0; col--)
{
const u16 src_pixel = src_row_ptr[(src_x + static_cast<u32>(col)) % VRAM_WIDTH];
u16* dst_pixel_ptr = &dst_row_ptr[(dst_x + static_cast<u32>(col)) % VRAM_WIDTH];
if ((*dst_pixel_ptr & mask_and) == 0)
*dst_pixel_ptr = src_pixel | mask_or;
}
}
}
else
{
for (u32 row = 0; row < height; row++)
{
const u16* src_row_ptr = &m_vram_ptr[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
u16* dst_row_ptr = &m_vram_ptr[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
for (u32 col = 0; col < width; col++)
{
const u16 src_pixel = src_row_ptr[(src_x + col) % VRAM_WIDTH];
u16* dst_pixel_ptr = &dst_row_ptr[(dst_x + col) % VRAM_WIDTH];
if ((*dst_pixel_ptr & mask_and) == 0)
*dst_pixel_ptr = src_pixel | mask_or;
}
}
}
}
void GPU::DispatchRenderCommand() {}
void GPU::FlushRender() {}
void GPU::SetDrawMode(u16 value)
{
DrawMode::Reg new_mode_reg{static_cast<u16>(value & DrawMode::Reg::MASK)};
if (!m_set_texture_disable_mask)
new_mode_reg.texture_disable = false;
if (new_mode_reg.bits == m_draw_mode.mode_reg.bits)
return;
if ((new_mode_reg.bits & DrawMode::Reg::TEXTURE_PAGE_MASK) !=
(m_draw_mode.mode_reg.bits & DrawMode::Reg::TEXTURE_PAGE_MASK))
{
m_draw_mode.texture_page_x = new_mode_reg.GetTexturePageXBase();
m_draw_mode.texture_page_y = new_mode_reg.GetTexturePageYBase();
m_draw_mode.texture_page_changed = true;
}
m_draw_mode.mode_reg.bits = new_mode_reg.bits;
if (m_GPUSTAT.draw_to_displayed_field != new_mode_reg.draw_to_displayed_field)
FlushRender();
// Bits 0..10 are returned in the GPU status register.
m_GPUSTAT.bits =
(m_GPUSTAT.bits & ~(DrawMode::Reg::GPUSTAT_MASK)) | (ZeroExtend32(new_mode_reg.bits) & DrawMode::Reg::GPUSTAT_MASK);
m_GPUSTAT.texture_disable = m_draw_mode.mode_reg.texture_disable;
}
void GPU::SetTexturePalette(u16 value)
{
value &= DrawMode::PALETTE_MASK;
if (m_draw_mode.palette_reg == value)
return;
m_draw_mode.texture_palette_x = ZeroExtend32(value & 0x3F) * 16;
m_draw_mode.texture_palette_y = ZeroExtend32(value >> 6);
m_draw_mode.palette_reg = value;
m_draw_mode.texture_page_changed = true;
}
void GPU::SetTextureWindow(u32 value)
{
value &= DrawMode::TEXTURE_WINDOW_MASK;
if (m_draw_mode.texture_window_value == value)
return;
FlushRender();
m_draw_mode.texture_window_mask_x = value & UINT32_C(0x1F);
m_draw_mode.texture_window_mask_y = (value >> 5) & UINT32_C(0x1F);
m_draw_mode.texture_window_offset_x = (value >> 10) & UINT32_C(0x1F);
m_draw_mode.texture_window_offset_y = (value >> 15) & UINT32_C(0x1F);
m_draw_mode.texture_window_value = value;
m_draw_mode.texture_window_changed = true;
}
bool GPU::DumpVRAMToFile(const char* filename, u32 width, u32 height, u32 stride, const void* buffer, bool remove_alpha)
{
auto fp = FileSystem::OpenManagedCFile(filename, "wb");
@ -1417,7 +1162,7 @@ void GPU::DrawDebugStateWindow()
ImGui::Columns(1);
}
DrawRendererStats(is_idle_frame);
g_gpu_backend->DrawRendererStats(is_idle_frame);
if (ImGui::CollapsingHeader("GPU", ImGuiTreeNodeFlags_DefaultOpen))
{
@ -1468,5 +1213,3 @@ void GPU::DrawDebugStateWindow()
ImGui::End();
#endif
}
void GPU::DrawRendererStats(bool is_idle_frame) {}

View File

@ -2,6 +2,7 @@
#include "common/bitfield.h"
#include "common/fifo_queue.h"
#include "common/rectangle.h"
#include "gpu_types.h"
#include "timers.h"
#include "types.h"
#include <algorithm>
@ -13,12 +14,12 @@
class StateWrapper;
class HostDisplay;
class TimingEvent;
class Timers;
class GPU
class GPUBackend;
class GPU final
{
public:
enum class BlitterState : u8
@ -37,66 +38,12 @@ public:
GPUREADtoCPU = 3
};
enum class Primitive : u8
{
Reserved = 0,
Polygon = 1,
Line = 2,
Rectangle = 3
};
enum class DrawRectangleSize : u8
{
Variable = 0,
R1x1 = 1,
R8x8 = 2,
R16x16 = 3
};
enum class TextureMode : u8
{
Palette4Bit = 0,
Palette8Bit = 1,
Direct16Bit = 2,
Reserved_Direct16Bit = 3,
// Not register values.
RawTextureBit = 4,
RawPalette4Bit = RawTextureBit | Palette4Bit,
RawPalette8Bit = RawTextureBit | Palette8Bit,
RawDirect16Bit = RawTextureBit | Direct16Bit,
Reserved_RawDirect16Bit = RawTextureBit | Reserved_Direct16Bit,
Disabled = 8 // Not a register value
};
enum class TransparencyMode : u8
{
HalfBackgroundPlusHalfForeground = 0,
BackgroundPlusForeground = 1,
BackgroundMinusForeground = 2,
BackgroundPlusQuarterForeground = 3,
Disabled = 4 // Not a register value
};
enum : u32
{
VRAM_WIDTH = 1024,
VRAM_HEIGHT = 512,
VRAM_SIZE = VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16),
VRAM_WIDTH_MASK = VRAM_WIDTH - 1,
VRAM_HEIGHT_MASK = VRAM_HEIGHT - 1,
VRAM_COORD_MASK = 0x3FF,
MAX_FIFO_SIZE = 4096,
TEXTURE_PAGE_WIDTH = 256,
TEXTURE_PAGE_HEIGHT = 256,
MAX_PRIMITIVE_WIDTH = 1024,
MAX_PRIMITIVE_HEIGHT = 512,
DOT_TIMER_INDEX = 0,
HBLANK_TIMER_INDEX = 1,
MAX_RESOLUTION_SCALE = 16,
DITHER_MATRIX_SIZE = 4
MAX_RESOLUTION_SCALE = 16
};
enum : u16
@ -117,17 +64,12 @@ public:
// Base class constructor.
GPU();
virtual ~GPU();
~GPU();
virtual bool IsHardwareRenderer() const = 0;
virtual bool Initialize(HostDisplay* host_display);
virtual void Reset();
virtual bool DoState(StateWrapper& sw);
// Graphics API state reset/restore - call when drawing the UI etc.
virtual void ResetGraphicsAPIState();
virtual void RestoreGraphicsAPIState();
void Initialize();
void Shutdown();
void Reset();
bool DoState(StateWrapper& sw);
// Render statistics debug window.
void DrawDebugStateWindow();
@ -164,6 +106,20 @@ public:
return (!m_force_progressive_scan) & m_GPUSTAT.SkipDrawingToActiveField();
}
/// Returns the interlaced mode to use when scanning out/displaying.
ALWAYS_INLINE GPUInterlacedDisplayMode GetInterlacedDisplayMode() const
{
if (IsInterlacedDisplayEnabled())
{
return m_GPUSTAT.vertical_resolution ? GPUInterlacedDisplayMode::InterleavedFields :
GPUInterlacedDisplayMode::SeparateFields;
}
else
{
return GPUInterlacedDisplayMode::None;
}
}
/// Returns the number of pending GPU ticks.
TickCount GetPendingCRTCTicks() const;
TickCount GetPendingCommandTicks() const;
@ -178,25 +134,9 @@ public:
void SynchronizeCRTC();
/// Recompile shaders/recreate framebuffers when needed.
virtual void UpdateSettings();
void UpdateSettings();
/// Updates the resolution scale when it's set to automatic.
virtual void UpdateResolutionScale();
/// Returns the effective display resolution of the GPU.
virtual std::tuple<u32, u32> GetEffectiveDisplayResolution();
// gpu_hw_d3d11.cpp
static std::unique_ptr<GPU> CreateHardwareD3D11Renderer();
// gpu_hw_opengl.cpp
static std::unique_ptr<GPU> CreateHardwareOpenGLRenderer();
// gpu_hw_vulkan.cpp
static std::unique_ptr<GPU> CreateHardwareVulkanRenderer();
// gpu_sw.cpp
static std::unique_ptr<GPU> CreateSoftwareRenderer();
// Converts window coordinates into horizontal ticks and scanlines. Returns false if out of range. Used for lightguns.
bool ConvertScreenCoordinatesToBeamTicksAndLines(s32 window_x, s32 window_y, u32* out_tick, u32* out_line) const;
@ -204,7 +144,7 @@ public:
// Returns the video clock frequency.
TickCount GetCRTCFrequency() const;
protected:
private:
TickCount CRTCTicksToSystemTicks(TickCount crtc_ticks, TickCount fractional_ticks) const;
TickCount SystemTicksToCRTCTicks(TickCount sysclk_ticks, TickCount* fractional_ticks) const;
@ -215,161 +155,9 @@ protected:
}
ALWAYS_INLINE static constexpr TickCount SystemTicksToGPUTicks(TickCount sysclk_ticks) { return sysclk_ticks << 1; }
// Helper/format conversion functions.
static constexpr u8 Convert5To8(u8 x5) { return (x5 << 3) | (x5 & 7); }
static constexpr u8 Convert8To5(u8 x8) { return (x8 >> 3); }
static constexpr u32 RGBA5551ToRGBA8888(u16 color)
{
u8 r = Truncate8(color & 31);
u8 g = Truncate8((color >> 5) & 31);
u8 b = Truncate8((color >> 10) & 31);
u8 a = Truncate8((color >> 15) & 1);
// 00012345 -> 1234545
b = (b << 3) | (b & 0b111);
g = (g << 3) | (g & 0b111);
r = (r << 3) | (r & 0b111);
a = a ? 255 : 0;
return ZeroExtend32(r) | (ZeroExtend32(g) << 8) | (ZeroExtend32(b) << 16) | (ZeroExtend32(a) << 24);
}
static constexpr u16 RGBA8888ToRGBA5551(u32 color)
{
const u16 r = Truncate16((color >> 3) & 0x1Fu);
const u16 g = Truncate16((color >> 11) & 0x1Fu);
const u16 b = Truncate16((color >> 19) & 0x1Fu);
const u16 a = Truncate16((color >> 31) & 0x01u);
return r | (g << 5) | (b << 10) | (a << 15);
}
static constexpr std::tuple<u8, u8> UnpackTexcoord(u16 texcoord)
{
return std::make_tuple(static_cast<u8>(texcoord), static_cast<u8>(texcoord >> 8));
}
static constexpr std::tuple<u8, u8, u8> UnpackColorRGB24(u32 rgb24)
{
return std::make_tuple(static_cast<u8>(rgb24), static_cast<u8>(rgb24 >> 8), static_cast<u8>(rgb24 >> 16));
}
static constexpr u32 PackColorRGB24(u8 r, u8 g, u8 b)
{
return ZeroExtend32(r) | (ZeroExtend32(g) << 8) | (ZeroExtend32(b) << 16);
}
static bool DumpVRAMToFile(const char* filename, u32 width, u32 height, u32 stride, const void* buffer,
bool remove_alpha);
union RenderCommand
{
u32 bits;
BitField<u32, u32, 0, 24> color_for_first_vertex;
BitField<u32, bool, 24, 1> raw_texture_enable; // not valid for lines
BitField<u32, bool, 25, 1> transparency_enable;
BitField<u32, bool, 26, 1> texture_enable;
BitField<u32, DrawRectangleSize, 27, 2> rectangle_size; // only for rectangles
BitField<u32, bool, 27, 1> quad_polygon; // only for polygons
BitField<u32, bool, 27, 1> polyline; // only for lines
BitField<u32, bool, 28, 1> shading_enable; // 0 - flat, 1 = gouroud
BitField<u32, Primitive, 29, 21> primitive;
/// Returns true if texturing should be enabled. Depends on the primitive type.
bool IsTexturingEnabled() const { return (primitive != Primitive::Line) ? texture_enable : false; }
/// Returns true if dithering should be enabled. Depends on the primitive type.
bool IsDitheringEnabled() const
{
switch (primitive)
{
case Primitive::Polygon:
return shading_enable || (texture_enable && !raw_texture_enable);
case Primitive::Line:
return true;
case Primitive::Rectangle:
default:
return false;
}
}
};
union VertexPosition
{
u32 bits;
BitField<u32, s32, 0, 12> x;
BitField<u32, s32, 16, 12> y;
};
// Sprites/rectangles should be clipped to 12 bits before drawing.
static constexpr s32 TruncateVertexPosition(s32 x) { return SignExtendN<11, s32>(x); }
struct NativeVertex
{
s16 x;
s16 y;
u32 color;
u16 texcoord;
};
union VRAMPixel
{
u16 bits;
BitField<u16, u8, 0, 5> r;
BitField<u16, u8, 5, 5> g;
BitField<u16, u8, 10, 5> b;
BitField<u16, bool, 15, 1> c;
u8 GetR8() const { return Convert5To8(r); }
u8 GetG8() const { return Convert5To8(g); }
u8 GetB8() const { return Convert5To8(b); }
void Set(u8 r_, u8 g_, u8 b_, bool c_ = false)
{
bits = (ZeroExtend16(r_)) | (ZeroExtend16(g_) << 5) | (ZeroExtend16(b_) << 10) | (static_cast<u16>(c_) << 15);
}
void ClampAndSet(u8 r_, u8 g_, u8 b_, bool c_ = false)
{
Set(std::min<u8>(r_, 0x1F), std::min<u8>(g_, 0x1F), std::min<u8>(b_, 0x1F), c_);
}
void SetRGB24(u32 rgb24, bool c_ = false)
{
bits = Truncate16(((rgb24 >> 3) & 0x1F) | (((rgb24 >> 11) & 0x1F) << 5) | (((rgb24 >> 19) & 0x1F) << 10)) |
(static_cast<u16>(c_) << 15);
}
void SetRGB24(u8 r8, u8 g8, u8 b8, bool c_ = false)
{
bits = (ZeroExtend16(r8 >> 3)) | (ZeroExtend16(g8 >> 3) << 5) | (ZeroExtend16(b8 >> 3) << 10) |
(static_cast<u16>(c_) << 15);
}
void SetRGB24Dithered(u32 x, u32 y, u8 r8, u8 g8, u8 b8, bool c_ = false)
{
const s32 offset = DITHER_MATRIX[y & 3][x & 3];
r8 = static_cast<u8>(std::clamp<s32>(static_cast<s32>(ZeroExtend32(r8)) + offset, 0, 255));
g8 = static_cast<u8>(std::clamp<s32>(static_cast<s32>(ZeroExtend32(g8)) + offset, 0, 255));
b8 = static_cast<u8>(std::clamp<s32>(static_cast<s32>(ZeroExtend32(b8)) + offset, 0, 255));
SetRGB24(r8, g8, b8, c_);
}
u32 ToRGB24() const
{
const u32 r_ = ZeroExtend32(r.GetValue());
const u32 g_ = ZeroExtend32(g.GetValue());
const u32 b_ = ZeroExtend32(b.GetValue());
return ((r_ << 3) | (r_ & 7)) | (((g_ << 3) | (g_ & 7)) << 8) | (((b_ << 3) | (b_ & 7)) << 16);
}
};
void SoftReset();
// Sets dots per scanline
@ -390,21 +178,6 @@ protected:
void CRTCTickEvent(TickCount ticks);
void CommandTickEvent(TickCount ticks);
/// Returns 0 if the currently-displayed field is on odd lines (1,3,5,...) or 1 if even (2,4,6,...).
ALWAYS_INLINE u32 GetInterlacedDisplayField() const { return ZeroExtend32(m_crtc_state.interlaced_field); }
/// Returns 0 if the currently-displayed field is on an even line in VRAM, otherwise 1.
ALWAYS_INLINE u32 GetActiveLineLSB() const { return ZeroExtend32(m_crtc_state.active_line_lsb); }
/// Sets/decodes GP0(E1h) (set draw mode).
void SetDrawMode(u16 bits);
/// Sets/decodes polygon/rectangle texture palette value.
void SetTexturePalette(u16 bits);
/// Sets/decodes texture window bits.
void SetTextureWindow(u32 value);
u32 ReadGPUREAD();
void FinishVRAMWrite();
@ -425,17 +198,6 @@ protected:
void ExecuteCommands();
void HandleGetGPUInfoCommand(u32 value);
// Rendering in the backend
virtual void ReadVRAM(u32 x, u32 y, u32 width, u32 height);
virtual void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color);
virtual void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data);
virtual void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height);
virtual void DispatchRenderCommand();
virtual void FlushRender();
virtual void ClearDisplay();
virtual void UpdateDisplay();
virtual void DrawRendererStats(bool is_idle_frame);
// These are **very** approximate.
ALWAYS_INLINE void AddDrawTriangleTicks(u32 width, u32 height, bool shaded, bool textured, bool semitransparent)
{
@ -470,21 +232,16 @@ protected:
AddCommandTicks(std::max(width, height));
}
HostDisplay* m_host_display = nullptr;
std::unique_ptr<TimingEvent> m_crtc_tick_event;
std::unique_ptr<TimingEvent> m_command_tick_event;
// Pointer to VRAM, used for reads/writes. In the hardware backends, this is the shadow buffer.
u16* m_vram_ptr = nullptr;
union GPUSTAT
{
u32 bits;
BitField<u32, u8, 0, 4> texture_page_x_base;
BitField<u32, u8, 4, 1> texture_page_y_base;
BitField<u32, TransparencyMode, 5, 2> semi_transparency_mode;
BitField<u32, TextureMode, 7, 2> texture_color_mode;
BitField<u32, GPUTransparencyMode, 5, 2> semi_transparency_mode;
BitField<u32, GPUTextureMode, 7, 2> texture_color_mode;
BitField<u32, bool, 9, 1> dither_enable;
BitField<u32, bool, 10, 1> draw_to_displayed_field;
BitField<u32, bool, 11, 1> set_mask_while_drawing;
@ -537,105 +294,18 @@ protected:
}
} m_GPUSTAT = {};
struct DrawMode
{
static constexpr u16 PALETTE_MASK = UINT16_C(0b0111111111111111);
static constexpr u32 TEXTURE_WINDOW_MASK = UINT32_C(0b11111111111111111111);
// bits in GP0(E1h) or texpage part of polygon
union Reg
{
static constexpr u16 MASK = 0b1111111111111;
static constexpr u16 TEXTURE_PAGE_MASK = UINT16_C(0b0000000000011111);
// Polygon texpage commands only affect bits 0-8, 11
static constexpr u16 POLYGON_TEXPAGE_MASK = 0b0000100111111111;
// Bits 0..5 are returned in the GPU status register, latched at E1h/polygon draw time.
static constexpr u32 GPUSTAT_MASK = 0b11111111111;
u16 bits;
BitField<u16, u8, 0, 4> texture_page_x_base;
BitField<u16, u8, 4, 1> texture_page_y_base;
BitField<u16, TransparencyMode, 5, 2> transparency_mode;
BitField<u16, TextureMode, 7, 2> texture_mode;
BitField<u16, bool, 9, 1> dither_enable;
BitField<u16, bool, 10, 1> draw_to_displayed_field;
BitField<u16, bool, 11, 1> texture_disable;
BitField<u16, bool, 12, 1> texture_x_flip;
BitField<u16, bool, 13, 1> texture_y_flip;
u32 GetTexturePageXBase() const { return ZeroExtend32(texture_page_x_base.GetValue()) * 64; }
u32 GetTexturePageYBase() const { return ZeroExtend32(texture_page_y_base.GetValue()) * 256; }
};
// original values
Reg mode_reg;
u16 palette_reg; // from vertex
u32 texture_window_value;
// decoded values
u32 texture_page_x;
u32 texture_page_y;
u32 texture_palette_x;
u32 texture_palette_y;
u8 texture_window_mask_x; // in 8 pixel steps
u8 texture_window_mask_y; // in 8 pixel steps
u8 texture_window_offset_x; // in 8 pixel steps
u8 texture_window_offset_y; // in 8 pixel steps
bool texture_x_flip;
bool texture_y_flip;
bool texture_page_changed;
bool texture_window_changed;
/// Returns the texture/palette rendering mode.
TextureMode GetTextureMode() const { return mode_reg.texture_mode; }
/// Returns the semi-transparency mode when enabled.
TransparencyMode GetTransparencyMode() const { return mode_reg.transparency_mode; }
/// Returns true if the texture mode requires a palette.
bool IsUsingPalette() const { return (mode_reg.bits & (2 << 7)) == 0; }
/// Returns a rectangle comprising the texture page area.
Common::Rectangle<u32> GetTexturePageRectangle() const
{
static constexpr std::array<u32, 4> texture_page_widths = {
{TEXTURE_PAGE_WIDTH / 4, TEXTURE_PAGE_WIDTH / 2, TEXTURE_PAGE_WIDTH, TEXTURE_PAGE_WIDTH}};
return Common::Rectangle<u32>::FromExtents(texture_page_x, texture_page_y,
texture_page_widths[static_cast<u8>(mode_reg.texture_mode.GetValue())],
TEXTURE_PAGE_HEIGHT);
}
/// Returns a rectangle comprising the texture palette area.
Common::Rectangle<u32> GetTexturePaletteRectangle() const
{
static constexpr std::array<u32, 4> palette_widths = {{16, 256, 0, 0}};
return Common::Rectangle<u32>::FromExtents(texture_palette_x, texture_palette_y,
palette_widths[static_cast<u8>(mode_reg.texture_mode.GetValue())], 1);
}
bool IsTexturePageChanged() const { return texture_page_changed; }
void SetTexturePageChanged() { texture_page_changed = true; }
void ClearTexturePageChangedFlag() { texture_page_changed = false; }
bool IsTextureWindowChanged() const { return texture_window_changed; }
void SetTextureWindowChanged() { texture_window_changed = true; }
void ClearTextureWindowChangedFlag() { texture_window_changed = false; }
} m_draw_mode = {};
Common::Rectangle<u32> m_drawing_area{0, 0, VRAM_WIDTH, VRAM_HEIGHT};
struct DrawingOffset
{
s32 x;
s32 y;
} m_drawing_offset = {};
Common::Rectangle<u32> m_drawing_area{0, 0, VRAM_WIDTH, VRAM_HEIGHT};
GPUDrawModeReg m_draw_mode{};
GPUTextureWindowReg m_texture_window{};
bool m_console_is_pal = false;
bool m_set_texture_disable_mask = false;
bool m_drawing_area_changed = false;
bool m_force_progressive_scan = false;
bool m_force_ntsc_timings = false;
@ -733,7 +403,7 @@ protected:
HeapFIFOQueue<u64, MAX_FIFO_SIZE> m_fifo;
std::vector<u32> m_blit_buffer;
u32 m_blit_remaining_words;
RenderCommand m_render_command{};
GPURenderCommand m_render_command{};
ALWAYS_INLINE u32 FifoPop() { return Truncate32(m_fifo.Pop()); }
ALWAYS_INLINE u32 FifoPeek() { return Truncate32(m_fifo.Peek()); }
@ -754,11 +424,17 @@ protected:
Stats m_stats = {};
Stats m_last_stats = {};
private:
using GP0CommandHandler = bool (GPU::*)();
using GP0CommandHandlerTable = std::array<GP0CommandHandler, 256>;
static GP0CommandHandlerTable GenerateGP0CommandHandlerTable();
void ClearDisplay();
void UpdateDisplay();
void FillBackendCommandParameters(GPUBackendCommand* cmd) const;
void FillDrawCommand(GPUBackendDrawCommand* cmd, GPURenderCommand rc) const;
void UpdateDrawingArea();
void FlushRender();
// Rendering commands, returns false if not enough data is provided
bool HandleUnknownGP0Command();
bool HandleNOPCommand();
@ -774,6 +450,7 @@ private:
bool HandleRenderRectangleCommand();
bool HandleRenderLineCommand();
bool HandleRenderPolyLineCommand();
void FinishPolyLineRenderCommand();
bool HandleFillRectangleCommand();
bool HandleCopyRectangleCPUToVRAMCommand();
bool HandleCopyRectangleVRAMToCPUCommand();
@ -782,6 +459,4 @@ private:
static const GP0CommandHandlerTable s_GP0_command_handler_table;
};
IMPLEMENT_ENUM_CLASS_BITWISE_OPERATORS(GPU::TextureMode);
extern std::unique_ptr<GPU> g_gpu;
extern GPU g_gpu;

679
src/core/gpu_backend.cpp Normal file
View File

@ -0,0 +1,679 @@
#include "gpu_backend.h"
#include "common/log.h"
#include "common/state_wrapper.h"
#include "settings.h"
#include "gpu_hw_opengl.h"
#include "gpu_hw_vulkan.h"
#include "gpu_sw.h"
#ifdef WIN32
#include "gpu_hw_d3d11.h"
#endif
Log_SetChannel(GPUBackend);
std::unique_ptr<GPUBackend> g_gpu_backend;
GPUBackend::GPUBackend() = default;
GPUBackend::~GPUBackend() = default;
static std::unique_ptr<GPUBackend> CreateBackend(GPURenderer backend)
{
switch (backend)
{
#ifdef WIN32
case GPURenderer::HardwareD3D11:
return std::make_unique<GPU_HW_D3D11>();
#endif
case GPURenderer::HardwareOpenGL:
return std::make_unique<GPU_HW_OpenGL>();
case GPURenderer::HardwareVulkan:
return std::make_unique<GPU_HW_Vulkan>();
case GPURenderer::Software:
default:
return std::make_unique<GPU_SW>();
}
}
bool GPUBackend::Create(GPURenderer backend)
{
g_gpu_backend = CreateBackend(backend);
if (!g_gpu_backend || !g_gpu_backend->Initialize())
{
Log_ErrorPrintf("Failed to initialize GPU backend, falling back to software");
g_gpu_backend.reset();
g_gpu_backend = CreateBackend(GPURenderer::Software);
if (!g_gpu_backend->Initialize())
{
g_gpu_backend.reset();
return false;
}
}
return true;
}
bool GPUBackend::Initialize()
{
return true;
}
void GPUBackend::Reset()
{
m_drawing_area = {};
m_display_aspect_ratio = 1.0f;
m_display_width = 0;
m_display_height = 0;
m_display_origin_left = 0;
m_display_origin_top = 0;
m_display_vram_left = 0;
m_display_vram_top = 0;
m_display_vram_width = 0;
m_display_vram_height = 0;
m_display_vram_start_x = 0;
m_display_vram_start_y = 0;
m_display_interlace = GPUInterlacedDisplayMode::None;
m_display_interlace_field = 0;
m_display_enabled = false;
m_display_24bit = false;
}
void GPUBackend::UpdateSettings() {}
void GPUBackend::ResetGraphicsAPIState() {}
void GPUBackend::RestoreGraphicsAPIState() {}
bool GPUBackend::IsHardwareRenderer() const
{
return false;
}
void GPUBackend::UpdateResolutionScale() {}
std::tuple<u32, u32> GPUBackend::GetEffectiveDisplayResolution()
{
return std::tie(m_display_vram_width, m_display_vram_height);
}
void GPUBackend::DrawRendererStats(bool is_idle_frame) {}
bool GPUBackend::DoState(StateWrapper& sw)
{
if (sw.IsReading())
{
// Still need a temporary here.
HeapArray<u16, VRAM_WIDTH * VRAM_HEIGHT> temp;
sw.DoBytes(temp.data(), VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16));
UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, temp.data(), {});
}
else
{
FlushRender();
ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT);
sw.DoBytes(m_vram_ptr, VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16));
}
sw.Do(&m_drawing_area.left);
sw.Do(&m_drawing_area.top);
sw.Do(&m_drawing_area.right);
sw.Do(&m_drawing_area.bottom);
sw.Do(&m_display_aspect_ratio);
sw.Do(&m_display_width);
sw.Do(&m_display_height);
sw.Do(&m_display_origin_left);
sw.Do(&m_display_origin_top);
sw.Do(&m_display_vram_left);
sw.Do(&m_display_vram_top);
sw.Do(&m_display_vram_width);
sw.Do(&m_display_vram_height);
sw.Do(&m_display_vram_start_x);
sw.Do(&m_display_vram_start_y);
sw.Do(&m_display_interlace);
sw.Do(&m_display_interlace_field);
sw.Do(&m_display_enabled);
sw.Do(&m_display_24bit);
return !sw.HasError();
}
GPUBackendResetCommand* GPUBackend::NewResetCommand()
{
GPUBackendResetCommand* cmd = static_cast<GPUBackendResetCommand*>(AllocateCommand(sizeof(GPUBackendResetCommand)));
cmd->type = GPUBackendCommandType::Reset;
cmd->size = cmd->Size();
return cmd;
}
GPUBackendUpdateSettingsCommand* GPUBackend::NewUpdateSettingsCommand()
{
GPUBackendUpdateSettingsCommand* cmd =
static_cast<GPUBackendUpdateSettingsCommand*>(AllocateCommand(sizeof(GPUBackendUpdateSettingsCommand)));
cmd->type = GPUBackendCommandType::UpdateSettings;
cmd->size = cmd->Size();
return cmd;
}
GPUBackendUpdateResolutionScaleCommand* GPUBackend::NewUpdateResolutionScaleCommand()
{
GPUBackendUpdateResolutionScaleCommand* cmd = static_cast<GPUBackendUpdateResolutionScaleCommand*>(
AllocateCommand(sizeof(GPUBackendUpdateResolutionScaleCommand)));
cmd->type = GPUBackendCommandType::UpdateResolutionScale;
cmd->size = cmd->Size();
return cmd;
}
GPUBackendReadVRAMCommand* GPUBackend::NewReadVRAMCommand()
{
GPUBackendReadVRAMCommand* cmd =
static_cast<GPUBackendReadVRAMCommand*>(AllocateCommand(sizeof(GPUBackendReadVRAMCommand)));
cmd->type = GPUBackendCommandType::ReadVRAM;
cmd->size = cmd->Size();
return cmd;
}
GPUBackendFillVRAMCommand* GPUBackend::NewFillVRAMCommand()
{
GPUBackendFillVRAMCommand* cmd =
static_cast<GPUBackendFillVRAMCommand*>(AllocateCommand(sizeof(GPUBackendFillVRAMCommand)));
cmd->type = GPUBackendCommandType::FillVRAM;
cmd->size = cmd->Size();
return cmd;
}
GPUBackendUpdateVRAMCommand* GPUBackend::NewUpdateVRAMCommand(u32 num_words)
{
const u32 size = sizeof(GPUBackendUpdateVRAMCommand) + (num_words * sizeof(u16));
GPUBackendUpdateVRAMCommand* cmd = static_cast<GPUBackendUpdateVRAMCommand*>(AllocateCommand(size));
cmd->type = GPUBackendCommandType::UpdateVRAM;
cmd->size = size;
return cmd;
}
GPUBackendCopyVRAMCommand* GPUBackend::NewCopyVRAMCommand()
{
GPUBackendCopyVRAMCommand* cmd =
static_cast<GPUBackendCopyVRAMCommand*>(AllocateCommand(sizeof(GPUBackendCopyVRAMCommand)));
cmd->type = GPUBackendCommandType::CopyVRAM;
cmd->size = cmd->Size();
return cmd;
}
GPUBackendSetDrawingAreaCommand* GPUBackend::NewSetDrawingAreaCommand()
{
GPUBackendSetDrawingAreaCommand* cmd =
static_cast<GPUBackendSetDrawingAreaCommand*>(AllocateCommand(sizeof(GPUBackendSetDrawingAreaCommand)));
cmd->type = GPUBackendCommandType::SetDrawingArea;
cmd->size = cmd->Size();
return cmd;
}
GPUBackendDrawPolygonCommand* GPUBackend::NewDrawPolygonCommand(u32 num_vertices)
{
const u32 size = sizeof(GPUBackendDrawPolygonCommand) + (num_vertices * sizeof(GPUBackendDrawPolygonCommand::Vertex));
GPUBackendDrawPolygonCommand* cmd = static_cast<GPUBackendDrawPolygonCommand*>(AllocateCommand(size));
cmd->type = GPUBackendCommandType::DrawPolygon;
cmd->size = size;
cmd->num_vertices = Truncate16(num_vertices);
return cmd;
}
GPUBackendDrawRectangleCommand* GPUBackend::NewDrawRectangleCommand()
{
GPUBackendDrawRectangleCommand* cmd =
static_cast<GPUBackendDrawRectangleCommand*>(AllocateCommand(sizeof(GPUBackendDrawRectangleCommand)));
cmd->type = GPUBackendCommandType::DrawRectangle;
cmd->size = cmd->Size();
return cmd;
}
GPUBackendDrawLineCommand* GPUBackend::NewDrawLineCommand(u32 num_vertices)
{
const u32 size = sizeof(GPUBackendDrawLineCommand) + (num_vertices * sizeof(GPUBackendDrawLineCommand::Vertex));
GPUBackendDrawLineCommand* cmd = static_cast<GPUBackendDrawLineCommand*>(AllocateCommand(size));
cmd->type = GPUBackendCommandType::DrawLine;
cmd->size = cmd->Size();
cmd->num_vertices = Truncate16(num_vertices);
return cmd;
}
GPUBackendClearDisplayCommand* GPUBackend::NewClearDisplayCommand()
{
GPUBackendClearDisplayCommand* cmd =
static_cast<GPUBackendClearDisplayCommand*>(AllocateCommand(sizeof(GPUBackendUpdateVRAMCommand)));
cmd->type = GPUBackendCommandType::ClearDisplay;
cmd->size = cmd->Size();
return cmd;
}
GPUBackendUpdateDisplayCommand* GPUBackend::NewUpdateDisplayCommand()
{
GPUBackendUpdateDisplayCommand* cmd =
static_cast<GPUBackendUpdateDisplayCommand*>(AllocateCommand(sizeof(GPUBackendUpdateDisplayCommand)));
cmd->type = GPUBackendCommandType::UpdateDisplay;
cmd->size = cmd->Size();
return cmd;
}
GPUBackendFlushRenderCommand* GPUBackend::NewFlushRenderCommand()
{
GPUBackendFlushRenderCommand* cmd =
static_cast<GPUBackendFlushRenderCommand*>(AllocateCommand(sizeof(GPUBackendFlushRenderCommand)));
cmd->type = GPUBackendCommandType::FlushRender;
cmd->size = cmd->Size();
return cmd;
}
void* GPUBackend::AllocateCommand(u32 size)
{
for (;;)
{
const u32 write_ptr = m_command_fifo_write_ptr.load();
const u32 available_size = COMMAND_QUEUE_SIZE - write_ptr;
if ((size + sizeof(GPUBackendSyncCommand)) > available_size)
{
Sync();
continue;
}
return &m_command_fifo_data[write_ptr];
}
}
u32 GPUBackend::GetPendingCommandSize() const
{
const u32 read_ptr = m_command_fifo_read_ptr.load();
const u32 write_ptr = m_command_fifo_write_ptr.load();
return (write_ptr - read_ptr);
}
void GPUBackend::PushCommand(GPUBackendCommand* cmd)
{
if (!g_settings.cpu_thread)
{
// single-thread mode
if (cmd->type != GPUBackendCommandType::Sync)
HandleCommand(cmd);
}
else
{
const u32 new_write_ptr = m_command_fifo_write_ptr.fetch_add(cmd->size) + cmd->size;
DebugAssert(new_write_ptr <= COMMAND_QUEUE_SIZE);
if (cmd->type == GPUBackendCommandType::Sync || cmd->type == GPUBackendCommandType::FrameDone ||
(new_write_ptr - m_command_fifo_read_ptr.load()) >= THRESHOLD_TO_WAKE_GPU)
{
WakeGPUThread();
}
}
}
void GPUBackend::WakeGPUThread()
{
std::unique_lock<std::mutex> lock(m_sync_mutex);
if (!m_gpu_thread_sleeping.load())
return;
m_wake_gpu_thread_cv.notify_one();
}
void GPUBackend::Sync()
{
if (!g_settings.cpu_thread)
return;
// since we do this on wrap-around, it can't go through the regular path
const u32 write_ptr = m_command_fifo_write_ptr.load();
Assert((COMMAND_QUEUE_SIZE - write_ptr) >= sizeof(GPUBackendSyncCommand));
GPUBackendSyncCommand* cmd = reinterpret_cast<GPUBackendSyncCommand*>(&m_command_fifo_data[write_ptr]);
cmd->type = GPUBackendCommandType::Sync;
cmd->size = cmd->Size();
PushCommand(cmd);
m_sync_event.Wait();
m_sync_event.Reset();
}
void GPUBackend::CPUFrameDone()
{
if (!g_settings.cpu_thread)
return;
GPUBackendFrameDoneCommand* cmd =
reinterpret_cast<GPUBackendFrameDoneCommand*>(AllocateCommand(sizeof(GPUBackendFrameDoneCommand)));
cmd->type = GPUBackendCommandType::FrameDone;
cmd->size = cmd->Size();
PushCommand(cmd);
}
void GPUBackend::ProcessGPUCommands()
{
for (;;)
{
const u32 write_ptr = m_command_fifo_write_ptr.load();
u32 read_ptr = m_command_fifo_read_ptr.load();
if (read_ptr == write_ptr)
return;
while (read_ptr < write_ptr)
{
const GPUBackendCommand* cmd = reinterpret_cast<const GPUBackendCommand*>(&m_command_fifo_data[read_ptr]);
read_ptr += cmd->size;
if (cmd->type == GPUBackendCommandType::Sync)
{
Assert(read_ptr == m_command_fifo_write_ptr.load());
m_command_fifo_read_ptr.store(0);
m_command_fifo_write_ptr.store(0);
m_sync_event.Signal();
return;
}
else if (cmd->type == GPUBackendCommandType::FrameDone)
{
m_frame_done = true;
m_command_fifo_read_ptr.store(read_ptr);
return;
}
else
{
HandleCommand(cmd);
}
}
m_command_fifo_read_ptr.store(read_ptr);
}
}
void GPUBackend::RunGPUFrame()
{
m_frame_done = false;
for (;;)
{
g_gpu_backend->ProcessGPUCommands();
if (m_frame_done)
break;
std::unique_lock<std::mutex> lock(m_sync_mutex);
m_gpu_thread_sleeping.store(true);
m_wake_gpu_thread_cv.wait(lock);
m_gpu_thread_sleeping.store(false);
}
}
void GPUBackend::EndGPUFrame()
{
g_gpu_backend->ProcessGPUCommands();
Assert(m_command_fifo_read_ptr.load() == m_command_fifo_write_ptr.load());
m_command_fifo_read_ptr.store(0);
m_command_fifo_write_ptr.store(0);
}
void GPUBackend::SetScissorFromDrawingArea() {}
void GPUBackend::HandleCommand(const GPUBackendCommand* cmd)
{
switch (cmd->type)
{
case GPUBackendCommandType::ReadVRAM:
{
FlushRender();
const GPUBackendReadVRAMCommand* ccmd = static_cast<const GPUBackendReadVRAMCommand*>(cmd);
ReadVRAM(ZeroExtend32(ccmd->x), ZeroExtend32(ccmd->y), ZeroExtend32(ccmd->width), ZeroExtend32(ccmd->height));
}
break;
case GPUBackendCommandType::FillVRAM:
{
FlushRender();
const GPUBackendFillVRAMCommand* ccmd = static_cast<const GPUBackendFillVRAMCommand*>(cmd);
FillVRAM(ZeroExtend32(ccmd->x), ZeroExtend32(ccmd->y), ZeroExtend32(ccmd->width), ZeroExtend32(ccmd->height),
ccmd->color, ccmd->params);
}
break;
case GPUBackendCommandType::UpdateVRAM:
{
FlushRender();
const GPUBackendUpdateVRAMCommand* ccmd = static_cast<const GPUBackendUpdateVRAMCommand*>(cmd);
UpdateVRAM(ZeroExtend32(ccmd->x), ZeroExtend32(ccmd->y), ZeroExtend32(ccmd->width), ZeroExtend32(ccmd->height),
ccmd->data, ccmd->params);
}
break;
case GPUBackendCommandType::CopyVRAM:
{
FlushRender();
const GPUBackendCopyVRAMCommand* ccmd = static_cast<const GPUBackendCopyVRAMCommand*>(cmd);
CopyVRAM(ZeroExtend32(ccmd->src_x), ZeroExtend32(ccmd->src_y), ZeroExtend32(ccmd->dst_x),
ZeroExtend32(ccmd->dst_y), ZeroExtend32(ccmd->width), ZeroExtend32(ccmd->height), ccmd->params);
}
break;
case GPUBackendCommandType::SetDrawingArea:
{
FlushRender();
m_drawing_area = static_cast<const GPUBackendSetDrawingAreaCommand*>(cmd)->new_area;
SetScissorFromDrawingArea();
}
break;
case GPUBackendCommandType::DrawPolygon:
{
DrawPolygon(static_cast<const GPUBackendDrawPolygonCommand*>(cmd));
}
break;
case GPUBackendCommandType::DrawRectangle:
{
DrawRectangle(static_cast<const GPUBackendDrawRectangleCommand*>(cmd));
}
break;
case GPUBackendCommandType::DrawLine:
{
DrawLine(static_cast<const GPUBackendDrawLineCommand*>(cmd));
}
break;
case GPUBackendCommandType::ClearDisplay:
{
ClearDisplay();
}
break;
case GPUBackendCommandType::UpdateDisplay:
{
const GPUBackendUpdateDisplayCommand* ccmd = static_cast<const GPUBackendUpdateDisplayCommand*>(cmd);
m_display_aspect_ratio = ccmd->display_aspect_ratio;
m_display_width = ccmd->display_width;
m_display_height = ccmd->display_height;
m_display_origin_left = ccmd->display_origin_left;
m_display_origin_top = ccmd->display_origin_top;
m_display_vram_left = ccmd->display_vram_left;
m_display_vram_top = ccmd->display_vram_top;
m_display_vram_width = ccmd->display_vram_width;
m_display_vram_height = ccmd->display_vram_height;
m_display_vram_start_x = ccmd->display_vram_start_x;
m_display_vram_start_y = ccmd->display_vram_start_y;
m_display_interlace = ccmd->display_interlace;
m_display_interlace_field = ccmd->display_interlace_field;
m_display_enabled = ccmd->display_enabled;
m_display_24bit = ccmd->display_24bit;
UpdateDisplay();
}
break;
case GPUBackendCommandType::FlushRender:
{
FlushRender();
}
break;
default:
break;
}
}
void GPUBackend::SoftwareFillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params)
{
const u16 color16 = RGBA8888ToRGBA5551(color);
if ((x + width) <= VRAM_WIDTH && !params.interlaced_rendering)
{
for (u32 yoffs = 0; yoffs < height; yoffs++)
{
const u32 row = (y + yoffs) % VRAM_HEIGHT;
std::fill_n(&m_vram_ptr[row * VRAM_WIDTH + x], width, color16);
}
}
else if (params.interlaced_rendering)
{
// Hardware tests show that fills seem to break on the first two lines when the offset matches the displayed field.
const u32 active_field = params.active_line_lsb;
for (u32 yoffs = 0; yoffs < height; yoffs++)
{
const u32 row = (y + yoffs) % VRAM_HEIGHT;
if ((row & u32(1)) == active_field)
continue;
u16* row_ptr = &m_vram_ptr[row * VRAM_WIDTH];
for (u32 xoffs = 0; xoffs < width; xoffs++)
{
const u32 col = (x + xoffs) % VRAM_WIDTH;
row_ptr[col] = color16;
}
}
}
else
{
for (u32 yoffs = 0; yoffs < height; yoffs++)
{
const u32 row = (y + yoffs) % VRAM_HEIGHT;
u16* row_ptr = &m_vram_ptr[row * VRAM_WIDTH];
for (u32 xoffs = 0; xoffs < width; xoffs++)
{
const u32 col = (x + xoffs) % VRAM_WIDTH;
row_ptr[col] = color16;
}
}
}
}
void GPUBackend::SoftwareUpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data,
GPUBackendCommandParameters params)
{
// Fast path when the copy is not oversized.
if ((x + width) <= VRAM_WIDTH && (y + height) <= VRAM_HEIGHT && !params.IsMaskingEnabled())
{
const u16* src_ptr = static_cast<const u16*>(data);
u16* dst_ptr = &m_vram_ptr[y * VRAM_WIDTH + x];
for (u32 yoffs = 0; yoffs < height; yoffs++)
{
std::copy_n(src_ptr, width, dst_ptr);
src_ptr += width;
dst_ptr += VRAM_WIDTH;
}
}
else
{
// Slow path when we need to handle wrap-around.
const u16* src_ptr = static_cast<const u16*>(data);
const u16 mask_and = params.GetMaskAND();
const u16 mask_or = params.GetMaskOR();
for (u32 row = 0; row < height;)
{
u16* dst_row_ptr = &m_vram_ptr[((y + row++) % VRAM_HEIGHT) * VRAM_WIDTH];
for (u32 col = 0; col < width;)
{
// TODO: Handle unaligned reads...
u16* pixel_ptr = &dst_row_ptr[(x + col++) % VRAM_WIDTH];
if (((*pixel_ptr) & mask_and) == 0)
*pixel_ptr = *(src_ptr++) | mask_or;
}
}
}
}
void GPUBackend::SoftwareCopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height,
GPUBackendCommandParameters params)
{
// Break up oversized copies. This behavior has not been verified on console.
if ((src_x + width) > VRAM_WIDTH || (dst_x + width) > VRAM_WIDTH)
{
u32 remaining_rows = height;
u32 current_src_y = src_y;
u32 current_dst_y = dst_y;
while (remaining_rows > 0)
{
const u32 rows_to_copy =
std::min<u32>(remaining_rows, std::min<u32>(VRAM_HEIGHT - current_src_y, VRAM_HEIGHT - current_dst_y));
u32 remaining_columns = width;
u32 current_src_x = src_x;
u32 current_dst_x = dst_x;
while (remaining_columns > 0)
{
const u32 columns_to_copy =
std::min<u32>(remaining_columns, std::min<u32>(VRAM_WIDTH - current_src_x, VRAM_WIDTH - current_dst_x));
SoftwareCopyVRAM(current_src_x, current_src_y, current_dst_x, current_dst_y, columns_to_copy, rows_to_copy,
params);
current_src_x = (current_src_x + columns_to_copy) % VRAM_WIDTH;
current_dst_x = (current_dst_x + columns_to_copy) % VRAM_WIDTH;
remaining_columns -= columns_to_copy;
}
current_src_y = (current_src_y + rows_to_copy) % VRAM_HEIGHT;
current_dst_y = (current_dst_y + rows_to_copy) % VRAM_HEIGHT;
remaining_rows -= rows_to_copy;
}
return;
}
// This doesn't have a fast path, but do we really need one? It's not common.
const u16 mask_and = params.GetMaskAND();
const u16 mask_or = params.GetMaskOR();
// Copy in reverse when src_x < dst_x, this is verified on console.
if (src_x < dst_x || ((src_x + width - 1) % VRAM_WIDTH) < ((dst_x + width - 1) % VRAM_WIDTH))
{
for (u32 row = 0; row < height; row++)
{
const u16* src_row_ptr = &m_vram_ptr[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
u16* dst_row_ptr = &m_vram_ptr[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
for (s32 col = static_cast<s32>(width - 1); col >= 0; col--)
{
const u16 src_pixel = src_row_ptr[(src_x + static_cast<u32>(col)) % VRAM_WIDTH];
u16* dst_pixel_ptr = &dst_row_ptr[(dst_x + static_cast<u32>(col)) % VRAM_WIDTH];
if ((*dst_pixel_ptr & mask_and) == 0)
*dst_pixel_ptr = src_pixel | mask_or;
}
}
}
else
{
for (u32 row = 0; row < height; row++)
{
const u16* src_row_ptr = &m_vram_ptr[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
u16* dst_row_ptr = &m_vram_ptr[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
for (u32 col = 0; col < width; col++)
{
const u16 src_pixel = src_row_ptr[(src_x + col) % VRAM_WIDTH];
u16* dst_pixel_ptr = &dst_row_ptr[(dst_x + col) % VRAM_WIDTH];
if ((*dst_pixel_ptr & mask_and) == 0)
*dst_pixel_ptr = src_pixel | mask_or;
}
}
}
}

143
src/core/gpu_backend.h Normal file
View File

@ -0,0 +1,143 @@
#pragma once
#include "common/heap_array.h"
#include "common/event.h"
#include "gpu_types.h"
#include <atomic>
#include <condition_variable>
#include <memory>
#include <mutex>
class StateWrapper;
class GPUBackend
{
public:
GPUBackend();
virtual ~GPUBackend();
ALWAYS_INLINE u16* GetVRAM() const { return m_vram_ptr; }
static bool Create(GPURenderer backend);
virtual bool Initialize();
// Graphics API state reset/restore - call when drawing the UI etc.
virtual void ResetGraphicsAPIState();
virtual void RestoreGraphicsAPIState();
virtual bool IsHardwareRenderer() const;
/// Recompile shaders/recreate framebuffers when needed.
virtual void UpdateSettings();
/// Updates the resolution scale when it's set to automatic.
virtual void UpdateResolutionScale();
/// Returns the effective display resolution of the GPU.
virtual std::tuple<u32, u32> GetEffectiveDisplayResolution();
virtual void DrawRendererStats(bool is_idle_frame);
bool DoState(StateWrapper& sw);
GPUBackendResetCommand* NewResetCommand();
GPUBackendUpdateSettingsCommand* NewUpdateSettingsCommand();
GPUBackendUpdateResolutionScaleCommand* NewUpdateResolutionScaleCommand();
GPUBackendReadVRAMCommand* NewReadVRAMCommand();
GPUBackendFillVRAMCommand* NewFillVRAMCommand();
GPUBackendUpdateVRAMCommand* NewUpdateVRAMCommand(u32 num_words);
GPUBackendCopyVRAMCommand* NewCopyVRAMCommand();
GPUBackendSetDrawingAreaCommand* NewSetDrawingAreaCommand();
GPUBackendDrawPolygonCommand* NewDrawPolygonCommand(u32 num_vertices);
GPUBackendDrawRectangleCommand* NewDrawRectangleCommand();
GPUBackendDrawLineCommand* NewDrawLineCommand(u32 num_vertices);
GPUBackendClearDisplayCommand* NewClearDisplayCommand();
GPUBackendUpdateDisplayCommand* NewUpdateDisplayCommand();
GPUBackendFlushRenderCommand* NewFlushRenderCommand();
void PushCommand(GPUBackendCommand* cmd);
void Sync();
/// Processes all pending GPU commands.
void ProcessGPUCommands();
void CPUFrameDone();
void RunGPUFrame();
void EndGPUFrame();
protected:
void* AllocateCommand(u32 size);
u32 GetPendingCommandSize() const;
void WakeGPUThread();
virtual void Reset();
virtual void ReadVRAM(u32 x, u32 y, u32 width, u32 height) = 0;
virtual void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params) = 0;
virtual void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data,
GPUBackendCommandParameters params) = 0;
virtual void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height,
GPUBackendCommandParameters params) = 0;
virtual void DrawPolygon(const GPUBackendDrawPolygonCommand* cmd) = 0;
virtual void DrawRectangle(const GPUBackendDrawRectangleCommand* cmd) = 0;
virtual void DrawLine(const GPUBackendDrawLineCommand* cmd) = 0;
virtual void SetScissorFromDrawingArea();
virtual void ClearDisplay() = 0;
virtual void UpdateDisplay() = 0;
virtual void FlushRender() = 0;
void HandleCommand(const GPUBackendCommand* cmd);
void SoftwareFillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params);
void SoftwareUpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, GPUBackendCommandParameters params);
void SoftwareCopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height,
GPUBackendCommandParameters params);
u16* m_vram_ptr = nullptr;
Common::Rectangle<u32> m_drawing_area{};
float m_display_aspect_ratio = 1.0f;
// Size of the simulated screen in pixels. Depending on crop mode, this may include overscan area.
u16 m_display_width = 0;
u16 m_display_height = 0;
// Top-left corner where the VRAM is displayed. Depending on the CRTC config, this may indicate padding.
u16 m_display_origin_left = 0;
u16 m_display_origin_top = 0;
// Rectangle describing the displayed area of VRAM, in coordinates.
u16 m_display_vram_left = 0;
u16 m_display_vram_top = 0;
u16 m_display_vram_width = 0;
u16 m_display_vram_height = 0;
u16 m_display_vram_start_x = 0;
u16 m_display_vram_start_y = 0;
GPUInterlacedDisplayMode m_display_interlace = GPUInterlacedDisplayMode::None;
u8 m_display_interlace_field = 0;
bool m_display_enabled = false;
bool m_display_24bit = false;
bool m_frame_done = false;
Common::Event m_sync_event;
std::atomic_bool m_gpu_thread_sleeping{ false };
std::mutex m_sync_mutex;
std::condition_variable m_sync_cpu_thread_cv;
std::condition_variable m_wake_gpu_thread_cv;
bool m_sync_done = false;
enum : u32
{
COMMAND_QUEUE_SIZE = 8 * 1024 * 1024,
THRESHOLD_TO_WAKE_GPU = 256
};
HeapArray<u8, COMMAND_QUEUE_SIZE> m_command_fifo_data;
alignas(64) std::atomic<u32> m_command_fifo_read_ptr{0};
alignas(64) std::atomic<u32> m_command_fifo_write_ptr{0};
};
extern std::unique_ptr<GPUBackend> g_gpu_backend;

View File

@ -2,7 +2,9 @@
#include "common/log.h"
#include "common/string_util.h"
#include "gpu.h"
#include "gpu_backend.h"
#include "interrupt_controller.h"
#include "pgxp.h"
#include "system.h"
Log_SetChannel(GPU);
@ -21,6 +23,15 @@ static constexpr u32 ReplaceZero(u32 value, u32 value_for_zero)
return value == 0 ? value_for_zero : value;
}
template<typename T>
ALWAYS_INLINE static constexpr std::tuple<T, T> MinMax(T v1, T v2)
{
if (v1 > v2)
return std::tie(v2, v1);
else
return std::tie(v1, v2);
}
void GPU::ExecuteCommands()
{
m_syncing = true;
@ -91,7 +102,7 @@ void GPU::ExecuteCommands()
// drop terminator
m_fifo.RemoveOne();
Log_DebugPrintf("Drawing poly-line with %u vertices", GetPolyLineVertexCount());
DispatchRenderCommand();
FinishPolyLineRenderCommand();
m_blit_buffer.clear();
EndCommand();
continue;
@ -132,16 +143,16 @@ GPU::GP0CommandHandlerTable GPU::GenerateGP0CommandHandlerTable()
table[0x1F] = &GPU::HandleInterruptRequestCommand;
for (u32 i = 0x20; i <= 0x7F; i++)
{
const RenderCommand rc{i << 24};
const GPURenderCommand rc{i << 24};
switch (rc.primitive)
{
case Primitive::Polygon:
case GPUPrimitive::Polygon:
table[i] = &GPU::HandleRenderPolygonCommand;
break;
case Primitive::Line:
case GPUPrimitive::Line:
table[i] = rc.polyline ? &GPU::HandleRenderPolyLineCommand : &GPU::HandleRenderLineCommand;
break;
case Primitive::Rectangle:
case GPUPrimitive::Rectangle:
table[i] = &GPU::HandleRenderRectangleCommand;
break;
default:
@ -218,7 +229,17 @@ bool GPU::HandleSetDrawModeCommand()
{
const u32 param = FifoPop() & 0x00FFFFFFu;
Log_DebugPrintf("Set draw mode %08X", param);
SetDrawMode(Truncate16(param));
GPUDrawModeReg new_mode_reg{static_cast<u16>(param & GPUDrawModeReg::MASK)};
if (!m_set_texture_disable_mask)
new_mode_reg.texture_disable = false;
// Bits 0..10 are returned in the GPU status register.
m_GPUSTAT.bits = (m_GPUSTAT.bits & ~(GPUDrawModeReg::GPUSTAT_MASK)) |
(ZeroExtend32(new_mode_reg.bits) & GPUDrawModeReg::GPUSTAT_MASK);
m_GPUSTAT.texture_disable = new_mode_reg.texture_disable;
m_draw_mode.bits = new_mode_reg.bits;
AddCommandTicks(1);
EndCommand();
return true;
@ -227,10 +248,10 @@ bool GPU::HandleSetDrawModeCommand()
bool GPU::HandleSetTextureWindowCommand()
{
const u32 param = FifoPop() & 0x00FFFFFFu;
SetTextureWindow(param);
Log_DebugPrintf("Set texture window %02X %02X %02X %02X", m_draw_mode.texture_window_mask_x,
m_draw_mode.texture_window_mask_y, m_draw_mode.texture_window_offset_x,
m_draw_mode.texture_window_offset_y);
m_texture_window.bits = param;
Log_DebugPrintf("Set texture window %02X %02X %02X %02X", m_texture_window.mask_x, m_texture_window.mask_y,
m_texture_window.offset_x, m_texture_window.offset_y);
AddCommandTicks(1);
EndCommand();
@ -245,11 +266,9 @@ bool GPU::HandleSetDrawingAreaTopLeftCommand()
Log_DebugPrintf("Set drawing area top-left: (%u, %u)", left, top);
if (m_drawing_area.left != left || m_drawing_area.top != top)
{
FlushRender();
m_drawing_area.left = left;
m_drawing_area.top = top;
m_drawing_area_changed = true;
UpdateDrawingArea();
}
AddCommandTicks(1);
@ -266,11 +285,9 @@ bool GPU::HandleSetDrawingAreaBottomRightCommand()
Log_DebugPrintf("Set drawing area bottom-right: (%u, %u)", m_drawing_area.right, m_drawing_area.bottom);
if (m_drawing_area.right != right || m_drawing_area.bottom != bottom)
{
FlushRender();
m_drawing_area.right = right;
m_drawing_area.bottom = bottom;
m_drawing_area_changed = true;
UpdateDrawingArea();
}
AddCommandTicks(1);
@ -304,10 +321,8 @@ bool GPU::HandleSetMaskBitCommand()
constexpr u32 gpustat_mask = (1 << 11) | (1 << 12);
const u32 gpustat_bits = (param & 0x03) << 11;
if ((m_GPUSTAT.bits & gpustat_mask) != gpustat_bits)
{
FlushRender();
m_GPUSTAT.bits = (m_GPUSTAT.bits & ~gpustat_mask) | gpustat_bits;
}
Log_DebugPrintf("Set mask bit %u %u", BoolToUInt32(m_GPUSTAT.set_mask_while_drawing),
BoolToUInt32(m_GPUSTAT.check_mask_before_draw));
@ -316,9 +331,64 @@ bool GPU::HandleSetMaskBitCommand()
return true;
}
void GPU::FillBackendCommandParameters(GPUBackendCommand* cmd) const
{
cmd->params.bits = 0;
cmd->params.check_mask_before_draw = m_GPUSTAT.check_mask_before_draw;
cmd->params.set_mask_while_drawing = m_GPUSTAT.set_mask_while_drawing;
cmd->params.active_line_lsb = m_crtc_state.active_line_lsb;
cmd->params.interlaced_rendering = IsInterlacedRenderingEnabled();
}
void GPU::ClearDisplay()
{
g_gpu_backend->PushCommand(g_gpu_backend->NewClearDisplayCommand());
}
void GPU::UpdateDisplay()
{
GPUBackendUpdateDisplayCommand* cmd = g_gpu_backend->NewUpdateDisplayCommand();
cmd->display_aspect_ratio = m_crtc_state.display_aspect_ratio;
cmd->display_width = m_crtc_state.display_width;
cmd->display_height = m_crtc_state.display_height;
cmd->display_origin_left = m_crtc_state.display_origin_left;
cmd->display_origin_top = m_crtc_state.display_origin_top;
cmd->display_vram_left = m_crtc_state.display_vram_left;
cmd->display_vram_top = m_crtc_state.display_vram_top;
cmd->display_vram_width = m_crtc_state.display_vram_width;
cmd->display_vram_height = m_crtc_state.display_vram_height;
cmd->display_vram_start_x = m_crtc_state.regs.X;
cmd->display_vram_start_y = m_crtc_state.regs.Y;
cmd->display_interlace = GetInterlacedDisplayMode();
cmd->display_interlace_field = m_crtc_state.interlaced_display_field;
cmd->display_enabled = !m_GPUSTAT.display_disable;
cmd->display_24bit = m_GPUSTAT.display_area_color_depth_24;
g_gpu_backend->PushCommand(cmd);
}
void GPU::FillDrawCommand(GPUBackendDrawCommand* cmd, GPURenderCommand rc) const
{
FillBackendCommandParameters(cmd);
cmd->rc.bits = rc.bits;
cmd->draw_mode.bits = m_draw_mode.bits;
cmd->window.bits = m_texture_window.bits;
}
void GPU::UpdateDrawingArea()
{
GPUBackendSetDrawingAreaCommand* cmd = g_gpu_backend->NewSetDrawingAreaCommand();
cmd->new_area = m_drawing_area;
g_gpu_backend->PushCommand(cmd);
}
void GPU::FlushRender()
{
g_gpu_backend->PushCommand(g_gpu_backend->NewFlushRenderCommand());
}
bool GPU::HandleRenderPolygonCommand()
{
const RenderCommand rc{FifoPeek(0)};
const GPURenderCommand rc{FifoPeek(0)};
// shaded vertices use the colour from the first word for the first vertex
const u32 words_per_vertex = 1 + BoolToUInt32(rc.texture_enable) + BoolToUInt32(rc.shading_enable);
@ -341,39 +411,155 @@ bool GPU::HandleRenderPolygonCommand()
rc.texture_enable ? "textured" : "non-textured", rc.shading_enable ? "shaded" : "monochrome",
ZeroExtend32(num_vertices), ZeroExtend32(words_per_vertex), setup_ticks);
GPUBackendDrawPolygonCommand* cmd = g_gpu_backend->NewDrawPolygonCommand(num_vertices);
FillDrawCommand(cmd, rc);
// set draw state up
if (rc.texture_enable)
{
const u16 texpage_attribute = Truncate16((rc.shading_enable ? FifoPeek(5) : FifoPeek(4)) >> 16);
SetDrawMode((texpage_attribute & DrawMode::Reg::POLYGON_TEXPAGE_MASK) |
(m_draw_mode.mode_reg.bits & ~DrawMode::Reg::POLYGON_TEXPAGE_MASK));
SetTexturePalette(Truncate16(FifoPeek(2) >> 16));
m_GPUSTAT.bits = (m_GPUSTAT.bits & ~(GPUDrawModeReg::GPUSTAT_MASK)) |
(ZeroExtend32(texpage_attribute) & GPUDrawModeReg::GPUSTAT_MASK);
cmd->draw_mode.bits = ((texpage_attribute & GPUDrawModeReg::POLYGON_TEXPAGE_MASK) |
(m_draw_mode.bits & ~GPUDrawModeReg::POLYGON_TEXPAGE_MASK));
cmd->palette.bits = Truncate16(FifoPeek(2) >> 16);
}
else
{
cmd->palette.bits = 0;
}
m_stats.num_vertices += num_vertices;
m_stats.num_polygons++;
m_render_command.bits = rc.bits;
m_fifo.RemoveOne();
DispatchRenderCommand();
const u32 first_color = rc.color_for_first_vertex;
const bool shaded = rc.shading_enable;
const bool textured = rc.texture_enable;
const bool pgxp = g_settings.gpu_pgxp_enable;
bool valid_w = g_settings.gpu_pgxp_texture_correction;
for (u32 i = 0; i < num_vertices; i++)
{
GPUBackendDrawPolygonCommand::Vertex* vert = &cmd->vertices[i];
vert->color = (shaded && i > 0) ? (FifoPop() & UINT32_C(0x00FFFFFF)) : first_color;
const u64 maddr_and_pos = m_fifo.Pop();
const GPUVertexPosition vp{Truncate32(maddr_and_pos)};
vert->x = m_drawing_offset.x + vp.x;
vert->y = m_drawing_offset.y + vp.y;
vert->precise_x = static_cast<float>(vert->x);
vert->precise_y = static_cast<float>(vert->y);
vert->precise_w = 1.0f;
vert->texcoord = textured ? Truncate16(FifoPop()) : 0;
const s32 native_x = m_drawing_offset.x + vp.x;
if (pgxp)
{
valid_w &= PGXP::GetPreciseVertex(Truncate32(maddr_and_pos >> 32), vp.bits, vert->x, vert->y, m_drawing_offset.x,
m_drawing_offset.y, &vert->precise_x, &vert->precise_y, &vert->precise_w);
}
}
if (pgxp && !valid_w)
{
for (u32 i = 0; i < num_vertices; i++)
cmd->vertices[i].precise_w = 1.0f;
}
if (!IsDrawingAreaIsValid())
{
EndCommand();
return true;
}
// Cull polygons which are too large.
const auto [min_x_12, max_x_12] = MinMax(cmd->vertices[1].x, cmd->vertices[2].x);
const auto [min_y_12, max_y_12] = MinMax(cmd->vertices[1].y, cmd->vertices[2].y);
const s32 min_x = std::min(min_x_12, cmd->vertices[0].x);
const s32 max_x = std::max(max_x_12, cmd->vertices[0].x);
const s32 min_y = std::min(min_y_12, cmd->vertices[0].y);
const s32 max_y = std::max(max_y_12, cmd->vertices[0].y);
if ((max_x - min_x) >= MAX_PRIMITIVE_WIDTH || (max_y - min_y) >= MAX_PRIMITIVE_HEIGHT)
{
Log_DebugPrintf("Culling too-large polygon: %d,%d %d,%d %d,%d", cmd->vertices[0].x, cmd->vertices[0].y,
cmd->vertices[1].x, cmd->vertices[1].y, cmd->vertices[2].x, cmd->vertices[2].y);
if (!rc.quad_polygon)
{
EndCommand();
return true;
}
// turn it into a degenerate triangle
std::memcpy(&cmd->vertices[0], &cmd->vertices[1], sizeof(GPUBackendDrawPolygonCommand::Vertex));
cmd->bounds.SetInvalid();
}
else
{
const u32 clip_left = static_cast<u32>(std::clamp<s32>(min_x, m_drawing_area.left, m_drawing_area.right));
const u32 clip_right = static_cast<u32>(std::clamp<s32>(max_x, m_drawing_area.left, m_drawing_area.right)) + 1u;
const u32 clip_top = static_cast<u32>(std::clamp<s32>(min_y, m_drawing_area.top, m_drawing_area.bottom));
const u32 clip_bottom = static_cast<u32>(std::clamp<s32>(max_y, m_drawing_area.top, m_drawing_area.bottom)) + 1u;
cmd->bounds.Set(Truncate16(clip_left), Truncate16(clip_top), Truncate16(clip_right), Truncate16(clip_bottom));
AddDrawTriangleTicks(clip_right - clip_left, clip_bottom - clip_top, rc.shading_enable, rc.texture_enable,
rc.transparency_enable);
}
// quads
if (rc.quad_polygon)
{
const s32 min_x_123 = std::min(min_x_12, cmd->vertices[3].x);
const s32 max_x_123 = std::max(max_x_12, cmd->vertices[3].x);
const s32 min_y_123 = std::min(min_y_12, cmd->vertices[3].y);
const s32 max_y_123 = std::max(max_y_12, cmd->vertices[3].y);
// Cull polygons which are too large.
if ((max_x_123 - min_x_123) >= MAX_PRIMITIVE_WIDTH || (max_y_123 - min_y_123) >= MAX_PRIMITIVE_HEIGHT)
{
Log_DebugPrintf("Culling too-large polygon (quad second half): %d,%d %d,%d %d,%d", cmd->vertices[2].x,
cmd->vertices[2].y, cmd->vertices[1].x, cmd->vertices[1].y, cmd->vertices[0].x,
cmd->vertices[0].y);
// turn it into a degenerate triangle
std::memcpy(&cmd->vertices[3], &cmd->vertices[2], sizeof(GPUBackendDrawPolygonCommand::Vertex));
cmd->bounds.SetInvalid();
}
else
{
const u32 clip_left = static_cast<u32>(std::clamp<s32>(min_x_123, m_drawing_area.left, m_drawing_area.right));
const u32 clip_right =
static_cast<u32>(std::clamp<s32>(max_x_123, m_drawing_area.left, m_drawing_area.right)) + 1u;
const u32 clip_top = static_cast<u32>(std::clamp<s32>(min_y_123, m_drawing_area.top, m_drawing_area.bottom));
const u32 clip_bottom =
static_cast<u32>(std::clamp<s32>(max_y_123, m_drawing_area.top, m_drawing_area.bottom)) + 1u;
cmd->bounds.Include(Truncate16(clip_left), Truncate16(clip_top), Truncate16(clip_right), Truncate16(clip_bottom));
AddDrawTriangleTicks(clip_right - clip_left, clip_bottom - clip_top, rc.shading_enable, rc.texture_enable,
rc.transparency_enable);
}
}
g_gpu_backend->PushCommand(cmd);
EndCommand();
return true;
}
bool GPU::HandleRenderRectangleCommand()
{
const RenderCommand rc{FifoPeek(0)};
const GPURenderCommand rc{FifoPeek(0)};
const u32 total_words =
2 + BoolToUInt32(rc.texture_enable) + BoolToUInt32(rc.rectangle_size == DrawRectangleSize::Variable);
2 + BoolToUInt32(rc.texture_enable) + BoolToUInt32(rc.rectangle_size == GPUDrawRectangleSize::Variable);
CHECK_COMMAND_SIZE(total_words);
if (IsInterlacedRenderingEnabled() && IsCRTCScanlinePending())
SynchronizeCRTC();
if (rc.texture_enable)
SetTexturePalette(Truncate16(FifoPeek(2) >> 16));
const TickCount setup_ticks = 16;
AddCommandTicks(setup_ticks);
@ -384,17 +570,84 @@ bool GPU::HandleRenderRectangleCommand()
m_stats.num_vertices++;
m_stats.num_polygons++;
m_render_command.bits = rc.bits;
m_fifo.RemoveOne();
DispatchRenderCommand();
GPUBackendDrawRectangleCommand* cmd = g_gpu_backend->NewDrawRectangleCommand();
FillDrawCommand(cmd, rc);
cmd->color = rc.color_for_first_vertex;
cmd->draw_mode.bits = m_draw_mode.bits;
cmd->window.bits = m_texture_window.bits;
const GPUVertexPosition vp{FifoPop()};
cmd->x = TruncateGPUVertexPosition(m_drawing_offset.x + vp.x);
cmd->y = TruncateGPUVertexPosition(m_drawing_offset.y + vp.y);
if (rc.texture_enable)
{
const u32 texcoord_and_palette = FifoPop();
cmd->palette.bits = Truncate16(texcoord_and_palette >> 16);
cmd->texcoord = Truncate16(texcoord_and_palette);
}
else
{
cmd->palette.bits = 0;
cmd->texcoord = 0;
}
switch (rc.rectangle_size)
{
case GPUDrawRectangleSize::R1x1:
cmd->width = 1;
cmd->height = 1;
break;
case GPUDrawRectangleSize::R8x8:
cmd->width = 8;
cmd->height = 8;
break;
case GPUDrawRectangleSize::R16x16:
cmd->width = 16;
cmd->height = 16;
break;
default:
{
const u32 width_and_height = FifoPop();
cmd->width = static_cast<u16>(width_and_height & VRAM_WIDTH_MASK);
cmd->height = static_cast<u16>((width_and_height >> 16) & VRAM_HEIGHT_MASK);
if (cmd->width >= MAX_PRIMITIVE_WIDTH || cmd->height >= MAX_PRIMITIVE_HEIGHT)
{
Log_DebugPrintf("Culling too-large rectangle: %d,%d %dx%d", cmd->x, cmd->y, cmd->width, cmd->height);
return true;
}
}
break;
}
if (!IsDrawingAreaIsValid())
{
EndCommand();
return true;
}
const u32 clip_left = static_cast<u32>(std::clamp<s32>(cmd->x, m_drawing_area.left, m_drawing_area.right));
const u32 clip_right =
static_cast<u32>(std::clamp<s32>(cmd->x + cmd->width, m_drawing_area.left, m_drawing_area.right)) + 1u;
const u32 clip_top = static_cast<u32>(std::clamp<s32>(cmd->y, m_drawing_area.top, m_drawing_area.bottom));
const u32 clip_bottom =
static_cast<u32>(std::clamp<s32>(cmd->y + cmd->height, m_drawing_area.top, m_drawing_area.bottom)) + 1u;
cmd->bounds.Set(Truncate16(clip_left), Truncate16(clip_top), Truncate16(clip_right), Truncate16(clip_bottom));
AddDrawRectangleTicks(clip_right - clip_left, clip_bottom - clip_top, rc.texture_enable, rc.transparency_enable);
g_gpu_backend->PushCommand(cmd);
EndCommand();
return true;
}
bool GPU::HandleRenderLineCommand()
{
const RenderCommand rc{FifoPeek(0)};
const GPURenderCommand rc{FifoPeek(0)};
const u32 total_words = rc.shading_enable ? 4 : 3;
CHECK_COMMAND_SIZE(total_words);
@ -409,7 +662,59 @@ bool GPU::HandleRenderLineCommand()
m_render_command.bits = rc.bits;
m_fifo.RemoveOne();
DispatchRenderCommand();
GPUBackendDrawLineCommand* cmd = g_gpu_backend->NewDrawLineCommand(2);
FillDrawCommand(cmd, rc);
cmd->palette.bits = 0;
if (rc.shading_enable)
{
cmd->vertices[0].color = rc.color_for_first_vertex;
const GPUVertexPosition start_pos{FifoPop()};
cmd->vertices[0].x = m_drawing_offset.x + start_pos.x;
cmd->vertices[0].y = m_drawing_offset.y + start_pos.y;
cmd->vertices[1].color = FifoPop() & UINT32_C(0x00FFFFFF);
const GPUVertexPosition end_pos{FifoPop()};
cmd->vertices[1].x = m_drawing_offset.x + end_pos.x;
cmd->vertices[1].y = m_drawing_offset.y + end_pos.y;
}
else
{
cmd->vertices[0].color = rc.color_for_first_vertex;
cmd->vertices[1].color = rc.color_for_first_vertex;
const GPUVertexPosition start_pos{FifoPop()};
cmd->vertices[0].x = m_drawing_offset.x + start_pos.x;
cmd->vertices[0].y = m_drawing_offset.y + start_pos.y;
const GPUVertexPosition end_pos{FifoPop()};
cmd->vertices[1].x = m_drawing_offset.x + end_pos.x;
cmd->vertices[1].y = m_drawing_offset.y + end_pos.y;
}
if (!IsDrawingAreaIsValid())
{
EndCommand();
return true;
}
const auto [min_x, max_x] = MinMax(cmd->vertices[0].x, cmd->vertices[1].x);
const auto [min_y, max_y] = MinMax(cmd->vertices[0].y, cmd->vertices[1].y);
if ((max_x - min_x) >= MAX_PRIMITIVE_WIDTH || (max_y - min_y) >= MAX_PRIMITIVE_HEIGHT)
{
Log_DebugPrintf("Culling too-large line: %d,%d - %d,%d", cmd->vertices[0].y, cmd->vertices[0].y, cmd->vertices[1].x,
cmd->vertices[1].y);
EndCommand();
return true;
}
const u32 clip_left = static_cast<u32>(std::clamp<s32>(min_x, m_drawing_area.left, m_drawing_area.left));
const u32 clip_right = static_cast<u32>(std::clamp<s32>(max_x, m_drawing_area.left, m_drawing_area.right)) + 1u;
const u32 clip_top = static_cast<u32>(std::clamp<s32>(min_y, m_drawing_area.top, m_drawing_area.bottom));
const u32 clip_bottom = static_cast<u32>(std::clamp<s32>(max_y, m_drawing_area.top, m_drawing_area.bottom)) + 1u;
cmd->bounds.Set(Truncate16(clip_left), Truncate16(clip_top), Truncate16(clip_right), Truncate16(clip_bottom));
AddDrawLineTicks(clip_right - clip_left, clip_bottom - clip_top, rc.shading_enable);
EndCommand();
return true;
}
@ -417,7 +722,7 @@ bool GPU::HandleRenderLineCommand()
bool GPU::HandleRenderPolyLineCommand()
{
// always read the first two vertices, we test for the terminator after that
const RenderCommand rc{FifoPeek(0)};
const GPURenderCommand rc{FifoPeek(0)};
const u32 min_words = rc.shading_enable ? 3 : 4;
CHECK_COMMAND_SIZE(min_words);
@ -446,6 +751,52 @@ bool GPU::HandleRenderPolyLineCommand()
return true;
}
void GPU::FinishPolyLineRenderCommand()
{
// Multiply by two because we don't use line strips.
const u32 num_vertices = GetPolyLineVertexCount();
if (!IsDrawingAreaIsValid())
return;
GPUBackendDrawLineCommand* cmd = g_gpu_backend->NewDrawLineCommand(num_vertices);
FillDrawCommand(cmd, m_render_command);
u32 buffer_pos = 0;
const GPUVertexPosition start_vp{m_blit_buffer[buffer_pos++]};
cmd->vertices[0].x = start_vp.x + m_drawing_offset.x;
cmd->vertices[0].y = start_vp.y + m_drawing_offset.y;
cmd->vertices[0].color = m_render_command.color_for_first_vertex;
cmd->bounds.SetInvalid();
const bool shaded = m_render_command.shading_enable;
for (u32 i = 1; i < num_vertices; i++)
{
cmd->vertices[i].color =
shaded ? (m_blit_buffer[buffer_pos++] & UINT32_C(0x00FFFFFF)) : m_render_command.color_for_first_vertex;
const GPUVertexPosition vp{m_blit_buffer[buffer_pos++]};
cmd->vertices[i].x = m_drawing_offset.x + vp.x;
cmd->vertices[i].y = m_drawing_offset.y + vp.y;
const auto [min_x, max_x] = MinMax(cmd->vertices[i - 1].x, cmd->vertices[i].y);
const auto [min_y, max_y] = MinMax(cmd->vertices[i - 1].x, cmd->vertices[i].y);
if ((max_x - min_x) >= MAX_PRIMITIVE_WIDTH || (max_y - min_y) >= MAX_PRIMITIVE_HEIGHT)
{
Log_DebugPrintf("Culling too-large line: %d,%d - %d,%d", cmd->vertices[i - 1].x, cmd->vertices[i - 1].y,
cmd->vertices[i].x, cmd->vertices[i].y);
}
else
{
const u32 clip_left = static_cast<u32>(std::clamp<s32>(min_x, m_drawing_area.left, m_drawing_area.left));
const u32 clip_right = static_cast<u32>(std::clamp<s32>(max_x, m_drawing_area.left, m_drawing_area.right)) + 1u;
const u32 clip_top = static_cast<u32>(std::clamp<s32>(min_y, m_drawing_area.top, m_drawing_area.bottom));
const u32 clip_bottom = static_cast<u32>(std::clamp<s32>(max_y, m_drawing_area.top, m_drawing_area.bottom)) + 1u;
cmd->bounds.Include(Truncate16(clip_left), Truncate16(clip_right), Truncate16(clip_top), Truncate16(clip_bottom));
AddDrawLineTicks(clip_right - clip_left, clip_bottom - clip_top, m_render_command.shading_enable);
}
}
}
bool GPU::HandleFillRectangleCommand()
{
CHECK_COMMAND_SIZE(3);
@ -453,19 +804,22 @@ bool GPU::HandleFillRectangleCommand()
if (IsInterlacedRenderingEnabled() && IsCRTCScanlinePending())
SynchronizeCRTC();
FlushRender();
GPUBackendFillVRAMCommand* cmd = g_gpu_backend->NewFillVRAMCommand();
FillBackendCommandParameters(cmd);
const u32 color = FifoPop() & 0x00FFFFFF;
const u32 dst_x = FifoPeek() & 0x3F0;
const u32 dst_y = (FifoPop() >> 16) & VRAM_COORD_MASK;
const u32 width = ((FifoPeek() & VRAM_WIDTH_MASK) + 0xF) & ~0xF;
const u32 height = (FifoPop() >> 16) & VRAM_HEIGHT_MASK;
cmd->color = FifoPop() & 0x00FFFFFF;
cmd->x = Truncate16(FifoPeek() & 0x3F0);
cmd->y = Truncate16((FifoPop() >> 16) & VRAM_COORD_MASK);
cmd->width = Truncate16(((FifoPeek() & VRAM_WIDTH_MASK) + 0xF) & ~0xF);
cmd->height = Truncate16((FifoPop() >> 16) & VRAM_HEIGHT_MASK);
Log_DebugPrintf("Fill VRAM rectangle offset=(%u,%u), size=(%u,%u)", dst_x, dst_y, width, height);
Log_DebugPrintf("Fill VRAM rectangle offset=(%u,%u), size=(%u,%u)", cmd->x, cmd->y, cmd->width, cmd->height);
FillVRAM(dst_x, dst_y, width, height, color);
AddCommandTicks(46 + ((cmd->width / 8) + 9) * cmd->height);
g_gpu_backend->PushCommand(cmd);
m_stats.num_vram_fills++;
AddCommandTicks(46 + ((width / 8) + 9) * height);
EndCommand();
return true;
}
@ -509,9 +863,17 @@ void GPU::FinishVRAMWrite()
if (IsInterlacedRenderingEnabled() && IsCRTCScanlinePending())
SynchronizeCRTC();
FlushRender();
// TODO: skip this copy
const u32 num_words = static_cast<u32>(m_blit_buffer.size()) * 2u;
GPUBackendUpdateVRAMCommand* cmd = g_gpu_backend->NewUpdateVRAMCommand(num_words);
FillBackendCommandParameters(cmd);
cmd->x = m_vram_transfer.x;
cmd->y = m_vram_transfer.y;
cmd->width = m_vram_transfer.width;
cmd->height = m_vram_transfer.height;
std::memcpy(cmd->data, m_blit_buffer.data(), sizeof(u16) * num_words);
g_gpu_backend->PushCommand(cmd);
UpdateVRAM(m_vram_transfer.x, m_vram_transfer.y, m_vram_transfer.width, m_vram_transfer.height, m_blit_buffer.data());
m_blit_buffer.clear();
m_vram_transfer = {};
m_blitter_state = BlitterState::Idle;
@ -532,17 +894,20 @@ bool GPU::HandleCopyRectangleVRAMToCPUCommand()
m_vram_transfer.width, m_vram_transfer.height);
DebugAssert(m_vram_transfer.col == 0 && m_vram_transfer.row == 0);
// all rendering should be done first...
FlushRender();
// ensure VRAM shadow is up to date
ReadVRAM(m_vram_transfer.x, m_vram_transfer.y, m_vram_transfer.width, m_vram_transfer.height);
GPUBackendReadVRAMCommand* cmd = g_gpu_backend->NewReadVRAMCommand();
cmd->x = m_vram_transfer.x;
cmd->y = m_vram_transfer.y;
cmd->width = m_vram_transfer.width;
cmd->height = m_vram_transfer.height;
g_gpu_backend->PushCommand(cmd);
g_gpu_backend->Sync();
if (g_settings.debugging.dump_vram_to_cpu_copies)
{
DumpVRAMToFile(StringUtil::StdStringFromFormat("vram_to_cpu_copy_%u.png", s_vram_to_cpu_dump_id++).c_str(),
m_vram_transfer.width, m_vram_transfer.height, sizeof(u16) * VRAM_WIDTH,
&m_vram_ptr[m_vram_transfer.y * VRAM_WIDTH + m_vram_transfer.x], true);
g_gpu_backend->GetVRAM() + (m_vram_transfer.y * VRAM_WIDTH + m_vram_transfer.x), true);
}
// switch to pixel-by-pixel read state
@ -557,20 +922,22 @@ bool GPU::HandleCopyRectangleVRAMToVRAMCommand()
CHECK_COMMAND_SIZE(4);
m_fifo.RemoveOne();
const u32 src_x = FifoPeek() & VRAM_COORD_MASK;
const u32 src_y = (FifoPop() >> 16) & VRAM_COORD_MASK;
const u32 dst_x = FifoPeek() & VRAM_COORD_MASK;
const u32 dst_y = (FifoPop() >> 16) & VRAM_COORD_MASK;
const u32 width = ReplaceZero(FifoPeek() & VRAM_WIDTH_MASK, 0x400);
const u32 height = ReplaceZero((FifoPop() >> 16) & VRAM_HEIGHT_MASK, 0x200);
GPUBackendCopyVRAMCommand* cmd = g_gpu_backend->NewCopyVRAMCommand();
cmd->src_x = Truncate16(FifoPeek() & VRAM_COORD_MASK);
cmd->src_y = Truncate16((FifoPop() >> 16) & VRAM_COORD_MASK);
cmd->dst_x = Truncate16(FifoPeek() & VRAM_COORD_MASK);
cmd->dst_y = Truncate16((FifoPop() >> 16) & VRAM_COORD_MASK);
cmd->width = Truncate16(ReplaceZero(FifoPeek() & VRAM_WIDTH_MASK, 0x400));
cmd->height = Truncate16(ReplaceZero((FifoPop() >> 16) & VRAM_HEIGHT_MASK, 0x200));
Log_DebugPrintf("Copy rectangle from VRAM to VRAM src=(%u,%u), dst=(%u,%u), size=(%u,%u)", src_x, src_y, dst_x, dst_y,
width, height);
Log_DebugPrintf("Copy rectangle from VRAM to VRAM src=(%u,%u), dst=(%u,%u), size=(%u,%u)", cmd->src_x, cmd->src_y,
cmd->dst_x, cmd->dst_y, cmd->width, cmd->height);
AddCommandTicks(ZeroExtend32(cmd->width) * ZeroExtend32(cmd->height) * 2);
g_gpu_backend->PushCommand(cmd);
FlushRender();
CopyVRAM(src_x, src_y, dst_x, dst_y, width, height);
m_stats.num_vram_copies++;
AddCommandTicks(width * height * 2);
EndCommand();
return true;
}

View File

@ -14,25 +14,13 @@
#endif
Log_SetChannel(GPU_HW);
template<typename T>
ALWAYS_INLINE static constexpr std::tuple<T, T> MinMax(T v1, T v2)
{
if (v1 > v2)
return std::tie(v2, v1);
else
return std::tie(v1, v2);
}
ALWAYS_INLINE static bool ShouldUseUVLimits()
{
// We only need UV limits if PGXP is enabled, or texture filtering is enabled.
return g_settings.gpu_pgxp_enable || g_settings.gpu_texture_filter != GPUTextureFilter::Nearest;
}
GPU_HW::GPU_HW() : GPU()
{
m_vram_ptr = m_vram_shadow.data();
}
GPU_HW::GPU_HW() : GPUBackend() {}
GPU_HW::~GPU_HW() = default;
@ -41,13 +29,14 @@ bool GPU_HW::IsHardwareRenderer() const
return true;
}
bool GPU_HW::Initialize(HostDisplay* host_display)
bool GPU_HW::Initialize()
{
if (!GPU::Initialize(host_display))
if (!GPUBackend::Initialize())
return false;
m_vram_ptr = m_vram_shadow.data();
m_resolution_scale = CalculateResolutionScale();
m_render_api = host_display->GetRenderAPI();
m_render_api = g_host_interface->GetDisplay()->GetRenderAPI();
m_true_color = g_settings.gpu_true_color;
m_scaled_dithering = g_settings.gpu_scaled_dithering;
m_texture_filtering = g_settings.gpu_texture_filter;
@ -58,7 +47,7 @@ bool GPU_HW::Initialize(HostDisplay* host_display)
void GPU_HW::Reset()
{
GPU::Reset();
GPUBackend::Reset();
m_batch_current_vertex_ptr = m_batch_start_vertex_ptr;
@ -72,22 +61,6 @@ void GPU_HW::Reset()
SetFullVRAMDirtyRectangle();
}
bool GPU_HW::DoState(StateWrapper& sw)
{
if (!GPU::DoState(sw))
return false;
// invalidate the whole VRAM read texture when loading state
if (sw.IsReading())
{
m_batch_current_vertex_ptr = m_batch_start_vertex_ptr;
SetFullVRAMDirtyRectangle();
ResetBatchVertexDepth();
}
return true;
}
void GPU_HW::UpdateHWSettings(bool* framebuffer_changed, bool* shaders_changed)
{
const u32 resolution_scale = CalculateResolutionScale();
@ -100,10 +73,12 @@ void GPU_HW::UpdateHWSettings(bool* framebuffer_changed, bool* shaders_changed)
if (m_resolution_scale != resolution_scale)
{
#if FIXME
g_host_interface->AddFormattedOSDMessage(10.0f, "Resolution scale set to %ux (display %ux%u, VRAM %ux%u)",
resolution_scale, m_crtc_state.display_vram_width * resolution_scale,
resolution_scale * m_crtc_state.display_vram_height,
VRAM_WIDTH * resolution_scale, VRAM_HEIGHT * resolution_scale);
#endif
}
m_resolution_scale = resolution_scale;
@ -119,6 +94,7 @@ u32 GPU_HW::CalculateResolutionScale() const
if (g_settings.gpu_resolution_scale != 0)
return std::clamp<u32>(g_settings.gpu_resolution_scale, 1, m_max_resolution_scale);
#if FIXME
// auto scaling
const s32 height = (m_crtc_state.display_height != 0) ? static_cast<s32>(m_crtc_state.display_height) : 480;
const s32 preferred_scale =
@ -126,11 +102,14 @@ u32 GPU_HW::CalculateResolutionScale() const
Log_InfoPrintf("Height = %d, preferred scale = %d", height, preferred_scale);
return static_cast<u32>(std::clamp<s32>(preferred_scale, 1, m_max_resolution_scale));
#else
return 1;
#endif
}
void GPU_HW::UpdateResolutionScale()
{
GPU::UpdateResolutionScale();
GPUBackend::UpdateResolutionScale();
if (CalculateResolutionScale() != m_resolution_scale)
UpdateSettings();
@ -138,8 +117,7 @@ void GPU_HW::UpdateResolutionScale()
std::tuple<u32, u32> GPU_HW::GetEffectiveDisplayResolution()
{
return std::make_tuple(m_crtc_state.display_vram_width * m_resolution_scale,
m_resolution_scale * m_crtc_state.display_vram_height);
return std::make_tuple(m_display_vram_width * m_resolution_scale, m_resolution_scale * m_display_vram_height);
}
void GPU_HW::PrintSettingsToLog()
@ -358,334 +336,119 @@ void GPU_HW::DrawLine(float x0, float y0, u32 col0, float x1, float y1, u32 col1
AddVertex(output[1]);
}
void GPU_HW::LoadVertices()
void GPU_HW::DrawPolygon(const GPUBackendDrawPolygonCommand* cmd)
{
if (m_GPUSTAT.check_mask_before_draw)
SetupDraw(cmd);
if (cmd->params.check_mask_before_draw)
m_current_depth++;
const RenderCommand rc{m_render_command.bits};
const u32 texpage = ZeroExtend32(m_draw_mode.mode_reg.bits) | (ZeroExtend32(m_draw_mode.palette_reg) << 16);
const GPURenderCommand rc{cmd->rc.bits};
const u32 texpage = ZeroExtend32(cmd->draw_mode.bits) | (ZeroExtend32(cmd->palette.bits) << 16);
const float depth = GetCurrentNormalizedVertexDepth();
switch (rc.primitive)
DebugAssert(GetBatchVertexSpace() >= (rc.quad_polygon ? 6u : 3u));
const u32 first_color = rc.color_for_first_vertex;
const bool shaded = rc.shading_enable;
const bool textured = rc.texture_enable;
const bool pgxp = g_settings.gpu_pgxp_enable;
std::array<BatchVertex, 4> vertices;
for (u32 i = 0; i < cmd->num_vertices; i++)
{
case Primitive::Polygon:
{
DebugAssert(GetBatchVertexSpace() >= (rc.quad_polygon ? 6u : 3u));
const u32 first_color = rc.color_for_first_vertex;
const bool shaded = rc.shading_enable;
const bool textured = rc.texture_enable;
const bool pgxp = g_settings.gpu_pgxp_enable;
const u32 num_vertices = rc.quad_polygon ? 4 : 3;
std::array<BatchVertex, 4> vertices;
std::array<std::array<s32, 2>, 4> native_vertex_positions;
bool valid_w = g_settings.gpu_pgxp_texture_correction;
for (u32 i = 0; i < num_vertices; i++)
{
const u32 color = (shaded && i > 0) ? (FifoPop() & UINT32_C(0x00FFFFFF)) : first_color;
const u64 maddr_and_pos = m_fifo.Pop();
const VertexPosition vp{Truncate32(maddr_and_pos)};
const u16 texcoord = textured ? Truncate16(FifoPop()) : 0;
const s32 native_x = m_drawing_offset.x + vp.x;
const s32 native_y = m_drawing_offset.y + vp.y;
native_vertex_positions[i][0] = native_x;
native_vertex_positions[i][1] = native_y;
vertices[i].Set(static_cast<float>(native_x), static_cast<float>(native_y), depth, 1.0f, color, texpage,
texcoord, 0xFFFF0000u);
if (pgxp)
{
valid_w &=
PGXP::GetPreciseVertex(Truncate32(maddr_and_pos >> 32), vp.bits, native_x, native_y, m_drawing_offset.x,
m_drawing_offset.y, &vertices[i].x, &vertices[i].y, &vertices[i].w);
}
}
if (!valid_w)
{
for (BatchVertex& v : vertices)
v.w = 1.0f;
}
if (rc.quad_polygon && m_resolution_scale > 1)
HandleFlippedQuadTextureCoordinates(vertices.data());
if (m_using_uv_limits && textured)
ComputePolygonUVLimits(vertices.data(), num_vertices);
if (!IsDrawingAreaIsValid())
return;
// Cull polygons which are too large.
const auto [min_x_12, max_x_12] = MinMax(native_vertex_positions[1][0], native_vertex_positions[2][0]);
const auto [min_y_12, max_y_12] = MinMax(native_vertex_positions[1][1], native_vertex_positions[2][1]);
const s32 min_x = std::min(min_x_12, native_vertex_positions[0][0]);
const s32 max_x = std::max(max_x_12, native_vertex_positions[0][0]);
const s32 min_y = std::min(min_y_12, native_vertex_positions[0][1]);
const s32 max_y = std::max(max_y_12, native_vertex_positions[0][1]);
if ((max_x - min_x) >= MAX_PRIMITIVE_WIDTH || (max_y - min_y) >= MAX_PRIMITIVE_HEIGHT)
{
Log_DebugPrintf("Culling too-large polygon: %d,%d %d,%d %d,%d", native_vertex_positions[0][0],
native_vertex_positions[0][1], native_vertex_positions[1][0], native_vertex_positions[1][1],
native_vertex_positions[2][0], native_vertex_positions[2][1]);
}
else
{
const u32 clip_left = static_cast<u32>(std::clamp<s32>(min_x, m_drawing_area.left, m_drawing_area.right));
const u32 clip_right = static_cast<u32>(std::clamp<s32>(max_x, m_drawing_area.left, m_drawing_area.right)) + 1u;
const u32 clip_top = static_cast<u32>(std::clamp<s32>(min_y, m_drawing_area.top, m_drawing_area.bottom));
const u32 clip_bottom =
static_cast<u32>(std::clamp<s32>(max_y, m_drawing_area.top, m_drawing_area.bottom)) + 1u;
m_vram_dirty_rect.Include(clip_left, clip_right, clip_top, clip_bottom);
AddDrawTriangleTicks(clip_right - clip_left, clip_bottom - clip_top, rc.shading_enable, rc.texture_enable,
rc.transparency_enable);
std::memcpy(m_batch_current_vertex_ptr, vertices.data(), sizeof(BatchVertex) * 3);
m_batch_current_vertex_ptr += 3;
}
// quads
if (rc.quad_polygon)
{
const s32 min_x_123 = std::min(min_x_12, native_vertex_positions[3][0]);
const s32 max_x_123 = std::max(max_x_12, native_vertex_positions[3][0]);
const s32 min_y_123 = std::min(min_y_12, native_vertex_positions[3][1]);
const s32 max_y_123 = std::max(max_y_12, native_vertex_positions[3][1]);
// Cull polygons which are too large.
if ((max_x_123 - min_x_123) >= MAX_PRIMITIVE_WIDTH || (max_y_123 - min_y_123) >= MAX_PRIMITIVE_HEIGHT)
{
Log_DebugPrintf("Culling too-large polygon (quad second half): %d,%d %d,%d %d,%d",
native_vertex_positions[2][0], native_vertex_positions[2][1], native_vertex_positions[1][0],
native_vertex_positions[1][1], native_vertex_positions[0][0], native_vertex_positions[0][1]);
}
else
{
const u32 clip_left = static_cast<u32>(std::clamp<s32>(min_x_123, m_drawing_area.left, m_drawing_area.right));
const u32 clip_right =
static_cast<u32>(std::clamp<s32>(max_x_123, m_drawing_area.left, m_drawing_area.right)) + 1u;
const u32 clip_top = static_cast<u32>(std::clamp<s32>(min_y_123, m_drawing_area.top, m_drawing_area.bottom));
const u32 clip_bottom =
static_cast<u32>(std::clamp<s32>(max_y_123, m_drawing_area.top, m_drawing_area.bottom)) + 1u;
m_vram_dirty_rect.Include(clip_left, clip_right, clip_top, clip_bottom);
AddDrawTriangleTicks(clip_right - clip_left, clip_bottom - clip_top, rc.shading_enable, rc.texture_enable,
rc.transparency_enable);
AddVertex(vertices[2]);
AddVertex(vertices[1]);
AddVertex(vertices[3]);
}
}
}
break;
case Primitive::Rectangle:
{
const u32 color = rc.color_for_first_vertex;
const VertexPosition vp{FifoPop()};
const s32 pos_x = TruncateVertexPosition(m_drawing_offset.x + vp.x);
const s32 pos_y = TruncateVertexPosition(m_drawing_offset.y + vp.y);
const auto [texcoord_x, texcoord_y] = UnpackTexcoord(rc.texture_enable ? Truncate16(FifoPop()) : 0);
u16 orig_tex_left = ZeroExtend16(texcoord_x);
u16 orig_tex_top = ZeroExtend16(texcoord_y);
s32 rectangle_width;
s32 rectangle_height;
switch (rc.rectangle_size)
{
case DrawRectangleSize::R1x1:
rectangle_width = 1;
rectangle_height = 1;
break;
case DrawRectangleSize::R8x8:
rectangle_width = 8;
rectangle_height = 8;
break;
case DrawRectangleSize::R16x16:
rectangle_width = 16;
rectangle_height = 16;
break;
default:
{
const u32 width_and_height = FifoPop();
rectangle_width = static_cast<s32>(width_and_height & VRAM_WIDTH_MASK);
rectangle_height = static_cast<s32>((width_and_height >> 16) & VRAM_HEIGHT_MASK);
if (rectangle_width >= MAX_PRIMITIVE_WIDTH || rectangle_height >= MAX_PRIMITIVE_HEIGHT)
{
Log_DebugPrintf("Culling too-large rectangle: %d,%d %dx%d", pos_x, pos_y, rectangle_width,
rectangle_height);
return;
}
}
break;
}
// we can split the rectangle up into potentially 8 quads
DebugAssert(GetBatchVertexSpace() >= MAX_VERTICES_FOR_RECTANGLE);
if (!IsDrawingAreaIsValid())
return;
// Split the rectangle into multiple quads if it's greater than 256x256, as the texture page should repeat.
u16 tex_top = orig_tex_top;
for (s32 y_offset = 0; y_offset < rectangle_height;)
{
const s32 quad_height = std::min<s32>(rectangle_height - y_offset, TEXTURE_PAGE_WIDTH - tex_top);
const float quad_start_y = static_cast<float>(pos_y + y_offset);
const float quad_end_y = quad_start_y + static_cast<float>(quad_height);
const u16 tex_bottom = tex_top + static_cast<u16>(quad_height);
u16 tex_left = orig_tex_left;
for (s32 x_offset = 0; x_offset < rectangle_width;)
{
const s32 quad_width = std::min<s32>(rectangle_width - x_offset, TEXTURE_PAGE_HEIGHT - tex_left);
const float quad_start_x = static_cast<float>(pos_x + x_offset);
const float quad_end_x = quad_start_x + static_cast<float>(quad_width);
const u16 tex_right = tex_left + static_cast<u16>(quad_width);
const u32 uv_limits = BatchVertex::PackUVLimits(tex_left, tex_right - 1, tex_top, tex_bottom - 1);
AddNewVertex(quad_start_x, quad_start_y, depth, 1.0f, color, texpage, tex_left, tex_top, uv_limits);
AddNewVertex(quad_end_x, quad_start_y, depth, 1.0f, color, texpage, tex_right, tex_top, uv_limits);
AddNewVertex(quad_start_x, quad_end_y, depth, 1.0f, color, texpage, tex_left, tex_bottom, uv_limits);
AddNewVertex(quad_start_x, quad_end_y, depth, 1.0f, color, texpage, tex_left, tex_bottom, uv_limits);
AddNewVertex(quad_end_x, quad_start_y, depth, 1.0f, color, texpage, tex_right, tex_top, uv_limits);
AddNewVertex(quad_end_x, quad_end_y, depth, 1.0f, color, texpage, tex_right, tex_bottom, uv_limits);
x_offset += quad_width;
tex_left = 0;
}
y_offset += quad_height;
tex_top = 0;
}
const u32 clip_left = static_cast<u32>(std::clamp<s32>(pos_x, m_drawing_area.left, m_drawing_area.right));
const u32 clip_right =
static_cast<u32>(std::clamp<s32>(pos_x + rectangle_width, m_drawing_area.left, m_drawing_area.right)) + 1u;
const u32 clip_top = static_cast<u32>(std::clamp<s32>(pos_y, m_drawing_area.top, m_drawing_area.bottom));
const u32 clip_bottom =
static_cast<u32>(std::clamp<s32>(pos_y + rectangle_height, m_drawing_area.top, m_drawing_area.bottom)) + 1u;
m_vram_dirty_rect.Include(clip_left, clip_right, clip_top, clip_bottom);
AddDrawRectangleTicks(clip_right - clip_left, clip_bottom - clip_top, rc.texture_enable, rc.transparency_enable);
}
break;
case Primitive::Line:
{
if (!rc.polyline)
{
DebugAssert(GetBatchVertexSpace() >= 2);
u32 start_color, end_color;
VertexPosition start_pos, end_pos;
if (rc.shading_enable)
{
start_color = rc.color_for_first_vertex;
start_pos.bits = FifoPop();
end_color = FifoPop() & UINT32_C(0x00FFFFFF);
end_pos.bits = FifoPop();
}
else
{
start_color = end_color = rc.color_for_first_vertex;
start_pos.bits = FifoPop();
end_pos.bits = FifoPop();
}
if (!IsDrawingAreaIsValid())
return;
s32 start_x = start_pos.x + m_drawing_offset.x;
s32 start_y = start_pos.y + m_drawing_offset.y;
s32 end_x = end_pos.x + m_drawing_offset.x;
s32 end_y = end_pos.y + m_drawing_offset.y;
const auto [min_x, max_x] = MinMax(start_x, end_x);
const auto [min_y, max_y] = MinMax(start_y, end_y);
if ((max_x - min_x) >= MAX_PRIMITIVE_WIDTH || (max_y - min_y) >= MAX_PRIMITIVE_HEIGHT)
{
Log_DebugPrintf("Culling too-large line: %d,%d - %d,%d", start_x, start_y, end_x, end_y);
return;
}
const u32 clip_left = static_cast<u32>(std::clamp<s32>(min_x, m_drawing_area.left, m_drawing_area.left));
const u32 clip_right = static_cast<u32>(std::clamp<s32>(max_x, m_drawing_area.left, m_drawing_area.right)) + 1u;
const u32 clip_top = static_cast<u32>(std::clamp<s32>(min_y, m_drawing_area.top, m_drawing_area.bottom));
const u32 clip_bottom =
static_cast<u32>(std::clamp<s32>(max_y, m_drawing_area.top, m_drawing_area.bottom)) + 1u;
m_vram_dirty_rect.Include(clip_left, clip_right, clip_top, clip_bottom);
AddDrawLineTicks(clip_right - clip_left, clip_bottom - clip_top, rc.shading_enable);
// TODO: Should we do a PGXP lookup here? Most lines are 2D.
DrawLine(static_cast<float>(start_x), static_cast<float>(start_y), start_color, static_cast<float>(end_x),
static_cast<float>(end_y), end_color, depth);
}
else
{
// Multiply by two because we don't use line strips.
const u32 num_vertices = GetPolyLineVertexCount();
DebugAssert(GetBatchVertexSpace() >= (num_vertices * 2));
if (!IsDrawingAreaIsValid())
return;
const bool shaded = rc.shading_enable;
u32 buffer_pos = 0;
const VertexPosition start_vp{m_blit_buffer[buffer_pos++]};
s32 start_x = start_vp.x + m_drawing_offset.x;
s32 start_y = start_vp.y + m_drawing_offset.y;
u32 start_color = rc.color_for_first_vertex;
for (u32 i = 1; i < num_vertices; i++)
{
const u32 end_color = shaded ? (m_blit_buffer[buffer_pos++] & UINT32_C(0x00FFFFFF)) : start_color;
const VertexPosition vp{m_blit_buffer[buffer_pos++]};
const s32 end_x = m_drawing_offset.x + vp.x;
const s32 end_y = m_drawing_offset.y + vp.y;
const auto [min_x, max_x] = MinMax(start_x, end_x);
const auto [min_y, max_y] = MinMax(start_y, end_y);
if ((max_x - min_x) >= MAX_PRIMITIVE_WIDTH || (max_y - min_y) >= MAX_PRIMITIVE_HEIGHT)
{
Log_DebugPrintf("Culling too-large line: %d,%d - %d,%d", start_x, start_y, end_x, end_y);
}
else
{
const u32 clip_left = static_cast<u32>(std::clamp<s32>(min_x, m_drawing_area.left, m_drawing_area.left));
const u32 clip_right =
static_cast<u32>(std::clamp<s32>(max_x, m_drawing_area.left, m_drawing_area.right)) + 1u;
const u32 clip_top = static_cast<u32>(std::clamp<s32>(min_y, m_drawing_area.top, m_drawing_area.bottom));
const u32 clip_bottom =
static_cast<u32>(std::clamp<s32>(max_y, m_drawing_area.top, m_drawing_area.bottom)) + 1u;
m_vram_dirty_rect.Include(clip_left, clip_right, clip_top, clip_bottom);
AddDrawLineTicks(clip_right - clip_left, clip_bottom - clip_top, rc.shading_enable);
// TODO: Should we do a PGXP lookup here? Most lines are 2D.
DrawLine(static_cast<float>(start_x), static_cast<float>(start_y), start_color, static_cast<float>(end_x),
static_cast<float>(end_y), end_color, depth);
}
start_x = end_x;
start_y = end_y;
start_color = end_color;
}
}
}
break;
default:
UnreachableCode();
break;
const GPUBackendDrawPolygonCommand::Vertex& v = cmd->vertices[i];
vertices[i].Set(v.precise_x, v.precise_y, depth, v.precise_w, v.color, texpage, v.texcoord, 0xFFFF0000u);
}
if (rc.quad_polygon && m_resolution_scale > 1)
HandleFlippedQuadTextureCoordinates(vertices.data());
if (m_using_uv_limits && textured)
ComputePolygonUVLimits(vertices.data(), cmd->num_vertices);
std::memcpy(m_batch_current_vertex_ptr, vertices.data(), sizeof(BatchVertex) * 3);
m_batch_current_vertex_ptr += 3;
// quads
if (rc.quad_polygon)
{
AddVertex(vertices[2]);
AddVertex(vertices[1]);
AddVertex(vertices[3]);
}
IncludeVRAMDityRectangle(cmd->bounds);
}
void GPU_HW::DrawRectangle(const GPUBackendDrawRectangleCommand* cmd)
{
SetupDraw(cmd);
if (cmd->params.check_mask_before_draw)
m_current_depth++;
const GPURenderCommand rc{cmd->rc.bits};
const u32 color = cmd->color;
const u32 texpage = ZeroExtend32(cmd->draw_mode.bits) | (ZeroExtend32(cmd->palette.bits) << 16);
const float depth = GetCurrentNormalizedVertexDepth();
u16 orig_tex_left = cmd->texcoord & 0xFFu;
u16 orig_tex_top = cmd->texcoord >> 8;
// Split the rectangle into multiple quads if it's greater than 256x256, as the texture page should repeat.
u16 tex_top = orig_tex_top;
for (u16 y_offset = 0; y_offset < cmd->height;)
{
const u16 quad_height = std::min<u16>(cmd->height - y_offset, TEXTURE_PAGE_WIDTH - tex_top);
const float quad_start_y = static_cast<float>(cmd->y + y_offset);
const float quad_end_y = quad_start_y + static_cast<float>(quad_height);
const u16 tex_bottom = tex_top + static_cast<u16>(quad_height);
u16 tex_left = orig_tex_left;
for (u16 x_offset = 0; x_offset < cmd->width;)
{
const u16 quad_width = std::min<u16>(cmd->width - x_offset, TEXTURE_PAGE_HEIGHT - tex_left);
const float quad_start_x = static_cast<float>(cmd->x + x_offset);
const float quad_end_x = quad_start_x + static_cast<float>(quad_width);
const u16 tex_right = tex_left + static_cast<u16>(quad_width);
const u32 uv_limits = BatchVertex::PackUVLimits(tex_left, tex_right - 1, tex_top, tex_bottom - 1);
AddNewVertex(quad_start_x, quad_start_y, depth, 1.0f, color, texpage, tex_left, tex_top, uv_limits);
AddNewVertex(quad_end_x, quad_start_y, depth, 1.0f, color, texpage, tex_right, tex_top, uv_limits);
AddNewVertex(quad_start_x, quad_end_y, depth, 1.0f, color, texpage, tex_left, tex_bottom, uv_limits);
AddNewVertex(quad_start_x, quad_end_y, depth, 1.0f, color, texpage, tex_left, tex_bottom, uv_limits);
AddNewVertex(quad_end_x, quad_start_y, depth, 1.0f, color, texpage, tex_right, tex_top, uv_limits);
AddNewVertex(quad_end_x, quad_end_y, depth, 1.0f, color, texpage, tex_right, tex_bottom, uv_limits);
x_offset += quad_width;
tex_left = 0;
}
y_offset += quad_height;
tex_top = 0;
}
IncludeVRAMDityRectangle(cmd->bounds);
}
void GPU_HW::DrawLine(const GPUBackendDrawLineCommand* cmd)
{
SetupDraw(cmd);
if (cmd->params.check_mask_before_draw)
m_current_depth++;
const GPURenderCommand rc{cmd->rc.bits};
const float depth = GetCurrentNormalizedVertexDepth();
for (u32 i = 1; i < cmd->num_vertices; i++)
{
const GPUBackendDrawLineCommand::Vertex& start = cmd->vertices[i - 1u];
const GPUBackendDrawLineCommand::Vertex& end = cmd->vertices[i];
DrawLine(static_cast<float>(start.x), static_cast<float>(start.y), start.color, static_cast<float>(end.x),
static_cast<float>(end.y), end.color, depth);
}
IncludeVRAMDityRectangle(cmd->bounds);
}
void GPU_HW::CalcScissorRect(int* left, int* top, int* right, int* bottom)
@ -696,7 +459,8 @@ void GPU_HW::CalcScissorRect(int* left, int* top, int* right, int* bottom)
*bottom = std::max<u32>((m_drawing_area.bottom + 1) * m_resolution_scale, *top + 1);
}
GPU_HW::VRAMFillUBOData GPU_HW::GetVRAMFillUBOData(u32 x, u32 y, u32 width, u32 height, u32 color) const
GPU_HW::VRAMFillUBOData GPU_HW::GetVRAMFillUBOData(u32 x, u32 y, u32 width, u32 height, u32 color,
GPUBackendCommandParameters params) const
{
// drop precision unless true colour is enabled
if (!m_true_color)
@ -705,7 +469,7 @@ GPU_HW::VRAMFillUBOData GPU_HW::GetVRAMFillUBOData(u32 x, u32 y, u32 width, u32
VRAMFillUBOData uniforms;
std::tie(uniforms.u_fill_color[0], uniforms.u_fill_color[1], uniforms.u_fill_color[2], uniforms.u_fill_color[3]) =
RGBA8ToFloat(color);
uniforms.u_interlaced_displayed_field = GetActiveLineLSB();
uniforms.u_interlaced_displayed_field = params.active_line_lsb;
return uniforms;
}
@ -725,7 +489,8 @@ Common::Rectangle<u32> GPU_HW::GetVRAMTransferBounds(u32 x, u32 y, u32 width, u3
return out_rc;
}
GPU_HW::VRAMWriteUBOData GPU_HW::GetVRAMWriteUBOData(u32 x, u32 y, u32 width, u32 height, u32 buffer_offset) const
GPU_HW::VRAMWriteUBOData GPU_HW::GetVRAMWriteUBOData(u32 x, u32 y, u32 width, u32 height, u32 buffer_offset,
GPUBackendCommandParameters params) const
{
const VRAMWriteUBOData uniforms = {(x % VRAM_WIDTH),
(y % VRAM_HEIGHT),
@ -734,23 +499,24 @@ GPU_HW::VRAMWriteUBOData GPU_HW::GetVRAMWriteUBOData(u32 x, u32 y, u32 width, u3
width,
height,
buffer_offset,
m_GPUSTAT.set_mask_while_drawing ? 0x8000u : 0x00,
params.set_mask_while_drawing ? 0x8000u : 0x00,
GetCurrentNormalizedVertexDepth()};
return uniforms;
}
bool GPU_HW::UseVRAMCopyShader(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) const
bool GPU_HW::UseVRAMCopyShader(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height,
GPUBackendCommandParameters params) const
{
// masking enabled, oversized, or overlapping
return (m_GPUSTAT.IsMaskingEnabled() || ((src_x % VRAM_WIDTH) + width) > VRAM_WIDTH ||
return (params.IsMaskingEnabled() || ((src_x % VRAM_WIDTH) + width) > VRAM_WIDTH ||
((src_y % VRAM_HEIGHT) + height) > VRAM_HEIGHT || ((dst_x % VRAM_WIDTH) + width) > VRAM_WIDTH ||
((dst_y % VRAM_HEIGHT) + height) > VRAM_HEIGHT ||
Common::Rectangle<u32>::FromExtents(src_x, src_y, width, height)
.Intersects(Common::Rectangle<u32>::FromExtents(dst_x, dst_y, width, height)));
}
GPU_HW::VRAMCopyUBOData GPU_HW::GetVRAMCopyUBOData(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width,
u32 height) const
GPU_HW::VRAMCopyUBOData GPU_HW::GetVRAMCopyUBOData(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height,
GPUBackendCommandParameters params) const
{
const VRAMCopyUBOData uniforms = {(src_x % VRAM_WIDTH) * m_resolution_scale,
(src_y % VRAM_HEIGHT) * m_resolution_scale,
@ -760,7 +526,7 @@ GPU_HW::VRAMCopyUBOData GPU_HW::GetVRAMCopyUBOData(u32 src_x, u32 src_y, u32 dst
((dst_y + height) % VRAM_HEIGHT) * m_resolution_scale,
width * m_resolution_scale,
height * m_resolution_scale,
m_GPUSTAT.set_mask_while_drawing ? 1u : 0u,
params.set_mask_while_drawing ? 1u : 0u,
GetCurrentNormalizedVertexDepth()};
return uniforms;
@ -770,6 +536,7 @@ void GPU_HW::IncludeVRAMDityRectangle(const Common::Rectangle<u32>& rect)
{
m_vram_dirty_rect.Include(rect);
#if FIXME
// the vram area can include the texture page, but the game can leave it as-is. in this case, set it as dirty so the
// shadow texture is updated
if (!m_draw_mode.IsTexturePageChanged() &&
@ -778,6 +545,13 @@ void GPU_HW::IncludeVRAMDityRectangle(const Common::Rectangle<u32>& rect)
{
m_draw_mode.SetTexturePageChanged();
}
#endif
}
void GPU_HW::IncludeVRAMDityRectangle(const Common::Rectangle<u16>& rect)
{
IncludeVRAMDityRectangle(Common::Rectangle<u32>(ZeroExtend32(rect.left), ZeroExtend32(rect.top),
ZeroExtend32(rect.right), ZeroExtend32(rect.bottom)));
}
void GPU_HW::EnsureVertexBufferSpace(u32 required_vertices)
@ -793,20 +567,20 @@ void GPU_HW::EnsureVertexBufferSpace(u32 required_vertices)
MapBatchVertexPointer(required_vertices);
}
void GPU_HW::EnsureVertexBufferSpaceForCurrentCommand()
void GPU_HW::EnsureVertexBufferSpace(const GPUBackendDrawCommand* cmd)
{
u32 required_vertices;
switch (m_render_command.primitive)
switch (cmd->type)
{
case Primitive::Polygon:
required_vertices = m_render_command.quad_polygon ? 6 : 3;
case GPUBackendCommandType::DrawPolygon:
required_vertices = cmd->rc.quad_polygon ? 6 : 3;
break;
case Primitive::Rectangle:
case GPUBackendCommandType::DrawRectangle:
required_vertices = MAX_VERTICES_FOR_RECTANGLE;
break;
case Primitive::Line:
case GPUBackendCommandType::DrawLine:
default:
required_vertices = m_render_command.polyline ? (GetPolyLineVertexCount() * 6u) : 6u;
required_vertices = static_cast<const GPUBackendDrawLineCommand*>(cmd)->num_vertices * 3u;
break;
}
@ -836,50 +610,55 @@ void GPU_HW::ResetBatchVertexDepth()
m_current_depth = 1;
}
void GPU_HW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
void GPU_HW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params)
{
IncludeVRAMDityRectangle(
Common::Rectangle<u32>::FromExtents(x, y, width, height).Clamped(0, 0, VRAM_WIDTH, VRAM_HEIGHT));
}
void GPU_HW::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data)
void GPU_HW::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, GPUBackendCommandParameters params)
{
DebugAssert((x + width) <= VRAM_WIDTH && (y + height) <= VRAM_HEIGHT);
IncludeVRAMDityRectangle(Common::Rectangle<u32>::FromExtents(x, y, width, height));
if (m_GPUSTAT.check_mask_before_draw)
if (params.check_mask_before_draw)
{
// set new vertex counter since we want this to take into consideration previous masked pixels
m_current_depth++;
}
}
void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height)
void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height,
GPUBackendCommandParameters params)
{
IncludeVRAMDityRectangle(
Common::Rectangle<u32>::FromExtents(dst_x, dst_y, width, height).Clamped(0, 0, VRAM_WIDTH, VRAM_HEIGHT));
if (m_GPUSTAT.check_mask_before_draw)
if (params.check_mask_before_draw)
{
// set new vertex counter since we want this to take into consideration previous masked pixels
m_current_depth++;
}
}
void GPU_HW::DispatchRenderCommand()
void GPU_HW::SetupDraw(const GPUBackendDrawCommand* cmd)
{
const RenderCommand rc{m_render_command.bits};
const GPURenderCommand rc{cmd->rc.bits};
TextureMode texture_mode;
GPUTextureMode texture_mode;
if (rc.IsTexturingEnabled())
{
// texture page changed - check that the new page doesn't intersect the drawing area
if (m_draw_mode.IsTexturePageChanged())
if ((cmd->draw_mode.bits & GPUDrawModeReg::TEXTURE_PAGE_MASK) !=
(m_last_texture_page_bits.bits & GPUDrawModeReg::TEXTURE_PAGE_MASK) ||
true)
{
m_draw_mode.ClearTexturePageChangedFlag();
m_last_texture_page_bits.bits = cmd->draw_mode.bits;
if (m_vram_dirty_rect.Valid() &&
(m_draw_mode.GetTexturePageRectangle().Intersects(m_vram_dirty_rect) ||
(m_draw_mode.IsUsingPalette() && m_draw_mode.GetTexturePaletteRectangle().Intersects(m_vram_dirty_rect))))
(m_last_texture_page_bits.GetTexturePageRectangle().Intersects(m_vram_dirty_rect) ||
(m_last_texture_page_bits.IsUsingPalette() &&
m_last_texture_page_bits.GetTexturePaletteRectangle().Intersects(m_vram_dirty_rect))))
{
// Log_DevPrintf("Invalidating VRAM read cache due to drawing area overlap");
if (!IsFlushed())
@ -889,32 +668,32 @@ void GPU_HW::DispatchRenderCommand()
}
}
texture_mode = m_draw_mode.GetTextureMode();
texture_mode = cmd->draw_mode.texture_mode;
if (rc.raw_texture_enable)
{
texture_mode =
static_cast<TextureMode>(static_cast<u8>(texture_mode) | static_cast<u8>(TextureMode::RawTextureBit));
static_cast<GPUTextureMode>(static_cast<u8>(texture_mode) | static_cast<u8>(GPUTextureMode::RawTextureBit));
}
}
else
{
texture_mode = TextureMode::Disabled;
texture_mode = GPUTextureMode::Disabled;
}
// has any state changed which requires a new batch?
const TransparencyMode transparency_mode =
rc.transparency_enable ? m_draw_mode.GetTransparencyMode() : TransparencyMode::Disabled;
const bool dithering_enable = (!m_true_color && rc.IsDitheringEnabled()) ? m_GPUSTAT.dither_enable : false;
const GPUTransparencyMode transparency_mode =
rc.transparency_enable ? cmd->draw_mode.transparency_mode : GPUTransparencyMode::Disabled;
const bool dithering_enable = (!m_true_color && rc.IsDitheringEnabled()) ? cmd->draw_mode.dither_enable : false;
if (m_batch.texture_mode != texture_mode || m_batch.transparency_mode != transparency_mode ||
dithering_enable != m_batch.dithering)
{
FlushRender();
}
EnsureVertexBufferSpaceForCurrentCommand();
EnsureVertexBufferSpace(cmd);
// transparency mode change
if (m_batch.transparency_mode != transparency_mode && transparency_mode != TransparencyMode::Disabled)
if (m_batch.transparency_mode != transparency_mode && transparency_mode != GPUTransparencyMode::Disabled)
{
static constexpr float transparent_alpha[4][2] = {{0.5f, 0.5f}, {1.0f, 1.0f}, {1.0f, 1.0f}, {0.25f, 1.0f}};
m_batch_ubo_data.u_src_alpha_factor = transparent_alpha[static_cast<u32>(transparency_mode)][0];
@ -922,19 +701,19 @@ void GPU_HW::DispatchRenderCommand()
m_batch_ubo_dirty = true;
}
if (m_batch.check_mask_before_draw != m_GPUSTAT.check_mask_before_draw ||
m_batch.set_mask_while_drawing != m_GPUSTAT.set_mask_while_drawing)
if (m_batch.check_mask_before_draw != cmd->params.check_mask_before_draw ||
m_batch.set_mask_while_drawing != cmd->params.set_mask_while_drawing)
{
m_batch.check_mask_before_draw = m_GPUSTAT.check_mask_before_draw;
m_batch.set_mask_while_drawing = m_GPUSTAT.set_mask_while_drawing;
m_batch.check_mask_before_draw = cmd->params.check_mask_before_draw;
m_batch.set_mask_while_drawing = cmd->params.set_mask_while_drawing;
m_batch_ubo_data.u_set_mask_while_drawing = BoolToUInt32(m_batch.set_mask_while_drawing);
m_batch_ubo_dirty = true;
}
m_batch.interlacing = IsInterlacedRenderingEnabled();
m_batch.interlacing = cmd->params.interlaced_rendering;
if (m_batch.interlacing)
{
const u32 displayed_field = GetActiveLineLSB();
const u32 displayed_field = cmd->params.active_line_lsb;
m_batch_ubo_dirty |= (m_batch_ubo_data.u_interlaced_displayed_field != displayed_field);
m_batch_ubo_data.u_interlaced_displayed_field = displayed_field;
}
@ -944,18 +723,16 @@ void GPU_HW::DispatchRenderCommand()
m_batch.transparency_mode = transparency_mode;
m_batch.dithering = dithering_enable;
if (m_draw_mode.IsTextureWindowChanged())
if (m_last_texture_window_reg.bits != cmd->window.bits)
{
m_draw_mode.ClearTextureWindowChangedFlag();
m_last_texture_window_reg.bits = cmd->window.bits;
m_batch_ubo_data.u_texture_window_mask[0] = ZeroExtend32(m_draw_mode.texture_window_mask_x);
m_batch_ubo_data.u_texture_window_mask[1] = ZeroExtend32(m_draw_mode.texture_window_mask_y);
m_batch_ubo_data.u_texture_window_offset[0] = ZeroExtend32(m_draw_mode.texture_window_offset_x);
m_batch_ubo_data.u_texture_window_offset[1] = ZeroExtend32(m_draw_mode.texture_window_offset_y);
m_batch_ubo_data.u_texture_window_mask[0] = ZeroExtend32(cmd->window.mask_x.GetValue());
m_batch_ubo_data.u_texture_window_mask[1] = ZeroExtend32(cmd->window.mask_y.GetValue());
m_batch_ubo_data.u_texture_window_offset[0] = ZeroExtend32(cmd->window.offset_x.GetValue());
m_batch_ubo_data.u_texture_window_offset[1] = ZeroExtend32(cmd->window.offset_y.GetValue());
m_batch_ubo_dirty = true;
}
LoadVertices();
}
void GPU_HW::FlushRender()
@ -1020,8 +797,10 @@ void GPU_HW::DrawRendererStats(bool is_idle_frame)
ImGui::TextUnformatted("Effective Display Resolution:");
ImGui::NextColumn();
#if FIXME
ImGui::Text("%ux%u", m_crtc_state.display_vram_width * m_resolution_scale,
m_crtc_state.display_vram_height * m_resolution_scale);
#endif
ImGui::NextColumn();
ImGui::TextUnformatted("True Color:");

View File

@ -1,6 +1,6 @@
#pragma once
#include "common/heap_array.h"
#include "gpu.h"
#include "gpu_backend.h"
#include "host_display.h"
#include <sstream>
#include <string>
@ -8,7 +8,7 @@
#include <utility>
#include <vector>
class GPU_HW : public GPU
class GPU_HW : public GPUBackend
{
public:
enum class BatchRenderMode : u8
@ -19,22 +19,14 @@ public:
OnlyTransparent
};
enum class InterlacedRenderMode : u8
{
None,
InterleavedFields,
SeparateFields
};
GPU_HW();
virtual ~GPU_HW();
virtual bool IsHardwareRenderer() const override;
virtual bool Initialize(HostDisplay* host_display) override;
virtual bool Initialize() override;
virtual void Reset() override;
virtual bool DoState(StateWrapper& sw) override;
void UpdateResolutionScale() override final;
std::tuple<u32, u32> GetEffectiveDisplayResolution() override final;
@ -94,8 +86,8 @@ protected:
struct BatchConfig
{
TextureMode texture_mode;
TransparencyMode transparency_mode;
GPUTextureMode texture_mode;
GPUTransparencyMode transparency_mode;
bool dithering;
bool interlacing;
bool set_mask_while_drawing;
@ -105,15 +97,15 @@ protected:
// on a per-pixel basis, and the opaque pixels shouldn't be blended at all.
bool NeedsTwoPassRendering() const
{
return transparency_mode == GPU::TransparencyMode::BackgroundMinusForeground &&
texture_mode != TextureMode::Disabled;
return transparency_mode == GPUTransparencyMode::BackgroundMinusForeground &&
texture_mode != GPUTextureMode::Disabled;
}
// Returns the render mode for this batch.
BatchRenderMode GetRenderMode() const
{
return transparency_mode == TransparencyMode::Disabled ? BatchRenderMode::TransparencyDisabled :
BatchRenderMode::TransparentAndOpaque;
return transparency_mode == GPUTransparencyMode::Disabled ? BatchRenderMode::TransparencyDisabled :
BatchRenderMode::TransparentAndOpaque;
}
};
@ -179,7 +171,6 @@ protected:
virtual void UpdateVRAMReadTexture();
virtual void UpdateDepthBufferFromMaskBit() = 0;
virtual void SetScissorFromDrawingArea() = 0;
virtual void MapBatchVertexPointer(u32 required_vertices) = 0;
virtual void UnmapBatchVertexPointer(u32 used_vertices) = 0;
virtual void UploadUniformBuffer(const void* uniforms, u32 uniforms_size) = 0;
@ -187,12 +178,9 @@ protected:
u32 CalculateResolutionScale() const;
void SetFullVRAMDirtyRectangle()
{
m_vram_dirty_rect.Set(0, 0, VRAM_WIDTH, VRAM_HEIGHT);
m_draw_mode.SetTexturePageChanged();
}
void SetFullVRAMDirtyRectangle() { m_vram_dirty_rect.Set(0, 0, VRAM_WIDTH, VRAM_HEIGHT); }
void ClearVRAMDirtyRectangle() { m_vram_dirty_rect.SetInvalid(); }
void IncludeVRAMDityRectangle(const Common::Rectangle<u16>& rect);
void IncludeVRAMDityRectangle(const Common::Rectangle<u32>& rect);
bool IsFlushed() const { return m_batch_current_vertex_ptr == m_batch_start_vertex_ptr; }
@ -200,7 +188,7 @@ protected:
u32 GetBatchVertexSpace() const { return static_cast<u32>(m_batch_end_vertex_ptr - m_batch_current_vertex_ptr); }
u32 GetBatchVertexCount() const { return static_cast<u32>(m_batch_current_vertex_ptr - m_batch_start_vertex_ptr); }
void EnsureVertexBufferSpace(u32 required_vertices);
void EnsureVertexBufferSpaceForCurrentCommand();
void EnsureVertexBufferSpace(const GPUBackendDrawCommand* cmd);
void ResetBatchVertexDepth();
/// Returns the value to be written to the depth buffer for the current operation for mask bit emulation.
@ -209,43 +197,41 @@ protected:
return 1.0f - (static_cast<float>(m_current_depth) / 65535.0f);
}
/// Returns the interlaced mode to use when scanning out/displaying.
ALWAYS_INLINE InterlacedRenderMode GetInterlacedRenderMode() const
{
if (IsInterlacedDisplayEnabled())
{
return m_GPUSTAT.vertical_resolution ? InterlacedRenderMode::InterleavedFields :
InterlacedRenderMode::SeparateFields;
}
else
{
return InterlacedRenderMode::None;
}
}
void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override;
void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) override;
void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override;
void DispatchRenderCommand() override;
void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params) override;
void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, GPUBackendCommandParameters params) override;
void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height,
GPUBackendCommandParameters params) override;
void DrawPolygon(const GPUBackendDrawPolygonCommand* cmd) override;
void DrawLine(const GPUBackendDrawLineCommand* cmd) override;
void DrawRectangle(const GPUBackendDrawRectangleCommand* cmd) override;
void FlushRender() override;
void DrawRendererStats(bool is_idle_frame) override;
void CalcScissorRect(int* left, int* top, int* right, int* bottom);
std::tuple<s32, s32> ScaleVRAMCoordinates(s32 x, s32 y) const
ALWAYS_INLINE std::tuple<s32, s32> ScaleVRAMCoordinates(s32 x, s32 y) const
{
return std::make_tuple(x * s32(m_resolution_scale), y * s32(m_resolution_scale));
}
ALWAYS_INLINE Common::Rectangle<u32> ScaleVRAMRect(const Common::Rectangle<u32>& rect)
{
return rect * m_resolution_scale;
}
/// Computes the area affected by a VRAM transfer, including wrap-around of X.
Common::Rectangle<u32> GetVRAMTransferBounds(u32 x, u32 y, u32 width, u32 height) const;
/// Returns true if the VRAM copy shader should be used (oversized copies, masking).
bool UseVRAMCopyShader(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) const;
bool UseVRAMCopyShader(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height,
GPUBackendCommandParameters params) const;
VRAMFillUBOData GetVRAMFillUBOData(u32 x, u32 y, u32 width, u32 height, u32 color) const;
VRAMWriteUBOData GetVRAMWriteUBOData(u32 x, u32 y, u32 width, u32 height, u32 buffer_offset) const;
VRAMCopyUBOData GetVRAMCopyUBOData(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) const;
VRAMFillUBOData GetVRAMFillUBOData(u32 x, u32 y, u32 width, u32 height, u32 color,
GPUBackendCommandParameters params) const;
VRAMWriteUBOData GetVRAMWriteUBOData(u32 x, u32 y, u32 width, u32 height, u32 buffer_offset,
GPUBackendCommandParameters params) const;
VRAMCopyUBOData GetVRAMCopyUBOData(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height,
GPUBackendCommandParameters params) const;
/// Expands a line into two triangles.
void DrawLine(float x0, float y0, u32 col0, float x1, float y1, u32 col1, float depth);
@ -257,6 +243,8 @@ protected:
static void ComputePolygonUVLimits(BatchVertex* vertices, u32 num_vertices);
static bool AreUVLimitsNeeded();
void SetupDraw(const GPUBackendDrawCommand* cmd);
HeapArray<u16, VRAM_WIDTH * VRAM_HEIGHT> m_vram_shadow;
BatchVertex* m_batch_start_vertex_ptr = nullptr;
@ -280,12 +268,16 @@ protected:
// Bounding box of VRAM area that the GPU has drawn into.
Common::Rectangle<u32> m_vram_dirty_rect;
GPUDrawModeReg m_last_texture_page_bits{};
GPUTextureWindowReg m_last_texture_window_reg{};
// Statistics
RendererStats m_renderer_stats = {};
RendererStats m_last_renderer_stats = {};
// Changed state
bool m_batch_ubo_dirty = true;
bool m_drawing_area_changed = false;
private:
enum : u32
@ -294,8 +286,6 @@ private:
MAX_BATCH_VERTEX_COUNT = VERTEX_BUFFER_SIZE / sizeof(BatchVertex)
};
void LoadVertices();
ALWAYS_INLINE void AddVertex(const BatchVertex& v)
{
std::memcpy(m_batch_current_vertex_ptr, &v, sizeof(BatchVertex));

View File

@ -13,8 +13,8 @@ GPU_HW_D3D11::GPU_HW_D3D11() = default;
GPU_HW_D3D11::~GPU_HW_D3D11()
{
if (m_host_display)
m_host_display->ClearDisplayTexture();
if (g_host_interface->GetDisplay())
g_host_interface->GetDisplay()->ClearDisplayTexture();
m_context->ClearState();
@ -22,8 +22,9 @@ GPU_HW_D3D11::~GPU_HW_D3D11()
DestroyStateObjects();
}
bool GPU_HW_D3D11::Initialize(HostDisplay* host_display)
bool GPU_HW_D3D11::Initialize()
{
HostDisplay* host_display = g_host_interface->GetDisplay();
if (host_display->GetRenderAPI() != HostDisplay::RenderAPI::D3D11)
{
Log_ErrorPrintf("Host render API is incompatible");
@ -32,7 +33,7 @@ bool GPU_HW_D3D11::Initialize(HostDisplay* host_display)
SetCapabilities();
if (!GPU_HW::Initialize(host_display))
if (!GPU_HW::Initialize())
return false;
m_device = static_cast<ID3D11Device*>(host_display->GetRenderDevice());
@ -92,8 +93,6 @@ void GPU_HW_D3D11::Reset()
void GPU_HW_D3D11::ResetGraphicsAPIState()
{
GPU_HW::ResetGraphicsAPIState();
m_context->GSSetShader(nullptr, nullptr, 0);
// In D3D11 we can't leave a buffer mapped across a Present() call.
@ -126,7 +125,7 @@ void GPU_HW_D3D11::UpdateSettings()
if (framebuffer_changed)
{
m_host_display->ClearDisplayTexture();
g_host_interface->GetDisplay()->ClearDisplayTexture();
CreateFramebuffer();
}
@ -333,8 +332,7 @@ bool GPU_HW_D3D11::CreateStateObjects()
for (u8 transparency_mode = 0; transparency_mode < 5; transparency_mode++)
{
bl_desc = CD3D11_BLEND_DESC(CD3D11_DEFAULT());
if (transparency_mode != static_cast<u8>(TransparencyMode::Disabled) ||
m_texture_filtering != GPUTextureFilter::Nearest)
if (transparency_mode != static_cast<u8>(GPUTransparencyMode::Disabled) || m_texture_filtering != GPUTextureFilter::Nearest)
{
bl_desc.RenderTarget[0].BlendEnable = TRUE;
bl_desc.RenderTarget[0].SrcBlend = D3D11_BLEND_ONE;
@ -342,7 +340,7 @@ bool GPU_HW_D3D11::CreateStateObjects()
bl_desc.RenderTarget[0].SrcBlendAlpha = D3D11_BLEND_ONE;
bl_desc.RenderTarget[0].DestBlendAlpha = D3D11_BLEND_ZERO;
bl_desc.RenderTarget[0].BlendOp =
(transparency_mode == static_cast<u8>(TransparencyMode::BackgroundMinusForeground)) ?
(transparency_mode == static_cast<u8>(GPUTransparencyMode::BackgroundMinusForeground)) ?
D3D11_BLEND_OP_REV_SUBTRACT :
D3D11_BLEND_OP_ADD;
bl_desc.RenderTarget[0].BlendOpAlpha = D3D11_BLEND_OP_ADD;
@ -371,8 +369,8 @@ void GPU_HW_D3D11::DestroyStateObjects()
bool GPU_HW_D3D11::CompileShaders()
{
GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_true_color, m_scaled_dithering,
m_texture_filtering, m_using_uv_limits, m_supports_dual_source_blend);
GPU_HW_ShaderGen shadergen(g_host_interface->GetDisplay()->GetRenderAPI(), m_resolution_scale, m_true_color,
m_scaled_dithering, m_texture_filtering, m_using_uv_limits, m_supports_dual_source_blend);
Common::Timer compile_time;
const int progress_total = 1 + 1 + 2 + (4 * 9 * 2 * 2) + 7 + (2 * 3);
@ -442,7 +440,7 @@ bool GPU_HW_D3D11::CompileShaders()
for (u8 interlacing = 0; interlacing < 2; interlacing++)
{
const std::string ps = shadergen.GenerateBatchFragmentShader(
static_cast<BatchRenderMode>(render_mode), static_cast<TextureMode>(texture_mode),
static_cast<BatchRenderMode>(render_mode), static_cast<GPUTextureMode>(texture_mode),
ConvertToBoolUnchecked(dithering), ConvertToBoolUnchecked(interlacing));
m_batch_pixel_shaders[render_mode][texture_mode][dithering][interlacing] =
@ -505,8 +503,8 @@ bool GPU_HW_D3D11::CompileShaders()
{
for (u8 interlacing = 0; interlacing < 3; interlacing++)
{
const std::string ps = shadergen.GenerateDisplayFragmentShader(ConvertToBoolUnchecked(depth_24bit),
static_cast<InterlacedRenderMode>(interlacing));
const std::string ps = shadergen.GenerateDisplayFragmentShader(
ConvertToBoolUnchecked(depth_24bit), static_cast<GPUInterlacedDisplayMode>(interlacing));
m_display_pixel_shaders[depth_24bit][interlacing] = m_shader_cache.GetPixelShader(m_device.Get(), ps);
if (!m_display_pixel_shaders[depth_24bit][interlacing])
return false;
@ -608,7 +606,7 @@ void GPU_HW_D3D11::DrawUtilityShader(ID3D11PixelShader* shader, const void* unif
void GPU_HW_D3D11::DrawBatchVertices(BatchRenderMode render_mode, u32 base_vertex, u32 num_vertices)
{
const bool textured = (m_batch.texture_mode != TextureMode::Disabled);
const bool textured = (m_batch.texture_mode != GPUTextureMode::Disabled);
m_context->VSSetShader(m_batch_vertex_shaders[BoolToUInt8(textured)].Get(), nullptr, 0);
@ -617,8 +615,8 @@ void GPU_HW_D3D11::DrawBatchVertices(BatchRenderMode render_mode, u32 base_verte
.Get(),
nullptr, 0);
const TransparencyMode transparency_mode =
(render_mode == BatchRenderMode::OnlyOpaque) ? TransparencyMode::Disabled : m_batch.transparency_mode;
const GPUTransparencyMode transparency_mode =
(render_mode == BatchRenderMode::OnlyOpaque) ? GPUTransparencyMode::Disabled : m_batch.transparency_mode;
m_context->OMSetBlendState(m_batch_blend_states[static_cast<u8>(transparency_mode)].Get(), nullptr, 0xFFFFFFFFu);
m_context->OMSetDepthStencilState(
m_batch.check_mask_before_draw ? m_depth_test_less_state.Get() : m_depth_test_always_state.Get(), 0);
@ -637,46 +635,44 @@ void GPU_HW_D3D11::SetScissorFromDrawingArea()
void GPU_HW_D3D11::ClearDisplay()
{
GPU_HW::ClearDisplay();
static constexpr std::array<float, 4> clear_color = {0.0f, 0.0f, 0.0f, 1.0f};
m_context->ClearRenderTargetView(m_display_texture.GetD3DRTV(), clear_color.data());
}
void GPU_HW_D3D11::UpdateDisplay()
{
GPU_HW::UpdateDisplay();
HostDisplay* display = g_host_interface->GetDisplay();
if (g_settings.debugging.show_vram)
{
m_host_display->SetDisplayTexture(m_vram_texture.GetD3DSRV(), m_vram_texture.GetWidth(), m_vram_texture.GetHeight(),
0, 0, m_vram_texture.GetWidth(), m_vram_texture.GetHeight());
m_host_display->SetDisplayParameters(VRAM_WIDTH, VRAM_HEIGHT, 0, 0, VRAM_WIDTH, VRAM_HEIGHT,
static_cast<float>(VRAM_WIDTH) / static_cast<float>(VRAM_HEIGHT));
display->SetDisplayTexture(m_vram_texture.GetD3DSRV(), m_vram_texture.GetWidth(), m_vram_texture.GetHeight(), 0, 0,
m_vram_texture.GetWidth(), m_vram_texture.GetHeight());
display->SetDisplayParameters(VRAM_WIDTH, VRAM_HEIGHT, 0, 0, VRAM_WIDTH, VRAM_HEIGHT,
static_cast<float>(VRAM_WIDTH) / static_cast<float>(VRAM_HEIGHT));
}
else
{
const u32 vram_offset_x = m_crtc_state.display_vram_left;
const u32 vram_offset_y = m_crtc_state.display_vram_top;
const u32 vram_offset_x = m_display_vram_left;
const u32 vram_offset_y = m_display_vram_top;
const u32 scaled_vram_offset_x = vram_offset_x * m_resolution_scale;
const u32 scaled_vram_offset_y = vram_offset_y * m_resolution_scale;
const u32 display_width = m_crtc_state.display_vram_width;
const u32 display_height = m_crtc_state.display_vram_height;
const u32 display_width = m_display_vram_width;
const u32 display_height = m_display_vram_height;
const u32 scaled_display_width = display_width * m_resolution_scale;
const u32 scaled_display_height = display_height * m_resolution_scale;
const InterlacedRenderMode interlaced = GetInterlacedRenderMode();
const GPUInterlacedDisplayMode interlaced = m_display_interlace;
if (IsDisplayDisabled())
if (!m_display_enabled)
{
m_host_display->ClearDisplayTexture();
display->ClearDisplayTexture();
}
else if (!m_GPUSTAT.display_area_color_depth_24 && interlaced == InterlacedRenderMode::None &&
else if (!m_display_24bit && interlaced == GPUInterlacedDisplayMode::None &&
(scaled_vram_offset_x + scaled_display_width) <= m_vram_texture.GetWidth() &&
(scaled_vram_offset_y + scaled_display_height) <= m_vram_texture.GetHeight())
{
m_host_display->SetDisplayTexture(m_vram_texture.GetD3DSRV(), m_vram_texture.GetWidth(),
m_vram_texture.GetHeight(), scaled_vram_offset_x, scaled_vram_offset_y,
scaled_display_width, scaled_display_height);
display->SetDisplayTexture(m_vram_texture.GetD3DSRV(), m_vram_texture.GetWidth(), m_vram_texture.GetHeight(),
scaled_vram_offset_x, scaled_vram_offset_y, scaled_display_width,
scaled_display_height);
}
else
{
@ -684,28 +680,26 @@ void GPU_HW_D3D11::UpdateDisplay()
m_context->OMSetDepthStencilState(m_depth_disabled_state.Get(), 0);
m_context->PSSetShaderResources(0, 1, m_vram_texture.GetD3DSRVArray());
const u32 reinterpret_field_offset = (interlaced != InterlacedRenderMode::None) ? GetInterlacedDisplayField() : 0;
const u32 reinterpret_start_x = m_crtc_state.regs.X * m_resolution_scale;
const u32 reinterpret_crop_left = (m_crtc_state.display_vram_left - m_crtc_state.regs.X) * m_resolution_scale;
const u32 reinterpret_field_offset =
(interlaced != GPUInterlacedDisplayMode::None) ? m_display_interlace_field : 0;
const u32 reinterpret_start_x = m_display_vram_start_x * m_resolution_scale;
const u32 reinterpret_crop_left = (m_display_vram_left - m_display_vram_start_x) * m_resolution_scale;
const u32 uniforms[4] = {reinterpret_start_x, scaled_vram_offset_y + reinterpret_field_offset,
reinterpret_crop_left, reinterpret_field_offset};
ID3D11PixelShader* display_pixel_shader =
m_display_pixel_shaders[BoolToUInt8(m_GPUSTAT.display_area_color_depth_24)][static_cast<u8>(interlaced)].Get();
m_display_pixel_shaders[BoolToUInt8(m_display_24bit)][static_cast<u8>(interlaced)].Get();
SetViewportAndScissor(0, 0, scaled_display_width, scaled_display_height);
DrawUtilityShader(display_pixel_shader, uniforms, sizeof(uniforms));
m_host_display->SetDisplayTexture(m_display_texture.GetD3DSRV(), m_display_texture.GetWidth(),
m_display_texture.GetHeight(), 0, 0, scaled_display_width,
scaled_display_height);
display->SetDisplayTexture(m_display_texture.GetD3DSRV(), m_display_texture.GetWidth(),
m_display_texture.GetHeight(), 0, 0, scaled_display_width, scaled_display_height);
RestoreGraphicsAPIState();
}
m_host_display->SetDisplayParameters(m_crtc_state.display_width, m_crtc_state.display_height,
m_crtc_state.display_origin_left, m_crtc_state.display_origin_top,
m_crtc_state.display_vram_width, m_crtc_state.display_vram_height,
m_crtc_state.display_aspect_ratio);
display->SetDisplayParameters(m_display_width, m_display_height, m_display_origin_left, m_display_origin_top,
m_display_width, m_display_height, m_display_aspect_ratio);
}
}
@ -742,50 +736,50 @@ void GPU_HW_D3D11::ReadVRAM(u32 x, u32 y, u32 width, u32 height)
RestoreGraphicsAPIState();
}
void GPU_HW_D3D11::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
void GPU_HW_D3D11::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params)
{
if ((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT)
{
// CPU round trip if oversized for now.
Log_WarningPrintf("Oversized VRAM fill (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height);
ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT);
GPU::FillVRAM(x, y, width, height, color);
UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data());
SoftwareFillVRAM(x, y, width, height, color, params);
UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data(), params);
return;
}
GPU_HW::FillVRAM(x, y, width, height, color);
GPU_HW::FillVRAM(x, y, width, height, color, params);
const VRAMFillUBOData uniforms = GetVRAMFillUBOData(x, y, width, height, color);
const VRAMFillUBOData uniforms = GetVRAMFillUBOData(x, y, width, height, color, params);
m_context->OMSetDepthStencilState(m_depth_test_always_state.Get(), 0);
SetViewportAndScissor(x * m_resolution_scale, y * m_resolution_scale, width * m_resolution_scale,
height * m_resolution_scale);
DrawUtilityShader(IsInterlacedRenderingEnabled() ? m_vram_interlaced_fill_pixel_shader.Get() :
m_vram_fill_pixel_shader.Get(),
DrawUtilityShader(params.interlaced_rendering ? m_vram_interlaced_fill_pixel_shader.Get() :
m_vram_fill_pixel_shader.Get(),
&uniforms, sizeof(uniforms));
RestoreGraphicsAPIState();
}
void GPU_HW_D3D11::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data)
void GPU_HW_D3D11::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, GPUBackendCommandParameters params)
{
const Common::Rectangle<u32> bounds = GetVRAMTransferBounds(x, y, width, height);
GPU_HW::UpdateVRAM(bounds.left, bounds.top, bounds.GetWidth(), bounds.GetHeight(), data);
GPU_HW::UpdateVRAM(bounds.left, bounds.top, bounds.GetWidth(), bounds.GetHeight(), data, params);
const u32 num_pixels = width * height;
const auto map_result = m_texture_stream_buffer.Map(m_context.Get(), sizeof(u16), num_pixels * sizeof(u16));
std::memcpy(map_result.pointer, data, num_pixels * sizeof(u16));
m_texture_stream_buffer.Unmap(m_context.Get(), num_pixels * sizeof(u16));
const VRAMWriteUBOData uniforms = GetVRAMWriteUBOData(x, y, width, height, map_result.index_aligned);
const VRAMWriteUBOData uniforms = GetVRAMWriteUBOData(x, y, width, height, map_result.index_aligned, params);
m_context->OMSetDepthStencilState(
m_GPUSTAT.check_mask_before_draw ? m_depth_test_less_state.Get() : m_depth_test_always_state.Get(), 0);
params.check_mask_before_draw ? m_depth_test_less_state.Get() : m_depth_test_always_state.Get(), 0);
m_context->PSSetShaderResources(0, 1, m_texture_stream_buffer_srv_r16ui.GetAddressOf());
// the viewport should already be set to the full vram, so just adjust the scissor
const Common::Rectangle<u32> scaled_bounds = bounds * m_resolution_scale;
const Common::Rectangle<u32> scaled_bounds(ScaleVRAMRect(bounds));
SetScissor(scaled_bounds.left, scaled_bounds.top, scaled_bounds.GetWidth(), scaled_bounds.GetHeight());
DrawUtilityShader(m_vram_write_pixel_shader.Get(), &uniforms, sizeof(uniforms));
@ -793,9 +787,10 @@ void GPU_HW_D3D11::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* d
RestoreGraphicsAPIState();
}
void GPU_HW_D3D11::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height)
void GPU_HW_D3D11::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height,
GPUBackendCommandParameters params)
{
if (UseVRAMCopyShader(src_x, src_y, dst_x, dst_y, width, height))
if (UseVRAMCopyShader(src_x, src_y, dst_x, dst_y, width, height, params))
{
const Common::Rectangle<u32> src_bounds = GetVRAMTransferBounds(src_x, src_y, width, height);
const Common::Rectangle<u32> dst_bounds = GetVRAMTransferBounds(dst_x, dst_y, width, height);
@ -803,18 +798,18 @@ void GPU_HW_D3D11::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 widt
UpdateVRAMReadTexture();
IncludeVRAMDityRectangle(dst_bounds);
const VRAMCopyUBOData uniforms = GetVRAMCopyUBOData(src_x, src_y, dst_x, dst_y, width, height);
const VRAMCopyUBOData uniforms = GetVRAMCopyUBOData(src_x, src_y, dst_x, dst_y, width, height, params);
const Common::Rectangle<u32> dst_bounds_scaled(dst_bounds * m_resolution_scale);
const Common::Rectangle<u32> dst_bounds_scaled(ScaleVRAMRect(dst_bounds));
SetViewportAndScissor(dst_bounds_scaled.left, dst_bounds_scaled.top, dst_bounds_scaled.GetWidth(),
dst_bounds_scaled.GetHeight());
m_context->OMSetDepthStencilState(
m_GPUSTAT.check_mask_before_draw ? m_depth_test_less_state.Get() : m_depth_test_always_state.Get(), 0);
params.check_mask_before_draw ? m_depth_test_less_state.Get() : m_depth_test_always_state.Get(), 0);
m_context->PSSetShaderResources(0, 1, m_vram_read_texture.GetD3DSRVArray());
DrawUtilityShader(m_vram_copy_pixel_shader.Get(), &uniforms, sizeof(uniforms));
RestoreGraphicsAPIState();
if (m_GPUSTAT.check_mask_before_draw)
if (params.check_mask_before_draw)
m_current_depth++;
return;
@ -826,7 +821,7 @@ void GPU_HW_D3D11::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 widt
if (m_vram_dirty_rect.Intersects(Common::Rectangle<u32>::FromExtents(src_x, src_y, width, height)))
UpdateVRAMReadTexture();
GPU_HW::CopyVRAM(src_x, src_y, dst_x, dst_y, width, height);
GPU_HW::CopyVRAM(src_x, src_y, dst_x, dst_y, width, height, params);
src_x *= m_resolution_scale;
src_y *= m_resolution_scale;
@ -841,7 +836,7 @@ void GPU_HW_D3D11::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 widt
void GPU_HW_D3D11::UpdateVRAMReadTexture()
{
const auto scaled_rect = m_vram_dirty_rect * m_resolution_scale;
const Common::Rectangle<u32> scaled_rect(ScaleVRAMRect(m_vram_dirty_rect));
const CD3D11_BOX src_box(scaled_rect.left, scaled_rect.top, 0, scaled_rect.right, scaled_rect.bottom, 1);
m_context->CopySubresourceRegion(m_vram_read_texture, 0, scaled_rect.left, scaled_rect.top, 0, m_vram_texture, 0,
&src_box);
@ -864,7 +859,3 @@ void GPU_HW_D3D11::UpdateDepthBufferFromMaskBit()
RestoreGraphicsAPIState();
}
std::unique_ptr<GPU> GPU::CreateHardwareD3D11Renderer()
{
return std::make_unique<GPU_HW_D3D11>();
}

View File

@ -19,7 +19,7 @@ public:
GPU_HW_D3D11();
~GPU_HW_D3D11() override;
bool Initialize(HostDisplay* host_display) override;
bool Initialize() override;
void Reset() override;
void ResetGraphicsAPIState() override;
@ -30,9 +30,9 @@ protected:
void ClearDisplay() override;
void UpdateDisplay() override;
void ReadVRAM(u32 x, u32 y, u32 width, u32 height) override;
void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override;
void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) override;
void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override;
void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params) override;
void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, GPUBackendCommandParameters params) override;
void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height, GPUBackendCommandParameters params) override;
void UpdateVRAMReadTexture() override;
void UpdateDepthBufferFromMaskBit() override;
void SetScissorFromDrawingArea() override;

View File

@ -21,9 +21,9 @@ GPU_HW_OpenGL::~GPU_HW_OpenGL()
if (m_texture_buffer_r16ui_texture != 0)
glDeleteTextures(1, &m_texture_buffer_r16ui_texture);
if (m_host_display)
if (g_host_interface->GetDisplay())
{
m_host_display->ClearDisplayTexture();
g_host_interface->GetDisplay()->ClearDisplayTexture();
ResetGraphicsAPIState();
}
@ -32,8 +32,9 @@ GPU_HW_OpenGL::~GPU_HW_OpenGL()
glUseProgram(0);
}
bool GPU_HW_OpenGL::Initialize(HostDisplay* host_display)
bool GPU_HW_OpenGL::Initialize()
{
HostDisplay* host_display = g_host_interface->GetDisplay();
if (host_display->GetRenderAPI() != HostDisplay::RenderAPI::OpenGL &&
host_display->GetRenderAPI() != HostDisplay::RenderAPI::OpenGLES)
{
@ -45,7 +46,7 @@ bool GPU_HW_OpenGL::Initialize(HostDisplay* host_display)
m_shader_cache.Open(IsGLES(), g_host_interface->GetShaderCacheBasePath());
if (!GPU_HW::Initialize(host_display))
if (!GPU_HW::Initialize())
return false;
if (!CreateFramebuffer())
@ -130,7 +131,7 @@ void GPU_HW_OpenGL::UpdateSettings()
if (framebuffer_changed)
{
m_host_display->ClearDisplayTexture();
g_host_interface->GetDisplay()->ClearDisplayTexture();
CreateFramebuffer();
}
if (shaders_changed)
@ -358,8 +359,8 @@ bool GPU_HW_OpenGL::CreateTextureBuffer()
bool GPU_HW_OpenGL::CompilePrograms()
{
const bool use_binding_layout = GPU_HW_ShaderGen::UseGLSLBindingLayout();
GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_true_color, m_scaled_dithering,
m_texture_filtering, m_using_uv_limits, m_supports_dual_source_blend);
GPU_HW_ShaderGen shadergen(g_host_interface->GetDisplay()->GetRenderAPI(), m_resolution_scale, m_true_color,
m_scaled_dithering, m_texture_filtering, m_using_uv_limits, m_supports_dual_source_blend);
Common::Timer compile_time;
const int progress_total = (4 * 9 * 2 * 2) + (2 * 3) + 5;
@ -383,10 +384,10 @@ bool GPU_HW_OpenGL::CompilePrograms()
{
for (u8 interlacing = 0; interlacing < 2; interlacing++)
{
const bool textured = (static_cast<TextureMode>(texture_mode) != TextureMode::Disabled);
const bool textured = (static_cast<GPUTextureMode>(texture_mode) != GPUTextureMode::Disabled);
const std::string batch_vs = shadergen.GenerateBatchVertexShader(textured);
const std::string fs = shadergen.GenerateBatchFragmentShader(
static_cast<BatchRenderMode>(render_mode), static_cast<TextureMode>(texture_mode),
static_cast<BatchRenderMode>(render_mode), static_cast<GPUTextureMode>(texture_mode),
ConvertToBoolUnchecked(dithering), ConvertToBoolUnchecked(interlacing));
const auto link_callback = [this, textured, use_binding_layout](GL::Program& prog) {
@ -444,7 +445,7 @@ bool GPU_HW_OpenGL::CompilePrograms()
{
const std::string vs = shadergen.GenerateScreenQuadVertexShader();
const std::string fs = shadergen.GenerateDisplayFragmentShader(ConvertToBoolUnchecked(depth_24bit),
static_cast<InterlacedRenderMode>(interlaced));
static_cast<GPUInterlacedDisplayMode>(interlaced));
std::optional<GL::Program> prog =
m_shader_cache.GetProgram(vs, {}, fs, [this, use_binding_layout](GL::Program& prog) {
@ -558,23 +559,24 @@ void GPU_HW_OpenGL::DrawBatchVertices(BatchRenderMode render_mode, u32 base_vert
[BoolToUInt8(m_batch.dithering)][BoolToUInt8(m_batch.interlacing)];
prog.Bind();
if (m_batch.texture_mode != TextureMode::Disabled)
if (m_batch.texture_mode != GPUTextureMode::Disabled)
m_vram_read_texture.Bind();
if (m_batch.transparency_mode == TransparencyMode::Disabled || render_mode == BatchRenderMode::OnlyOpaque)
if (m_batch.transparency_mode == GPUTransparencyMode::Disabled || render_mode == BatchRenderMode::OnlyOpaque)
{
glDisable(GL_BLEND);
}
else
{
glEnable(GL_BLEND);
glBlendEquationSeparate(
m_batch.transparency_mode == TransparencyMode::BackgroundMinusForeground ? GL_FUNC_REVERSE_SUBTRACT : GL_FUNC_ADD,
GL_FUNC_ADD);
glBlendEquationSeparate(m_batch.transparency_mode == GPUTransparencyMode::BackgroundMinusForeground ?
GL_FUNC_REVERSE_SUBTRACT :
GL_FUNC_ADD,
GL_FUNC_ADD);
glBlendFuncSeparate(GL_ONE, m_supports_dual_source_blend ? GL_SRC1_ALPHA : GL_SRC_ALPHA, GL_ONE, GL_ZERO);
}
glDepthFunc(m_GPUSTAT.check_mask_before_draw ? GL_GEQUAL : GL_ALWAYS);
glDepthFunc(m_batch.check_mask_before_draw ? GL_GEQUAL : GL_ALWAYS);
glDrawArrays(GL_TRIANGLES, m_batch_base_vertex, num_vertices);
}
@ -606,8 +608,6 @@ void GPU_HW_OpenGL::UploadUniformBuffer(const void* data, u32 data_size)
void GPU_HW_OpenGL::ClearDisplay()
{
GPU_HW::ClearDisplay();
m_display_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER);
glDisable(GL_SCISSOR_TEST);
glClearColor(0.0f, 0.0f, 0.0f, 1.0f);
@ -618,41 +618,40 @@ void GPU_HW_OpenGL::ClearDisplay()
void GPU_HW_OpenGL::UpdateDisplay()
{
GPU_HW::UpdateDisplay();
HostDisplay* display = g_host_interface->GetDisplay();
if (g_settings.debugging.show_vram)
{
m_host_display->SetDisplayTexture(reinterpret_cast<void*>(static_cast<uintptr_t>(m_vram_texture.GetGLId())),
m_vram_texture.GetWidth(), static_cast<s32>(m_vram_texture.GetHeight()), 0,
m_vram_texture.GetHeight(), m_vram_texture.GetWidth(),
-static_cast<s32>(m_vram_texture.GetHeight()));
m_host_display->SetDisplayParameters(VRAM_WIDTH, VRAM_HEIGHT, 0, 0, VRAM_WIDTH, VRAM_HEIGHT,
static_cast<float>(VRAM_WIDTH) / static_cast<float>(VRAM_HEIGHT));
display->SetDisplayTexture(reinterpret_cast<void*>(static_cast<uintptr_t>(m_vram_texture.GetGLId())),
m_vram_texture.GetWidth(), static_cast<s32>(m_vram_texture.GetHeight()), 0,
m_vram_texture.GetHeight(), m_vram_texture.GetWidth(),
-static_cast<s32>(m_vram_texture.GetHeight()));
display->SetDisplayParameters(VRAM_WIDTH, VRAM_HEIGHT, 0, 0, VRAM_WIDTH, VRAM_HEIGHT,
static_cast<float>(VRAM_WIDTH) / static_cast<float>(VRAM_HEIGHT));
}
else
{
const u32 vram_offset_x = m_crtc_state.display_vram_left;
const u32 vram_offset_y = m_crtc_state.display_vram_top;
const u32 vram_offset_x = m_display_vram_left;
const u32 vram_offset_y = m_display_vram_top;
const u32 scaled_vram_offset_x = vram_offset_x * m_resolution_scale;
const u32 scaled_vram_offset_y = vram_offset_y * m_resolution_scale;
const u32 display_width = m_crtc_state.display_vram_width;
const u32 display_height = m_crtc_state.display_vram_height;
const u32 scaled_display_width = display_width * m_resolution_scale;
const u32 scaled_display_height = display_height * m_resolution_scale;
const InterlacedRenderMode interlaced = GetInterlacedRenderMode();
const u32 display_width = m_display_vram_width;
const u32 display_height = m_display_vram_height;
const u32 scaled_display_width = m_display_width * m_resolution_scale;
const u32 scaled_display_height = m_display_height * m_resolution_scale;
const GPUInterlacedDisplayMode interlaced = m_display_interlace;
if (IsDisplayDisabled())
if (!m_display_enabled)
{
m_host_display->ClearDisplayTexture();
display->ClearDisplayTexture();
}
else if (!m_GPUSTAT.display_area_color_depth_24 && interlaced == GPU_HW::InterlacedRenderMode::None &&
else if (!m_display_24bit && interlaced == GPUInterlacedDisplayMode::None &&
(scaled_vram_offset_x + scaled_display_width) <= m_vram_texture.GetWidth() &&
(scaled_vram_offset_y + scaled_display_height) <= m_vram_texture.GetHeight())
{
m_host_display->SetDisplayTexture(reinterpret_cast<void*>(static_cast<uintptr_t>(m_vram_texture.GetGLId())),
m_vram_texture.GetWidth(), m_vram_texture.GetHeight(), scaled_vram_offset_x,
m_vram_texture.GetHeight() - scaled_vram_offset_y, scaled_display_width,
-static_cast<s32>(scaled_display_height));
display->SetDisplayTexture(reinterpret_cast<void*>(static_cast<uintptr_t>(m_vram_texture.GetGLId())),
m_vram_texture.GetWidth(), m_vram_texture.GetHeight(), scaled_vram_offset_x,
m_vram_texture.GetHeight() - scaled_vram_offset_y, scaled_display_width,
-static_cast<s32>(scaled_display_height));
}
else
{
@ -660,16 +659,17 @@ void GPU_HW_OpenGL::UpdateDisplay()
glDisable(GL_SCISSOR_TEST);
glDisable(GL_DEPTH_TEST);
m_display_programs[BoolToUInt8(m_GPUSTAT.display_area_color_depth_24)][static_cast<u8>(interlaced)].Bind();
m_display_programs[BoolToUInt8(m_display_24bit)][static_cast<u8>(interlaced)].Bind();
m_display_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER);
m_vram_texture.Bind();
const u8 height_div2 = BoolToUInt8(interlaced == GPU_HW::InterlacedRenderMode::SeparateFields);
const u32 reinterpret_field_offset = (interlaced != InterlacedRenderMode::None) ? GetInterlacedDisplayField() : 0;
const u8 height_div2 = BoolToUInt8(interlaced == GPUInterlacedDisplayMode::SeparateFields);
const u32 reinterpret_field_offset =
(interlaced != GPUInterlacedDisplayMode::None) ? m_display_interlace_field : 0;
const u32 scaled_flipped_vram_offset_y = m_vram_texture.GetHeight() - scaled_vram_offset_y -
reinterpret_field_offset - (scaled_display_height >> height_div2);
const u32 reinterpret_start_x = m_crtc_state.regs.X * m_resolution_scale;
const u32 reinterpret_crop_left = (m_crtc_state.display_vram_left - m_crtc_state.regs.X) * m_resolution_scale;
const u32 reinterpret_start_x = m_display_vram_start_x * m_resolution_scale;
const u32 reinterpret_crop_left = (m_display_vram_left - m_display_vram_start_x) * m_resolution_scale;
const u32 uniforms[4] = {reinterpret_start_x, scaled_flipped_vram_offset_y, reinterpret_crop_left,
reinterpret_field_offset};
UploadUniformBuffer(uniforms, sizeof(uniforms));
@ -679,10 +679,9 @@ void GPU_HW_OpenGL::UpdateDisplay()
glBindVertexArray(m_attributeless_vao_id);
glDrawArrays(GL_TRIANGLES, 0, 3);
m_host_display->SetDisplayTexture(reinterpret_cast<void*>(static_cast<uintptr_t>(m_display_texture.GetGLId())),
m_display_texture.GetWidth(), m_display_texture.GetHeight(), 0,
scaled_display_height, scaled_display_width,
-static_cast<s32>(scaled_display_height));
display->SetDisplayTexture(reinterpret_cast<void*>(static_cast<uintptr_t>(m_display_texture.GetGLId())),
m_display_texture.GetWidth(), m_display_texture.GetHeight(), 0, scaled_display_height,
scaled_display_width, -static_cast<s32>(scaled_display_height));
// restore state
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_vram_fbo_id);
@ -692,10 +691,8 @@ void GPU_HW_OpenGL::UpdateDisplay()
glEnable(GL_SCISSOR_TEST);
}
m_host_display->SetDisplayParameters(m_crtc_state.display_width, m_crtc_state.display_height,
m_crtc_state.display_origin_left, m_crtc_state.display_origin_top,
m_crtc_state.display_vram_width, m_crtc_state.display_vram_height,
m_crtc_state.display_aspect_ratio);
display->SetDisplayParameters(m_display_width, m_display_height, m_display_origin_left, m_display_origin_top,
m_display_vram_width, m_display_vram_height, m_display_aspect_ratio);
}
}
@ -730,19 +727,19 @@ void GPU_HW_OpenGL::ReadVRAM(u32 x, u32 y, u32 width, u32 height)
RestoreGraphicsAPIState();
}
void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params)
{
if ((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT)
{
// CPU round trip if oversized for now.
Log_WarningPrintf("Oversized VRAM fill (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height);
ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT);
GPU::FillVRAM(x, y, width, height, color);
UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data());
SoftwareFillVRAM(x, y, width, height, color, params);
UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data(), params);
return;
}
GPU_HW::FillVRAM(x, y, width, height, color);
GPU_HW::FillVRAM(x, y, width, height, color, params);
// scale coordinates
x *= m_resolution_scale;
@ -753,7 +750,7 @@ void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
glScissor(x, m_vram_texture.GetHeight() - y - height, width, height);
// fast path when not using interlaced rendering
if (!IsInterlacedRenderingEnabled())
if (!params.interlaced_rendering)
{
const auto [r, g, b, a] = RGBA8ToFloat(m_true_color ? color : RGBA5551ToRGBA8888(RGBA8888ToRGBA5551(color)));
glClearColor(r, g, b, a);
@ -763,7 +760,7 @@ void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
}
else
{
const VRAMFillUBOData uniforms = GetVRAMFillUBOData(x, y, width, height, color);
const VRAMFillUBOData uniforms = GetVRAMFillUBOData(x, y, width, height, color, params);
m_vram_interlaced_fill_program.Bind();
UploadUniformBuffer(&uniforms, sizeof(uniforms));
@ -776,13 +773,14 @@ void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
}
}
void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data)
void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data,
GPUBackendCommandParameters params)
{
const u32 num_pixels = width * height;
if (num_pixels < m_max_texture_buffer_size || m_use_ssbo_for_vram_writes)
{
const Common::Rectangle<u32> bounds = GetVRAMTransferBounds(x, y, width, height);
GPU_HW::UpdateVRAM(bounds.left, bounds.top, bounds.GetWidth(), bounds.GetHeight(), data);
GPU_HW::UpdateVRAM(bounds.left, bounds.top, bounds.GetWidth(), bounds.GetHeight(), data, params);
const auto map_result = m_texture_stream_buffer->Map(sizeof(u16), num_pixels * sizeof(u16));
std::memcpy(map_result.pointer, data, num_pixels * sizeof(u16));
@ -790,7 +788,7 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void*
m_texture_stream_buffer->Unbind();
glDisable(GL_BLEND);
glDepthFunc(m_GPUSTAT.check_mask_before_draw ? GL_GEQUAL : GL_ALWAYS);
glDepthFunc(params.check_mask_before_draw ? GL_GEQUAL : GL_ALWAYS);
m_vram_write_program.Bind();
if (m_use_ssbo_for_vram_writes)
@ -798,11 +796,11 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void*
else
glBindTexture(GL_TEXTURE_BUFFER, m_texture_buffer_r16ui_texture);
const VRAMWriteUBOData uniforms = GetVRAMWriteUBOData(x, y, width, height, map_result.index_aligned);
const VRAMWriteUBOData uniforms = GetVRAMWriteUBOData(x, y, width, height, map_result.index_aligned, params);
UploadUniformBuffer(&uniforms, sizeof(uniforms));
// the viewport should already be set to the full vram, so just adjust the scissor
const Common::Rectangle<u32> scaled_bounds = bounds * m_resolution_scale;
const Common::Rectangle<u32> scaled_bounds(ScaleVRAMRect(bounds));
glScissor(scaled_bounds.left, m_vram_texture.GetHeight() - scaled_bounds.top - scaled_bounds.GetHeight(),
scaled_bounds.GetWidth(), scaled_bounds.GetHeight());
@ -818,12 +816,12 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void*
// CPU round trip if oversized for now.
Log_WarningPrintf("Oversized VRAM update (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height);
ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT);
GPU::UpdateVRAM(x, y, width, height, data);
UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data());
SoftwareUpdateVRAM(x, y, width, height, data, params);
UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data(), params);
return;
}
GPU_HW::UpdateVRAM(x, y, width, height, data);
GPU_HW::UpdateVRAM(x, y, width, height, data, params);
const auto map_result = m_texture_stream_buffer->Map(sizeof(u32), num_pixels * sizeof(u32));
@ -881,9 +879,10 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void*
}
}
void GPU_HW_OpenGL::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height)
void GPU_HW_OpenGL::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height,
GPUBackendCommandParameters params)
{
if (UseVRAMCopyShader(src_x, src_y, dst_x, dst_y, width, height))
if (UseVRAMCopyShader(src_x, src_y, dst_x, dst_y, width, height, params))
{
const Common::Rectangle<u32> src_bounds = GetVRAMTransferBounds(src_x, src_y, width, height);
const Common::Rectangle<u32> dst_bounds = GetVRAMTransferBounds(dst_x, dst_y, width, height);
@ -891,14 +890,14 @@ void GPU_HW_OpenGL::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 wid
UpdateVRAMReadTexture();
IncludeVRAMDityRectangle(dst_bounds);
VRAMCopyUBOData uniforms = GetVRAMCopyUBOData(src_x, src_y, dst_x, dst_y, width, height);
VRAMCopyUBOData uniforms = GetVRAMCopyUBOData(src_x, src_y, dst_x, dst_y, width, height, params);
uniforms.u_src_y = m_vram_texture.GetHeight() - uniforms.u_src_y - uniforms.u_height;
uniforms.u_dst_y = m_vram_texture.GetHeight() - uniforms.u_dst_y - uniforms.u_height;
UploadUniformBuffer(&uniforms, sizeof(uniforms));
glDisable(GL_SCISSOR_TEST);
glDisable(GL_BLEND);
glDepthFunc(m_GPUSTAT.check_mask_before_draw ? GL_GEQUAL : GL_ALWAYS);
glDepthFunc(params.check_mask_before_draw ? GL_GEQUAL : GL_ALWAYS);
const Common::Rectangle<u32> dst_bounds_scaled(dst_bounds * m_resolution_scale);
glViewport(dst_bounds_scaled.left,
@ -910,13 +909,13 @@ void GPU_HW_OpenGL::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 wid
RestoreGraphicsAPIState();
if (m_GPUSTAT.check_mask_before_draw)
if (params.check_mask_before_draw)
m_current_depth++;
return;
}
GPU_HW::CopyVRAM(src_x, src_y, dst_x, dst_y, width, height);
GPU_HW::CopyVRAM(src_x, src_y, dst_x, dst_y, width, height, params);
src_x *= m_resolution_scale;
src_y *= m_resolution_scale;
@ -951,7 +950,7 @@ void GPU_HW_OpenGL::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 wid
void GPU_HW_OpenGL::UpdateVRAMReadTexture()
{
const auto scaled_rect = m_vram_dirty_rect * m_resolution_scale;
const Common::Rectangle<u32> scaled_rect = ScaleVRAMRect(m_vram_dirty_rect);
const u32 width = scaled_rect.GetWidth();
const u32 height = scaled_rect.GetHeight();
const u32 x = scaled_rect.left;
@ -996,8 +995,3 @@ void GPU_HW_OpenGL::UpdateDepthBufferFromMaskBit()
glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
glEnable(GL_SCISSOR_TEST);
}
std::unique_ptr<GPU> GPU::CreateHardwareOpenGLRenderer()
{
return std::make_unique<GPU_HW_OpenGL>();
}

View File

@ -15,7 +15,7 @@ public:
GPU_HW_OpenGL();
~GPU_HW_OpenGL() override;
bool Initialize(HostDisplay* host_display) override;
bool Initialize() override;
void Reset() override;
void ResetGraphicsAPIState() override;
@ -26,9 +26,10 @@ protected:
void ClearDisplay() override;
void UpdateDisplay() override;
void ReadVRAM(u32 x, u32 y, u32 width, u32 height) override;
void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override;
void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) override;
void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override;
void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params) override;
void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, GPUBackendCommandParameters params) override;
void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height,
GPUBackendCommandParameters params) override;
void UpdateVRAMReadTexture() override;
void UpdateDepthBufferFromMaskBit() override;
void SetScissorFromDrawingArea() override;

View File

@ -17,7 +17,7 @@ GPU_HW_ShaderGen::~GPU_HW_ShaderGen() = default;
void GPU_HW_ShaderGen::WriteCommonFunctions(std::stringstream& ss)
{
ss << "CONSTANT uint RESOLUTION_SCALE = " << m_resolution_scale << "u;\n";
ss << "CONSTANT uint2 VRAM_SIZE = uint2(" << GPU::VRAM_WIDTH << ", " << GPU::VRAM_HEIGHT << ") * RESOLUTION_SCALE;\n";
ss << "CONSTANT uint2 VRAM_SIZE = uint2(" << VRAM_WIDTH << ", " << VRAM_HEIGHT << ") * RESOLUTION_SCALE;\n";
ss << "CONSTANT float2 RCP_VRAM_SIZE = float2(1.0, 1.0) / float2(VRAM_SIZE);\n";
ss << R"(
@ -628,12 +628,11 @@ void FilteredSampleFromVRAM(uint4 texpage, float2 coords, float4 uv_limits,
}
std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(GPU_HW::BatchRenderMode transparency,
GPU::TextureMode texture_mode, bool dithering,
bool interlacing)
GPUTextureMode texture_mode, bool dithering, bool interlacing)
{
const GPU::TextureMode actual_texture_mode = texture_mode & ~GPU::TextureMode::RawTextureBit;
const bool raw_texture = (texture_mode & GPU::TextureMode::RawTextureBit) == GPU::TextureMode::RawTextureBit;
const bool textured = (texture_mode != GPU::TextureMode::Disabled);
const GPUTextureMode actual_texture_mode = texture_mode & ~GPUTextureMode::RawTextureBit;
const bool raw_texture = (texture_mode & GPUTextureMode::RawTextureBit) == GPUTextureMode::RawTextureBit;
const bool textured = (texture_mode != GPUTextureMode::Disabled);
const bool use_dual_source =
m_supports_dual_source_blend && ((transparency != GPU_HW::BatchRenderMode::TransparencyDisabled &&
transparency != GPU_HW::BatchRenderMode::OnlyOpaque) ||
@ -646,10 +645,9 @@ std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(GPU_HW::BatchRenderMod
DefineMacro(ss, "TRANSPARENCY_ONLY_TRANSPARENT", transparency == GPU_HW::BatchRenderMode::OnlyTransparent);
DefineMacro(ss, "TEXTURED", textured);
DefineMacro(ss, "PALETTE",
actual_texture_mode == GPU::TextureMode::Palette4Bit ||
actual_texture_mode == GPU::TextureMode::Palette8Bit);
DefineMacro(ss, "PALETTE_4_BIT", actual_texture_mode == GPU::TextureMode::Palette4Bit);
DefineMacro(ss, "PALETTE_8_BIT", actual_texture_mode == GPU::TextureMode::Palette8Bit);
actual_texture_mode == GPUTextureMode::Palette4Bit || actual_texture_mode == GPUTextureMode::Palette8Bit);
DefineMacro(ss, "PALETTE_4_BIT", actual_texture_mode == GPUTextureMode::Palette4Bit);
DefineMacro(ss, "PALETTE_8_BIT", actual_texture_mode == GPUTextureMode::Palette8Bit);
DefineMacro(ss, "RAW_TEXTURE", raw_texture);
DefineMacro(ss, "DITHERING", dithering);
DefineMacro(ss, "DITHERING_SCALED", m_scaled_dithering);
@ -671,7 +669,7 @@ std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(GPU_HW::BatchRenderMod
{
if (i > 0)
ss << ", ";
ss << GPU::DITHER_MATRIX[i / 4][i % 4];
ss << DITHER_MATRIX[i / 4][i % 4];
}
if (m_glsl)
ss << " );\n";
@ -967,14 +965,13 @@ std::string GPU_HW_ShaderGen::GenerateInterlacedFillFragmentShader()
return ss.str();
}
std::string GPU_HW_ShaderGen::GenerateDisplayFragmentShader(bool depth_24bit,
GPU_HW::InterlacedRenderMode interlace_mode)
std::string GPU_HW_ShaderGen::GenerateDisplayFragmentShader(bool depth_24bit, GPUInterlacedDisplayMode interlace_mode)
{
std::stringstream ss;
WriteHeader(ss);
DefineMacro(ss, "DEPTH_24BIT", depth_24bit);
DefineMacro(ss, "INTERLACED", interlace_mode != GPU_HW::InterlacedRenderMode::None);
DefineMacro(ss, "INTERLEAVED", interlace_mode == GPU_HW::InterlacedRenderMode::InterleavedFields);
DefineMacro(ss, "INTERLACED", interlace_mode != GPUInterlacedDisplayMode::None);
DefineMacro(ss, "INTERLEAVED", interlace_mode == GPUInterlacedDisplayMode::InterleavedFields);
WriteCommonFunctions(ss);
DeclareUniformBuffer(ss, {"uint2 u_vram_offset", "uint u_crop_left", "uint u_field_offset"}, true);

View File

@ -10,10 +10,10 @@ public:
~GPU_HW_ShaderGen();
std::string GenerateBatchVertexShader(bool textured);
std::string GenerateBatchFragmentShader(GPU_HW::BatchRenderMode transparency, GPU::TextureMode texture_mode,
std::string GenerateBatchFragmentShader(GPU_HW::BatchRenderMode transparency, GPUTextureMode texture_mode,
bool dithering, bool interlacing);
std::string GenerateInterlacedFillFragmentShader();
std::string GenerateDisplayFragmentShader(bool depth_24bit, GPU_HW::InterlacedRenderMode interlace_mode);
std::string GenerateDisplayFragmentShader(bool depth_24bit, GPUInterlacedDisplayMode interlace_mode);
std::string GenerateVRAMReadFragmentShader();
std::string GenerateVRAMWriteFragmentShader(bool use_ssbo);
std::string GenerateVRAMCopyFragmentShader();

View File

@ -17,17 +17,18 @@ GPU_HW_Vulkan::GPU_HW_Vulkan() = default;
GPU_HW_Vulkan::~GPU_HW_Vulkan()
{
if (m_host_display)
if (g_host_interface->GetDisplay())
{
m_host_display->ClearDisplayTexture();
g_host_interface->GetDisplay()->ClearDisplayTexture();
ResetGraphicsAPIState();
}
DestroyResources();
}
bool GPU_HW_Vulkan::Initialize(HostDisplay* host_display)
bool GPU_HW_Vulkan::Initialize()
{
HostDisplay* host_display = g_host_interface->GetDisplay();
if (host_display->GetRenderAPI() != HostDisplay::RenderAPI::Vulkan)
{
Log_ErrorPrintf("Host render API is incompatible");
@ -37,7 +38,7 @@ bool GPU_HW_Vulkan::Initialize(HostDisplay* host_display)
Assert(g_vulkan_shader_cache);
SetCapabilities();
if (!GPU_HW::Initialize(host_display))
if (!GPU_HW::Initialize())
return false;
if (!CreatePipelineLayouts())
@ -131,7 +132,7 @@ void GPU_HW_Vulkan::UpdateSettings()
if (shaders_changed)
{
// clear it since we draw a loading screen and it's not in the correct state
m_host_display->ClearDisplayTexture();
g_host_interface->GetDisplay()->ClearDisplayTexture();
DestroyPipelines();
CompilePipelines();
}
@ -583,8 +584,8 @@ bool GPU_HW_Vulkan::CompilePipelines()
VkDevice device = g_vulkan_context->GetDevice();
VkPipelineCache pipeline_cache = g_vulkan_shader_cache->GetPipelineCache();
GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_true_color, m_scaled_dithering,
m_texture_filtering, m_using_uv_limits, m_supports_dual_source_blend);
GPU_HW_ShaderGen shadergen(g_host_interface->GetDisplay()->GetRenderAPI(), m_resolution_scale, m_true_color,
m_scaled_dithering, m_texture_filtering, m_using_uv_limits, m_supports_dual_source_blend);
Common::Timer compile_time;
const int progress_total = 2 + (4 * 9 * 2 * 2) + (2 * 4 * 5 * 9 * 2 * 2) + 1 + 2 + 2 + 2 + 2 + (2 * 3);
@ -629,7 +630,7 @@ bool GPU_HW_Vulkan::CompilePipelines()
for (u8 interlacing = 0; interlacing < 2; interlacing++)
{
const std::string fs = shadergen.GenerateBatchFragmentShader(
static_cast<BatchRenderMode>(render_mode), static_cast<TextureMode>(texture_mode),
static_cast<BatchRenderMode>(render_mode), static_cast<GPUTextureMode>(texture_mode),
ConvertToBoolUnchecked(dithering), ConvertToBoolUnchecked(interlacing));
VkShaderModule shader = g_vulkan_shader_cache->GetFragmentShader(fs);
@ -658,7 +659,7 @@ bool GPU_HW_Vulkan::CompilePipelines()
{
for (u8 interlacing = 0; interlacing < 2; interlacing++)
{
const bool textured = (static_cast<TextureMode>(texture_mode) != TextureMode::Disabled);
const bool textured = (static_cast<GPUTextureMode>(texture_mode) != GPUTextureMode::Disabled);
gpbuilder.SetPipelineLayout(m_batch_pipeline_layout);
gpbuilder.SetRenderPass(m_vram_render_pass, 0);
@ -683,7 +684,7 @@ bool GPU_HW_Vulkan::CompilePipelines()
(depth_test != 0) ? VK_COMPARE_OP_GREATER_OR_EQUAL : VK_COMPARE_OP_ALWAYS);
gpbuilder.SetNoBlendingState();
if ((static_cast<TransparencyMode>(transparency_mode) != TransparencyMode::Disabled &&
if ((static_cast<GPUTransparencyMode>(transparency_mode) != GPUTransparencyMode::Disabled &&
(static_cast<BatchRenderMode>(render_mode) != BatchRenderMode::TransparencyDisabled &&
static_cast<BatchRenderMode>(render_mode) != BatchRenderMode::OnlyOpaque)) ||
m_texture_filtering != GPUTextureFilter::Nearest)
@ -691,7 +692,8 @@ bool GPU_HW_Vulkan::CompilePipelines()
gpbuilder.SetBlendAttachment(
0, true, VK_BLEND_FACTOR_ONE,
m_supports_dual_source_blend ? VK_BLEND_FACTOR_SRC1_ALPHA : VK_BLEND_FACTOR_SRC_ALPHA,
(static_cast<TransparencyMode>(transparency_mode) == TransparencyMode::BackgroundMinusForeground &&
(static_cast<GPUTransparencyMode>(transparency_mode) ==
GPUTransparencyMode::BackgroundMinusForeground &&
static_cast<BatchRenderMode>(render_mode) != BatchRenderMode::TransparencyDisabled &&
static_cast<BatchRenderMode>(render_mode) != BatchRenderMode::OnlyOpaque) ?
VK_BLEND_OP_REVERSE_SUBTRACT :
@ -874,7 +876,7 @@ bool GPU_HW_Vulkan::CompilePipelines()
for (u8 interlace_mode = 0; interlace_mode < 3; interlace_mode++)
{
VkShaderModule fs = g_vulkan_shader_cache->GetFragmentShader(shadergen.GenerateDisplayFragmentShader(
ConvertToBoolUnchecked(depth_24), static_cast<InterlacedRenderMode>(interlace_mode)));
ConvertToBoolUnchecked(depth_24), static_cast<GPUInterlacedDisplayMode>(interlace_mode)));
if (fs == VK_NULL_HANDLE)
return false;
@ -940,7 +942,6 @@ void GPU_HW_Vulkan::SetScissorFromDrawingArea()
void GPU_HW_Vulkan::ClearDisplay()
{
GPU_HW::ClearDisplay();
EndRenderPass();
VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer();
@ -953,51 +954,51 @@ void GPU_HW_Vulkan::ClearDisplay()
void GPU_HW_Vulkan::UpdateDisplay()
{
GPU_HW::UpdateDisplay();
EndRenderPass();
HostDisplay* display = g_host_interface->GetDisplay();
if (g_settings.debugging.show_vram)
{
m_vram_texture.TransitionToLayout(g_vulkan_context->GetCurrentCommandBuffer(),
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
m_host_display->SetDisplayTexture(&m_vram_texture, m_vram_texture.GetWidth(), m_vram_texture.GetHeight(), 0, 0,
m_vram_texture.GetWidth(), m_vram_texture.GetHeight());
m_host_display->SetDisplayParameters(VRAM_WIDTH, VRAM_HEIGHT, 0, 0, VRAM_WIDTH, VRAM_HEIGHT,
static_cast<float>(VRAM_WIDTH) / static_cast<float>(VRAM_HEIGHT));
display->SetDisplayTexture(&m_vram_texture, m_vram_texture.GetWidth(), m_vram_texture.GetHeight(), 0, 0,
m_vram_texture.GetWidth(), m_vram_texture.GetHeight());
display->SetDisplayParameters(VRAM_WIDTH, VRAM_HEIGHT, 0, 0, VRAM_WIDTH, VRAM_HEIGHT,
static_cast<float>(VRAM_WIDTH) / static_cast<float>(VRAM_HEIGHT));
}
else
{
const u32 vram_offset_x = m_crtc_state.display_vram_left;
const u32 vram_offset_y = m_crtc_state.display_vram_top;
const u32 vram_offset_x = m_display_vram_left;
const u32 vram_offset_y = m_display_vram_top;
const u32 scaled_vram_offset_x = vram_offset_x * m_resolution_scale;
const u32 scaled_vram_offset_y = vram_offset_y * m_resolution_scale;
const u32 display_width = m_crtc_state.display_vram_width;
const u32 display_height = m_crtc_state.display_vram_height;
const u32 display_width = m_display_vram_width;
const u32 display_height = m_display_vram_height;
const u32 scaled_display_width = display_width * m_resolution_scale;
const u32 scaled_display_height = display_height * m_resolution_scale;
const InterlacedRenderMode interlaced = GetInterlacedRenderMode();
const GPUInterlacedDisplayMode interlaced = m_display_interlace;
if (IsDisplayDisabled())
if (!m_display_enabled)
{
m_host_display->ClearDisplayTexture();
display->ClearDisplayTexture();
}
else if (!m_GPUSTAT.display_area_color_depth_24 && interlaced == InterlacedRenderMode::None &&
else if (!m_display_24bit && interlaced == GPUInterlacedDisplayMode::None &&
(scaled_vram_offset_x + scaled_display_width) <= m_vram_texture.GetWidth() &&
(scaled_vram_offset_y + scaled_display_height) <= m_vram_texture.GetHeight())
{
m_vram_texture.TransitionToLayout(g_vulkan_context->GetCurrentCommandBuffer(),
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
m_host_display->SetDisplayTexture(&m_vram_texture, m_vram_texture.GetWidth(), m_vram_texture.GetHeight(),
scaled_vram_offset_x, scaled_vram_offset_y, scaled_display_width,
scaled_display_height);
display->SetDisplayTexture(&m_vram_texture, m_vram_texture.GetWidth(), m_vram_texture.GetHeight(),
scaled_vram_offset_x, scaled_vram_offset_y, scaled_display_width,
scaled_display_height);
}
else
{
EndRenderPass();
const u32 reinterpret_field_offset = (interlaced != InterlacedRenderMode::None) ? GetInterlacedDisplayField() : 0;
const u32 reinterpret_start_x = m_crtc_state.regs.X * m_resolution_scale;
const u32 reinterpret_crop_left = (m_crtc_state.display_vram_left - m_crtc_state.regs.X) * m_resolution_scale;
const u32 reinterpret_field_offset =
(interlaced != GPUInterlacedDisplayMode::None) ? m_display_interlace_field : 0;
const u32 reinterpret_start_x = m_display_vram_start_x * m_resolution_scale;
const u32 reinterpret_crop_left = (m_display_vram_left - m_display_vram_start_x) * m_resolution_scale;
const u32 uniforms[4] = {reinterpret_start_x, scaled_vram_offset_y + reinterpret_field_offset,
reinterpret_crop_left, reinterpret_field_offset};
@ -1007,9 +1008,8 @@ void GPU_HW_Vulkan::UpdateDisplay()
BeginRenderPass(m_display_render_pass, m_display_framebuffer, 0, 0, scaled_display_width, scaled_display_height);
vkCmdBindPipeline(
cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS,
m_display_pipelines[BoolToUInt8(m_GPUSTAT.display_area_color_depth_24)][static_cast<u8>(interlaced)]);
vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS,
m_display_pipelines[BoolToUInt8(m_display_24bit)][static_cast<u8>(interlaced)]);
vkCmdPushConstants(cmdbuf, m_single_sampler_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(uniforms),
uniforms);
vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_single_sampler_pipeline_layout, 0, 1,
@ -1022,16 +1022,14 @@ void GPU_HW_Vulkan::UpdateDisplay()
m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
m_display_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
m_host_display->SetDisplayTexture(&m_display_texture, m_display_texture.GetWidth(), m_display_texture.GetHeight(),
0, 0, scaled_display_width, scaled_display_height);
display->SetDisplayTexture(&m_display_texture, m_display_texture.GetWidth(), m_display_texture.GetHeight(), 0, 0,
scaled_display_width, scaled_display_height);
RestoreGraphicsAPIState();
}
m_host_display->SetDisplayParameters(m_crtc_state.display_width, m_crtc_state.display_height,
m_crtc_state.display_origin_left, m_crtc_state.display_origin_top,
m_crtc_state.display_vram_width, m_crtc_state.display_vram_height,
m_crtc_state.display_aspect_ratio);
display->SetDisplayParameters(m_display_width, m_display_height, m_display_origin_left, m_display_origin_top,
m_display_width, m_display_height, m_display_aspect_ratio);
}
}
@ -1080,19 +1078,19 @@ void GPU_HW_Vulkan::ReadVRAM(u32 x, u32 y, u32 width, u32 height)
RestoreGraphicsAPIState();
}
void GPU_HW_Vulkan::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
void GPU_HW_Vulkan::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params)
{
if ((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT)
{
// CPU round trip if oversized for now.
Log_WarningPrintf("Oversized VRAM fill (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height);
ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT);
GPU::FillVRAM(x, y, width, height, color);
UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data());
SoftwareFillVRAM(x, y, width, height, color, params);
UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data(), params);
return;
}
GPU_HW::FillVRAM(x, y, width, height, color);
GPU_HW::FillVRAM(x, y, width, height, color, params);
x *= m_resolution_scale;
y *= m_resolution_scale;
@ -1102,21 +1100,22 @@ void GPU_HW_Vulkan::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
BeginVRAMRenderPass();
VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer();
const VRAMFillUBOData uniforms = GetVRAMFillUBOData(x, y, width, height, color);
const VRAMFillUBOData uniforms = GetVRAMFillUBOData(x, y, width, height, color, params);
vkCmdPushConstants(cmdbuf, m_no_samplers_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(uniforms),
&uniforms);
vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS,
m_vram_fill_pipelines[BoolToUInt8(IsInterlacedRenderingEnabled())]);
m_vram_fill_pipelines[BoolToUInt8(params.interlaced_rendering)]);
Vulkan::Util::SetViewportAndScissor(cmdbuf, x, y, width, height);
vkCmdDraw(cmdbuf, 3, 1, 0, 0);
RestoreGraphicsAPIState();
}
void GPU_HW_Vulkan::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data)
void GPU_HW_Vulkan::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data,
GPUBackendCommandParameters params)
{
const Common::Rectangle<u32> bounds = GetVRAMTransferBounds(x, y, width, height);
GPU_HW::UpdateVRAM(bounds.left, bounds.top, bounds.GetWidth(), bounds.GetHeight(), data);
GPU_HW::UpdateVRAM(bounds.left, bounds.top, bounds.GetWidth(), bounds.GetHeight(), data, params);
const u32 data_size = width * height * sizeof(u16);
const u32 alignment = std::max<u32>(sizeof(u16), static_cast<u32>(g_vulkan_context->GetTexelBufferAlignment()));
@ -1140,16 +1139,16 @@ void GPU_HW_Vulkan::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void*
BeginVRAMRenderPass();
VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer();
const VRAMWriteUBOData uniforms = GetVRAMWriteUBOData(x, y, width, height, start_index);
const VRAMWriteUBOData uniforms = GetVRAMWriteUBOData(x, y, width, height, start_index, params);
vkCmdPushConstants(cmdbuf, m_vram_write_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(uniforms),
&uniforms);
vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS,
m_vram_write_pipelines[BoolToUInt8(m_GPUSTAT.check_mask_before_draw)]);
m_vram_write_pipelines[BoolToUInt8(params.check_mask_before_draw)]);
vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_vram_write_pipeline_layout, 0, 1,
&m_vram_write_descriptor_set, 0, nullptr);
// the viewport should already be set to the full vram, so just adjust the scissor
const Common::Rectangle<u32> scaled_bounds = bounds * m_resolution_scale;
const Common::Rectangle<u32> scaled_bounds(ScaleVRAMRect(bounds));
Vulkan::Util::SetScissor(cmdbuf, scaled_bounds.left, scaled_bounds.top, scaled_bounds.GetWidth(),
scaled_bounds.GetHeight());
vkCmdDraw(cmdbuf, 3, 1, 0, 0);
@ -1157,9 +1156,10 @@ void GPU_HW_Vulkan::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void*
RestoreGraphicsAPIState();
}
void GPU_HW_Vulkan::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height)
void GPU_HW_Vulkan::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height,
GPUBackendCommandParameters params)
{
if (UseVRAMCopyShader(src_x, src_y, dst_x, dst_y, width, height))
if (UseVRAMCopyShader(src_x, src_y, dst_x, dst_y, width, height, params))
{
const Common::Rectangle<u32> src_bounds = GetVRAMTransferBounds(src_x, src_y, width, height);
const Common::Rectangle<u32> dst_bounds = GetVRAMTransferBounds(dst_x, dst_y, width, height);
@ -1167,14 +1167,14 @@ void GPU_HW_Vulkan::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 wid
UpdateVRAMReadTexture();
IncludeVRAMDityRectangle(dst_bounds);
const VRAMCopyUBOData uniforms(GetVRAMCopyUBOData(src_x, src_y, dst_x, dst_y, width, height));
const Common::Rectangle<u32> dst_bounds_scaled(dst_bounds * m_resolution_scale);
const VRAMCopyUBOData uniforms(GetVRAMCopyUBOData(src_x, src_y, dst_x, dst_y, width, height, params));
const Common::Rectangle<u32> dst_bounds_scaled(ScaleVRAMRect(dst_bounds));
BeginVRAMRenderPass();
VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer();
vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS,
m_vram_copy_pipelines[BoolToUInt8(m_GPUSTAT.check_mask_before_draw)]);
m_vram_copy_pipelines[BoolToUInt8(params.check_mask_before_draw)]);
vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_single_sampler_pipeline_layout, 0, 1,
&m_vram_copy_descriptor_set, 0, nullptr);
vkCmdPushConstants(cmdbuf, m_single_sampler_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(uniforms),
@ -1184,13 +1184,13 @@ void GPU_HW_Vulkan::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 wid
vkCmdDraw(cmdbuf, 3, 1, 0, 0);
RestoreGraphicsAPIState();
if (m_GPUSTAT.check_mask_before_draw)
if (params.check_mask_before_draw)
m_current_depth++;
return;
}
GPU_HW::CopyVRAM(src_x, src_y, dst_x, dst_y, width, height);
GPU_HW::CopyVRAM(src_x, src_y, dst_x, dst_y, width, height, params);
src_x *= m_resolution_scale;
src_y *= m_resolution_scale;
@ -1224,7 +1224,7 @@ void GPU_HW_Vulkan::UpdateVRAMReadTexture()
m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
m_vram_read_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
const auto scaled_rect = m_vram_dirty_rect * m_resolution_scale;
const Common::Rectangle<u32> scaled_rect(ScaleVRAMRect(m_vram_dirty_rect));
const VkImageCopy copy{{VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u},
{static_cast<s32>(scaled_rect.left), static_cast<s32>(scaled_rect.top), 0},
{VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u},
@ -1262,8 +1262,3 @@ void GPU_HW_Vulkan::UpdateDepthBufferFromMaskBit()
RestoreGraphicsAPIState();
}
std::unique_ptr<GPU> GPU::CreateHardwareVulkanRenderer()
{
return std::make_unique<GPU_HW_Vulkan>();
}

View File

@ -14,7 +14,7 @@ public:
GPU_HW_Vulkan();
~GPU_HW_Vulkan() override;
bool Initialize(HostDisplay* host_display) override;
bool Initialize() override;
void Reset() override;
void ResetGraphicsAPIState() override;
@ -25,9 +25,10 @@ protected:
void ClearDisplay() override;
void UpdateDisplay() override;
void ReadVRAM(u32 x, u32 y, u32 width, u32 height) override;
void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override;
void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) override;
void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override;
void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params) override;
void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, GPUBackendCommandParameters params) override;
void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height,
GPUBackendCommandParameters params) override;
void UpdateVRAMReadTexture() override;
void UpdateDepthBufferFromMaskBit() override;
void SetScissorFromDrawingArea() override;

View File

@ -6,16 +6,29 @@
#include <algorithm>
Log_SetChannel(GPU_SW);
static constexpr std::tuple<u8, u8> UnpackTexcoord(u16 texcoord)
{
return std::make_tuple(static_cast<u8>(texcoord), static_cast<u8>(texcoord >> 8));
}
static constexpr std::tuple<u8, u8, u8> UnpackColorRGB24(u32 rgb24)
{
return std::make_tuple(static_cast<u8>(rgb24), static_cast<u8>(rgb24 >> 8), static_cast<u8>(rgb24 >> 16));
}
static constexpr u32 PackColorRGB24(u8 r, u8 g, u8 b)
{
return ZeroExtend32(r) | (ZeroExtend32(g) << 8) | (ZeroExtend32(b) << 16);
}
GPU_SW::GPU_SW()
{
m_vram.fill(0);
m_vram_ptr = m_vram.data();
}
GPU_SW::~GPU_SW()
{
if (m_host_display)
m_host_display->ClearDisplayTexture();
if (g_host_interface->GetDisplay())
g_host_interface->GetDisplay()->ClearDisplayTexture();
}
bool GPU_SW::IsHardwareRenderer() const
@ -23,21 +36,22 @@ bool GPU_SW::IsHardwareRenderer() const
return false;
}
bool GPU_SW::Initialize(HostDisplay* host_display)
bool GPU_SW::Initialize()
{
if (!GPU::Initialize(host_display))
if (!GPUBackend::Initialize())
return false;
m_display_texture = host_display->CreateTexture(VRAM_WIDTH, VRAM_HEIGHT, nullptr, 0, true);
m_display_texture = g_host_interface->GetDisplay()->CreateTexture(VRAM_WIDTH, VRAM_HEIGHT, nullptr, 0, true);
if (!m_display_texture)
return false;
m_vram_ptr = m_vram.data();
return true;
}
void GPU_SW::Reset()
{
GPU::Reset();
GPUBackend::Reset();
m_vram.fill(0);
}
@ -151,211 +165,122 @@ void GPU_SW::UpdateDisplay()
// fill display texture
m_display_texture_buffer.resize(VRAM_WIDTH * VRAM_HEIGHT);
HostDisplay* display = g_host_interface->GetDisplay();
if (!g_settings.debugging.show_vram)
{
if (IsDisplayDisabled())
if (!m_display_enabled)
{
m_host_display->ClearDisplayTexture();
display->ClearDisplayTexture();
return;
}
const u32 vram_offset_y = m_crtc_state.display_vram_top;
const u32 display_width = m_crtc_state.display_vram_width;
const u32 display_height = m_crtc_state.display_vram_height;
const u32 texture_offset_x = m_crtc_state.display_vram_left - m_crtc_state.regs.X;
if (IsInterlacedDisplayEnabled())
const u32 vram_offset_x = m_display_vram_left;
const u32 vram_offset_y = m_display_vram_top;
const u32 display_width = m_display_vram_width;
const u32 display_height = m_display_vram_height;
const u32 texture_offset_x = m_display_vram_left - m_display_vram_start_x;
if (m_display_interlace != GPUInterlacedDisplayMode::None)
{
const u32 field = GetInterlacedDisplayField();
if (m_GPUSTAT.display_area_color_depth_24)
const u32 field = m_display_interlace_field;
const bool interleaved = (m_display_interlace == GPUInterlacedDisplayMode::InterleavedFields);
if (m_display_24bit)
{
CopyOut24Bit(m_crtc_state.regs.X, vram_offset_y + field, m_display_texture_buffer.data() + field * VRAM_WIDTH,
VRAM_WIDTH, display_width + texture_offset_x, display_height, true, m_GPUSTAT.vertical_resolution);
CopyOut24Bit(m_display_vram_start_x, vram_offset_y + field,
m_display_texture_buffer.data() + field * VRAM_WIDTH, VRAM_WIDTH, display_width + texture_offset_x,
display_height, true, interleaved);
}
else
{
CopyOut15Bit(m_crtc_state.regs.X, vram_offset_y + field, m_display_texture_buffer.data() + field * VRAM_WIDTH,
VRAM_WIDTH, display_width + texture_offset_x, display_height, true, m_GPUSTAT.vertical_resolution);
CopyOut15Bit(m_display_vram_start_x, vram_offset_y + field,
m_display_texture_buffer.data() + field * VRAM_WIDTH, VRAM_WIDTH, display_width + texture_offset_x,
display_height, true, interleaved);
}
}
else
{
if (m_GPUSTAT.display_area_color_depth_24)
if (m_display_24bit)
{
CopyOut24Bit(m_crtc_state.regs.X, vram_offset_y, m_display_texture_buffer.data(), VRAM_WIDTH,
CopyOut24Bit(m_display_vram_start_x, vram_offset_y, m_display_texture_buffer.data(), VRAM_WIDTH,
display_width + texture_offset_x, display_height, false, false);
}
else
{
CopyOut15Bit(m_crtc_state.regs.X, vram_offset_y, m_display_texture_buffer.data(), VRAM_WIDTH,
CopyOut15Bit(m_display_vram_start_x, vram_offset_y, m_display_texture_buffer.data(), VRAM_WIDTH,
display_width + texture_offset_x, display_height, false, false);
}
}
m_host_display->UpdateTexture(m_display_texture.get(), 0, 0, display_width, display_height,
m_display_texture_buffer.data(), VRAM_WIDTH * sizeof(u32));
m_host_display->SetDisplayTexture(m_display_texture->GetHandle(), VRAM_WIDTH, VRAM_HEIGHT, texture_offset_x, 0,
display_width, display_height);
m_host_display->SetDisplayParameters(m_crtc_state.display_width, m_crtc_state.display_height,
m_crtc_state.display_origin_left, m_crtc_state.display_origin_top,
m_crtc_state.display_vram_width, m_crtc_state.display_vram_height,
m_crtc_state.display_aspect_ratio);
display->UpdateTexture(m_display_texture.get(), 0, 0, display_width, display_height,
m_display_texture_buffer.data(), VRAM_WIDTH * sizeof(u32));
display->SetDisplayTexture(m_display_texture->GetHandle(), VRAM_WIDTH, VRAM_HEIGHT, texture_offset_x, 0,
display_width, display_height);
display->SetDisplayParameters(m_display_width, m_display_height, m_display_origin_left, m_display_origin_top,
m_display_vram_width, m_display_vram_height, m_display_aspect_ratio);
}
else
{
CopyOut15Bit(0, 0, m_display_texture_buffer.data(), VRAM_WIDTH, VRAM_WIDTH, VRAM_HEIGHT, false, false);
m_host_display->UpdateTexture(m_display_texture.get(), 0, 0, VRAM_WIDTH, VRAM_HEIGHT,
m_display_texture_buffer.data(), VRAM_WIDTH * sizeof(u32));
m_host_display->SetDisplayTexture(m_display_texture->GetHandle(), VRAM_WIDTH, VRAM_HEIGHT, 0, 0, VRAM_WIDTH,
VRAM_HEIGHT);
m_host_display->SetDisplayParameters(VRAM_WIDTH, VRAM_HEIGHT, 0, 0, VRAM_WIDTH, VRAM_HEIGHT,
static_cast<float>(VRAM_WIDTH) / static_cast<float>(VRAM_HEIGHT));
display->UpdateTexture(m_display_texture.get(), 0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_display_texture_buffer.data(),
VRAM_WIDTH * sizeof(u32));
display->SetDisplayTexture(m_display_texture->GetHandle(), VRAM_WIDTH, VRAM_HEIGHT, 0, 0, VRAM_WIDTH, VRAM_HEIGHT);
display->SetDisplayParameters(VRAM_WIDTH, VRAM_HEIGHT, 0, 0, VRAM_WIDTH, VRAM_HEIGHT,
static_cast<float>(VRAM_WIDTH) / static_cast<float>(VRAM_HEIGHT));
}
}
void GPU_SW::DispatchRenderCommand()
void GPU_SW::ReadVRAM(u32 x, u32 y, u32 width, u32 height)
{
const RenderCommand rc{m_render_command.bits};
const bool dithering_enable = rc.IsDitheringEnabled() && m_GPUSTAT.dither_enable;
// no-op
}
switch (rc.primitive)
{
case Primitive::Polygon:
{
const u32 first_color = rc.color_for_first_vertex;
const bool shaded = rc.shading_enable;
const bool textured = rc.texture_enable;
void GPU_SW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params)
{
SoftwareFillVRAM(x, y, width, height, color, params);
}
const u32 num_vertices = rc.quad_polygon ? 4 : 3;
std::array<SWVertex, 4> vertices;
for (u32 i = 0; i < num_vertices; i++)
{
SWVertex& vert = vertices[i];
const u32 color_rgb = (shaded && i > 0) ? (FifoPop() & UINT32_C(0x00FFFFFF)) : first_color;
vert.color_r = Truncate8(color_rgb);
vert.color_g = Truncate8(color_rgb >> 8);
vert.color_b = Truncate8(color_rgb >> 16);
void GPU_SW::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, GPUBackendCommandParameters params)
{
SoftwareUpdateVRAM(x, y, width, height, data, params);
}
const VertexPosition vp{FifoPop()};
vert.x = vp.x;
vert.y = vp.y;
void GPU_SW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height,
GPUBackendCommandParameters params)
{
SoftwareCopyVRAM(src_x, src_y, dst_x, dst_y, width, height, params);
}
if (textured)
{
std::tie(vert.texcoord_x, vert.texcoord_y) = UnpackTexcoord(Truncate16(FifoPop()));
}
else
{
vert.texcoord_x = 0;
vert.texcoord_y = 0;
}
}
void GPU_SW::DrawPolygon(const GPUBackendDrawPolygonCommand* cmd)
{
const GPURenderCommand rc{cmd->rc.bits};
const bool dithering_enable = rc.IsDitheringEnabled() && cmd->draw_mode.dither_enable;
if (!IsDrawingAreaIsValid())
return;
const DrawTriangleFunction DrawFunction = GetDrawTriangleFunction(
rc.shading_enable, rc.texture_enable, rc.raw_texture_enable, rc.transparency_enable, dithering_enable);
const DrawTriangleFunction DrawFunction = GetDrawTriangleFunction(
rc.shading_enable, rc.texture_enable, rc.raw_texture_enable, rc.transparency_enable, dithering_enable);
(this->*DrawFunction)(cmd, &cmd->vertices[0], &cmd->vertices[1], &cmd->vertices[2]);
if (rc.quad_polygon)
(this->*DrawFunction)(cmd, &cmd->vertices[2], &cmd->vertices[1], &cmd->vertices[3]);
}
(this->*DrawFunction)(&vertices[0], &vertices[1], &vertices[2]);
if (num_vertices > 3)
(this->*DrawFunction)(&vertices[2], &vertices[1], &vertices[3]);
}
break;
void GPU_SW::DrawRectangle(const GPUBackendDrawRectangleCommand* cmd)
{
const GPURenderCommand rc{cmd->rc.bits};
const bool dithering_enable = rc.IsDitheringEnabled() && cmd->draw_mode.dither_enable;
case Primitive::Rectangle:
{
const auto [r, g, b] = UnpackColorRGB24(rc.color_for_first_vertex);
const VertexPosition vp{FifoPop()};
const u32 texcoord_and_palette = rc.texture_enable ? FifoPop() : 0;
const auto [texcoord_x, texcoord_y] = UnpackTexcoord(Truncate16(texcoord_and_palette));
const DrawRectangleFunction DrawFunction =
GetDrawRectangleFunction(rc.texture_enable, rc.raw_texture_enable, rc.transparency_enable);
s32 width;
s32 height;
switch (rc.rectangle_size)
{
case DrawRectangleSize::R1x1:
width = 1;
height = 1;
break;
case DrawRectangleSize::R8x8:
width = 8;
height = 8;
break;
case DrawRectangleSize::R16x16:
width = 16;
height = 16;
break;
default:
{
const u32 width_and_height = FifoPop();
width = static_cast<s32>(width_and_height & VRAM_WIDTH_MASK);
height = static_cast<s32>((width_and_height >> 16) & VRAM_HEIGHT_MASK);
(this->*DrawFunction)(cmd);
}
if (width >= MAX_PRIMITIVE_WIDTH || height >= MAX_PRIMITIVE_HEIGHT)
{
Log_DebugPrintf("Culling too-large rectangle: %d,%d %dx%d", vp.x.GetValue(), vp.y.GetValue(), width,
height);
return;
}
}
break;
}
void GPU_SW::DrawLine(const GPUBackendDrawLineCommand* cmd)
{
const DrawLineFunction DrawFunction =
GetDrawLineFunction(cmd->rc.shading_enable, cmd->rc.transparency_enable, cmd->IsDitheringEnabled());
if (!IsDrawingAreaIsValid())
return;
const DrawRectangleFunction DrawFunction =
GetDrawRectangleFunction(rc.texture_enable, rc.raw_texture_enable, rc.transparency_enable);
(this->*DrawFunction)(vp.x, vp.y, width, height, r, g, b, texcoord_x, texcoord_y);
}
break;
case Primitive::Line:
{
const u32 first_color = rc.color_for_first_vertex;
const bool shaded = rc.shading_enable;
const DrawLineFunction DrawFunction = GetDrawLineFunction(shaded, rc.transparency_enable, dithering_enable);
std::array<SWVertex, 2> vertices = {};
u32 buffer_pos = 0;
// first vertex
SWVertex* p0 = &vertices[0];
SWVertex* p1 = &vertices[1];
p0->SetPosition(VertexPosition{rc.polyline ? m_blit_buffer[buffer_pos++] : Truncate32(FifoPop())});
p0->SetColorRGB24(first_color);
// remaining vertices in line strip
const u32 num_vertices = rc.polyline ? GetPolyLineVertexCount() : 2;
for (u32 i = 1; i < num_vertices; i++)
{
if (rc.polyline)
{
p1->SetColorRGB24(shaded ? (m_blit_buffer[buffer_pos++] & UINT32_C(0x00FFFFFF)) : first_color);
p1->SetPosition(VertexPosition{m_blit_buffer[buffer_pos++]});
}
else
{
p1->SetColorRGB24(shaded ? (FifoPop() & UINT32_C(0x00FFFFFF)) : first_color);
p1->SetPosition(VertexPosition{Truncate32(FifoPop())});
}
// down here because of the FIFO pops
if (IsDrawingAreaIsValid())
(this->*DrawFunction)(p0, p1);
// swap p0/p1 so that the last vertex is used as the first for the next line
std::swap(p0, p1);
}
}
break;
default:
UnreachableCode();
break;
}
for (u16 i = 1; i < cmd->num_vertices; i++)
(this->*DrawFunction)(cmd, &cmd->vertices[i - 1], &cmd->vertices[i]);
}
enum : u32
@ -383,7 +308,9 @@ constexpr u8 FixedColorToInt(FixedPointColor r)
return Truncate8(r >> 12);
}
bool GPU_SW::IsClockwiseWinding(const SWVertex* v0, const SWVertex* v1, const SWVertex* v2)
bool GPU_SW::IsClockwiseWinding(const GPUBackendDrawPolygonCommand::Vertex* v0,
const GPUBackendDrawPolygonCommand::Vertex* v1,
const GPUBackendDrawPolygonCommand::Vertex* v2)
{
const s32 abx = v1->x - v0->x;
const s32 aby = v1->y - v0->y;
@ -407,7 +334,9 @@ static constexpr u8 Interpolate(u8 v0, u8 v1, u8 v2, s32 w0, s32 w1, s32 w2, s32
template<bool shading_enable, bool texture_enable, bool raw_texture_enable, bool transparency_enable,
bool dithering_enable>
void GPU_SW::DrawTriangle(const SWVertex* v0, const SWVertex* v1, const SWVertex* v2)
void GPU_SW::DrawTriangle(const GPUBackendDrawPolygonCommand* cmd, const GPUBackendDrawPolygonCommand::Vertex* v0,
const GPUBackendDrawPolygonCommand::Vertex* v1,
const GPUBackendDrawPolygonCommand::Vertex* v2)
{
#define orient2d(ax, ay, bx, by, cx, cy) ((bx - ax) * (cy - ay) - (by - ay) * (cx - ax))
@ -415,12 +344,12 @@ void GPU_SW::DrawTriangle(const SWVertex* v0, const SWVertex* v1, const SWVertex
if (IsClockwiseWinding(v0, v1, v2))
std::swap(v1, v2);
const s32 px0 = v0->x + m_drawing_offset.x;
const s32 py0 = v0->y + m_drawing_offset.y;
const s32 px1 = v1->x + m_drawing_offset.x;
const s32 py1 = v1->y + m_drawing_offset.y;
const s32 px2 = v2->x + m_drawing_offset.x;
const s32 py2 = v2->y + m_drawing_offset.y;
const s32 px0 = v0->x;
const s32 py0 = v0->y;
const s32 px1 = v1->x;
const s32 py1 = v1->y;
const s32 px2 = v2->x;
const s32 py2 = v2->y;
// Barycentric coordinates at minX/minY corner
const s32 ws = orient2d(px0, py0, px1, py1, px2, py2);
@ -434,16 +363,11 @@ void GPU_SW::DrawTriangle(const SWVertex* v0, const SWVertex* v1, const SWVertex
s32 min_y = std::min(py0, std::min(py1, py2));
s32 max_y = std::max(py0, std::max(py1, py2));
// reject triangles which cover the whole vram area
if (static_cast<u32>(max_x - min_x) > MAX_PRIMITIVE_WIDTH || static_cast<u32>(max_y - min_y) > MAX_PRIMITIVE_HEIGHT)
return;
// clip to drawing area
min_x = std::clamp(min_x, static_cast<s32>(m_drawing_area.left), static_cast<s32>(m_drawing_area.right));
max_x = std::clamp(max_x, static_cast<s32>(m_drawing_area.left), static_cast<s32>(m_drawing_area.right));
min_y = std::clamp(min_y, static_cast<s32>(m_drawing_area.top), static_cast<s32>(m_drawing_area.bottom));
max_y = std::clamp(max_y, static_cast<s32>(m_drawing_area.top), static_cast<s32>(m_drawing_area.bottom));
AddDrawTriangleTicks(max_x - min_x + 1, max_y - min_y + 1, shading_enable, texture_enable, transparency_enable);
// compute per-pixel increments
const s32 a01 = py0 - py1, b01 = px1 - px0;
@ -476,17 +400,17 @@ void GPU_SW::DrawTriangle(const SWVertex* v0, const SWVertex* v1, const SWVertex
const s32 b2 = row_w2;
const u8 r =
shading_enable ? Interpolate(v0->color_r, v1->color_r, v2->color_r, b0, b1, b2, ws, half_ws) : v0->color_r;
shading_enable ? Interpolate(v0->GetR(), v1->GetR(), v2->GetR(), b0, b1, b2, ws, half_ws) : v0->GetR();
const u8 g =
shading_enable ? Interpolate(v0->color_g, v1->color_g, v2->color_g, b0, b1, b2, ws, half_ws) : v0->color_g;
shading_enable ? Interpolate(v0->GetG(), v1->GetG(), v2->GetG(), b0, b1, b2, ws, half_ws) : v0->GetG();
const u8 b =
shading_enable ? Interpolate(v0->color_b, v1->color_b, v2->color_b, b0, b1, b2, ws, half_ws) : v0->color_b;
shading_enable ? Interpolate(v0->GetB(), v1->GetB(), v2->GetB(), b0, b1, b2, ws, half_ws) : v0->GetB();
const u8 texcoord_x = Interpolate(v0->texcoord_x, v1->texcoord_x, v2->texcoord_x, b0, b1, b2, ws, half_ws);
const u8 texcoord_y = Interpolate(v0->texcoord_y, v1->texcoord_y, v2->texcoord_y, b0, b1, b2, ws, half_ws);
const u8 u = texture_enable ? Interpolate(v0->GetU(), v1->GetU(), v2->GetU(), b0, b1, b2, ws, half_ws) : 0;
const u8 v = texture_enable ? Interpolate(v0->GetV(), v1->GetV(), v2->GetV(), b0, b1, b2, ws, half_ws) : 0;
ShadePixel<texture_enable, raw_texture_enable, transparency_enable, dithering_enable>(
static_cast<u32>(x), static_cast<u32>(y), r, g, b, texcoord_x, texcoord_y);
cmd, static_cast<u32>(x), static_cast<u32>(y), r, g, b, u, v);
}
row_w0 += a12;
@ -534,42 +458,31 @@ GPU_SW::DrawTriangleFunction GPU_SW::GetDrawTriangleFunction(bool shading_enable
}
template<bool texture_enable, bool raw_texture_enable, bool transparency_enable>
void GPU_SW::DrawRectangle(s32 origin_x, s32 origin_y, u32 width, u32 height, u8 r, u8 g, u8 b, u8 origin_texcoord_x,
u8 origin_texcoord_y)
void GPU_SW::DrawRectangle(const GPUBackendDrawRectangleCommand* cmd)
{
const s32 start_x = TruncateVertexPosition(m_drawing_offset.x + origin_x);
const s32 start_y = TruncateVertexPosition(m_drawing_offset.y + origin_y);
const s32 origin_x = cmd->x;
const s32 origin_y = cmd->y;
const auto [r, g, b] = UnpackColorRGB24(cmd->color);
const auto [origin_texcoord_x, origin_texcoord_y] = UnpackTexcoord(cmd->texcoord);
for (u32 offset_y = 0; offset_y < cmd->height; offset_y++)
{
const u32 clip_left = static_cast<u32>(std::clamp<s32>(start_x, m_drawing_area.left, m_drawing_area.right));
const u32 clip_right =
static_cast<u32>(std::clamp<s32>(start_x + static_cast<s32>(width), m_drawing_area.left, m_drawing_area.right)) +
1u;
const u32 clip_top = static_cast<u32>(std::clamp<s32>(start_y, m_drawing_area.top, m_drawing_area.bottom));
const u32 clip_bottom =
static_cast<u32>(std::clamp<s32>(start_y + static_cast<s32>(height), m_drawing_area.top, m_drawing_area.bottom)) +
1u;
AddDrawRectangleTicks(clip_right - clip_left, clip_bottom - clip_top, texture_enable, transparency_enable);
}
for (u32 offset_y = 0; offset_y < height; offset_y++)
{
const s32 y = start_y + static_cast<s32>(offset_y);
const s32 y = origin_y + static_cast<s32>(offset_y);
if (y < static_cast<s32>(m_drawing_area.top) || y > static_cast<s32>(m_drawing_area.bottom))
continue;
const u8 texcoord_y = Truncate8(ZeroExtend32(origin_texcoord_y) + offset_y);
for (u32 offset_x = 0; offset_x < width; offset_x++)
for (u32 offset_x = 0; offset_x < cmd->width; offset_x++)
{
const s32 x = start_x + static_cast<s32>(offset_x);
const s32 x = origin_x + static_cast<s32>(offset_x);
if (x < static_cast<s32>(m_drawing_area.left) || x > static_cast<s32>(m_drawing_area.right))
continue;
const u8 texcoord_x = Truncate8(ZeroExtend32(origin_texcoord_x) + offset_x);
ShadePixel<texture_enable, raw_texture_enable, transparency_enable, false>(
static_cast<u32>(x), static_cast<u32>(y), r, g, b, texcoord_x, texcoord_y);
cmd, static_cast<u32>(x), static_cast<u32>(y), r, g, b, texcoord_x, texcoord_y);
}
}
}
@ -583,7 +496,7 @@ constexpr GPU_SW::DitherLUT GPU_SW::ComputeDitherLUT()
{
for (s32 value = 0; value < DITHER_LUT_SIZE; value++)
{
const s32 dithered_value = (value + DITHER_MATRIX[i][j]) >> 3;
const s32 dithered_value = (value + GPU::DITHER_MATRIX[i][j]) >> 3;
lut[i][j][value] = static_cast<u8>((dithered_value < 0) ? 0 : ((dithered_value > 31) ? 31 : dithered_value));
}
}
@ -594,7 +507,8 @@ constexpr GPU_SW::DitherLUT GPU_SW::ComputeDitherLUT()
static constexpr GPU_SW::DitherLUT s_dither_lut = GPU_SW::ComputeDitherLUT();
template<bool texture_enable, bool raw_texture_enable, bool transparency_enable, bool dithering_enable>
void GPU_SW::ShadePixel(u32 x, u32 y, u8 color_r, u8 color_g, u8 color_b, u8 texcoord_x, u8 texcoord_y)
void GPU_SW::ShadePixel(const GPUBackendDrawCommand* cmd, u32 x, u32 y, u8 color_r, u8 color_g, u8 color_b,
u8 texcoord_x, u8 texcoord_y)
{
VRAMPixel color;
bool transparent;
@ -602,38 +516,41 @@ void GPU_SW::ShadePixel(u32 x, u32 y, u8 color_r, u8 color_g, u8 color_b, u8 tex
{
// Apply texture window
// TODO: Precompute the second half
texcoord_x = (texcoord_x & ~(m_draw_mode.texture_window_mask_x * 8u)) |
((m_draw_mode.texture_window_offset_x & m_draw_mode.texture_window_mask_x) * 8u);
texcoord_y = (texcoord_y & ~(m_draw_mode.texture_window_mask_y * 8u)) |
((m_draw_mode.texture_window_offset_y & m_draw_mode.texture_window_mask_y) * 8u);
texcoord_x = (texcoord_x & ~(cmd->window.mask_x * 8u)) | ((cmd->window.offset_x & cmd->window.mask_x) * 8u);
texcoord_y = (texcoord_y & ~(cmd->window.mask_y * 8u)) | ((cmd->window.offset_y & cmd->window.mask_y) * 8u);
VRAMPixel texture_color;
switch (m_draw_mode.GetTextureMode())
switch (cmd->draw_mode.texture_mode)
{
case GPU::TextureMode::Palette4Bit:
case GPUTextureMode::Palette4Bit:
{
const u16 palette_value = GetPixel((m_draw_mode.texture_page_x + ZeroExtend32(texcoord_x / 4)) % VRAM_WIDTH,
(m_draw_mode.texture_page_y + ZeroExtend32(texcoord_y)) % VRAM_HEIGHT);
const u16 palette_value =
GetPixel((cmd->draw_mode.GetTexturePageBaseX() + ZeroExtend32(texcoord_x / 4)) % VRAM_WIDTH,
(cmd->draw_mode.GetTexturePageBaseY() + ZeroExtend32(texcoord_y)) % VRAM_HEIGHT);
const u16 palette_index = (palette_value >> ((texcoord_x % 4) * 4)) & 0x0Fu;
texture_color.bits = GetPixel((m_draw_mode.texture_palette_x + ZeroExtend32(palette_index)) % VRAM_WIDTH,
m_draw_mode.texture_palette_y);
const u32 px = (cmd->palette.GetXBase() + ZeroExtend32(palette_index)) % VRAM_WIDTH;
const u32 py = cmd->palette.GetYBase();
texture_color.bits =
GetPixel((cmd->palette.GetXBase() + ZeroExtend32(palette_index)) % VRAM_WIDTH, cmd->palette.GetYBase());
}
break;
case GPU::TextureMode::Palette8Bit:
case GPUTextureMode::Palette8Bit:
{
const u16 palette_value = GetPixel((m_draw_mode.texture_page_x + ZeroExtend32(texcoord_x / 2)) % VRAM_WIDTH,
(m_draw_mode.texture_page_y + ZeroExtend32(texcoord_y)) % VRAM_HEIGHT);
const u16 palette_value =
GetPixel((cmd->draw_mode.GetTexturePageBaseX() + ZeroExtend32(texcoord_x / 2)) % VRAM_WIDTH,
(cmd->draw_mode.GetTexturePageBaseY() + ZeroExtend32(texcoord_y)) % VRAM_HEIGHT);
const u16 palette_index = (palette_value >> ((texcoord_x % 2) * 8)) & 0xFFu;
texture_color.bits = GetPixel((m_draw_mode.texture_palette_x + ZeroExtend32(palette_index)) % VRAM_WIDTH,
m_draw_mode.texture_palette_y);
texture_color.bits =
GetPixel((cmd->palette.GetXBase() + ZeroExtend32(palette_index)) % VRAM_WIDTH, cmd->palette.GetYBase());
}
break;
default:
{
texture_color.bits = GetPixel((m_draw_mode.texture_page_x + ZeroExtend32(texcoord_x)) % VRAM_WIDTH,
(m_draw_mode.texture_page_y + ZeroExtend32(texcoord_y)) % VRAM_HEIGHT);
texture_color.bits = GetPixel((cmd->draw_mode.GetTexturePageBaseX() + ZeroExtend32(texcoord_x)) % VRAM_WIDTH,
(cmd->draw_mode.GetTexturePageBaseY() + ZeroExtend32(texcoord_y)) % VRAM_HEIGHT);
}
break;
}
@ -684,18 +601,18 @@ void GPU_SW::ShadePixel(u32 x, u32 y, u8 color_r, u8 color_g, u8 color_b, u8 tex
color.Set(func(bg_color.r.GetValue(), color.r.GetValue()), func(bg_color.g.GetValue(), color.g.GetValue()), \
func(bg_color.b.GetValue(), color.b.GetValue()), color.c.GetValue())
switch (m_draw_mode.GetTransparencyMode())
switch (cmd->draw_mode.transparency_mode)
{
case GPU::TransparencyMode::HalfBackgroundPlusHalfForeground:
case GPUTransparencyMode::HalfBackgroundPlusHalfForeground:
BLEND_RGB(BLEND_AVERAGE);
break;
case GPU::TransparencyMode::BackgroundPlusForeground:
case GPUTransparencyMode::BackgroundPlusForeground:
BLEND_RGB(BLEND_ADD);
break;
case GPU::TransparencyMode::BackgroundMinusForeground:
case GPUTransparencyMode::BackgroundMinusForeground:
BLEND_RGB(BLEND_SUBTRACT);
break;
case GPU::TransparencyMode::BackgroundPlusQuarterForeground:
case GPUTransparencyMode::BackgroundPlusQuarterForeground:
BLEND_RGB(BLEND_QUARTER);
break;
default:
@ -715,14 +632,14 @@ void GPU_SW::ShadePixel(u32 x, u32 y, u8 color_r, u8 color_g, u8 color_b, u8 tex
UNREFERENCED_VARIABLE(transparent);
}
const u16 mask_and = m_GPUSTAT.GetMaskAND();
const u16 mask_and = cmd->params.GetMaskAND();
if ((bg_color.bits & mask_and) != 0)
return;
if (IsInterlacedRenderingEnabled() && GetActiveLineLSB() == (static_cast<u32>(y) & 1u))
if (cmd->params.interlaced_rendering && cmd->params.active_line_lsb == (Truncate8(static_cast<u32>(y)) & 1u))
return;
SetPixel(static_cast<u32>(x), static_cast<u32>(y), color.bits | m_GPUSTAT.GetMaskOR());
SetPixel(static_cast<u32>(x), static_cast<u32>(y), color.bits | cmd->params.GetMaskOR());
}
constexpr FixedPointCoord GetLineCoordStep(s32 delta, s32 k)
@ -747,7 +664,8 @@ constexpr FixedPointColor GetLineColorStep(s32 delta, s32 k)
}
template<bool shading_enable, bool transparency_enable, bool dithering_enable>
void GPU_SW::DrawLine(const SWVertex* p0, const SWVertex* p1)
void GPU_SW::DrawLine(const GPUBackendDrawLineCommand* cmd, const GPUBackendDrawLineCommand::Vertex* p0,
const GPUBackendDrawLineCommand::Vertex* p1)
{
// Algorithm based on Mednafen.
if (p0->x > p1->x)
@ -757,21 +675,6 @@ void GPU_SW::DrawLine(const SWVertex* p0, const SWVertex* p1)
const s32 dy = p1->y - p0->y;
const s32 k = std::max(std::abs(dx), std::abs(dy));
{
// TODO: Move to base class
const s32 min_x = std::min(p0->x, p1->x);
const s32 max_x = std::max(p0->x, p1->x);
const s32 min_y = std::min(p0->y, p1->y);
const s32 max_y = std::max(p0->y, p1->y);
const u32 clip_left = static_cast<u32>(std::clamp<s32>(min_x, m_drawing_area.left, m_drawing_area.left));
const u32 clip_right = static_cast<u32>(std::clamp<s32>(max_x, m_drawing_area.left, m_drawing_area.right)) + 1u;
const u32 clip_top = static_cast<u32>(std::clamp<s32>(min_y, m_drawing_area.top, m_drawing_area.bottom));
const u32 clip_bottom = static_cast<u32>(std::clamp<s32>(max_y, m_drawing_area.top, m_drawing_area.bottom)) + 1u;
AddDrawLineTicks(clip_right - clip_left, clip_bottom - clip_top, shading_enable);
}
FixedPointCoord step_x, step_y;
FixedPointColor step_r, step_g, step_b;
if (k > 0)
@ -781,9 +684,9 @@ void GPU_SW::DrawLine(const SWVertex* p0, const SWVertex* p1)
if constexpr (shading_enable)
{
step_r = GetLineColorStep(s32(ZeroExtend32(p1->color_r)) - s32(ZeroExtend32(p0->color_r)), k);
step_g = GetLineColorStep(s32(ZeroExtend32(p1->color_g)) - s32(ZeroExtend32(p0->color_g)), k);
step_b = GetLineColorStep(s32(ZeroExtend32(p1->color_b)) - s32(ZeroExtend32(p0->color_b)), k);
step_r = GetLineColorStep(s32(ZeroExtend32(p1->GetR())) - s32(ZeroExtend32(p0->GetR())), k);
step_g = GetLineColorStep(s32(ZeroExtend32(p1->GetG())) - s32(ZeroExtend32(p0->GetG())), k);
step_b = GetLineColorStep(s32(ZeroExtend32(p1->GetB())) - s32(ZeroExtend32(p0->GetB())), k);
}
else
{
@ -803,24 +706,25 @@ void GPU_SW::DrawLine(const SWVertex* p0, const SWVertex* p1)
FixedPointCoord current_x = IntToFixedCoord(p0->x);
FixedPointCoord current_y = IntToFixedCoord(p0->y);
FixedPointColor current_r = IntToFixedColor(p0->color_r);
FixedPointColor current_g = IntToFixedColor(p0->color_g);
FixedPointColor current_b = IntToFixedColor(p0->color_b);
FixedPointColor current_r = IntToFixedColor(p0->GetR());
FixedPointColor current_g = IntToFixedColor(p0->GetG());
FixedPointColor current_b = IntToFixedColor(p0->GetB());
for (s32 i = 0; i <= k; i++)
{
const s32 x = m_drawing_offset.x + FixedToIntCoord(current_x);
const s32 y = m_drawing_offset.y + FixedToIntCoord(current_y);
// FIXME: Draw offset should be applied here
const s32 x = /*m_drawing_offset.x + */ FixedToIntCoord(current_x);
const s32 y = /*m_drawing_offset.y + */ FixedToIntCoord(current_y);
const u8 r = shading_enable ? FixedColorToInt(current_r) : p0->color_r;
const u8 g = shading_enable ? FixedColorToInt(current_g) : p0->color_g;
const u8 b = shading_enable ? FixedColorToInt(current_b) : p0->color_b;
const u8 r = shading_enable ? FixedColorToInt(current_r) : p0->GetR();
const u8 g = shading_enable ? FixedColorToInt(current_g) : p0->GetG();
const u8 b = shading_enable ? FixedColorToInt(current_b) : p0->GetB();
if (x >= static_cast<s32>(m_drawing_area.left) && x <= static_cast<s32>(m_drawing_area.right) &&
y >= static_cast<s32>(m_drawing_area.top) && y <= static_cast<s32>(m_drawing_area.bottom))
{
ShadePixel<false, false, transparency_enable, dithering_enable>(static_cast<u32>(x), static_cast<u32>(y), r, g, b,
0, 0);
ShadePixel<false, false, transparency_enable, dithering_enable>(cmd, static_cast<u32>(x), static_cast<u32>(y), r,
g, b, 0, 0);
}
current_x += step_x;
@ -863,7 +767,7 @@ GPU_SW::DrawRectangleFunction GPU_SW::GetDrawRectangleFunction(bool texture_enab
return funcs[u8(texture_enable)][u8(raw_texture_enable)][u8(transparency_enable)];
}
std::unique_ptr<GPU> GPU::CreateSoftwareRenderer()
void GPU_SW::FlushRender()
{
return std::make_unique<GPU_SW>();
// no-op
}

View File

@ -1,12 +1,13 @@
#pragma once
#include "gpu.h"
#include "gpu_backend.h"
#include <array>
#include <memory>
#include <vector>
class HostDisplayTexture;
class GPU_SW final : public GPU
class GPU_SW final : public GPUBackend
{
public:
GPU_SW();
@ -14,7 +15,7 @@ public:
bool IsHardwareRenderer() const override;
bool Initialize(HostDisplay* host_display) override;
bool Initialize() override;
void Reset() override;
u16 GetPixel(u32 x, u32 y) const { return m_vram[VRAM_WIDTH * y + x]; }
@ -28,20 +29,61 @@ public:
static constexpr DitherLUT ComputeDitherLUT();
protected:
struct SWVertex
{
s32 x, y;
u8 color_r, color_g, color_b;
u8 texcoord_x, texcoord_y;
static constexpr u8 Convert5To8(u8 x5) { return (x5 << 3) | (x5 & 7); }
static constexpr u8 Convert8To5(u8 x8) { return (x8 >> 3); }
ALWAYS_INLINE void SetPosition(VertexPosition p)
union VRAMPixel
{
u16 bits;
BitField<u16, u8, 0, 5> r;
BitField<u16, u8, 5, 5> g;
BitField<u16, u8, 10, 5> b;
BitField<u16, bool, 15, 1> c;
u8 GetR8() const { return Convert5To8(r); }
u8 GetG8() const { return Convert5To8(g); }
u8 GetB8() const { return Convert5To8(b); }
void Set(u8 r_, u8 g_, u8 b_, bool c_ = false)
{
x = p.x;
y = p.y;
bits = (ZeroExtend16(r_)) | (ZeroExtend16(g_) << 5) | (ZeroExtend16(b_) << 10) | (static_cast<u16>(c_) << 15);
}
ALWAYS_INLINE void SetColorRGB24(u32 color) { std::tie(color_r, color_g, color_b) = UnpackColorRGB24(color); }
ALWAYS_INLINE void SetTexcoord(u16 value) { std::tie(texcoord_x, texcoord_y) = UnpackTexcoord(value); }
void ClampAndSet(u8 r_, u8 g_, u8 b_, bool c_ = false)
{
Set(std::min<u8>(r_, 0x1F), std::min<u8>(g_, 0x1F), std::min<u8>(b_, 0x1F), c_);
}
void SetRGB24(u32 rgb24, bool c_ = false)
{
bits = Truncate16(((rgb24 >> 3) & 0x1F) | (((rgb24 >> 11) & 0x1F) << 5) | (((rgb24 >> 19) & 0x1F) << 10)) |
(static_cast<u16>(c_) << 15);
}
void SetRGB24(u8 r8, u8 g8, u8 b8, bool c_ = false)
{
bits = (ZeroExtend16(r8 >> 3)) | (ZeroExtend16(g8 >> 3) << 5) | (ZeroExtend16(b8 >> 3) << 10) |
(static_cast<u16>(c_) << 15);
}
void SetRGB24Dithered(u32 x, u32 y, u8 r8, u8 g8, u8 b8, bool c_ = false)
{
const s32 offset = GPU::DITHER_MATRIX[y & 3][x & 3];
r8 = static_cast<u8>(std::clamp<s32>(static_cast<s32>(ZeroExtend32(r8)) + offset, 0, 255));
g8 = static_cast<u8>(std::clamp<s32>(static_cast<s32>(ZeroExtend32(g8)) + offset, 0, 255));
b8 = static_cast<u8>(std::clamp<s32>(static_cast<s32>(ZeroExtend32(b8)) + offset, 0, 255));
SetRGB24(r8, g8, b8, c_);
}
u32 ToRGB24() const
{
const u32 r_ = ZeroExtend32(r.GetValue());
const u32 g_ = ZeroExtend32(g.GetValue());
const u32 b_ = ZeroExtend32(b.GetValue());
return ((r_ << 3) | (r_ & 7)) | (((g_ << 3) | (g_ & 7)) << 8) | (((b_ << 3) | (b_ & 7)) << 16);
}
};
//////////////////////////////////////////////////////////////////////////
@ -53,39 +95,51 @@ protected:
bool interleaved);
void ClearDisplay() override;
void UpdateDisplay() override;
void ReadVRAM(u32 x, u32 y, u32 width, u32 height) override;
void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params) override;
void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, GPUBackendCommandParameters params) override;
void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height,
GPUBackendCommandParameters params) override;
//////////////////////////////////////////////////////////////////////////
// Rasterization
//////////////////////////////////////////////////////////////////////////
void DispatchRenderCommand() override;
void DrawPolygon(const GPUBackendDrawPolygonCommand* cmd) override;
void DrawLine(const GPUBackendDrawLineCommand* cmd) override;
void DrawRectangle(const GPUBackendDrawRectangleCommand* cmd) override;
void FlushRender() override;
static bool IsClockwiseWinding(const SWVertex* v0, const SWVertex* v1, const SWVertex* v2);
static bool IsClockwiseWinding(const GPUBackendDrawPolygonCommand::Vertex* v0, const GPUBackendDrawPolygonCommand::Vertex* v1,
const GPUBackendDrawPolygonCommand::Vertex* v2);
template<bool texture_enable, bool raw_texture_enable, bool transparency_enable, bool dithering_enable>
void ShadePixel(u32 x, u32 y, u8 color_r, u8 color_g, u8 color_b, u8 texcoord_x, u8 texcoord_y);
void ShadePixel(const GPUBackendDrawCommand* cmd, u32 x, u32 y, u8 color_r, u8 color_g, u8 color_b, u8 texcoord_x,
u8 texcoord_y);
template<bool shading_enable, bool texture_enable, bool raw_texture_enable, bool transparency_enable,
bool dithering_enable>
void DrawTriangle(const SWVertex* v0, const SWVertex* v1, const SWVertex* v2);
void DrawTriangle(const GPUBackendDrawPolygonCommand* cmd, const GPUBackendDrawPolygonCommand::Vertex* v0,
const GPUBackendDrawPolygonCommand::Vertex* v1, const GPUBackendDrawPolygonCommand::Vertex* v2);
using DrawTriangleFunction = void (GPU_SW::*)(const SWVertex* v0, const SWVertex* v1, const SWVertex* v2);
using DrawTriangleFunction = void (GPU_SW::*)(const GPUBackendDrawPolygonCommand* cmd, const GPUBackendDrawPolygonCommand::Vertex* v0,
const GPUBackendDrawPolygonCommand::Vertex* v1,
const GPUBackendDrawPolygonCommand::Vertex* v2);
DrawTriangleFunction GetDrawTriangleFunction(bool shading_enable, bool texture_enable, bool raw_texture_enable,
bool transparency_enable, bool dithering_enable);
template<bool texture_enable, bool raw_texture_enable, bool transparency_enable>
void DrawRectangle(s32 origin_x, s32 origin_y, u32 width, u32 height, u8 r, u8 g, u8 b, u8 origin_texcoord_x,
u8 origin_texcoord_y);
void DrawRectangle(const GPUBackendDrawRectangleCommand* cmd);
using DrawRectangleFunction = void (GPU_SW::*)(s32 origin_x, s32 origin_y, u32 width, u32 height, u8 r, u8 g, u8 b,
u8 origin_texcoord_x, u8 origin_texcoord_y);
using DrawRectangleFunction = void (GPU_SW::*)(const GPUBackendDrawRectangleCommand* cmd);
DrawRectangleFunction GetDrawRectangleFunction(bool texture_enable, bool raw_texture_enable,
bool transparency_enable);
template<bool shading_enable, bool transparency_enable, bool dithering_enable>
void DrawLine(const SWVertex* p0, const SWVertex* p1);
void DrawLine(const GPUBackendDrawLineCommand* cmd, const GPUBackendDrawLineCommand::Vertex* p0, const GPUBackendDrawLineCommand::Vertex* p1);
using DrawLineFunction = void (GPU_SW::*)(const SWVertex* p0, const SWVertex* p1);
using DrawLineFunction = void (GPU_SW::*)(const GPUBackendDrawLineCommand* cmd, const GPUBackendDrawLineCommand::Vertex* p0,
const GPUBackendDrawLineCommand::Vertex* p1);
DrawLineFunction GetDrawLineFunction(bool shading_enable, bool transparency_enable, bool dithering_enable);
std::vector<u32> m_display_texture_buffer;

455
src/core/gpu_types.h Normal file
View File

@ -0,0 +1,455 @@
#pragma once
#include "common/bitfield.h"
#include "common/rectangle.h"
#include "types.h"
#include <array>
enum : u32
{
VRAM_WIDTH = 1024,
VRAM_HEIGHT = 512,
VRAM_SIZE = VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16),
VRAM_WIDTH_MASK = VRAM_WIDTH - 1,
VRAM_HEIGHT_MASK = VRAM_HEIGHT - 1,
VRAM_COORD_MASK = 0x3FF,
TEXTURE_PAGE_WIDTH = 256,
TEXTURE_PAGE_HEIGHT = 256,
MAX_PRIMITIVE_WIDTH = 1024,
MAX_PRIMITIVE_HEIGHT = 512,
DITHER_MATRIX_SIZE = 4
};
enum class GPUPrimitive : u8
{
Reserved = 0,
Polygon = 1,
Line = 2,
Rectangle = 3
};
enum class GPUDrawRectangleSize : u8
{
Variable = 0,
R1x1 = 1,
R8x8 = 2,
R16x16 = 3
};
enum class GPUTextureMode : u8
{
Palette4Bit = 0,
Palette8Bit = 1,
Direct16Bit = 2,
Reserved_Direct16Bit = 3,
// Not register values.
RawTextureBit = 4,
RawPalette4Bit = RawTextureBit | Palette4Bit,
RawPalette8Bit = RawTextureBit | Palette8Bit,
RawDirect16Bit = RawTextureBit | Direct16Bit,
Reserved_RawDirect16Bit = RawTextureBit | Reserved_Direct16Bit,
Disabled = 8 // Not a register value
};
IMPLEMENT_ENUM_CLASS_BITWISE_OPERATORS(GPUTextureMode);
enum class GPUTransparencyMode : u8
{
HalfBackgroundPlusHalfForeground = 0,
BackgroundPlusForeground = 1,
BackgroundMinusForeground = 2,
BackgroundPlusQuarterForeground = 3,
Disabled = 4 // Not a register value
};
enum class GPUInterlacedDisplayMode : u8
{
None,
InterleavedFields,
SeparateFields
};
union GPURenderCommand
{
u32 bits;
BitField<u32, u32, 0, 24> color_for_first_vertex;
BitField<u32, bool, 24, 1> raw_texture_enable; // not valid for lines
BitField<u32, bool, 25, 1> transparency_enable;
BitField<u32, bool, 26, 1> texture_enable;
BitField<u32, GPUDrawRectangleSize, 27, 2> rectangle_size; // only for rectangles
BitField<u32, bool, 27, 1> quad_polygon; // only for polygons
BitField<u32, bool, 27, 1> polyline; // only for lines
BitField<u32, bool, 28, 1> shading_enable; // 0 - flat, 1 = gouroud
BitField<u32, GPUPrimitive, 29, 21> primitive;
/// Returns true if texturing should be enabled. Depends on the primitive type.
ALWAYS_INLINE bool IsTexturingEnabled() const { return (primitive != GPUPrimitive::Line) ? texture_enable : false; }
/// Returns true if dithering should be enabled. Depends on the primitive type.
ALWAYS_INLINE bool IsDitheringEnabled() const
{
switch (primitive)
{
case GPUPrimitive::Polygon:
return shading_enable || (texture_enable && !raw_texture_enable);
case GPUPrimitive::Line:
return true;
case GPUPrimitive::Rectangle:
default:
return false;
}
}
};
// Helper/format conversion functions.
static constexpr u32 RGBA5551ToRGBA8888(u16 color)
{
u8 r = Truncate8(color & 31);
u8 g = Truncate8((color >> 5) & 31);
u8 b = Truncate8((color >> 10) & 31);
u8 a = Truncate8((color >> 15) & 1);
// 00012345 -> 1234545
b = (b << 3) | (b & 0b111);
g = (g << 3) | (g & 0b111);
r = (r << 3) | (r & 0b111);
a = a ? 255 : 0;
return ZeroExtend32(r) | (ZeroExtend32(g) << 8) | (ZeroExtend32(b) << 16) | (ZeroExtend32(a) << 24);
}
static constexpr u16 RGBA8888ToRGBA5551(u32 color)
{
const u16 r = Truncate16((color >> 3) & 0x1Fu);
const u16 g = Truncate16((color >> 11) & 0x1Fu);
const u16 b = Truncate16((color >> 19) & 0x1Fu);
const u16 a = Truncate16((color >> 31) & 0x01u);
return r | (g << 5) | (b << 10) | (a << 15);
}
union GPUVertexPosition
{
u32 bits;
BitField<u32, s32, 0, 12> x;
BitField<u32, s32, 16, 12> y;
};
// Sprites/rectangles should be clipped to 12 bits before drawing.
static constexpr s32 TruncateGPUVertexPosition(s32 x)
{
return SignExtendN<11, s32>(x);
}
// bits in GP0(E1h) or texpage part of polygon
union GPUDrawModeReg
{
static constexpr u16 MASK = 0b1111111111111;
static constexpr u16 TEXTURE_PAGE_MASK = UINT16_C(0b0000000000011111);
// Polygon texpage commands only affect bits 0-8, 11
static constexpr u16 POLYGON_TEXPAGE_MASK = 0b0000100111111111;
// Bits 0..5 are returned in the GPU status register, latched at E1h/polygon draw time.
static constexpr u32 GPUSTAT_MASK = 0b11111111111;
u16 bits;
BitField<u16, u8, 0, 4> texture_page_x_base;
BitField<u16, u8, 4, 1> texture_page_y_base;
BitField<u16, GPUTransparencyMode, 5, 2> transparency_mode;
BitField<u16, GPUTextureMode, 7, 2> texture_mode;
BitField<u16, bool, 9, 1> dither_enable;
BitField<u16, bool, 10, 1> draw_to_displayed_field;
BitField<u16, bool, 11, 1> texture_disable;
BitField<u16, bool, 12, 1> texture_x_flip;
BitField<u16, bool, 13, 1> texture_y_flip;
ALWAYS_INLINE u16 GetTexturePageBaseX() const { return ZeroExtend16(texture_page_x_base.GetValue()) * 64; }
ALWAYS_INLINE u16 GetTexturePageBaseY() const { return ZeroExtend16(texture_page_y_base.GetValue()) * 256; }
/// Returns true if the texture mode requires a palette.
bool IsUsingPalette() const { return (bits & (2 << 7)) == 0; }
/// Returns a rectangle comprising the texture page area.
Common::Rectangle<u32> GetTexturePageRectangle() const
{
static constexpr std::array<u32, 4> texture_page_widths = {
{TEXTURE_PAGE_WIDTH / 4, TEXTURE_PAGE_WIDTH / 2, TEXTURE_PAGE_WIDTH, TEXTURE_PAGE_WIDTH}};
return Common::Rectangle<u32>::FromExtents(GetTexturePageBaseX(), GetTexturePageBaseY(),
texture_page_widths[static_cast<u8>(texture_mode.GetValue())],
TEXTURE_PAGE_HEIGHT);
}
/// Returns a rectangle comprising the texture palette area.
Common::Rectangle<u32> GetTexturePaletteRectangle() const
{
static constexpr std::array<u32, 4> palette_widths = {{16, 256, 0, 0}};
return Common::Rectangle<u32>::FromExtents(GetTexturePageBaseX(), GetTexturePageBaseY(),
palette_widths[static_cast<u8>(texture_mode.GetValue())], 1);
}
};
union GPUTexturePaletteReg
{
static constexpr u16 MASK = UINT16_C(0b0111111111111111);
u16 bits;
BitField<u16, u16, 0, 6> x;
BitField<u16, u16, 6, 10> y;
ALWAYS_INLINE u32 GetXBase() const { return static_cast<u32>(x) * 16u; }
ALWAYS_INLINE u32 GetYBase() const { return static_cast<u32>(y); }
};
union GPUTextureWindowReg
{
static constexpr u32 MASK = 0b11111111111111111111;
u32 bits;
BitField<u32, u8, 0, 5> mask_x;
BitField<u32, u8, 5, 5> mask_y;
BitField<u32, u8, 10, 5> offset_x;
BitField<u32, u8, 15, 5> offset_y;
};
// 4x4 dither matrix.
static constexpr s32 DITHER_MATRIX[DITHER_MATRIX_SIZE][DITHER_MATRIX_SIZE] = {{-4, +0, -3, +1}, // row 0
{+2, -2, +3, -1}, // row 1
{-3, +1, -4, +0}, // row 2
{+4, -1, +2, -2}}; // row 3
enum class GPUBackendCommandType : u8
{
Sync,
FrameDone,
Reset,
UpdateSettings,
UpdateResolutionScale,
ReadVRAM,
FillVRAM,
UpdateVRAM,
CopyVRAM,
SetDrawingArea,
DrawPolygon,
DrawRectangle,
DrawLine,
ClearDisplay,
UpdateDisplay,
FlushRender
};
union GPUBackendCommandParameters
{
u8 bits;
BitField<u8, bool, 0, 1> interlaced_rendering;
/// Returns 0 if the currently-displayed field is on an even line in VRAM, otherwise 1.
BitField<u8, u8, 1, 1> active_line_lsb;
BitField<u8, bool, 2, 1> set_mask_while_drawing;
BitField<u8, bool, 3, 1> check_mask_before_draw;
ALWAYS_INLINE bool IsMaskingEnabled() const { return (bits & 12u) != 0u; }
// During transfer/render operations, if ((dst_pixel & mask_and) == 0) { pixel = src_pixel | mask_or }
u16 GetMaskAND() const
{
// return check_mask_before_draw ? 0x8000 : 0x0000;
return Truncate16((bits << 12) & 0x8000);
}
u16 GetMaskOR() const
{
// return set_mask_while_drawing ? 0x8000 : 0x0000;
return Truncate16((bits << 13) & 0x8000);
}
};
struct GPUBackendCommand
{
GPUBackendCommandType type;
GPUBackendCommandParameters params;
u32 size;
};
struct GPUBackendSyncCommand : public GPUBackendCommand
{
ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendSyncCommand); }
};
struct GPUBackendFrameDoneCommand : public GPUBackendCommand
{
ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendFrameDoneCommand); }
};
struct GPUBackendResetCommand : public GPUBackendCommand
{
ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendResetCommand); }
};
struct GPUBackendUpdateSettingsCommand : public GPUBackendCommand
{
ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendUpdateSettingsCommand); }
};
struct GPUBackendUpdateResolutionScaleCommand : public GPUBackendCommand
{
ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendUpdateResolutionScaleCommand); }
};
struct GPUBackendReadVRAMCommand : public GPUBackendCommand
{
u16 x;
u16 y;
u16 width;
u16 height;
ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendReadVRAMCommand); }
};
struct GPUBackendFillVRAMCommand : public GPUBackendCommand
{
u16 x;
u16 y;
u16 width;
u16 height;
u32 color;
ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendFillVRAMCommand); }
};
struct GPUBackendUpdateVRAMCommand : public GPUBackendCommand
{
u16 x;
u16 y;
u16 width;
u16 height;
u16 data[0];
ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendUpdateVRAMCommand) + (sizeof(u16) * width * height); }
};
struct GPUBackendCopyVRAMCommand : public GPUBackendCommand
{
u16 src_x;
u16 src_y;
u16 dst_x;
u16 dst_y;
u16 width;
u16 height;
ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendCopyVRAMCommand); }
};
struct GPUBackendSetDrawingAreaCommand : public GPUBackendCommand
{
Common::Rectangle<u32> new_area;
ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendSetDrawingAreaCommand); }
};
struct GPUBackendDrawCommand : public GPUBackendCommand
{
GPURenderCommand rc;
GPUDrawModeReg draw_mode;
GPUTexturePaletteReg palette;
GPUTextureWindowReg window;
Common::Rectangle<u16> bounds;
ALWAYS_INLINE bool IsDitheringEnabled() const { return rc.IsDitheringEnabled() && draw_mode.dither_enable; }
};
struct GPUBackendDrawPolygonCommand : public GPUBackendDrawCommand
{
u16 num_vertices;
struct Vertex
{
float precise_x, precise_y, precise_w;
s32 x, y;
u32 color;
u16 texcoord;
ALWAYS_INLINE u8 GetR() const { return Truncate8(color); }
ALWAYS_INLINE u8 GetG() const { return Truncate8(color >> 8); }
ALWAYS_INLINE u8 GetB() const { return Truncate8(color >> 16); }
ALWAYS_INLINE u8 GetU() const { return Truncate8(texcoord); }
ALWAYS_INLINE u8 GetV() const { return Truncate8(texcoord >> 8); }
};
Vertex vertices[0];
ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendDrawPolygonCommand) + sizeof(Vertex) * num_vertices; }
};
struct GPUBackendDrawRectangleCommand : public GPUBackendDrawCommand
{
s32 x, y;
u16 width, height;
u16 texcoord;
u32 color;
ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendDrawRectangleCommand); }
};
struct GPUBackendDrawLineCommand : public GPUBackendDrawCommand
{
u16 num_vertices;
struct Vertex
{
s32 x, y;
u32 color;
ALWAYS_INLINE u8 GetR() const { return Truncate8(color); }
ALWAYS_INLINE u8 GetG() const { return Truncate8(color >> 8); }
ALWAYS_INLINE u8 GetB() const { return Truncate8(color >> 16); }
};
Vertex vertices[0];
ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendDrawLineCommand) + sizeof(Vertex) * num_vertices; }
};
struct GPUBackendClearDisplayCommand : public GPUBackendCommand
{
ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendClearDisplayCommand); }
};
struct GPUBackendUpdateDisplayCommand : public GPUBackendCommand
{
float display_aspect_ratio;
u16 display_width;
u16 display_height;
u16 display_origin_left;
u16 display_origin_top;
u16 display_vram_left;
u16 display_vram_top;
u16 display_vram_width;
u16 display_vram_height;
u16 display_vram_start_x;
u16 display_vram_start_y;
GPUInterlacedDisplayMode display_interlace;
/// Returns 0 if the currently-displayed field is on odd lines (1,3,5,...) or 1 if even (2,4,6,...).
u8 display_interlace_field;
bool display_enabled;
bool display_24bit;
ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendUpdateDisplayCommand); }
};
struct GPUBackendFlushRenderCommand : public GPUBackendCommand
{
ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendFlushRenderCommand); }
};

View File

@ -12,6 +12,7 @@
#include "cpu_core.h"
#include "dma.h"
#include "gpu.h"
#include "gpu_backend.h"
#include "gte.h"
#include "host_display.h"
#include "pgxp.h"
@ -554,7 +555,7 @@ void HostInterface::CheckForSettingsChanges(const Settings& old_settings)
g_settings.display_active_start_offset != old_settings.display_active_start_offset ||
g_settings.display_active_end_offset != old_settings.display_active_end_offset)
{
g_gpu->UpdateSettings();
g_gpu.UpdateSettings();
}
if (g_settings.gpu_pgxp_enable != old_settings.gpu_pgxp_enable ||
@ -726,7 +727,7 @@ void HostInterface::ToggleSoftwareRendering()
if (System::IsShutdown() || g_settings.gpu_renderer == GPURenderer::Software)
return;
const GPURenderer new_renderer = g_gpu->IsHardwareRenderer() ? GPURenderer::Software : g_settings.gpu_renderer;
const GPURenderer new_renderer = g_gpu_backend->IsHardwareRenderer() ? GPURenderer::Software : g_settings.gpu_renderer;
AddFormattedOSDMessage(5.0f, "Switching to %s renderer...", Settings::GetRendererDisplayName(new_renderer));
System::RecreateGPU(new_renderer);
@ -743,9 +744,9 @@ void HostInterface::ModifyResolutionScale(s32 increment)
if (!System::IsShutdown())
{
g_gpu->RestoreGraphicsAPIState();
g_gpu->UpdateSettings();
g_gpu->ResetGraphicsAPIState();
g_gpu_backend->RestoreGraphicsAPIState();
g_gpu_backend->UpdateSettings();
g_gpu_backend->ResetGraphicsAPIState();
}
}

View File

@ -159,7 +159,7 @@ void NamcoGunCon::UpdatePosition()
// are we within the active display area?
u32 tick, line;
if (mouse_x < 0 || mouse_y < 0 || !g_gpu->ConvertScreenCoordinatesToBeamTicksAndLines(mouse_x, mouse_y, &tick, &line))
if (mouse_x < 0 || mouse_y < 0 || !g_gpu.ConvertScreenCoordinatesToBeamTicksAndLines(mouse_x, mouse_y, &tick, &line))
{
Log_DebugPrintf("Lightgun out of range for window coordinates %d,%d", mouse_x, mouse_y);
m_position_x = 0x01;
@ -168,7 +168,7 @@ void NamcoGunCon::UpdatePosition()
}
// 8MHz units for X = 44100*768*11/7 = 53222400 / 8000000 = 6.6528
const double divider = static_cast<double>(g_gpu->GetCRTCFrequency()) / 8000000.0;
const double divider = static_cast<double>(g_gpu.GetCRTCFrequency()) / 8000000.0;
m_position_x = static_cast<u16>(static_cast<float>(tick) / static_cast<float>(divider));
m_position_y = static_cast<u16>(line);
Log_DebugPrintf("Lightgun window coordinates %d,%d -> tick %u line %u 8mhz ticks %u", mouse_x, mouse_y, tick, line,

View File

@ -72,6 +72,7 @@ struct Settings
CPUExecutionMode cpu_execution_mode = CPUExecutionMode::Interpreter;
bool cpu_recompiler_memory_exceptions = false;
bool cpu_recompiler_icache = false;
bool cpu_thread = true;
float emulation_speed = 1.0f;
bool speed_limiter_enabled = true;

View File

@ -14,6 +14,7 @@
#include "cpu_core.h"
#include "dma.h"
#include "gpu.h"
#include "gpu_backend.h"
#include "gte.h"
#include "host_display.h"
#include "host_interface.h"
@ -28,9 +29,12 @@
#include "spu.h"
#include "timers.h"
#include <cctype>
#include <condition_variable>
#include <cstdio>
#include <fstream>
#include <limits>
#include <mutex>
#include <thread>
Log_SetChannel(System);
#ifdef WIN32
@ -59,7 +63,6 @@ static std::unique_ptr<CDImage> OpenCDImage(const char* path, bool force_preload
static bool DoLoadState(ByteStream* stream, bool force_software_renderer);
static bool DoState(StateWrapper& sw);
static bool CreateGPU(GPURenderer renderer);
static bool Initialize(bool force_software_renderer);
@ -95,6 +98,17 @@ static u32 s_last_global_tick_counter = 0;
static Common::Timer s_fps_timer;
static Common::Timer s_frame_timer;
static float s_average_cpu_frame_time_accumulator = 0.0f;
static float s_worst_cpu_frame_time_accumulator = 0.0f;
static float s_worst_cpu_frame_time = 0.0f;
static float s_average_cpu_frame_time = 0.0f;
static Common::Timer s_cpu_frame_timer;
static void StartCPUThread();
static void WakeCPUThread();
static void WaitForCPUThread();
static void StopCPUThread();
// Playlist of disc images.
static std::vector<std::string> s_media_playlist;
static std::string s_media_playlist_filename;
@ -153,12 +167,6 @@ u32 GetInternalFrameNumber()
return s_internal_frame_number;
}
void FrameDone()
{
s_frame_number++;
CPU::g_state.frame_done = true;
}
void IncrementInternalFrameNumber()
{
s_internal_frame_number++;
@ -194,10 +202,18 @@ float GetAverageFrameTime()
{
return s_average_frame_time;
}
float GetAverageCPUFrameTime()
{
return s_average_cpu_frame_time;
}
float GetWorstFrameTime()
{
return s_worst_frame_time;
}
float GetWorstCPUFrameTime()
{
return s_worst_cpu_frame_time;
}
float GetThrottleFrequency()
{
return s_throttle_frequency;
@ -469,22 +485,22 @@ std::optional<DiscRegion> GetRegionForPath(const char* image_path)
bool RecreateGPU(GPURenderer renderer)
{
g_gpu->RestoreGraphicsAPIState();
g_gpu_backend->RestoreGraphicsAPIState();
// save current state
std::unique_ptr<ByteStream> state_stream = ByteStream_CreateGrowableMemoryStream();
StateWrapper sw(state_stream.get(), StateWrapper::Mode::Write);
const bool state_valid = g_gpu->DoState(sw) && TimingEvents::DoState(sw);
const bool state_valid = g_gpu_backend->DoState(sw);
if (!state_valid)
Log_ErrorPrintf("Failed to save old GPU state when switching renderers");
g_gpu->ResetGraphicsAPIState();
g_gpu_backend->ResetGraphicsAPIState();
g_gpu_backend.reset();
// create new renderer
g_gpu.reset();
if (!CreateGPU(renderer))
if (!GPUBackend::Create(renderer))
{
Panic("Failed to recreate GPU");
Panic("Failed to recreate GPU backend");
return false;
}
@ -492,10 +508,9 @@ bool RecreateGPU(GPURenderer renderer)
{
state_stream->SeekAbsolute(0);
sw.SetMode(StateWrapper::Mode::Read);
g_gpu->RestoreGraphicsAPIState();
g_gpu->DoState(sw);
TimingEvents::DoState(sw);
g_gpu->ResetGraphicsAPIState();
g_gpu_backend->RestoreGraphicsAPIState();
g_gpu_backend->DoState(sw);
g_gpu_backend->ResetGraphicsAPIState();
}
return true;
@ -526,6 +541,7 @@ bool Boot(const SystemBootParameters& params)
Assert(s_media_playlist.empty());
s_state = State::Starting;
s_region = g_settings.region;
// g_settings.cpu_thread = false;
if (params.state_stream)
{
@ -705,15 +721,16 @@ bool Initialize(bool force_software_renderer)
s_fps_timer.Reset();
s_frame_timer.Reset();
if (!GPUBackend::Create(force_software_renderer ? GPURenderer::Software : g_settings.gpu_renderer))
return false;
TimingEvents::Initialize();
CPU::Initialize();
CPU::CodeCache::Initialize(g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler);
Bus::Initialize();
if (!CreateGPU(force_software_renderer ? GPURenderer::Software : g_settings.gpu_renderer))
return false;
g_gpu.Initialize();
g_dma.Initialize();
g_interrupt_controller.Initialize();
@ -726,6 +743,10 @@ bool Initialize(bool force_software_renderer)
g_sio.Initialize();
UpdateThrottlePeriod();
if (g_settings.cpu_thread)
StartCPUThread();
return true;
}
@ -734,13 +755,19 @@ void Shutdown()
if (s_state == State::Shutdown)
return;
s_state = State::Stopping;
if (g_settings.cpu_thread)
StopCPUThread();
g_sio.Shutdown();
g_mdec.Shutdown();
g_spu.Shutdown();
g_timers.Shutdown();
g_pad.Shutdown();
g_cdrom.Shutdown();
g_gpu.reset();
g_gpu_backend.reset();
g_gpu.Shutdown();
g_interrupt_controller.Shutdown();
g_dma.Shutdown();
CPU::CodeCache::Shutdown();
@ -756,44 +783,6 @@ void Shutdown()
s_state = State::Shutdown;
}
bool CreateGPU(GPURenderer renderer)
{
switch (renderer)
{
case GPURenderer::HardwareOpenGL:
g_gpu = GPU::CreateHardwareOpenGLRenderer();
break;
case GPURenderer::HardwareVulkan:
g_gpu = GPU::CreateHardwareVulkanRenderer();
break;
#ifdef WIN32
case GPURenderer::HardwareD3D11:
g_gpu = GPU::CreateHardwareD3D11Renderer();
break;
#endif
case GPURenderer::Software:
default:
g_gpu = GPU::CreateSoftwareRenderer();
break;
}
if (!g_gpu || !g_gpu->Initialize(g_host_interface->GetDisplay()))
{
Log_ErrorPrintf("Failed to initialize GPU, falling back to software");
g_gpu.reset();
g_gpu = GPU::CreateSoftwareRenderer();
if (!g_gpu->Initialize(g_host_interface->GetDisplay()))
return false;
}
// we put this here rather than in Initialize() because of the virtual calls
g_gpu->Reset();
return true;
}
bool DoState(StateWrapper& sw)
{
if (!sw.DoMarker("System"))
@ -818,12 +807,15 @@ bool DoState(StateWrapper& sw)
if (!sw.DoMarker("InterruptController") || !g_interrupt_controller.DoState(sw))
return false;
g_gpu->RestoreGraphicsAPIState();
const bool gpu_result = sw.DoMarker("GPU") && g_gpu->DoState(sw);
g_gpu->ResetGraphicsAPIState();
g_gpu_backend->RestoreGraphicsAPIState();
const bool gpu_result = sw.DoMarker("GPUBackend") && g_gpu_backend->DoState(sw);
g_gpu_backend->ResetGraphicsAPIState();
if (!gpu_result)
return false;
if (!sw.DoMarker("GPU") || !g_gpu.DoState(sw))
return false;
if (!sw.DoMarker("CDROM") || !g_cdrom.DoState(sw))
return false;
@ -853,14 +845,14 @@ void Reset()
if (IsShutdown())
return;
g_gpu->RestoreGraphicsAPIState();
g_gpu_backend->RestoreGraphicsAPIState();
CPU::Reset();
CPU::CodeCache::Flush();
Bus::Reset();
g_dma.Reset();
g_interrupt_controller.Reset();
g_gpu->Reset();
g_gpu.Reset();
g_cdrom.Reset();
g_pad.Reset();
g_timers.Reset();
@ -872,7 +864,7 @@ void Reset()
TimingEvents::Reset();
ResetPerformanceCounters();
g_gpu->ResetGraphicsAPIState();
g_gpu_backend->ResetGraphicsAPIState();
}
bool LoadState(ByteStream* state)
@ -1052,12 +1044,12 @@ bool SaveState(ByteStream* state, u32 screenshot_size /* = 128 */)
{
header.offset_to_data = static_cast<u32>(state->GetPosition());
g_gpu->RestoreGraphicsAPIState();
g_gpu_backend->RestoreGraphicsAPIState();
StateWrapper sw(state, StateWrapper::Mode::Write);
const bool result = DoState(sw);
g_gpu->ResetGraphicsAPIState();
g_gpu_backend->ResetGraphicsAPIState();
if (!result)
return false;
@ -1077,12 +1069,14 @@ bool SaveState(ByteStream* state, u32 screenshot_size /* = 128 */)
return true;
}
void RunFrame()
static std::thread s_cpu_thread;
static std::atomic_bool s_cpu_thread_running{false};
static std::atomic_bool s_cpu_thread_sleeping{false};
static std::mutex s_cpu_thread_wake_mutex;
static std::condition_variable s_cpu_thread_wake_cv;
static void ExecuteCPUFrame()
{
s_frame_timer.Reset();
g_gpu->RestoreGraphicsAPIState();
switch (g_settings.cpu_execution_mode)
{
case CPUExecutionMode::Recompiler:
@ -1105,11 +1099,100 @@ void RunFrame()
// Generate any pending samples from the SPU before sleeping, this way we reduce the chances of underruns.
g_spu.GeneratePendingSamples();
}
static void CPUThreadFunction()
{
for (;;)
{
{
std::unique_lock<std::mutex> lock(s_cpu_thread_wake_mutex);
s_cpu_thread_sleeping.store(true);
s_cpu_thread_wake_cv.wait(lock, []() { return !CPU::g_state.frame_done || s_state == State::Stopping; });
s_cpu_thread_sleeping.store(false);
if (s_state == State::Stopping)
break;
}
s_cpu_frame_timer.Reset();
ExecuteCPUFrame();
{
const float frame_time = static_cast<float>(s_frame_timer.GetTimeMilliseconds());
s_average_cpu_frame_time_accumulator += frame_time;
s_worst_cpu_frame_time_accumulator = std::max(s_worst_cpu_frame_time_accumulator, frame_time);
}
}
s_cpu_thread_running.store(false);
}
void StartCPUThread()
{
Assert(!s_cpu_thread.joinable() && !s_cpu_thread_running.load());
s_cpu_thread_running.store(true);
s_cpu_thread = std::thread(CPUThreadFunction);
}
void StopCPUThread()
{
Assert(s_state == State::Stopping);
while (s_cpu_thread_running.load())
{
WakeCPUThread();
g_gpu_backend->ProcessGPUCommands();
}
if (s_cpu_thread.joinable())
s_cpu_thread.join();
}
void WakeCPUThread()
{
std::unique_lock<std::mutex> lock(s_cpu_thread_wake_mutex);
if (!s_cpu_thread_sleeping.load())
return;
CPU::g_state.frame_done = false;
s_cpu_thread_wake_cv.notify_one();
}
void WaitForCPUThread()
{
while (!s_cpu_thread_sleeping.load())
;
}
void RunFrame()
{
s_frame_timer.Reset();
g_gpu_backend->RestoreGraphicsAPIState();
if (!g_settings.cpu_thread)
{
CPU::g_state.frame_done = false;
ExecuteCPUFrame();
}
else
{
WakeCPUThread();
g_gpu_backend->RunGPUFrame();
}
if (s_cheat_list)
s_cheat_list->Apply();
g_gpu->ResetGraphicsAPIState();
g_gpu_backend->ResetGraphicsAPIState();
}
void FrameDone()
{
s_frame_number++;
CPU::g_state.frame_done = true;
g_gpu_backend->CPUFrameDone();
}
void SetThrottleFrequency(float frequency)
@ -1163,8 +1246,15 @@ void Throttle()
s_last_throttle_time += s_throttle_period;
}
void UpdatePerformanceCounters()
void EndFrame()
{
if (g_settings.cpu_thread)
{
// finish up anything the CPU pushed to the GPU after vblank
WaitForCPUThread();
g_gpu_backend->EndGPUFrame();
}
const float frame_time = static_cast<float>(s_frame_timer.GetTimeMilliseconds());
s_average_frame_time_accumulator += frame_time;
s_worst_frame_time_accumulator = std::max(s_worst_frame_time_accumulator, frame_time);
@ -1179,8 +1269,12 @@ void UpdatePerformanceCounters()
s_worst_frame_time = s_worst_frame_time_accumulator;
s_worst_frame_time_accumulator = 0.0f;
s_worst_cpu_frame_time = s_worst_cpu_frame_time_accumulator;
s_worst_cpu_frame_time_accumulator = 0.0f;
s_average_frame_time = s_average_frame_time_accumulator / frames_presented;
s_average_frame_time_accumulator = 0.0f;
s_average_cpu_frame_time = s_average_cpu_frame_time_accumulator / frames_presented;
s_average_cpu_frame_time_accumulator = 0.0f;
s_vps = static_cast<float>(frames_presented / time);
s_last_frame_number = s_frame_number;
s_fps = static_cast<float>(s_internal_frame_number - s_last_internal_frame_number) / time;
@ -1191,6 +1285,11 @@ void UpdatePerformanceCounters()
s_last_global_tick_counter = global_tick_counter;
s_fps_timer.Reset();
#ifndef WIN32
Log_InfoPrintf("FPS: %.2f VPS: %.2f Average: %.2fms (%.2fms CPU) Worst: %.2fms (%.2fms CPU)", s_fps, s_vps,
s_average_frame_time, s_average_cpu_frame_time, s_worst_frame_time, s_worst_cpu_frame_time);
#endif
g_host_interface->OnSystemPerformanceCountersUpdated();
}

View File

@ -45,7 +45,8 @@ enum class State
Shutdown,
Starting,
Running,
Paused
Paused,
Stopping
};
/// Returns true if the filename is a PlayStation executable we can inject.
@ -93,7 +94,9 @@ float GetFPS();
float GetVPS();
float GetEmulationSpeed();
float GetAverageFrameTime();
float GetAverageCPUFrameTime();
float GetWorstFrameTime();
float GetWorstCPUFrameTime();
float GetThrottleFrequency();
bool Boot(const SystemBootParameters& params);
@ -117,7 +120,7 @@ void UpdateThrottlePeriod();
/// Throttles the system, i.e. sleeps until it's time to execute the next frame.
void Throttle();
void UpdatePerformanceCounters();
void EndFrame();
void ResetPerformanceCounters();
// Access controllers for simulating input.

View File

@ -194,8 +194,8 @@ u32 Timers::ReadRegister(u32 offset)
if (timer_index < 2 && cs.external_counting_enabled)
{
// timers 0/1 depend on the GPU
if (timer_index == 0 || g_gpu->IsCRTCScanlinePending())
g_gpu->SynchronizeCRTC();
if (timer_index == 0 || g_gpu.IsCRTCScanlinePending())
g_gpu.SynchronizeCRTC();
}
m_sysclk_event->InvokeEarly();
@ -208,8 +208,8 @@ u32 Timers::ReadRegister(u32 offset)
if (timer_index < 2 && cs.external_counting_enabled)
{
// timers 0/1 depend on the GPU
if (timer_index == 0 || g_gpu->IsCRTCScanlinePending())
g_gpu->SynchronizeCRTC();
if (timer_index == 0 || g_gpu.IsCRTCScanlinePending())
g_gpu.SynchronizeCRTC();
}
m_sysclk_event->InvokeEarly();
@ -244,8 +244,8 @@ void Timers::WriteRegister(u32 offset, u32 value)
if (timer_index < 2 && cs.external_counting_enabled)
{
// timers 0/1 depend on the GPU
if (timer_index == 0 || g_gpu->IsCRTCScanlinePending())
g_gpu->SynchronizeCRTC();
if (timer_index == 0 || g_gpu.IsCRTCScanlinePending())
g_gpu.SynchronizeCRTC();
}
m_sysclk_event->InvokeEarly();

View File

@ -8,6 +8,7 @@
#include "core/cheats.h"
#include "core/controller.h"
#include "core/gpu.h"
#include "core/gpu_backend.h"
#include "core/system.h"
#include "frontend-common/game_list.h"
#include "frontend-common/imgui_styles.h"
@ -432,7 +433,7 @@ void QtHostInterface::onHostDisplayWindowResized(int width, int height)
// re-render the display, since otherwise it will be out of date and stretched if paused
if (!System::IsShutdown())
{
g_gpu->UpdateResolutionScale();
g_gpu_backend->UpdateResolutionScale();
renderDisplay();
}
}
@ -550,7 +551,7 @@ void QtHostInterface::updateDisplayState()
if (!System::IsShutdown())
{
g_gpu->UpdateResolutionScale();
g_gpu_backend->UpdateResolutionScale();
redrawDisplayWindow();
}
UpdateSpeedLimiterState();
@ -1247,7 +1248,7 @@ void QtHostInterface::threadEntryPoint()
renderDisplay();
System::UpdatePerformanceCounters();
System::EndFrame();
if (m_speed_limiter_enabled)
System::Throttle();

View File

@ -7,6 +7,7 @@
#include "core/cheats.h"
#include "core/controller.h"
#include "core/gpu.h"
#include "core/gpu_backend.h"
#include "core/host_display.h"
#include "core/system.h"
#include "frontend-common/icon.h"
@ -376,7 +377,7 @@ bool SDLHostInterface::SetFullscreen(bool enabled)
m_display->ResizeRenderWindow(window_width, window_height);
if (!System::IsShutdown())
g_gpu->UpdateResolutionScale();
g_gpu_backend->UpdateResolutionScale();
m_fullscreen = enabled;
return true;
@ -534,7 +535,7 @@ void SDLHostInterface::HandleSDLEvent(const SDL_Event* event)
UpdateFramebufferScale();
if (!System::IsShutdown())
g_gpu->UpdateResolutionScale();
g_gpu_backend->UpdateResolutionScale();
}
else if (event->window.event == SDL_WINDOWEVENT_MOVED)
{
@ -824,11 +825,11 @@ void SDLHostInterface::DrawMainMenuBar()
}
else
{
ImGui::SetCursorPosX(ImGui::GetIO().DisplaySize.x - (420.0f * framebuffer_scale));
ImGui::Text("Average: %.2fms", System::GetAverageFrameTime());
ImGui::SetCursorPosX(ImGui::GetIO().DisplaySize.x - (500.0f * framebuffer_scale));
ImGui::Text("Average: %.2fms / %.2fms", System::GetAverageFrameTime(), System::GetAverageCPUFrameTime());
ImGui::SetCursorPosX(ImGui::GetIO().DisplaySize.x - (310.0f * framebuffer_scale));
ImGui::Text("Worst: %.2fms", System::GetWorstFrameTime());
ImGui::SetCursorPosX(ImGui::GetIO().DisplaySize.x - (350.0f * framebuffer_scale));
ImGui::Text("Worst: %.2fms / %.2fms", System::GetWorstFrameTime(), System::GetWorstCPUFrameTime());
ImGui::SetCursorPosX(ImGui::GetIO().DisplaySize.x - (210.0f * framebuffer_scale));
@ -907,7 +908,7 @@ void SDLHostInterface::DrawQuickSettingsMenu()
for (u32 scale = 1; scale <= GPU::MAX_RESOLUTION_SCALE; scale++)
{
char buf[32];
std::snprintf(buf, sizeof(buf), "%ux (%ux%u)", scale, scale * GPU::VRAM_WIDTH, scale * GPU::VRAM_HEIGHT);
std::snprintf(buf, sizeof(buf), "%ux (%ux%u)", scale, scale * VRAM_WIDTH, scale * VRAM_HEIGHT);
if (ImGui::MenuItem(buf, nullptr, current_internal_resolution == scale))
{
@ -1653,7 +1654,7 @@ void SDLHostInterface::Run()
if (System::IsRunning())
{
System::UpdatePerformanceCounters();
System::EndFrame();
if (m_speed_limiter_enabled)
System::Throttle();

View File

@ -11,6 +11,7 @@
#include "core/cpu_code_cache.h"
#include "core/dma.h"
#include "core/gpu.h"
#include "core/gpu_backend.h"
#include "core/host_display.h"
#include "core/mdec.h"
#include "core/pgxp.h"
@ -825,8 +826,8 @@ void CommonHostInterface::DrawFPSWindow()
if (g_settings.display_show_resolution)
{
const auto [effective_width, effective_height] = g_gpu->GetEffectiveDisplayResolution();
const bool interlaced = g_gpu->IsInterlacedDisplayEnabled();
const auto [effective_width, effective_height] = g_gpu_backend->GetEffectiveDisplayResolution();
const bool interlaced = g_gpu.IsInterlacedDisplayEnabled();
ImGui::Text("%ux%u (%s)", effective_width, effective_height, interlaced ? "interlaced" : "progressive");
}
@ -906,7 +907,7 @@ void CommonHostInterface::DrawOSDMessages()
void CommonHostInterface::DrawDebugWindows()
{
if (g_settings.debugging.show_gpu_state)
g_gpu->DrawDebugStateWindow();
g_gpu.DrawDebugStateWindow();
if (g_settings.debugging.show_cdrom_state)
g_cdrom.DrawDebugWindow();
if (g_settings.debugging.show_timers_state)
@ -1419,7 +1420,7 @@ void CommonHostInterface::RegisterGraphicsHotkeys()
if (!pressed)
{
g_settings.gpu_pgxp_enable = !g_settings.gpu_pgxp_enable;
g_gpu->UpdateSettings();
g_gpu.UpdateSettings();
AddFormattedOSDMessage(5.0f, "PGXP is now %s.",
g_settings.gpu_pgxp_enable ? "enabled" : "disabled");