This commit is contained in:
Connor McLaughlin 2020-09-05 00:23:52 +10:00
parent fa3307e5f1
commit 5a2e4ed08f
36 changed files with 2709 additions and 1829 deletions

View File

@ -1,5 +1,6 @@
#pragma once #pragma once
#include <algorithm> #include <algorithm>
#include <cstring>
#include <limits> #include <limits>
#include <tuple> #include <tuple>
#include <type_traits> #include <type_traits>
@ -63,10 +64,7 @@ struct Rectangle
/// Assignment operator. /// Assignment operator.
constexpr Rectangle& operator=(const Rectangle& rhs) constexpr Rectangle& operator=(const Rectangle& rhs)
{ {
left = rhs.left; std::memcpy(this, &rhs, sizeof(Rectangle));
top = rhs.top;
right = rhs.right;
bottom = rhs.bottom;
return *this; return *this;
} }

View File

@ -28,6 +28,8 @@ add_library(core
dma.h dma.h
gpu.cpp gpu.cpp
gpu.h gpu.h
gpu_backend.cpp
gpu_backend.h
gpu_commands.cpp gpu_commands.cpp
gpu_hw.cpp gpu_hw.cpp
gpu_hw.h gpu_hw.h

View File

@ -579,14 +579,14 @@ ALWAYS_INLINE static TickCount DoGPUAccess(u32 offset, u32& value)
{ {
if constexpr (type == MemoryAccessType::Read) if constexpr (type == MemoryAccessType::Read)
{ {
value = g_gpu->ReadRegister(offset); value = g_gpu.ReadRegister(offset);
FixupUnalignedWordAccessW32(offset, value); FixupUnalignedWordAccessW32(offset, value);
return 2; return 2;
} }
else else
{ {
FixupUnalignedWordAccessW32(offset, value); FixupUnalignedWordAccessW32(offset, value);
g_gpu->WriteRegister(offset, value); g_gpu.WriteRegister(offset, value);
return 0; return 0;
} }
} }

View File

@ -60,7 +60,9 @@
<ClCompile Include="cpu_recompiler_register_cache.cpp" /> <ClCompile Include="cpu_recompiler_register_cache.cpp" />
<ClCompile Include="cpu_types.cpp" /> <ClCompile Include="cpu_types.cpp" />
<ClCompile Include="digital_controller.cpp" /> <ClCompile Include="digital_controller.cpp" />
<ClCompile Include="gpu_backend.cpp" />
<ClCompile Include="gpu_commands.cpp" /> <ClCompile Include="gpu_commands.cpp" />
<ClCompile Include="gpu_hw.cpp" />
<ClCompile Include="gpu_hw_d3d11.cpp" /> <ClCompile Include="gpu_hw_d3d11.cpp" />
<ClCompile Include="gpu_hw_shadergen.cpp" /> <ClCompile Include="gpu_hw_shadergen.cpp" />
<ClCompile Include="gpu_hw_vulkan.cpp" /> <ClCompile Include="gpu_hw_vulkan.cpp" />
@ -68,7 +70,6 @@
<ClCompile Include="gte.cpp" /> <ClCompile Include="gte.cpp" />
<ClCompile Include="dma.cpp" /> <ClCompile Include="dma.cpp" />
<ClCompile Include="gpu.cpp" /> <ClCompile Include="gpu.cpp" />
<ClCompile Include="gpu_hw.cpp" />
<ClCompile Include="gpu_hw_opengl.cpp" /> <ClCompile Include="gpu_hw_opengl.cpp" />
<ClCompile Include="host_display.cpp" /> <ClCompile Include="host_display.cpp" />
<ClCompile Include="host_interface.cpp" /> <ClCompile Include="host_interface.cpp" />
@ -109,6 +110,9 @@
<ClInclude Include="cpu_recompiler_thunks.h" /> <ClInclude Include="cpu_recompiler_thunks.h" />
<ClInclude Include="cpu_recompiler_types.h" /> <ClInclude Include="cpu_recompiler_types.h" />
<ClInclude Include="digital_controller.h" /> <ClInclude Include="digital_controller.h" />
<ClInclude Include="gpu_backend.h" />
<ClInclude Include="gpu_types.h" />
<ClInclude Include="gpu_hw.h" />
<ClInclude Include="gpu_hw_d3d11.h" /> <ClInclude Include="gpu_hw_d3d11.h" />
<ClInclude Include="gpu_hw_shadergen.h" /> <ClInclude Include="gpu_hw_shadergen.h" />
<ClInclude Include="gpu_hw_vulkan.h" /> <ClInclude Include="gpu_hw_vulkan.h" />
@ -117,7 +121,6 @@
<ClInclude Include="cpu_types.h" /> <ClInclude Include="cpu_types.h" />
<ClInclude Include="dma.h" /> <ClInclude Include="dma.h" />
<ClInclude Include="gpu.h" /> <ClInclude Include="gpu.h" />
<ClInclude Include="gpu_hw.h" />
<ClInclude Include="gpu_hw_opengl.h" /> <ClInclude Include="gpu_hw_opengl.h" />
<ClInclude Include="gte_types.h" /> <ClInclude Include="gte_types.h" />
<ClInclude Include="host_display.h" /> <ClInclude Include="host_display.h" />

View File

@ -8,7 +8,6 @@
<ClCompile Include="dma.cpp" /> <ClCompile Include="dma.cpp" />
<ClCompile Include="gpu.cpp" /> <ClCompile Include="gpu.cpp" />
<ClCompile Include="gpu_hw_opengl.cpp" /> <ClCompile Include="gpu_hw_opengl.cpp" />
<ClCompile Include="gpu_hw.cpp" />
<ClCompile Include="host_interface.cpp" /> <ClCompile Include="host_interface.cpp" />
<ClCompile Include="interrupt_controller.cpp" /> <ClCompile Include="interrupt_controller.cpp" />
<ClCompile Include="cdrom.cpp" /> <ClCompile Include="cdrom.cpp" />
@ -49,6 +48,8 @@
<ClCompile Include="cheats.cpp" /> <ClCompile Include="cheats.cpp" />
<ClCompile Include="shadergen.cpp" /> <ClCompile Include="shadergen.cpp" />
<ClCompile Include="memory_card_image.cpp" /> <ClCompile Include="memory_card_image.cpp" />
<ClCompile Include="gpu_backend.cpp" />
<ClCompile Include="gpu_hw.cpp" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ClInclude Include="types.h" /> <ClInclude Include="types.h" />
@ -61,7 +62,6 @@
<ClInclude Include="dma.h" /> <ClInclude Include="dma.h" />
<ClInclude Include="gpu.h" /> <ClInclude Include="gpu.h" />
<ClInclude Include="gpu_hw_opengl.h" /> <ClInclude Include="gpu_hw_opengl.h" />
<ClInclude Include="gpu_hw.h" />
<ClInclude Include="host_interface.h" /> <ClInclude Include="host_interface.h" />
<ClInclude Include="interrupt_controller.h" /> <ClInclude Include="interrupt_controller.h" />
<ClInclude Include="cdrom.h" /> <ClInclude Include="cdrom.h" />
@ -101,5 +101,8 @@
<ClInclude Include="cheats.h" /> <ClInclude Include="cheats.h" />
<ClInclude Include="shadergen.h" /> <ClInclude Include="shadergen.h" />
<ClInclude Include="memory_card_image.h" /> <ClInclude Include="memory_card_image.h" />
<ClInclude Include="gpu_backend.h" />
<ClInclude Include="gpu_hw.h" />
<ClInclude Include="gpu_types.h" />
</ItemGroup> </ItemGroup>
</Project> </Project>

View File

@ -129,7 +129,6 @@ static void ExecuteImpl()
{ {
CodeBlockKey next_block_key; CodeBlockKey next_block_key;
g_state.frame_done = false;
while (!g_state.frame_done) while (!g_state.frame_done)
{ {
TimingEvents::UpdateCPUDowncount(); TimingEvents::UpdateCPUDowncount();
@ -240,7 +239,6 @@ void Execute()
void ExecuteRecompiler() void ExecuteRecompiler()
{ {
g_state.frame_done = false;
while (!g_state.frame_done) while (!g_state.frame_done)
{ {
TimingEvents::UpdateCPUDowncount(); TimingEvents::UpdateCPUDowncount();

View File

@ -66,6 +66,8 @@ void Shutdown()
void Reset() void Reset()
{ {
g_state.frame_done = true;
g_state.pending_ticks = 0; g_state.pending_ticks = 0;
g_state.downcount = MAX_SLICE_SIZE; g_state.downcount = MAX_SLICE_SIZE;
@ -1368,7 +1370,6 @@ restart_instruction:
template<PGXPMode pgxp_mode> template<PGXPMode pgxp_mode>
static void ExecuteImpl() static void ExecuteImpl()
{ {
g_state.frame_done = false;
while (!g_state.frame_done) while (!g_state.frame_done)
{ {
TimingEvents::UpdateCPUDowncount(); TimingEvents::UpdateCPUDowncount();

View File

@ -66,7 +66,7 @@ struct State
bool branch_was_taken = false; bool branch_was_taken = false;
bool exception_raised = false; bool exception_raised = false;
bool interrupt_delay = false; bool interrupt_delay = false;
bool frame_done = false; bool frame_done = true;
// load delays // load delays
Reg load_delay_reg = Reg::count; Reg load_delay_reg = Reg::count;

View File

@ -449,17 +449,17 @@ TickCount DMA::TransferMemoryToDevice(Channel channel, u32 address, u32 incremen
{ {
case Channel::GPU: case Channel::GPU:
{ {
if (g_gpu->BeginDMAWrite()) if (g_gpu.BeginDMAWrite())
{ {
u8* ram_pointer = Bus::g_ram; u8* ram_pointer = Bus::g_ram;
for (u32 i = 0; i < word_count; i++) for (u32 i = 0; i < word_count; i++)
{ {
u32 value; u32 value;
std::memcpy(&value, &ram_pointer[address], sizeof(u32)); std::memcpy(&value, &ram_pointer[address], sizeof(u32));
g_gpu->DMAWrite(address, value); g_gpu.DMAWrite(address, value);
address = (address + increment) & ADDRESS_MASK; address = (address + increment) & ADDRESS_MASK;
} }
g_gpu->EndDMAWrite(); g_gpu.EndDMAWrite();
} }
} }
break; break;
@ -516,7 +516,7 @@ TickCount DMA::TransferDeviceToMemory(Channel channel, u32 address, u32 incremen
switch (channel) switch (channel)
{ {
case Channel::GPU: case Channel::GPU:
g_gpu->DMARead(dest_pointer, word_count); g_gpu.DMARead(dest_pointer, word_count);
break; break;
case Channel::CDROM: case Channel::CDROM:

View File

@ -4,6 +4,7 @@
#include "common/log.h" #include "common/log.h"
#include "common/state_wrapper.h" #include "common/state_wrapper.h"
#include "dma.h" #include "dma.h"
#include "gpu_backend.h"
#include "host_display.h" #include "host_display.h"
#include "host_interface.h" #include "host_interface.h"
#include "interrupt_controller.h" #include "interrupt_controller.h"
@ -16,7 +17,7 @@
#endif #endif
Log_SetChannel(GPU); Log_SetChannel(GPU);
std::unique_ptr<GPU> g_gpu; GPU g_gpu;
const GPU::GP0CommandHandlerTable GPU::s_GP0_command_handler_table = GPU::GenerateGP0CommandHandlerTable(); const GPU::GP0CommandHandlerTable GPU::s_GP0_command_handler_table = GPU::GenerateGP0CommandHandlerTable();
@ -24,9 +25,8 @@ GPU::GPU() = default;
GPU::~GPU() = default; GPU::~GPU() = default;
bool GPU::Initialize(HostDisplay* host_display) void GPU::Initialize()
{ {
m_host_display = host_display;
m_force_progressive_scan = g_settings.gpu_disable_interlacing; m_force_progressive_scan = g_settings.gpu_disable_interlacing;
m_force_ntsc_timings = g_settings.gpu_force_ntsc_timings; m_force_ntsc_timings = g_settings.gpu_force_ntsc_timings;
m_crtc_state.display_aspect_ratio = Settings::GetDisplayAspectRatioValue(g_settings.display_aspect_ratio); m_crtc_state.display_aspect_ratio = Settings::GetDisplayAspectRatioValue(g_settings.display_aspect_ratio);
@ -38,7 +38,14 @@ bool GPU::Initialize(HostDisplay* host_display)
m_max_run_ahead = g_settings.gpu_max_run_ahead; m_max_run_ahead = g_settings.gpu_max_run_ahead;
m_console_is_pal = System::IsPALRegion(); m_console_is_pal = System::IsPALRegion();
UpdateCRTCConfig(); UpdateCRTCConfig();
return true; }
void GPU::Shutdown()
{
m_command_tick_event.reset();
m_crtc_tick_event.reset();
m_fifo.Clear();
std::vector<u32>().swap(m_blit_buffer);
} }
void GPU::UpdateSettings() void GPU::UpdateSettings()
@ -58,13 +65,8 @@ void GPU::UpdateSettings()
// Crop mode calls this, so recalculate the display area // Crop mode calls this, so recalculate the display area
UpdateCRTCDisplayParameters(); UpdateCRTCDisplayParameters();
}
void GPU::UpdateResolutionScale() {} g_gpu_backend->PushCommand(g_gpu_backend->NewUpdateSettingsCommand());
std::tuple<u32, u32> GPU::GetEffectiveDisplayResolution()
{
return std::tie(m_crtc_state.display_vram_width, m_crtc_state.display_vram_height);
} }
void GPU::Reset() void GPU::Reset()
@ -72,6 +74,8 @@ void GPU::Reset()
SoftReset(); SoftReset();
m_set_texture_disable_mask = false; m_set_texture_disable_mask = false;
m_GPUREAD_latch = 0; m_GPUREAD_latch = 0;
g_gpu_backend->PushCommand(g_gpu_backend->NewResetCommand());
} }
void GPU::SoftReset() void GPU::SoftReset()
@ -81,7 +85,7 @@ void GPU::SoftReset()
m_GPUSTAT.bits = 0x14802000; m_GPUSTAT.bits = 0x14802000;
m_GPUSTAT.pal_mode = System::IsPALRegion(); m_GPUSTAT.pal_mode = System::IsPALRegion();
m_drawing_area.Set(0, 0, 0, 0); m_drawing_area.Set(0, 0, 0, 0);
m_drawing_area_changed = true; UpdateDrawingArea();
m_drawing_offset = {}; m_drawing_offset = {};
std::memset(&m_crtc_state.regs, 0, sizeof(m_crtc_state.regs)); std::memset(&m_crtc_state.regs, 0, sizeof(m_crtc_state.regs));
m_crtc_state.regs.horizontal_display_range = 0xC60260; m_crtc_state.regs.horizontal_display_range = 0xC60260;
@ -98,9 +102,8 @@ void GPU::SoftReset()
m_fifo.Clear(); m_fifo.Clear();
m_blit_buffer.clear(); m_blit_buffer.clear();
m_blit_remaining_words = 0; m_blit_remaining_words = 0;
SetDrawMode(0); m_draw_mode.bits = 0;
SetTexturePalette(0); m_texture_window.bits = 0;
SetTextureWindow(0);
UpdateDMARequest(); UpdateDMARequest();
UpdateCRTCConfig(); UpdateCRTCConfig();
UpdateCRTCTickEvent(); UpdateCRTCTickEvent();
@ -117,27 +120,14 @@ bool GPU::DoState(StateWrapper& sw)
sw.Do(&m_GPUSTAT.bits); sw.Do(&m_GPUSTAT.bits);
sw.Do(&m_draw_mode.mode_reg.bits); sw.Do(&m_drawing_offset.x);
sw.Do(&m_draw_mode.palette_reg); sw.Do(&m_drawing_offset.y);
sw.Do(&m_draw_mode.texture_window_value);
sw.Do(&m_draw_mode.texture_page_x);
sw.Do(&m_draw_mode.texture_page_y);
sw.Do(&m_draw_mode.texture_palette_x);
sw.Do(&m_draw_mode.texture_palette_y);
sw.Do(&m_draw_mode.texture_window_mask_x);
sw.Do(&m_draw_mode.texture_window_mask_y);
sw.Do(&m_draw_mode.texture_window_offset_x);
sw.Do(&m_draw_mode.texture_window_offset_y);
sw.Do(&m_draw_mode.texture_x_flip);
sw.Do(&m_draw_mode.texture_y_flip);
sw.Do(&m_drawing_area.left); sw.Do(&m_drawing_area.left);
sw.Do(&m_drawing_area.top); sw.Do(&m_drawing_area.top);
sw.Do(&m_drawing_area.right); sw.Do(&m_drawing_area.right);
sw.Do(&m_drawing_area.bottom); sw.Do(&m_drawing_area.bottom);
sw.Do(&m_drawing_offset.x); sw.Do(&m_draw_mode.bits);
sw.Do(&m_drawing_offset.y); sw.Do(&m_texture_window.bits);
sw.Do(&m_drawing_offset.x);
sw.Do(&m_console_is_pal); sw.Do(&m_console_is_pal);
sw.Do(&m_set_texture_disable_mask); sw.Do(&m_set_texture_disable_mask);
@ -195,9 +185,7 @@ bool GPU::DoState(StateWrapper& sw)
if (sw.IsReading()) if (sw.IsReading())
{ {
m_draw_mode.texture_page_changed = true; UpdateDrawingArea();
m_draw_mode.texture_window_changed = true;
m_drawing_area_changed = true;
UpdateDMARequest(); UpdateDMARequest();
} }
@ -206,37 +194,15 @@ bool GPU::DoState(StateWrapper& sw)
if (sw.IsReading()) if (sw.IsReading())
{ {
// Need to clear the mask bits since we want to pull it in from the copy.
const u32 old_GPUSTAT = m_GPUSTAT.bits;
m_GPUSTAT.check_mask_before_draw = false;
m_GPUSTAT.set_mask_while_drawing = false;
// Still need a temporary here.
HeapArray<u16, VRAM_WIDTH * VRAM_HEIGHT> temp;
sw.DoBytes(temp.data(), VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16));
UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, temp.data());
// Restore mask setting.
m_GPUSTAT.bits = old_GPUSTAT;
UpdateCRTCConfig(); UpdateCRTCConfig();
UpdateDisplay(); UpdateDisplay();
UpdateCRTCTickEvent(); UpdateCRTCTickEvent();
UpdateCommandTickEvent(); UpdateCommandTickEvent();
} }
else
{
ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT);
sw.DoBytes(m_vram_ptr, VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16));
}
return !sw.HasError(); return !sw.HasError();
} }
void GPU::ResetGraphicsAPIState() {}
void GPU::RestoreGraphicsAPIState() {}
void GPU::UpdateDMARequest() void GPU::UpdateDMARequest()
{ {
switch (m_blitter_state) switch (m_blitter_state)
@ -818,9 +784,9 @@ void GPU::UpdateCommandTickEvent()
bool GPU::ConvertScreenCoordinatesToBeamTicksAndLines(s32 window_x, s32 window_y, u32* out_tick, u32* out_line) const bool GPU::ConvertScreenCoordinatesToBeamTicksAndLines(s32 window_x, s32 window_y, u32* out_tick, u32* out_line) const
{ {
const auto [display_x, display_y] = m_host_display->ConvertWindowCoordinatesToDisplayCoordinates( HostDisplay* display = g_host_interface->GetDisplay();
window_x, window_y, m_host_display->GetWindowWidth(), m_host_display->GetWindowHeight(), const auto [display_x, display_y] = display->ConvertWindowCoordinatesToDisplayCoordinates(
m_host_display->GetDisplayTopMargin()); window_x, window_y, display->GetWindowWidth(), display->GetWindowHeight(), display->GetDisplayTopMargin());
Log_DebugPrintf("win %d,%d -> disp %d,%d (size %u,%u frac %f,%f)", window_x, window_y, display_x, display_y, Log_DebugPrintf("win %d,%d -> disp %d,%d (size %u,%u frac %f,%f)", window_x, window_y, display_x, display_y,
m_crtc_state.display_width, m_crtc_state.display_height, m_crtc_state.display_width, m_crtc_state.display_height,
static_cast<float>(display_x) / static_cast<float>(m_crtc_state.display_width), static_cast<float>(display_x) / static_cast<float>(m_crtc_state.display_width),
@ -850,7 +816,7 @@ u32 GPU::ReadGPUREAD()
// Read with correct wrap-around behavior. // Read with correct wrap-around behavior.
const u16 read_x = (m_vram_transfer.x + m_vram_transfer.col) % VRAM_WIDTH; const u16 read_x = (m_vram_transfer.x + m_vram_transfer.col) % VRAM_WIDTH;
const u16 read_y = (m_vram_transfer.y + m_vram_transfer.row) % VRAM_HEIGHT; const u16 read_y = (m_vram_transfer.y + m_vram_transfer.row) % VRAM_HEIGHT;
value |= ZeroExtend32(m_vram_ptr[read_y * VRAM_WIDTH + read_x]) << (i * 16); value |= ZeroExtend32(g_gpu_backend->GetVRAM()[read_y * VRAM_WIDTH + read_x]) << (i * 16);
if (++m_vram_transfer.col == m_vram_transfer.width) if (++m_vram_transfer.col == m_vram_transfer.width)
{ {
@ -1064,7 +1030,7 @@ void GPU::HandleGetGPUInfoCommand(u32 value)
case 0x02: // Get Texture Window case 0x02: // Get Texture Window
{ {
Log_DebugPrintf("Get texture window"); Log_DebugPrintf("Get texture window");
m_GPUREAD_latch = m_draw_mode.texture_window_value; m_GPUREAD_latch = m_texture_window.bits;
} }
break; break;
@ -1098,227 +1064,6 @@ void GPU::HandleGetGPUInfoCommand(u32 value)
} }
} }
void GPU::ClearDisplay() {}
void GPU::UpdateDisplay() {}
void GPU::ReadVRAM(u32 x, u32 y, u32 width, u32 height) {}
void GPU::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
{
const u16 color16 = RGBA8888ToRGBA5551(color);
if ((x + width) <= VRAM_WIDTH && !IsInterlacedRenderingEnabled())
{
for (u32 yoffs = 0; yoffs < height; yoffs++)
{
const u32 row = (y + yoffs) % VRAM_HEIGHT;
std::fill_n(&m_vram_ptr[row * VRAM_WIDTH + x], width, color16);
}
}
else if (IsInterlacedRenderingEnabled())
{
// Hardware tests show that fills seem to break on the first two lines when the offset matches the displayed field.
if (IsCRTCScanlinePending())
SynchronizeCRTC();
const u32 active_field = GetActiveLineLSB();
for (u32 yoffs = 0; yoffs < height; yoffs++)
{
const u32 row = (y + yoffs) % VRAM_HEIGHT;
if ((row & u32(1)) == active_field)
continue;
u16* row_ptr = &m_vram_ptr[row * VRAM_WIDTH];
for (u32 xoffs = 0; xoffs < width; xoffs++)
{
const u32 col = (x + xoffs) % VRAM_WIDTH;
row_ptr[col] = color16;
}
}
}
else
{
for (u32 yoffs = 0; yoffs < height; yoffs++)
{
const u32 row = (y + yoffs) % VRAM_HEIGHT;
u16* row_ptr = &m_vram_ptr[row * VRAM_WIDTH];
for (u32 xoffs = 0; xoffs < width; xoffs++)
{
const u32 col = (x + xoffs) % VRAM_WIDTH;
row_ptr[col] = color16;
}
}
}
}
void GPU::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data)
{
// Fast path when the copy is not oversized.
if ((x + width) <= VRAM_WIDTH && (y + height) <= VRAM_HEIGHT && !m_GPUSTAT.IsMaskingEnabled())
{
const u16* src_ptr = static_cast<const u16*>(data);
u16* dst_ptr = &m_vram_ptr[y * VRAM_WIDTH + x];
for (u32 yoffs = 0; yoffs < height; yoffs++)
{
std::copy_n(src_ptr, width, dst_ptr);
src_ptr += width;
dst_ptr += VRAM_WIDTH;
}
}
else
{
// Slow path when we need to handle wrap-around.
const u16* src_ptr = static_cast<const u16*>(data);
const u16 mask_and = m_GPUSTAT.GetMaskAND();
const u16 mask_or = m_GPUSTAT.GetMaskOR();
for (u32 row = 0; row < height;)
{
u16* dst_row_ptr = &m_vram_ptr[((y + row++) % VRAM_HEIGHT) * VRAM_WIDTH];
for (u32 col = 0; col < width;)
{
// TODO: Handle unaligned reads...
u16* pixel_ptr = &dst_row_ptr[(x + col++) % VRAM_WIDTH];
if (((*pixel_ptr) & mask_and) == 0)
*pixel_ptr = *(src_ptr++) | mask_or;
}
}
}
}
void GPU::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height)
{
// Break up oversized copies. This behavior has not been verified on console.
if ((src_x + width) > VRAM_WIDTH || (dst_x + width) > VRAM_WIDTH)
{
u32 remaining_rows = height;
u32 current_src_y = src_y;
u32 current_dst_y = dst_y;
while (remaining_rows > 0)
{
const u32 rows_to_copy =
std::min<u32>(remaining_rows, std::min<u32>(VRAM_HEIGHT - current_src_y, VRAM_HEIGHT - current_dst_y));
u32 remaining_columns = width;
u32 current_src_x = src_x;
u32 current_dst_x = dst_x;
while (remaining_columns > 0)
{
const u32 columns_to_copy =
std::min<u32>(remaining_columns, std::min<u32>(VRAM_WIDTH - current_src_x, VRAM_WIDTH - current_dst_x));
CopyVRAM(current_src_x, current_src_y, current_dst_x, current_dst_y, columns_to_copy, rows_to_copy);
current_src_x = (current_src_x + columns_to_copy) % VRAM_WIDTH;
current_dst_x = (current_dst_x + columns_to_copy) % VRAM_WIDTH;
remaining_columns -= columns_to_copy;
}
current_src_y = (current_src_y + rows_to_copy) % VRAM_HEIGHT;
current_dst_y = (current_dst_y + rows_to_copy) % VRAM_HEIGHT;
remaining_rows -= rows_to_copy;
}
return;
}
// This doesn't have a fast path, but do we really need one? It's not common.
const u16 mask_and = m_GPUSTAT.GetMaskAND();
const u16 mask_or = m_GPUSTAT.GetMaskOR();
// Copy in reverse when src_x < dst_x, this is verified on console.
if (src_x < dst_x || ((src_x + width - 1) % VRAM_WIDTH) < ((dst_x + width - 1) % VRAM_WIDTH))
{
for (u32 row = 0; row < height; row++)
{
const u16* src_row_ptr = &m_vram_ptr[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
u16* dst_row_ptr = &m_vram_ptr[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
for (s32 col = static_cast<s32>(width - 1); col >= 0; col--)
{
const u16 src_pixel = src_row_ptr[(src_x + static_cast<u32>(col)) % VRAM_WIDTH];
u16* dst_pixel_ptr = &dst_row_ptr[(dst_x + static_cast<u32>(col)) % VRAM_WIDTH];
if ((*dst_pixel_ptr & mask_and) == 0)
*dst_pixel_ptr = src_pixel | mask_or;
}
}
}
else
{
for (u32 row = 0; row < height; row++)
{
const u16* src_row_ptr = &m_vram_ptr[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
u16* dst_row_ptr = &m_vram_ptr[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
for (u32 col = 0; col < width; col++)
{
const u16 src_pixel = src_row_ptr[(src_x + col) % VRAM_WIDTH];
u16* dst_pixel_ptr = &dst_row_ptr[(dst_x + col) % VRAM_WIDTH];
if ((*dst_pixel_ptr & mask_and) == 0)
*dst_pixel_ptr = src_pixel | mask_or;
}
}
}
}
void GPU::DispatchRenderCommand() {}
void GPU::FlushRender() {}
void GPU::SetDrawMode(u16 value)
{
DrawMode::Reg new_mode_reg{static_cast<u16>(value & DrawMode::Reg::MASK)};
if (!m_set_texture_disable_mask)
new_mode_reg.texture_disable = false;
if (new_mode_reg.bits == m_draw_mode.mode_reg.bits)
return;
if ((new_mode_reg.bits & DrawMode::Reg::TEXTURE_PAGE_MASK) !=
(m_draw_mode.mode_reg.bits & DrawMode::Reg::TEXTURE_PAGE_MASK))
{
m_draw_mode.texture_page_x = new_mode_reg.GetTexturePageXBase();
m_draw_mode.texture_page_y = new_mode_reg.GetTexturePageYBase();
m_draw_mode.texture_page_changed = true;
}
m_draw_mode.mode_reg.bits = new_mode_reg.bits;
if (m_GPUSTAT.draw_to_displayed_field != new_mode_reg.draw_to_displayed_field)
FlushRender();
// Bits 0..10 are returned in the GPU status register.
m_GPUSTAT.bits =
(m_GPUSTAT.bits & ~(DrawMode::Reg::GPUSTAT_MASK)) | (ZeroExtend32(new_mode_reg.bits) & DrawMode::Reg::GPUSTAT_MASK);
m_GPUSTAT.texture_disable = m_draw_mode.mode_reg.texture_disable;
}
void GPU::SetTexturePalette(u16 value)
{
value &= DrawMode::PALETTE_MASK;
if (m_draw_mode.palette_reg == value)
return;
m_draw_mode.texture_palette_x = ZeroExtend32(value & 0x3F) * 16;
m_draw_mode.texture_palette_y = ZeroExtend32(value >> 6);
m_draw_mode.palette_reg = value;
m_draw_mode.texture_page_changed = true;
}
void GPU::SetTextureWindow(u32 value)
{
value &= DrawMode::TEXTURE_WINDOW_MASK;
if (m_draw_mode.texture_window_value == value)
return;
FlushRender();
m_draw_mode.texture_window_mask_x = value & UINT32_C(0x1F);
m_draw_mode.texture_window_mask_y = (value >> 5) & UINT32_C(0x1F);
m_draw_mode.texture_window_offset_x = (value >> 10) & UINT32_C(0x1F);
m_draw_mode.texture_window_offset_y = (value >> 15) & UINT32_C(0x1F);
m_draw_mode.texture_window_value = value;
m_draw_mode.texture_window_changed = true;
}
bool GPU::DumpVRAMToFile(const char* filename, u32 width, u32 height, u32 stride, const void* buffer, bool remove_alpha) bool GPU::DumpVRAMToFile(const char* filename, u32 width, u32 height, u32 stride, const void* buffer, bool remove_alpha)
{ {
auto fp = FileSystem::OpenManagedCFile(filename, "wb"); auto fp = FileSystem::OpenManagedCFile(filename, "wb");
@ -1417,7 +1162,7 @@ void GPU::DrawDebugStateWindow()
ImGui::Columns(1); ImGui::Columns(1);
} }
DrawRendererStats(is_idle_frame); g_gpu_backend->DrawRendererStats(is_idle_frame);
if (ImGui::CollapsingHeader("GPU", ImGuiTreeNodeFlags_DefaultOpen)) if (ImGui::CollapsingHeader("GPU", ImGuiTreeNodeFlags_DefaultOpen))
{ {
@ -1468,5 +1213,3 @@ void GPU::DrawDebugStateWindow()
ImGui::End(); ImGui::End();
#endif #endif
} }
void GPU::DrawRendererStats(bool is_idle_frame) {}

View File

@ -2,6 +2,7 @@
#include "common/bitfield.h" #include "common/bitfield.h"
#include "common/fifo_queue.h" #include "common/fifo_queue.h"
#include "common/rectangle.h" #include "common/rectangle.h"
#include "gpu_types.h"
#include "timers.h" #include "timers.h"
#include "types.h" #include "types.h"
#include <algorithm> #include <algorithm>
@ -13,12 +14,12 @@
class StateWrapper; class StateWrapper;
class HostDisplay;
class TimingEvent; class TimingEvent;
class Timers; class Timers;
class GPU class GPUBackend;
class GPU final
{ {
public: public:
enum class BlitterState : u8 enum class BlitterState : u8
@ -37,66 +38,12 @@ public:
GPUREADtoCPU = 3 GPUREADtoCPU = 3
}; };
enum class Primitive : u8
{
Reserved = 0,
Polygon = 1,
Line = 2,
Rectangle = 3
};
enum class DrawRectangleSize : u8
{
Variable = 0,
R1x1 = 1,
R8x8 = 2,
R16x16 = 3
};
enum class TextureMode : u8
{
Palette4Bit = 0,
Palette8Bit = 1,
Direct16Bit = 2,
Reserved_Direct16Bit = 3,
// Not register values.
RawTextureBit = 4,
RawPalette4Bit = RawTextureBit | Palette4Bit,
RawPalette8Bit = RawTextureBit | Palette8Bit,
RawDirect16Bit = RawTextureBit | Direct16Bit,
Reserved_RawDirect16Bit = RawTextureBit | Reserved_Direct16Bit,
Disabled = 8 // Not a register value
};
enum class TransparencyMode : u8
{
HalfBackgroundPlusHalfForeground = 0,
BackgroundPlusForeground = 1,
BackgroundMinusForeground = 2,
BackgroundPlusQuarterForeground = 3,
Disabled = 4 // Not a register value
};
enum : u32 enum : u32
{ {
VRAM_WIDTH = 1024,
VRAM_HEIGHT = 512,
VRAM_SIZE = VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16),
VRAM_WIDTH_MASK = VRAM_WIDTH - 1,
VRAM_HEIGHT_MASK = VRAM_HEIGHT - 1,
VRAM_COORD_MASK = 0x3FF,
MAX_FIFO_SIZE = 4096, MAX_FIFO_SIZE = 4096,
TEXTURE_PAGE_WIDTH = 256,
TEXTURE_PAGE_HEIGHT = 256,
MAX_PRIMITIVE_WIDTH = 1024,
MAX_PRIMITIVE_HEIGHT = 512,
DOT_TIMER_INDEX = 0, DOT_TIMER_INDEX = 0,
HBLANK_TIMER_INDEX = 1, HBLANK_TIMER_INDEX = 1,
MAX_RESOLUTION_SCALE = 16, MAX_RESOLUTION_SCALE = 16
DITHER_MATRIX_SIZE = 4
}; };
enum : u16 enum : u16
@ -117,17 +64,12 @@ public:
// Base class constructor. // Base class constructor.
GPU(); GPU();
virtual ~GPU(); ~GPU();
virtual bool IsHardwareRenderer() const = 0; void Initialize();
void Shutdown();
virtual bool Initialize(HostDisplay* host_display); void Reset();
virtual void Reset(); bool DoState(StateWrapper& sw);
virtual bool DoState(StateWrapper& sw);
// Graphics API state reset/restore - call when drawing the UI etc.
virtual void ResetGraphicsAPIState();
virtual void RestoreGraphicsAPIState();
// Render statistics debug window. // Render statistics debug window.
void DrawDebugStateWindow(); void DrawDebugStateWindow();
@ -164,6 +106,20 @@ public:
return (!m_force_progressive_scan) & m_GPUSTAT.SkipDrawingToActiveField(); return (!m_force_progressive_scan) & m_GPUSTAT.SkipDrawingToActiveField();
} }
/// Returns the interlaced mode to use when scanning out/displaying.
ALWAYS_INLINE GPUInterlacedDisplayMode GetInterlacedDisplayMode() const
{
if (IsInterlacedDisplayEnabled())
{
return m_GPUSTAT.vertical_resolution ? GPUInterlacedDisplayMode::InterleavedFields :
GPUInterlacedDisplayMode::SeparateFields;
}
else
{
return GPUInterlacedDisplayMode::None;
}
}
/// Returns the number of pending GPU ticks. /// Returns the number of pending GPU ticks.
TickCount GetPendingCRTCTicks() const; TickCount GetPendingCRTCTicks() const;
TickCount GetPendingCommandTicks() const; TickCount GetPendingCommandTicks() const;
@ -178,25 +134,9 @@ public:
void SynchronizeCRTC(); void SynchronizeCRTC();
/// Recompile shaders/recreate framebuffers when needed. /// Recompile shaders/recreate framebuffers when needed.
virtual void UpdateSettings(); void UpdateSettings();
/// Updates the resolution scale when it's set to automatic.
virtual void UpdateResolutionScale();
/// Returns the effective display resolution of the GPU.
virtual std::tuple<u32, u32> GetEffectiveDisplayResolution();
// gpu_hw_d3d11.cpp
static std::unique_ptr<GPU> CreateHardwareD3D11Renderer();
// gpu_hw_opengl.cpp
static std::unique_ptr<GPU> CreateHardwareOpenGLRenderer();
// gpu_hw_vulkan.cpp
static std::unique_ptr<GPU> CreateHardwareVulkanRenderer();
// gpu_sw.cpp
static std::unique_ptr<GPU> CreateSoftwareRenderer();
// Converts window coordinates into horizontal ticks and scanlines. Returns false if out of range. Used for lightguns. // Converts window coordinates into horizontal ticks and scanlines. Returns false if out of range. Used for lightguns.
bool ConvertScreenCoordinatesToBeamTicksAndLines(s32 window_x, s32 window_y, u32* out_tick, u32* out_line) const; bool ConvertScreenCoordinatesToBeamTicksAndLines(s32 window_x, s32 window_y, u32* out_tick, u32* out_line) const;
@ -204,7 +144,7 @@ public:
// Returns the video clock frequency. // Returns the video clock frequency.
TickCount GetCRTCFrequency() const; TickCount GetCRTCFrequency() const;
protected: private:
TickCount CRTCTicksToSystemTicks(TickCount crtc_ticks, TickCount fractional_ticks) const; TickCount CRTCTicksToSystemTicks(TickCount crtc_ticks, TickCount fractional_ticks) const;
TickCount SystemTicksToCRTCTicks(TickCount sysclk_ticks, TickCount* fractional_ticks) const; TickCount SystemTicksToCRTCTicks(TickCount sysclk_ticks, TickCount* fractional_ticks) const;
@ -215,161 +155,9 @@ protected:
} }
ALWAYS_INLINE static constexpr TickCount SystemTicksToGPUTicks(TickCount sysclk_ticks) { return sysclk_ticks << 1; } ALWAYS_INLINE static constexpr TickCount SystemTicksToGPUTicks(TickCount sysclk_ticks) { return sysclk_ticks << 1; }
// Helper/format conversion functions.
static constexpr u8 Convert5To8(u8 x5) { return (x5 << 3) | (x5 & 7); }
static constexpr u8 Convert8To5(u8 x8) { return (x8 >> 3); }
static constexpr u32 RGBA5551ToRGBA8888(u16 color)
{
u8 r = Truncate8(color & 31);
u8 g = Truncate8((color >> 5) & 31);
u8 b = Truncate8((color >> 10) & 31);
u8 a = Truncate8((color >> 15) & 1);
// 00012345 -> 1234545
b = (b << 3) | (b & 0b111);
g = (g << 3) | (g & 0b111);
r = (r << 3) | (r & 0b111);
a = a ? 255 : 0;
return ZeroExtend32(r) | (ZeroExtend32(g) << 8) | (ZeroExtend32(b) << 16) | (ZeroExtend32(a) << 24);
}
static constexpr u16 RGBA8888ToRGBA5551(u32 color)
{
const u16 r = Truncate16((color >> 3) & 0x1Fu);
const u16 g = Truncate16((color >> 11) & 0x1Fu);
const u16 b = Truncate16((color >> 19) & 0x1Fu);
const u16 a = Truncate16((color >> 31) & 0x01u);
return r | (g << 5) | (b << 10) | (a << 15);
}
static constexpr std::tuple<u8, u8> UnpackTexcoord(u16 texcoord)
{
return std::make_tuple(static_cast<u8>(texcoord), static_cast<u8>(texcoord >> 8));
}
static constexpr std::tuple<u8, u8, u8> UnpackColorRGB24(u32 rgb24)
{
return std::make_tuple(static_cast<u8>(rgb24), static_cast<u8>(rgb24 >> 8), static_cast<u8>(rgb24 >> 16));
}
static constexpr u32 PackColorRGB24(u8 r, u8 g, u8 b)
{
return ZeroExtend32(r) | (ZeroExtend32(g) << 8) | (ZeroExtend32(b) << 16);
}
static bool DumpVRAMToFile(const char* filename, u32 width, u32 height, u32 stride, const void* buffer, static bool DumpVRAMToFile(const char* filename, u32 width, u32 height, u32 stride, const void* buffer,
bool remove_alpha); bool remove_alpha);
union RenderCommand
{
u32 bits;
BitField<u32, u32, 0, 24> color_for_first_vertex;
BitField<u32, bool, 24, 1> raw_texture_enable; // not valid for lines
BitField<u32, bool, 25, 1> transparency_enable;
BitField<u32, bool, 26, 1> texture_enable;
BitField<u32, DrawRectangleSize, 27, 2> rectangle_size; // only for rectangles
BitField<u32, bool, 27, 1> quad_polygon; // only for polygons
BitField<u32, bool, 27, 1> polyline; // only for lines
BitField<u32, bool, 28, 1> shading_enable; // 0 - flat, 1 = gouroud
BitField<u32, Primitive, 29, 21> primitive;
/// Returns true if texturing should be enabled. Depends on the primitive type.
bool IsTexturingEnabled() const { return (primitive != Primitive::Line) ? texture_enable : false; }
/// Returns true if dithering should be enabled. Depends on the primitive type.
bool IsDitheringEnabled() const
{
switch (primitive)
{
case Primitive::Polygon:
return shading_enable || (texture_enable && !raw_texture_enable);
case Primitive::Line:
return true;
case Primitive::Rectangle:
default:
return false;
}
}
};
union VertexPosition
{
u32 bits;
BitField<u32, s32, 0, 12> x;
BitField<u32, s32, 16, 12> y;
};
// Sprites/rectangles should be clipped to 12 bits before drawing.
static constexpr s32 TruncateVertexPosition(s32 x) { return SignExtendN<11, s32>(x); }
struct NativeVertex
{
s16 x;
s16 y;
u32 color;
u16 texcoord;
};
union VRAMPixel
{
u16 bits;
BitField<u16, u8, 0, 5> r;
BitField<u16, u8, 5, 5> g;
BitField<u16, u8, 10, 5> b;
BitField<u16, bool, 15, 1> c;
u8 GetR8() const { return Convert5To8(r); }
u8 GetG8() const { return Convert5To8(g); }
u8 GetB8() const { return Convert5To8(b); }
void Set(u8 r_, u8 g_, u8 b_, bool c_ = false)
{
bits = (ZeroExtend16(r_)) | (ZeroExtend16(g_) << 5) | (ZeroExtend16(b_) << 10) | (static_cast<u16>(c_) << 15);
}
void ClampAndSet(u8 r_, u8 g_, u8 b_, bool c_ = false)
{
Set(std::min<u8>(r_, 0x1F), std::min<u8>(g_, 0x1F), std::min<u8>(b_, 0x1F), c_);
}
void SetRGB24(u32 rgb24, bool c_ = false)
{
bits = Truncate16(((rgb24 >> 3) & 0x1F) | (((rgb24 >> 11) & 0x1F) << 5) | (((rgb24 >> 19) & 0x1F) << 10)) |
(static_cast<u16>(c_) << 15);
}
void SetRGB24(u8 r8, u8 g8, u8 b8, bool c_ = false)
{
bits = (ZeroExtend16(r8 >> 3)) | (ZeroExtend16(g8 >> 3) << 5) | (ZeroExtend16(b8 >> 3) << 10) |
(static_cast<u16>(c_) << 15);
}
void SetRGB24Dithered(u32 x, u32 y, u8 r8, u8 g8, u8 b8, bool c_ = false)
{
const s32 offset = DITHER_MATRIX[y & 3][x & 3];
r8 = static_cast<u8>(std::clamp<s32>(static_cast<s32>(ZeroExtend32(r8)) + offset, 0, 255));
g8 = static_cast<u8>(std::clamp<s32>(static_cast<s32>(ZeroExtend32(g8)) + offset, 0, 255));
b8 = static_cast<u8>(std::clamp<s32>(static_cast<s32>(ZeroExtend32(b8)) + offset, 0, 255));
SetRGB24(r8, g8, b8, c_);
}
u32 ToRGB24() const
{
const u32 r_ = ZeroExtend32(r.GetValue());
const u32 g_ = ZeroExtend32(g.GetValue());
const u32 b_ = ZeroExtend32(b.GetValue());
return ((r_ << 3) | (r_ & 7)) | (((g_ << 3) | (g_ & 7)) << 8) | (((b_ << 3) | (b_ & 7)) << 16);
}
};
void SoftReset(); void SoftReset();
// Sets dots per scanline // Sets dots per scanline
@ -390,21 +178,6 @@ protected:
void CRTCTickEvent(TickCount ticks); void CRTCTickEvent(TickCount ticks);
void CommandTickEvent(TickCount ticks); void CommandTickEvent(TickCount ticks);
/// Returns 0 if the currently-displayed field is on odd lines (1,3,5,...) or 1 if even (2,4,6,...).
ALWAYS_INLINE u32 GetInterlacedDisplayField() const { return ZeroExtend32(m_crtc_state.interlaced_field); }
/// Returns 0 if the currently-displayed field is on an even line in VRAM, otherwise 1.
ALWAYS_INLINE u32 GetActiveLineLSB() const { return ZeroExtend32(m_crtc_state.active_line_lsb); }
/// Sets/decodes GP0(E1h) (set draw mode).
void SetDrawMode(u16 bits);
/// Sets/decodes polygon/rectangle texture palette value.
void SetTexturePalette(u16 bits);
/// Sets/decodes texture window bits.
void SetTextureWindow(u32 value);
u32 ReadGPUREAD(); u32 ReadGPUREAD();
void FinishVRAMWrite(); void FinishVRAMWrite();
@ -425,17 +198,6 @@ protected:
void ExecuteCommands(); void ExecuteCommands();
void HandleGetGPUInfoCommand(u32 value); void HandleGetGPUInfoCommand(u32 value);
// Rendering in the backend
virtual void ReadVRAM(u32 x, u32 y, u32 width, u32 height);
virtual void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color);
virtual void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data);
virtual void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height);
virtual void DispatchRenderCommand();
virtual void FlushRender();
virtual void ClearDisplay();
virtual void UpdateDisplay();
virtual void DrawRendererStats(bool is_idle_frame);
// These are **very** approximate. // These are **very** approximate.
ALWAYS_INLINE void AddDrawTriangleTicks(u32 width, u32 height, bool shaded, bool textured, bool semitransparent) ALWAYS_INLINE void AddDrawTriangleTicks(u32 width, u32 height, bool shaded, bool textured, bool semitransparent)
{ {
@ -470,21 +232,16 @@ protected:
AddCommandTicks(std::max(width, height)); AddCommandTicks(std::max(width, height));
} }
HostDisplay* m_host_display = nullptr;
std::unique_ptr<TimingEvent> m_crtc_tick_event; std::unique_ptr<TimingEvent> m_crtc_tick_event;
std::unique_ptr<TimingEvent> m_command_tick_event; std::unique_ptr<TimingEvent> m_command_tick_event;
// Pointer to VRAM, used for reads/writes. In the hardware backends, this is the shadow buffer.
u16* m_vram_ptr = nullptr;
union GPUSTAT union GPUSTAT
{ {
u32 bits; u32 bits;
BitField<u32, u8, 0, 4> texture_page_x_base; BitField<u32, u8, 0, 4> texture_page_x_base;
BitField<u32, u8, 4, 1> texture_page_y_base; BitField<u32, u8, 4, 1> texture_page_y_base;
BitField<u32, TransparencyMode, 5, 2> semi_transparency_mode; BitField<u32, GPUTransparencyMode, 5, 2> semi_transparency_mode;
BitField<u32, TextureMode, 7, 2> texture_color_mode; BitField<u32, GPUTextureMode, 7, 2> texture_color_mode;
BitField<u32, bool, 9, 1> dither_enable; BitField<u32, bool, 9, 1> dither_enable;
BitField<u32, bool, 10, 1> draw_to_displayed_field; BitField<u32, bool, 10, 1> draw_to_displayed_field;
BitField<u32, bool, 11, 1> set_mask_while_drawing; BitField<u32, bool, 11, 1> set_mask_while_drawing;
@ -537,105 +294,18 @@ protected:
} }
} m_GPUSTAT = {}; } m_GPUSTAT = {};
struct DrawMode
{
static constexpr u16 PALETTE_MASK = UINT16_C(0b0111111111111111);
static constexpr u32 TEXTURE_WINDOW_MASK = UINT32_C(0b11111111111111111111);
// bits in GP0(E1h) or texpage part of polygon
union Reg
{
static constexpr u16 MASK = 0b1111111111111;
static constexpr u16 TEXTURE_PAGE_MASK = UINT16_C(0b0000000000011111);
// Polygon texpage commands only affect bits 0-8, 11
static constexpr u16 POLYGON_TEXPAGE_MASK = 0b0000100111111111;
// Bits 0..5 are returned in the GPU status register, latched at E1h/polygon draw time.
static constexpr u32 GPUSTAT_MASK = 0b11111111111;
u16 bits;
BitField<u16, u8, 0, 4> texture_page_x_base;
BitField<u16, u8, 4, 1> texture_page_y_base;
BitField<u16, TransparencyMode, 5, 2> transparency_mode;
BitField<u16, TextureMode, 7, 2> texture_mode;
BitField<u16, bool, 9, 1> dither_enable;
BitField<u16, bool, 10, 1> draw_to_displayed_field;
BitField<u16, bool, 11, 1> texture_disable;
BitField<u16, bool, 12, 1> texture_x_flip;
BitField<u16, bool, 13, 1> texture_y_flip;
u32 GetTexturePageXBase() const { return ZeroExtend32(texture_page_x_base.GetValue()) * 64; }
u32 GetTexturePageYBase() const { return ZeroExtend32(texture_page_y_base.GetValue()) * 256; }
};
// original values
Reg mode_reg;
u16 palette_reg; // from vertex
u32 texture_window_value;
// decoded values
u32 texture_page_x;
u32 texture_page_y;
u32 texture_palette_x;
u32 texture_palette_y;
u8 texture_window_mask_x; // in 8 pixel steps
u8 texture_window_mask_y; // in 8 pixel steps
u8 texture_window_offset_x; // in 8 pixel steps
u8 texture_window_offset_y; // in 8 pixel steps
bool texture_x_flip;
bool texture_y_flip;
bool texture_page_changed;
bool texture_window_changed;
/// Returns the texture/palette rendering mode.
TextureMode GetTextureMode() const { return mode_reg.texture_mode; }
/// Returns the semi-transparency mode when enabled.
TransparencyMode GetTransparencyMode() const { return mode_reg.transparency_mode; }
/// Returns true if the texture mode requires a palette.
bool IsUsingPalette() const { return (mode_reg.bits & (2 << 7)) == 0; }
/// Returns a rectangle comprising the texture page area.
Common::Rectangle<u32> GetTexturePageRectangle() const
{
static constexpr std::array<u32, 4> texture_page_widths = {
{TEXTURE_PAGE_WIDTH / 4, TEXTURE_PAGE_WIDTH / 2, TEXTURE_PAGE_WIDTH, TEXTURE_PAGE_WIDTH}};
return Common::Rectangle<u32>::FromExtents(texture_page_x, texture_page_y,
texture_page_widths[static_cast<u8>(mode_reg.texture_mode.GetValue())],
TEXTURE_PAGE_HEIGHT);
}
/// Returns a rectangle comprising the texture palette area.
Common::Rectangle<u32> GetTexturePaletteRectangle() const
{
static constexpr std::array<u32, 4> palette_widths = {{16, 256, 0, 0}};
return Common::Rectangle<u32>::FromExtents(texture_palette_x, texture_palette_y,
palette_widths[static_cast<u8>(mode_reg.texture_mode.GetValue())], 1);
}
bool IsTexturePageChanged() const { return texture_page_changed; }
void SetTexturePageChanged() { texture_page_changed = true; }
void ClearTexturePageChangedFlag() { texture_page_changed = false; }
bool IsTextureWindowChanged() const { return texture_window_changed; }
void SetTextureWindowChanged() { texture_window_changed = true; }
void ClearTextureWindowChangedFlag() { texture_window_changed = false; }
} m_draw_mode = {};
Common::Rectangle<u32> m_drawing_area{0, 0, VRAM_WIDTH, VRAM_HEIGHT};
struct DrawingOffset struct DrawingOffset
{ {
s32 x; s32 x;
s32 y; s32 y;
} m_drawing_offset = {}; } m_drawing_offset = {};
Common::Rectangle<u32> m_drawing_area{0, 0, VRAM_WIDTH, VRAM_HEIGHT};
GPUDrawModeReg m_draw_mode{};
GPUTextureWindowReg m_texture_window{};
bool m_console_is_pal = false; bool m_console_is_pal = false;
bool m_set_texture_disable_mask = false; bool m_set_texture_disable_mask = false;
bool m_drawing_area_changed = false;
bool m_force_progressive_scan = false; bool m_force_progressive_scan = false;
bool m_force_ntsc_timings = false; bool m_force_ntsc_timings = false;
@ -733,7 +403,7 @@ protected:
HeapFIFOQueue<u64, MAX_FIFO_SIZE> m_fifo; HeapFIFOQueue<u64, MAX_FIFO_SIZE> m_fifo;
std::vector<u32> m_blit_buffer; std::vector<u32> m_blit_buffer;
u32 m_blit_remaining_words; u32 m_blit_remaining_words;
RenderCommand m_render_command{}; GPURenderCommand m_render_command{};
ALWAYS_INLINE u32 FifoPop() { return Truncate32(m_fifo.Pop()); } ALWAYS_INLINE u32 FifoPop() { return Truncate32(m_fifo.Pop()); }
ALWAYS_INLINE u32 FifoPeek() { return Truncate32(m_fifo.Peek()); } ALWAYS_INLINE u32 FifoPeek() { return Truncate32(m_fifo.Peek()); }
@ -754,11 +424,17 @@ protected:
Stats m_stats = {}; Stats m_stats = {};
Stats m_last_stats = {}; Stats m_last_stats = {};
private:
using GP0CommandHandler = bool (GPU::*)(); using GP0CommandHandler = bool (GPU::*)();
using GP0CommandHandlerTable = std::array<GP0CommandHandler, 256>; using GP0CommandHandlerTable = std::array<GP0CommandHandler, 256>;
static GP0CommandHandlerTable GenerateGP0CommandHandlerTable(); static GP0CommandHandlerTable GenerateGP0CommandHandlerTable();
void ClearDisplay();
void UpdateDisplay();
void FillBackendCommandParameters(GPUBackendCommand* cmd) const;
void FillDrawCommand(GPUBackendDrawCommand* cmd, GPURenderCommand rc) const;
void UpdateDrawingArea();
void FlushRender();
// Rendering commands, returns false if not enough data is provided // Rendering commands, returns false if not enough data is provided
bool HandleUnknownGP0Command(); bool HandleUnknownGP0Command();
bool HandleNOPCommand(); bool HandleNOPCommand();
@ -774,6 +450,7 @@ private:
bool HandleRenderRectangleCommand(); bool HandleRenderRectangleCommand();
bool HandleRenderLineCommand(); bool HandleRenderLineCommand();
bool HandleRenderPolyLineCommand(); bool HandleRenderPolyLineCommand();
void FinishPolyLineRenderCommand();
bool HandleFillRectangleCommand(); bool HandleFillRectangleCommand();
bool HandleCopyRectangleCPUToVRAMCommand(); bool HandleCopyRectangleCPUToVRAMCommand();
bool HandleCopyRectangleVRAMToCPUCommand(); bool HandleCopyRectangleVRAMToCPUCommand();
@ -782,6 +459,4 @@ private:
static const GP0CommandHandlerTable s_GP0_command_handler_table; static const GP0CommandHandlerTable s_GP0_command_handler_table;
}; };
IMPLEMENT_ENUM_CLASS_BITWISE_OPERATORS(GPU::TextureMode); extern GPU g_gpu;
extern std::unique_ptr<GPU> g_gpu;

679
src/core/gpu_backend.cpp Normal file
View File

@ -0,0 +1,679 @@
#include "gpu_backend.h"
#include "common/log.h"
#include "common/state_wrapper.h"
#include "settings.h"
#include "gpu_hw_opengl.h"
#include "gpu_hw_vulkan.h"
#include "gpu_sw.h"
#ifdef WIN32
#include "gpu_hw_d3d11.h"
#endif
Log_SetChannel(GPUBackend);
std::unique_ptr<GPUBackend> g_gpu_backend;
GPUBackend::GPUBackend() = default;
GPUBackend::~GPUBackend() = default;
static std::unique_ptr<GPUBackend> CreateBackend(GPURenderer backend)
{
switch (backend)
{
#ifdef WIN32
case GPURenderer::HardwareD3D11:
return std::make_unique<GPU_HW_D3D11>();
#endif
case GPURenderer::HardwareOpenGL:
return std::make_unique<GPU_HW_OpenGL>();
case GPURenderer::HardwareVulkan:
return std::make_unique<GPU_HW_Vulkan>();
case GPURenderer::Software:
default:
return std::make_unique<GPU_SW>();
}
}
bool GPUBackend::Create(GPURenderer backend)
{
g_gpu_backend = CreateBackend(backend);
if (!g_gpu_backend || !g_gpu_backend->Initialize())
{
Log_ErrorPrintf("Failed to initialize GPU backend, falling back to software");
g_gpu_backend.reset();
g_gpu_backend = CreateBackend(GPURenderer::Software);
if (!g_gpu_backend->Initialize())
{
g_gpu_backend.reset();
return false;
}
}
return true;
}
bool GPUBackend::Initialize()
{
return true;
}
void GPUBackend::Reset()
{
m_drawing_area = {};
m_display_aspect_ratio = 1.0f;
m_display_width = 0;
m_display_height = 0;
m_display_origin_left = 0;
m_display_origin_top = 0;
m_display_vram_left = 0;
m_display_vram_top = 0;
m_display_vram_width = 0;
m_display_vram_height = 0;
m_display_vram_start_x = 0;
m_display_vram_start_y = 0;
m_display_interlace = GPUInterlacedDisplayMode::None;
m_display_interlace_field = 0;
m_display_enabled = false;
m_display_24bit = false;
}
void GPUBackend::UpdateSettings() {}
void GPUBackend::ResetGraphicsAPIState() {}
void GPUBackend::RestoreGraphicsAPIState() {}
bool GPUBackend::IsHardwareRenderer() const
{
return false;
}
void GPUBackend::UpdateResolutionScale() {}
std::tuple<u32, u32> GPUBackend::GetEffectiveDisplayResolution()
{
return std::tie(m_display_vram_width, m_display_vram_height);
}
void GPUBackend::DrawRendererStats(bool is_idle_frame) {}
bool GPUBackend::DoState(StateWrapper& sw)
{
if (sw.IsReading())
{
// Still need a temporary here.
HeapArray<u16, VRAM_WIDTH * VRAM_HEIGHT> temp;
sw.DoBytes(temp.data(), VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16));
UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, temp.data(), {});
}
else
{
FlushRender();
ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT);
sw.DoBytes(m_vram_ptr, VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16));
}
sw.Do(&m_drawing_area.left);
sw.Do(&m_drawing_area.top);
sw.Do(&m_drawing_area.right);
sw.Do(&m_drawing_area.bottom);
sw.Do(&m_display_aspect_ratio);
sw.Do(&m_display_width);
sw.Do(&m_display_height);
sw.Do(&m_display_origin_left);
sw.Do(&m_display_origin_top);
sw.Do(&m_display_vram_left);
sw.Do(&m_display_vram_top);
sw.Do(&m_display_vram_width);
sw.Do(&m_display_vram_height);
sw.Do(&m_display_vram_start_x);
sw.Do(&m_display_vram_start_y);
sw.Do(&m_display_interlace);
sw.Do(&m_display_interlace_field);
sw.Do(&m_display_enabled);
sw.Do(&m_display_24bit);
return !sw.HasError();
}
GPUBackendResetCommand* GPUBackend::NewResetCommand()
{
GPUBackendResetCommand* cmd = static_cast<GPUBackendResetCommand*>(AllocateCommand(sizeof(GPUBackendResetCommand)));
cmd->type = GPUBackendCommandType::Reset;
cmd->size = cmd->Size();
return cmd;
}
GPUBackendUpdateSettingsCommand* GPUBackend::NewUpdateSettingsCommand()
{
GPUBackendUpdateSettingsCommand* cmd =
static_cast<GPUBackendUpdateSettingsCommand*>(AllocateCommand(sizeof(GPUBackendUpdateSettingsCommand)));
cmd->type = GPUBackendCommandType::UpdateSettings;
cmd->size = cmd->Size();
return cmd;
}
GPUBackendUpdateResolutionScaleCommand* GPUBackend::NewUpdateResolutionScaleCommand()
{
GPUBackendUpdateResolutionScaleCommand* cmd = static_cast<GPUBackendUpdateResolutionScaleCommand*>(
AllocateCommand(sizeof(GPUBackendUpdateResolutionScaleCommand)));
cmd->type = GPUBackendCommandType::UpdateResolutionScale;
cmd->size = cmd->Size();
return cmd;
}
GPUBackendReadVRAMCommand* GPUBackend::NewReadVRAMCommand()
{
GPUBackendReadVRAMCommand* cmd =
static_cast<GPUBackendReadVRAMCommand*>(AllocateCommand(sizeof(GPUBackendReadVRAMCommand)));
cmd->type = GPUBackendCommandType::ReadVRAM;
cmd->size = cmd->Size();
return cmd;
}
GPUBackendFillVRAMCommand* GPUBackend::NewFillVRAMCommand()
{
GPUBackendFillVRAMCommand* cmd =
static_cast<GPUBackendFillVRAMCommand*>(AllocateCommand(sizeof(GPUBackendFillVRAMCommand)));
cmd->type = GPUBackendCommandType::FillVRAM;
cmd->size = cmd->Size();
return cmd;
}
GPUBackendUpdateVRAMCommand* GPUBackend::NewUpdateVRAMCommand(u32 num_words)
{
const u32 size = sizeof(GPUBackendUpdateVRAMCommand) + (num_words * sizeof(u16));
GPUBackendUpdateVRAMCommand* cmd = static_cast<GPUBackendUpdateVRAMCommand*>(AllocateCommand(size));
cmd->type = GPUBackendCommandType::UpdateVRAM;
cmd->size = size;
return cmd;
}
GPUBackendCopyVRAMCommand* GPUBackend::NewCopyVRAMCommand()
{
GPUBackendCopyVRAMCommand* cmd =
static_cast<GPUBackendCopyVRAMCommand*>(AllocateCommand(sizeof(GPUBackendCopyVRAMCommand)));
cmd->type = GPUBackendCommandType::CopyVRAM;
cmd->size = cmd->Size();
return cmd;
}
GPUBackendSetDrawingAreaCommand* GPUBackend::NewSetDrawingAreaCommand()
{
GPUBackendSetDrawingAreaCommand* cmd =
static_cast<GPUBackendSetDrawingAreaCommand*>(AllocateCommand(sizeof(GPUBackendSetDrawingAreaCommand)));
cmd->type = GPUBackendCommandType::SetDrawingArea;
cmd->size = cmd->Size();
return cmd;
}
GPUBackendDrawPolygonCommand* GPUBackend::NewDrawPolygonCommand(u32 num_vertices)
{
const u32 size = sizeof(GPUBackendDrawPolygonCommand) + (num_vertices * sizeof(GPUBackendDrawPolygonCommand::Vertex));
GPUBackendDrawPolygonCommand* cmd = static_cast<GPUBackendDrawPolygonCommand*>(AllocateCommand(size));
cmd->type = GPUBackendCommandType::DrawPolygon;
cmd->size = size;
cmd->num_vertices = Truncate16(num_vertices);
return cmd;
}
GPUBackendDrawRectangleCommand* GPUBackend::NewDrawRectangleCommand()
{
GPUBackendDrawRectangleCommand* cmd =
static_cast<GPUBackendDrawRectangleCommand*>(AllocateCommand(sizeof(GPUBackendDrawRectangleCommand)));
cmd->type = GPUBackendCommandType::DrawRectangle;
cmd->size = cmd->Size();
return cmd;
}
GPUBackendDrawLineCommand* GPUBackend::NewDrawLineCommand(u32 num_vertices)
{
const u32 size = sizeof(GPUBackendDrawLineCommand) + (num_vertices * sizeof(GPUBackendDrawLineCommand::Vertex));
GPUBackendDrawLineCommand* cmd = static_cast<GPUBackendDrawLineCommand*>(AllocateCommand(size));
cmd->type = GPUBackendCommandType::DrawLine;
cmd->size = cmd->Size();
cmd->num_vertices = Truncate16(num_vertices);
return cmd;
}
GPUBackendClearDisplayCommand* GPUBackend::NewClearDisplayCommand()
{
GPUBackendClearDisplayCommand* cmd =
static_cast<GPUBackendClearDisplayCommand*>(AllocateCommand(sizeof(GPUBackendUpdateVRAMCommand)));
cmd->type = GPUBackendCommandType::ClearDisplay;
cmd->size = cmd->Size();
return cmd;
}
GPUBackendUpdateDisplayCommand* GPUBackend::NewUpdateDisplayCommand()
{
GPUBackendUpdateDisplayCommand* cmd =
static_cast<GPUBackendUpdateDisplayCommand*>(AllocateCommand(sizeof(GPUBackendUpdateDisplayCommand)));
cmd->type = GPUBackendCommandType::UpdateDisplay;
cmd->size = cmd->Size();
return cmd;
}
GPUBackendFlushRenderCommand* GPUBackend::NewFlushRenderCommand()
{
GPUBackendFlushRenderCommand* cmd =
static_cast<GPUBackendFlushRenderCommand*>(AllocateCommand(sizeof(GPUBackendFlushRenderCommand)));
cmd->type = GPUBackendCommandType::FlushRender;
cmd->size = cmd->Size();
return cmd;
}
void* GPUBackend::AllocateCommand(u32 size)
{
for (;;)
{
const u32 write_ptr = m_command_fifo_write_ptr.load();
const u32 available_size = COMMAND_QUEUE_SIZE - write_ptr;
if ((size + sizeof(GPUBackendSyncCommand)) > available_size)
{
Sync();
continue;
}
return &m_command_fifo_data[write_ptr];
}
}
u32 GPUBackend::GetPendingCommandSize() const
{
const u32 read_ptr = m_command_fifo_read_ptr.load();
const u32 write_ptr = m_command_fifo_write_ptr.load();
return (write_ptr - read_ptr);
}
void GPUBackend::PushCommand(GPUBackendCommand* cmd)
{
if (!g_settings.cpu_thread)
{
// single-thread mode
if (cmd->type != GPUBackendCommandType::Sync)
HandleCommand(cmd);
}
else
{
const u32 new_write_ptr = m_command_fifo_write_ptr.fetch_add(cmd->size) + cmd->size;
DebugAssert(new_write_ptr <= COMMAND_QUEUE_SIZE);
if (cmd->type == GPUBackendCommandType::Sync || cmd->type == GPUBackendCommandType::FrameDone ||
(new_write_ptr - m_command_fifo_read_ptr.load()) >= THRESHOLD_TO_WAKE_GPU)
{
WakeGPUThread();
}
}
}
void GPUBackend::WakeGPUThread()
{
std::unique_lock<std::mutex> lock(m_sync_mutex);
if (!m_gpu_thread_sleeping.load())
return;
m_wake_gpu_thread_cv.notify_one();
}
void GPUBackend::Sync()
{
if (!g_settings.cpu_thread)
return;
// since we do this on wrap-around, it can't go through the regular path
const u32 write_ptr = m_command_fifo_write_ptr.load();
Assert((COMMAND_QUEUE_SIZE - write_ptr) >= sizeof(GPUBackendSyncCommand));
GPUBackendSyncCommand* cmd = reinterpret_cast<GPUBackendSyncCommand*>(&m_command_fifo_data[write_ptr]);
cmd->type = GPUBackendCommandType::Sync;
cmd->size = cmd->Size();
PushCommand(cmd);
m_sync_event.Wait();
m_sync_event.Reset();
}
void GPUBackend::CPUFrameDone()
{
if (!g_settings.cpu_thread)
return;
GPUBackendFrameDoneCommand* cmd =
reinterpret_cast<GPUBackendFrameDoneCommand*>(AllocateCommand(sizeof(GPUBackendFrameDoneCommand)));
cmd->type = GPUBackendCommandType::FrameDone;
cmd->size = cmd->Size();
PushCommand(cmd);
}
void GPUBackend::ProcessGPUCommands()
{
for (;;)
{
const u32 write_ptr = m_command_fifo_write_ptr.load();
u32 read_ptr = m_command_fifo_read_ptr.load();
if (read_ptr == write_ptr)
return;
while (read_ptr < write_ptr)
{
const GPUBackendCommand* cmd = reinterpret_cast<const GPUBackendCommand*>(&m_command_fifo_data[read_ptr]);
read_ptr += cmd->size;
if (cmd->type == GPUBackendCommandType::Sync)
{
Assert(read_ptr == m_command_fifo_write_ptr.load());
m_command_fifo_read_ptr.store(0);
m_command_fifo_write_ptr.store(0);
m_sync_event.Signal();
return;
}
else if (cmd->type == GPUBackendCommandType::FrameDone)
{
m_frame_done = true;
m_command_fifo_read_ptr.store(read_ptr);
return;
}
else
{
HandleCommand(cmd);
}
}
m_command_fifo_read_ptr.store(read_ptr);
}
}
void GPUBackend::RunGPUFrame()
{
m_frame_done = false;
for (;;)
{
g_gpu_backend->ProcessGPUCommands();
if (m_frame_done)
break;
std::unique_lock<std::mutex> lock(m_sync_mutex);
m_gpu_thread_sleeping.store(true);
m_wake_gpu_thread_cv.wait(lock);
m_gpu_thread_sleeping.store(false);
}
}
void GPUBackend::EndGPUFrame()
{
g_gpu_backend->ProcessGPUCommands();
Assert(m_command_fifo_read_ptr.load() == m_command_fifo_write_ptr.load());
m_command_fifo_read_ptr.store(0);
m_command_fifo_write_ptr.store(0);
}
void GPUBackend::SetScissorFromDrawingArea() {}
void GPUBackend::HandleCommand(const GPUBackendCommand* cmd)
{
switch (cmd->type)
{
case GPUBackendCommandType::ReadVRAM:
{
FlushRender();
const GPUBackendReadVRAMCommand* ccmd = static_cast<const GPUBackendReadVRAMCommand*>(cmd);
ReadVRAM(ZeroExtend32(ccmd->x), ZeroExtend32(ccmd->y), ZeroExtend32(ccmd->width), ZeroExtend32(ccmd->height));
}
break;
case GPUBackendCommandType::FillVRAM:
{
FlushRender();
const GPUBackendFillVRAMCommand* ccmd = static_cast<const GPUBackendFillVRAMCommand*>(cmd);
FillVRAM(ZeroExtend32(ccmd->x), ZeroExtend32(ccmd->y), ZeroExtend32(ccmd->width), ZeroExtend32(ccmd->height),
ccmd->color, ccmd->params);
}
break;
case GPUBackendCommandType::UpdateVRAM:
{
FlushRender();
const GPUBackendUpdateVRAMCommand* ccmd = static_cast<const GPUBackendUpdateVRAMCommand*>(cmd);
UpdateVRAM(ZeroExtend32(ccmd->x), ZeroExtend32(ccmd->y), ZeroExtend32(ccmd->width), ZeroExtend32(ccmd->height),
ccmd->data, ccmd->params);
}
break;
case GPUBackendCommandType::CopyVRAM:
{
FlushRender();
const GPUBackendCopyVRAMCommand* ccmd = static_cast<const GPUBackendCopyVRAMCommand*>(cmd);
CopyVRAM(ZeroExtend32(ccmd->src_x), ZeroExtend32(ccmd->src_y), ZeroExtend32(ccmd->dst_x),
ZeroExtend32(ccmd->dst_y), ZeroExtend32(ccmd->width), ZeroExtend32(ccmd->height), ccmd->params);
}
break;
case GPUBackendCommandType::SetDrawingArea:
{
FlushRender();
m_drawing_area = static_cast<const GPUBackendSetDrawingAreaCommand*>(cmd)->new_area;
SetScissorFromDrawingArea();
}
break;
case GPUBackendCommandType::DrawPolygon:
{
DrawPolygon(static_cast<const GPUBackendDrawPolygonCommand*>(cmd));
}
break;
case GPUBackendCommandType::DrawRectangle:
{
DrawRectangle(static_cast<const GPUBackendDrawRectangleCommand*>(cmd));
}
break;
case GPUBackendCommandType::DrawLine:
{
DrawLine(static_cast<const GPUBackendDrawLineCommand*>(cmd));
}
break;
case GPUBackendCommandType::ClearDisplay:
{
ClearDisplay();
}
break;
case GPUBackendCommandType::UpdateDisplay:
{
const GPUBackendUpdateDisplayCommand* ccmd = static_cast<const GPUBackendUpdateDisplayCommand*>(cmd);
m_display_aspect_ratio = ccmd->display_aspect_ratio;
m_display_width = ccmd->display_width;
m_display_height = ccmd->display_height;
m_display_origin_left = ccmd->display_origin_left;
m_display_origin_top = ccmd->display_origin_top;
m_display_vram_left = ccmd->display_vram_left;
m_display_vram_top = ccmd->display_vram_top;
m_display_vram_width = ccmd->display_vram_width;
m_display_vram_height = ccmd->display_vram_height;
m_display_vram_start_x = ccmd->display_vram_start_x;
m_display_vram_start_y = ccmd->display_vram_start_y;
m_display_interlace = ccmd->display_interlace;
m_display_interlace_field = ccmd->display_interlace_field;
m_display_enabled = ccmd->display_enabled;
m_display_24bit = ccmd->display_24bit;
UpdateDisplay();
}
break;
case GPUBackendCommandType::FlushRender:
{
FlushRender();
}
break;
default:
break;
}
}
void GPUBackend::SoftwareFillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params)
{
const u16 color16 = RGBA8888ToRGBA5551(color);
if ((x + width) <= VRAM_WIDTH && !params.interlaced_rendering)
{
for (u32 yoffs = 0; yoffs < height; yoffs++)
{
const u32 row = (y + yoffs) % VRAM_HEIGHT;
std::fill_n(&m_vram_ptr[row * VRAM_WIDTH + x], width, color16);
}
}
else if (params.interlaced_rendering)
{
// Hardware tests show that fills seem to break on the first two lines when the offset matches the displayed field.
const u32 active_field = params.active_line_lsb;
for (u32 yoffs = 0; yoffs < height; yoffs++)
{
const u32 row = (y + yoffs) % VRAM_HEIGHT;
if ((row & u32(1)) == active_field)
continue;
u16* row_ptr = &m_vram_ptr[row * VRAM_WIDTH];
for (u32 xoffs = 0; xoffs < width; xoffs++)
{
const u32 col = (x + xoffs) % VRAM_WIDTH;
row_ptr[col] = color16;
}
}
}
else
{
for (u32 yoffs = 0; yoffs < height; yoffs++)
{
const u32 row = (y + yoffs) % VRAM_HEIGHT;
u16* row_ptr = &m_vram_ptr[row * VRAM_WIDTH];
for (u32 xoffs = 0; xoffs < width; xoffs++)
{
const u32 col = (x + xoffs) % VRAM_WIDTH;
row_ptr[col] = color16;
}
}
}
}
void GPUBackend::SoftwareUpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data,
GPUBackendCommandParameters params)
{
// Fast path when the copy is not oversized.
if ((x + width) <= VRAM_WIDTH && (y + height) <= VRAM_HEIGHT && !params.IsMaskingEnabled())
{
const u16* src_ptr = static_cast<const u16*>(data);
u16* dst_ptr = &m_vram_ptr[y * VRAM_WIDTH + x];
for (u32 yoffs = 0; yoffs < height; yoffs++)
{
std::copy_n(src_ptr, width, dst_ptr);
src_ptr += width;
dst_ptr += VRAM_WIDTH;
}
}
else
{
// Slow path when we need to handle wrap-around.
const u16* src_ptr = static_cast<const u16*>(data);
const u16 mask_and = params.GetMaskAND();
const u16 mask_or = params.GetMaskOR();
for (u32 row = 0; row < height;)
{
u16* dst_row_ptr = &m_vram_ptr[((y + row++) % VRAM_HEIGHT) * VRAM_WIDTH];
for (u32 col = 0; col < width;)
{
// TODO: Handle unaligned reads...
u16* pixel_ptr = &dst_row_ptr[(x + col++) % VRAM_WIDTH];
if (((*pixel_ptr) & mask_and) == 0)
*pixel_ptr = *(src_ptr++) | mask_or;
}
}
}
}
void GPUBackend::SoftwareCopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height,
GPUBackendCommandParameters params)
{
// Break up oversized copies. This behavior has not been verified on console.
if ((src_x + width) > VRAM_WIDTH || (dst_x + width) > VRAM_WIDTH)
{
u32 remaining_rows = height;
u32 current_src_y = src_y;
u32 current_dst_y = dst_y;
while (remaining_rows > 0)
{
const u32 rows_to_copy =
std::min<u32>(remaining_rows, std::min<u32>(VRAM_HEIGHT - current_src_y, VRAM_HEIGHT - current_dst_y));
u32 remaining_columns = width;
u32 current_src_x = src_x;
u32 current_dst_x = dst_x;
while (remaining_columns > 0)
{
const u32 columns_to_copy =
std::min<u32>(remaining_columns, std::min<u32>(VRAM_WIDTH - current_src_x, VRAM_WIDTH - current_dst_x));
SoftwareCopyVRAM(current_src_x, current_src_y, current_dst_x, current_dst_y, columns_to_copy, rows_to_copy,
params);
current_src_x = (current_src_x + columns_to_copy) % VRAM_WIDTH;
current_dst_x = (current_dst_x + columns_to_copy) % VRAM_WIDTH;
remaining_columns -= columns_to_copy;
}
current_src_y = (current_src_y + rows_to_copy) % VRAM_HEIGHT;
current_dst_y = (current_dst_y + rows_to_copy) % VRAM_HEIGHT;
remaining_rows -= rows_to_copy;
}
return;
}
// This doesn't have a fast path, but do we really need one? It's not common.
const u16 mask_and = params.GetMaskAND();
const u16 mask_or = params.GetMaskOR();
// Copy in reverse when src_x < dst_x, this is verified on console.
if (src_x < dst_x || ((src_x + width - 1) % VRAM_WIDTH) < ((dst_x + width - 1) % VRAM_WIDTH))
{
for (u32 row = 0; row < height; row++)
{
const u16* src_row_ptr = &m_vram_ptr[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
u16* dst_row_ptr = &m_vram_ptr[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
for (s32 col = static_cast<s32>(width - 1); col >= 0; col--)
{
const u16 src_pixel = src_row_ptr[(src_x + static_cast<u32>(col)) % VRAM_WIDTH];
u16* dst_pixel_ptr = &dst_row_ptr[(dst_x + static_cast<u32>(col)) % VRAM_WIDTH];
if ((*dst_pixel_ptr & mask_and) == 0)
*dst_pixel_ptr = src_pixel | mask_or;
}
}
}
else
{
for (u32 row = 0; row < height; row++)
{
const u16* src_row_ptr = &m_vram_ptr[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
u16* dst_row_ptr = &m_vram_ptr[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH];
for (u32 col = 0; col < width; col++)
{
const u16 src_pixel = src_row_ptr[(src_x + col) % VRAM_WIDTH];
u16* dst_pixel_ptr = &dst_row_ptr[(dst_x + col) % VRAM_WIDTH];
if ((*dst_pixel_ptr & mask_and) == 0)
*dst_pixel_ptr = src_pixel | mask_or;
}
}
}
}

143
src/core/gpu_backend.h Normal file
View File

@ -0,0 +1,143 @@
#pragma once
#include "common/heap_array.h"
#include "common/event.h"
#include "gpu_types.h"
#include <atomic>
#include <condition_variable>
#include <memory>
#include <mutex>
class StateWrapper;
class GPUBackend
{
public:
GPUBackend();
virtual ~GPUBackend();
ALWAYS_INLINE u16* GetVRAM() const { return m_vram_ptr; }
static bool Create(GPURenderer backend);
virtual bool Initialize();
// Graphics API state reset/restore - call when drawing the UI etc.
virtual void ResetGraphicsAPIState();
virtual void RestoreGraphicsAPIState();
virtual bool IsHardwareRenderer() const;
/// Recompile shaders/recreate framebuffers when needed.
virtual void UpdateSettings();
/// Updates the resolution scale when it's set to automatic.
virtual void UpdateResolutionScale();
/// Returns the effective display resolution of the GPU.
virtual std::tuple<u32, u32> GetEffectiveDisplayResolution();
virtual void DrawRendererStats(bool is_idle_frame);
bool DoState(StateWrapper& sw);
GPUBackendResetCommand* NewResetCommand();
GPUBackendUpdateSettingsCommand* NewUpdateSettingsCommand();
GPUBackendUpdateResolutionScaleCommand* NewUpdateResolutionScaleCommand();
GPUBackendReadVRAMCommand* NewReadVRAMCommand();
GPUBackendFillVRAMCommand* NewFillVRAMCommand();
GPUBackendUpdateVRAMCommand* NewUpdateVRAMCommand(u32 num_words);
GPUBackendCopyVRAMCommand* NewCopyVRAMCommand();
GPUBackendSetDrawingAreaCommand* NewSetDrawingAreaCommand();
GPUBackendDrawPolygonCommand* NewDrawPolygonCommand(u32 num_vertices);
GPUBackendDrawRectangleCommand* NewDrawRectangleCommand();
GPUBackendDrawLineCommand* NewDrawLineCommand(u32 num_vertices);
GPUBackendClearDisplayCommand* NewClearDisplayCommand();
GPUBackendUpdateDisplayCommand* NewUpdateDisplayCommand();
GPUBackendFlushRenderCommand* NewFlushRenderCommand();
void PushCommand(GPUBackendCommand* cmd);
void Sync();
/// Processes all pending GPU commands.
void ProcessGPUCommands();
void CPUFrameDone();
void RunGPUFrame();
void EndGPUFrame();
protected:
void* AllocateCommand(u32 size);
u32 GetPendingCommandSize() const;
void WakeGPUThread();
virtual void Reset();
virtual void ReadVRAM(u32 x, u32 y, u32 width, u32 height) = 0;
virtual void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params) = 0;
virtual void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data,
GPUBackendCommandParameters params) = 0;
virtual void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height,
GPUBackendCommandParameters params) = 0;
virtual void DrawPolygon(const GPUBackendDrawPolygonCommand* cmd) = 0;
virtual void DrawRectangle(const GPUBackendDrawRectangleCommand* cmd) = 0;
virtual void DrawLine(const GPUBackendDrawLineCommand* cmd) = 0;
virtual void SetScissorFromDrawingArea();
virtual void ClearDisplay() = 0;
virtual void UpdateDisplay() = 0;
virtual void FlushRender() = 0;
void HandleCommand(const GPUBackendCommand* cmd);
void SoftwareFillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params);
void SoftwareUpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, GPUBackendCommandParameters params);
void SoftwareCopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height,
GPUBackendCommandParameters params);
u16* m_vram_ptr = nullptr;
Common::Rectangle<u32> m_drawing_area{};
float m_display_aspect_ratio = 1.0f;
// Size of the simulated screen in pixels. Depending on crop mode, this may include overscan area.
u16 m_display_width = 0;
u16 m_display_height = 0;
// Top-left corner where the VRAM is displayed. Depending on the CRTC config, this may indicate padding.
u16 m_display_origin_left = 0;
u16 m_display_origin_top = 0;
// Rectangle describing the displayed area of VRAM, in coordinates.
u16 m_display_vram_left = 0;
u16 m_display_vram_top = 0;
u16 m_display_vram_width = 0;
u16 m_display_vram_height = 0;
u16 m_display_vram_start_x = 0;
u16 m_display_vram_start_y = 0;
GPUInterlacedDisplayMode m_display_interlace = GPUInterlacedDisplayMode::None;
u8 m_display_interlace_field = 0;
bool m_display_enabled = false;
bool m_display_24bit = false;
bool m_frame_done = false;
Common::Event m_sync_event;
std::atomic_bool m_gpu_thread_sleeping{ false };
std::mutex m_sync_mutex;
std::condition_variable m_sync_cpu_thread_cv;
std::condition_variable m_wake_gpu_thread_cv;
bool m_sync_done = false;
enum : u32
{
COMMAND_QUEUE_SIZE = 8 * 1024 * 1024,
THRESHOLD_TO_WAKE_GPU = 256
};
HeapArray<u8, COMMAND_QUEUE_SIZE> m_command_fifo_data;
alignas(64) std::atomic<u32> m_command_fifo_read_ptr{0};
alignas(64) std::atomic<u32> m_command_fifo_write_ptr{0};
};
extern std::unique_ptr<GPUBackend> g_gpu_backend;

View File

@ -2,7 +2,9 @@
#include "common/log.h" #include "common/log.h"
#include "common/string_util.h" #include "common/string_util.h"
#include "gpu.h" #include "gpu.h"
#include "gpu_backend.h"
#include "interrupt_controller.h" #include "interrupt_controller.h"
#include "pgxp.h"
#include "system.h" #include "system.h"
Log_SetChannel(GPU); Log_SetChannel(GPU);
@ -21,6 +23,15 @@ static constexpr u32 ReplaceZero(u32 value, u32 value_for_zero)
return value == 0 ? value_for_zero : value; return value == 0 ? value_for_zero : value;
} }
template<typename T>
ALWAYS_INLINE static constexpr std::tuple<T, T> MinMax(T v1, T v2)
{
if (v1 > v2)
return std::tie(v2, v1);
else
return std::tie(v1, v2);
}
void GPU::ExecuteCommands() void GPU::ExecuteCommands()
{ {
m_syncing = true; m_syncing = true;
@ -91,7 +102,7 @@ void GPU::ExecuteCommands()
// drop terminator // drop terminator
m_fifo.RemoveOne(); m_fifo.RemoveOne();
Log_DebugPrintf("Drawing poly-line with %u vertices", GetPolyLineVertexCount()); Log_DebugPrintf("Drawing poly-line with %u vertices", GetPolyLineVertexCount());
DispatchRenderCommand(); FinishPolyLineRenderCommand();
m_blit_buffer.clear(); m_blit_buffer.clear();
EndCommand(); EndCommand();
continue; continue;
@ -132,16 +143,16 @@ GPU::GP0CommandHandlerTable GPU::GenerateGP0CommandHandlerTable()
table[0x1F] = &GPU::HandleInterruptRequestCommand; table[0x1F] = &GPU::HandleInterruptRequestCommand;
for (u32 i = 0x20; i <= 0x7F; i++) for (u32 i = 0x20; i <= 0x7F; i++)
{ {
const RenderCommand rc{i << 24}; const GPURenderCommand rc{i << 24};
switch (rc.primitive) switch (rc.primitive)
{ {
case Primitive::Polygon: case GPUPrimitive::Polygon:
table[i] = &GPU::HandleRenderPolygonCommand; table[i] = &GPU::HandleRenderPolygonCommand;
break; break;
case Primitive::Line: case GPUPrimitive::Line:
table[i] = rc.polyline ? &GPU::HandleRenderPolyLineCommand : &GPU::HandleRenderLineCommand; table[i] = rc.polyline ? &GPU::HandleRenderPolyLineCommand : &GPU::HandleRenderLineCommand;
break; break;
case Primitive::Rectangle: case GPUPrimitive::Rectangle:
table[i] = &GPU::HandleRenderRectangleCommand; table[i] = &GPU::HandleRenderRectangleCommand;
break; break;
default: default:
@ -218,7 +229,17 @@ bool GPU::HandleSetDrawModeCommand()
{ {
const u32 param = FifoPop() & 0x00FFFFFFu; const u32 param = FifoPop() & 0x00FFFFFFu;
Log_DebugPrintf("Set draw mode %08X", param); Log_DebugPrintf("Set draw mode %08X", param);
SetDrawMode(Truncate16(param));
GPUDrawModeReg new_mode_reg{static_cast<u16>(param & GPUDrawModeReg::MASK)};
if (!m_set_texture_disable_mask)
new_mode_reg.texture_disable = false;
// Bits 0..10 are returned in the GPU status register.
m_GPUSTAT.bits = (m_GPUSTAT.bits & ~(GPUDrawModeReg::GPUSTAT_MASK)) |
(ZeroExtend32(new_mode_reg.bits) & GPUDrawModeReg::GPUSTAT_MASK);
m_GPUSTAT.texture_disable = new_mode_reg.texture_disable;
m_draw_mode.bits = new_mode_reg.bits;
AddCommandTicks(1); AddCommandTicks(1);
EndCommand(); EndCommand();
return true; return true;
@ -227,10 +248,10 @@ bool GPU::HandleSetDrawModeCommand()
bool GPU::HandleSetTextureWindowCommand() bool GPU::HandleSetTextureWindowCommand()
{ {
const u32 param = FifoPop() & 0x00FFFFFFu; const u32 param = FifoPop() & 0x00FFFFFFu;
SetTextureWindow(param);
Log_DebugPrintf("Set texture window %02X %02X %02X %02X", m_draw_mode.texture_window_mask_x, m_texture_window.bits = param;
m_draw_mode.texture_window_mask_y, m_draw_mode.texture_window_offset_x, Log_DebugPrintf("Set texture window %02X %02X %02X %02X", m_texture_window.mask_x, m_texture_window.mask_y,
m_draw_mode.texture_window_offset_y); m_texture_window.offset_x, m_texture_window.offset_y);
AddCommandTicks(1); AddCommandTicks(1);
EndCommand(); EndCommand();
@ -245,11 +266,9 @@ bool GPU::HandleSetDrawingAreaTopLeftCommand()
Log_DebugPrintf("Set drawing area top-left: (%u, %u)", left, top); Log_DebugPrintf("Set drawing area top-left: (%u, %u)", left, top);
if (m_drawing_area.left != left || m_drawing_area.top != top) if (m_drawing_area.left != left || m_drawing_area.top != top)
{ {
FlushRender();
m_drawing_area.left = left; m_drawing_area.left = left;
m_drawing_area.top = top; m_drawing_area.top = top;
m_drawing_area_changed = true; UpdateDrawingArea();
} }
AddCommandTicks(1); AddCommandTicks(1);
@ -266,11 +285,9 @@ bool GPU::HandleSetDrawingAreaBottomRightCommand()
Log_DebugPrintf("Set drawing area bottom-right: (%u, %u)", m_drawing_area.right, m_drawing_area.bottom); Log_DebugPrintf("Set drawing area bottom-right: (%u, %u)", m_drawing_area.right, m_drawing_area.bottom);
if (m_drawing_area.right != right || m_drawing_area.bottom != bottom) if (m_drawing_area.right != right || m_drawing_area.bottom != bottom)
{ {
FlushRender();
m_drawing_area.right = right; m_drawing_area.right = right;
m_drawing_area.bottom = bottom; m_drawing_area.bottom = bottom;
m_drawing_area_changed = true; UpdateDrawingArea();
} }
AddCommandTicks(1); AddCommandTicks(1);
@ -304,10 +321,8 @@ bool GPU::HandleSetMaskBitCommand()
constexpr u32 gpustat_mask = (1 << 11) | (1 << 12); constexpr u32 gpustat_mask = (1 << 11) | (1 << 12);
const u32 gpustat_bits = (param & 0x03) << 11; const u32 gpustat_bits = (param & 0x03) << 11;
if ((m_GPUSTAT.bits & gpustat_mask) != gpustat_bits) if ((m_GPUSTAT.bits & gpustat_mask) != gpustat_bits)
{
FlushRender();
m_GPUSTAT.bits = (m_GPUSTAT.bits & ~gpustat_mask) | gpustat_bits; m_GPUSTAT.bits = (m_GPUSTAT.bits & ~gpustat_mask) | gpustat_bits;
}
Log_DebugPrintf("Set mask bit %u %u", BoolToUInt32(m_GPUSTAT.set_mask_while_drawing), Log_DebugPrintf("Set mask bit %u %u", BoolToUInt32(m_GPUSTAT.set_mask_while_drawing),
BoolToUInt32(m_GPUSTAT.check_mask_before_draw)); BoolToUInt32(m_GPUSTAT.check_mask_before_draw));
@ -316,9 +331,64 @@ bool GPU::HandleSetMaskBitCommand()
return true; return true;
} }
void GPU::FillBackendCommandParameters(GPUBackendCommand* cmd) const
{
cmd->params.bits = 0;
cmd->params.check_mask_before_draw = m_GPUSTAT.check_mask_before_draw;
cmd->params.set_mask_while_drawing = m_GPUSTAT.set_mask_while_drawing;
cmd->params.active_line_lsb = m_crtc_state.active_line_lsb;
cmd->params.interlaced_rendering = IsInterlacedRenderingEnabled();
}
void GPU::ClearDisplay()
{
g_gpu_backend->PushCommand(g_gpu_backend->NewClearDisplayCommand());
}
void GPU::UpdateDisplay()
{
GPUBackendUpdateDisplayCommand* cmd = g_gpu_backend->NewUpdateDisplayCommand();
cmd->display_aspect_ratio = m_crtc_state.display_aspect_ratio;
cmd->display_width = m_crtc_state.display_width;
cmd->display_height = m_crtc_state.display_height;
cmd->display_origin_left = m_crtc_state.display_origin_left;
cmd->display_origin_top = m_crtc_state.display_origin_top;
cmd->display_vram_left = m_crtc_state.display_vram_left;
cmd->display_vram_top = m_crtc_state.display_vram_top;
cmd->display_vram_width = m_crtc_state.display_vram_width;
cmd->display_vram_height = m_crtc_state.display_vram_height;
cmd->display_vram_start_x = m_crtc_state.regs.X;
cmd->display_vram_start_y = m_crtc_state.regs.Y;
cmd->display_interlace = GetInterlacedDisplayMode();
cmd->display_interlace_field = m_crtc_state.interlaced_display_field;
cmd->display_enabled = !m_GPUSTAT.display_disable;
cmd->display_24bit = m_GPUSTAT.display_area_color_depth_24;
g_gpu_backend->PushCommand(cmd);
}
void GPU::FillDrawCommand(GPUBackendDrawCommand* cmd, GPURenderCommand rc) const
{
FillBackendCommandParameters(cmd);
cmd->rc.bits = rc.bits;
cmd->draw_mode.bits = m_draw_mode.bits;
cmd->window.bits = m_texture_window.bits;
}
void GPU::UpdateDrawingArea()
{
GPUBackendSetDrawingAreaCommand* cmd = g_gpu_backend->NewSetDrawingAreaCommand();
cmd->new_area = m_drawing_area;
g_gpu_backend->PushCommand(cmd);
}
void GPU::FlushRender()
{
g_gpu_backend->PushCommand(g_gpu_backend->NewFlushRenderCommand());
}
bool GPU::HandleRenderPolygonCommand() bool GPU::HandleRenderPolygonCommand()
{ {
const RenderCommand rc{FifoPeek(0)}; const GPURenderCommand rc{FifoPeek(0)};
// shaded vertices use the colour from the first word for the first vertex // shaded vertices use the colour from the first word for the first vertex
const u32 words_per_vertex = 1 + BoolToUInt32(rc.texture_enable) + BoolToUInt32(rc.shading_enable); const u32 words_per_vertex = 1 + BoolToUInt32(rc.texture_enable) + BoolToUInt32(rc.shading_enable);
@ -341,39 +411,155 @@ bool GPU::HandleRenderPolygonCommand()
rc.texture_enable ? "textured" : "non-textured", rc.shading_enable ? "shaded" : "monochrome", rc.texture_enable ? "textured" : "non-textured", rc.shading_enable ? "shaded" : "monochrome",
ZeroExtend32(num_vertices), ZeroExtend32(words_per_vertex), setup_ticks); ZeroExtend32(num_vertices), ZeroExtend32(words_per_vertex), setup_ticks);
GPUBackendDrawPolygonCommand* cmd = g_gpu_backend->NewDrawPolygonCommand(num_vertices);
FillDrawCommand(cmd, rc);
// set draw state up // set draw state up
if (rc.texture_enable) if (rc.texture_enable)
{ {
const u16 texpage_attribute = Truncate16((rc.shading_enable ? FifoPeek(5) : FifoPeek(4)) >> 16); const u16 texpage_attribute = Truncate16((rc.shading_enable ? FifoPeek(5) : FifoPeek(4)) >> 16);
SetDrawMode((texpage_attribute & DrawMode::Reg::POLYGON_TEXPAGE_MASK) |
(m_draw_mode.mode_reg.bits & ~DrawMode::Reg::POLYGON_TEXPAGE_MASK)); m_GPUSTAT.bits = (m_GPUSTAT.bits & ~(GPUDrawModeReg::GPUSTAT_MASK)) |
SetTexturePalette(Truncate16(FifoPeek(2) >> 16)); (ZeroExtend32(texpage_attribute) & GPUDrawModeReg::GPUSTAT_MASK);
cmd->draw_mode.bits = ((texpage_attribute & GPUDrawModeReg::POLYGON_TEXPAGE_MASK) |
(m_draw_mode.bits & ~GPUDrawModeReg::POLYGON_TEXPAGE_MASK));
cmd->palette.bits = Truncate16(FifoPeek(2) >> 16);
}
else
{
cmd->palette.bits = 0;
} }
m_stats.num_vertices += num_vertices; m_stats.num_vertices += num_vertices;
m_stats.num_polygons++; m_stats.num_polygons++;
m_render_command.bits = rc.bits;
m_fifo.RemoveOne(); m_fifo.RemoveOne();
DispatchRenderCommand(); const u32 first_color = rc.color_for_first_vertex;
const bool shaded = rc.shading_enable;
const bool textured = rc.texture_enable;
const bool pgxp = g_settings.gpu_pgxp_enable;
bool valid_w = g_settings.gpu_pgxp_texture_correction;
for (u32 i = 0; i < num_vertices; i++)
{
GPUBackendDrawPolygonCommand::Vertex* vert = &cmd->vertices[i];
vert->color = (shaded && i > 0) ? (FifoPop() & UINT32_C(0x00FFFFFF)) : first_color;
const u64 maddr_and_pos = m_fifo.Pop();
const GPUVertexPosition vp{Truncate32(maddr_and_pos)};
vert->x = m_drawing_offset.x + vp.x;
vert->y = m_drawing_offset.y + vp.y;
vert->precise_x = static_cast<float>(vert->x);
vert->precise_y = static_cast<float>(vert->y);
vert->precise_w = 1.0f;
vert->texcoord = textured ? Truncate16(FifoPop()) : 0;
const s32 native_x = m_drawing_offset.x + vp.x;
if (pgxp)
{
valid_w &= PGXP::GetPreciseVertex(Truncate32(maddr_and_pos >> 32), vp.bits, vert->x, vert->y, m_drawing_offset.x,
m_drawing_offset.y, &vert->precise_x, &vert->precise_y, &vert->precise_w);
}
}
if (pgxp && !valid_w)
{
for (u32 i = 0; i < num_vertices; i++)
cmd->vertices[i].precise_w = 1.0f;
}
if (!IsDrawingAreaIsValid())
{
EndCommand();
return true;
}
// Cull polygons which are too large.
const auto [min_x_12, max_x_12] = MinMax(cmd->vertices[1].x, cmd->vertices[2].x);
const auto [min_y_12, max_y_12] = MinMax(cmd->vertices[1].y, cmd->vertices[2].y);
const s32 min_x = std::min(min_x_12, cmd->vertices[0].x);
const s32 max_x = std::max(max_x_12, cmd->vertices[0].x);
const s32 min_y = std::min(min_y_12, cmd->vertices[0].y);
const s32 max_y = std::max(max_y_12, cmd->vertices[0].y);
if ((max_x - min_x) >= MAX_PRIMITIVE_WIDTH || (max_y - min_y) >= MAX_PRIMITIVE_HEIGHT)
{
Log_DebugPrintf("Culling too-large polygon: %d,%d %d,%d %d,%d", cmd->vertices[0].x, cmd->vertices[0].y,
cmd->vertices[1].x, cmd->vertices[1].y, cmd->vertices[2].x, cmd->vertices[2].y);
if (!rc.quad_polygon)
{
EndCommand();
return true;
}
// turn it into a degenerate triangle
std::memcpy(&cmd->vertices[0], &cmd->vertices[1], sizeof(GPUBackendDrawPolygonCommand::Vertex));
cmd->bounds.SetInvalid();
}
else
{
const u32 clip_left = static_cast<u32>(std::clamp<s32>(min_x, m_drawing_area.left, m_drawing_area.right));
const u32 clip_right = static_cast<u32>(std::clamp<s32>(max_x, m_drawing_area.left, m_drawing_area.right)) + 1u;
const u32 clip_top = static_cast<u32>(std::clamp<s32>(min_y, m_drawing_area.top, m_drawing_area.bottom));
const u32 clip_bottom = static_cast<u32>(std::clamp<s32>(max_y, m_drawing_area.top, m_drawing_area.bottom)) + 1u;
cmd->bounds.Set(Truncate16(clip_left), Truncate16(clip_top), Truncate16(clip_right), Truncate16(clip_bottom));
AddDrawTriangleTicks(clip_right - clip_left, clip_bottom - clip_top, rc.shading_enable, rc.texture_enable,
rc.transparency_enable);
}
// quads
if (rc.quad_polygon)
{
const s32 min_x_123 = std::min(min_x_12, cmd->vertices[3].x);
const s32 max_x_123 = std::max(max_x_12, cmd->vertices[3].x);
const s32 min_y_123 = std::min(min_y_12, cmd->vertices[3].y);
const s32 max_y_123 = std::max(max_y_12, cmd->vertices[3].y);
// Cull polygons which are too large.
if ((max_x_123 - min_x_123) >= MAX_PRIMITIVE_WIDTH || (max_y_123 - min_y_123) >= MAX_PRIMITIVE_HEIGHT)
{
Log_DebugPrintf("Culling too-large polygon (quad second half): %d,%d %d,%d %d,%d", cmd->vertices[2].x,
cmd->vertices[2].y, cmd->vertices[1].x, cmd->vertices[1].y, cmd->vertices[0].x,
cmd->vertices[0].y);
// turn it into a degenerate triangle
std::memcpy(&cmd->vertices[3], &cmd->vertices[2], sizeof(GPUBackendDrawPolygonCommand::Vertex));
cmd->bounds.SetInvalid();
}
else
{
const u32 clip_left = static_cast<u32>(std::clamp<s32>(min_x_123, m_drawing_area.left, m_drawing_area.right));
const u32 clip_right =
static_cast<u32>(std::clamp<s32>(max_x_123, m_drawing_area.left, m_drawing_area.right)) + 1u;
const u32 clip_top = static_cast<u32>(std::clamp<s32>(min_y_123, m_drawing_area.top, m_drawing_area.bottom));
const u32 clip_bottom =
static_cast<u32>(std::clamp<s32>(max_y_123, m_drawing_area.top, m_drawing_area.bottom)) + 1u;
cmd->bounds.Include(Truncate16(clip_left), Truncate16(clip_top), Truncate16(clip_right), Truncate16(clip_bottom));
AddDrawTriangleTicks(clip_right - clip_left, clip_bottom - clip_top, rc.shading_enable, rc.texture_enable,
rc.transparency_enable);
}
}
g_gpu_backend->PushCommand(cmd);
EndCommand(); EndCommand();
return true; return true;
} }
bool GPU::HandleRenderRectangleCommand() bool GPU::HandleRenderRectangleCommand()
{ {
const RenderCommand rc{FifoPeek(0)}; const GPURenderCommand rc{FifoPeek(0)};
const u32 total_words = const u32 total_words =
2 + BoolToUInt32(rc.texture_enable) + BoolToUInt32(rc.rectangle_size == DrawRectangleSize::Variable); 2 + BoolToUInt32(rc.texture_enable) + BoolToUInt32(rc.rectangle_size == GPUDrawRectangleSize::Variable);
CHECK_COMMAND_SIZE(total_words); CHECK_COMMAND_SIZE(total_words);
if (IsInterlacedRenderingEnabled() && IsCRTCScanlinePending()) if (IsInterlacedRenderingEnabled() && IsCRTCScanlinePending())
SynchronizeCRTC(); SynchronizeCRTC();
if (rc.texture_enable)
SetTexturePalette(Truncate16(FifoPeek(2) >> 16));
const TickCount setup_ticks = 16; const TickCount setup_ticks = 16;
AddCommandTicks(setup_ticks); AddCommandTicks(setup_ticks);
@ -384,17 +570,84 @@ bool GPU::HandleRenderRectangleCommand()
m_stats.num_vertices++; m_stats.num_vertices++;
m_stats.num_polygons++; m_stats.num_polygons++;
m_render_command.bits = rc.bits;
m_fifo.RemoveOne(); m_fifo.RemoveOne();
DispatchRenderCommand(); GPUBackendDrawRectangleCommand* cmd = g_gpu_backend->NewDrawRectangleCommand();
FillDrawCommand(cmd, rc);
cmd->color = rc.color_for_first_vertex;
cmd->draw_mode.bits = m_draw_mode.bits;
cmd->window.bits = m_texture_window.bits;
const GPUVertexPosition vp{FifoPop()};
cmd->x = TruncateGPUVertexPosition(m_drawing_offset.x + vp.x);
cmd->y = TruncateGPUVertexPosition(m_drawing_offset.y + vp.y);
if (rc.texture_enable)
{
const u32 texcoord_and_palette = FifoPop();
cmd->palette.bits = Truncate16(texcoord_and_palette >> 16);
cmd->texcoord = Truncate16(texcoord_and_palette);
}
else
{
cmd->palette.bits = 0;
cmd->texcoord = 0;
}
switch (rc.rectangle_size)
{
case GPUDrawRectangleSize::R1x1:
cmd->width = 1;
cmd->height = 1;
break;
case GPUDrawRectangleSize::R8x8:
cmd->width = 8;
cmd->height = 8;
break;
case GPUDrawRectangleSize::R16x16:
cmd->width = 16;
cmd->height = 16;
break;
default:
{
const u32 width_and_height = FifoPop();
cmd->width = static_cast<u16>(width_and_height & VRAM_WIDTH_MASK);
cmd->height = static_cast<u16>((width_and_height >> 16) & VRAM_HEIGHT_MASK);
if (cmd->width >= MAX_PRIMITIVE_WIDTH || cmd->height >= MAX_PRIMITIVE_HEIGHT)
{
Log_DebugPrintf("Culling too-large rectangle: %d,%d %dx%d", cmd->x, cmd->y, cmd->width, cmd->height);
return true;
}
}
break;
}
if (!IsDrawingAreaIsValid())
{
EndCommand();
return true;
}
const u32 clip_left = static_cast<u32>(std::clamp<s32>(cmd->x, m_drawing_area.left, m_drawing_area.right));
const u32 clip_right =
static_cast<u32>(std::clamp<s32>(cmd->x + cmd->width, m_drawing_area.left, m_drawing_area.right)) + 1u;
const u32 clip_top = static_cast<u32>(std::clamp<s32>(cmd->y, m_drawing_area.top, m_drawing_area.bottom));
const u32 clip_bottom =
static_cast<u32>(std::clamp<s32>(cmd->y + cmd->height, m_drawing_area.top, m_drawing_area.bottom)) + 1u;
cmd->bounds.Set(Truncate16(clip_left), Truncate16(clip_top), Truncate16(clip_right), Truncate16(clip_bottom));
AddDrawRectangleTicks(clip_right - clip_left, clip_bottom - clip_top, rc.texture_enable, rc.transparency_enable);
g_gpu_backend->PushCommand(cmd);
EndCommand(); EndCommand();
return true; return true;
} }
bool GPU::HandleRenderLineCommand() bool GPU::HandleRenderLineCommand()
{ {
const RenderCommand rc{FifoPeek(0)}; const GPURenderCommand rc{FifoPeek(0)};
const u32 total_words = rc.shading_enable ? 4 : 3; const u32 total_words = rc.shading_enable ? 4 : 3;
CHECK_COMMAND_SIZE(total_words); CHECK_COMMAND_SIZE(total_words);
@ -409,7 +662,59 @@ bool GPU::HandleRenderLineCommand()
m_render_command.bits = rc.bits; m_render_command.bits = rc.bits;
m_fifo.RemoveOne(); m_fifo.RemoveOne();
DispatchRenderCommand(); GPUBackendDrawLineCommand* cmd = g_gpu_backend->NewDrawLineCommand(2);
FillDrawCommand(cmd, rc);
cmd->palette.bits = 0;
if (rc.shading_enable)
{
cmd->vertices[0].color = rc.color_for_first_vertex;
const GPUVertexPosition start_pos{FifoPop()};
cmd->vertices[0].x = m_drawing_offset.x + start_pos.x;
cmd->vertices[0].y = m_drawing_offset.y + start_pos.y;
cmd->vertices[1].color = FifoPop() & UINT32_C(0x00FFFFFF);
const GPUVertexPosition end_pos{FifoPop()};
cmd->vertices[1].x = m_drawing_offset.x + end_pos.x;
cmd->vertices[1].y = m_drawing_offset.y + end_pos.y;
}
else
{
cmd->vertices[0].color = rc.color_for_first_vertex;
cmd->vertices[1].color = rc.color_for_first_vertex;
const GPUVertexPosition start_pos{FifoPop()};
cmd->vertices[0].x = m_drawing_offset.x + start_pos.x;
cmd->vertices[0].y = m_drawing_offset.y + start_pos.y;
const GPUVertexPosition end_pos{FifoPop()};
cmd->vertices[1].x = m_drawing_offset.x + end_pos.x;
cmd->vertices[1].y = m_drawing_offset.y + end_pos.y;
}
if (!IsDrawingAreaIsValid())
{
EndCommand();
return true;
}
const auto [min_x, max_x] = MinMax(cmd->vertices[0].x, cmd->vertices[1].x);
const auto [min_y, max_y] = MinMax(cmd->vertices[0].y, cmd->vertices[1].y);
if ((max_x - min_x) >= MAX_PRIMITIVE_WIDTH || (max_y - min_y) >= MAX_PRIMITIVE_HEIGHT)
{
Log_DebugPrintf("Culling too-large line: %d,%d - %d,%d", cmd->vertices[0].y, cmd->vertices[0].y, cmd->vertices[1].x,
cmd->vertices[1].y);
EndCommand();
return true;
}
const u32 clip_left = static_cast<u32>(std::clamp<s32>(min_x, m_drawing_area.left, m_drawing_area.left));
const u32 clip_right = static_cast<u32>(std::clamp<s32>(max_x, m_drawing_area.left, m_drawing_area.right)) + 1u;
const u32 clip_top = static_cast<u32>(std::clamp<s32>(min_y, m_drawing_area.top, m_drawing_area.bottom));
const u32 clip_bottom = static_cast<u32>(std::clamp<s32>(max_y, m_drawing_area.top, m_drawing_area.bottom)) + 1u;
cmd->bounds.Set(Truncate16(clip_left), Truncate16(clip_top), Truncate16(clip_right), Truncate16(clip_bottom));
AddDrawLineTicks(clip_right - clip_left, clip_bottom - clip_top, rc.shading_enable);
EndCommand(); EndCommand();
return true; return true;
} }
@ -417,7 +722,7 @@ bool GPU::HandleRenderLineCommand()
bool GPU::HandleRenderPolyLineCommand() bool GPU::HandleRenderPolyLineCommand()
{ {
// always read the first two vertices, we test for the terminator after that // always read the first two vertices, we test for the terminator after that
const RenderCommand rc{FifoPeek(0)}; const GPURenderCommand rc{FifoPeek(0)};
const u32 min_words = rc.shading_enable ? 3 : 4; const u32 min_words = rc.shading_enable ? 3 : 4;
CHECK_COMMAND_SIZE(min_words); CHECK_COMMAND_SIZE(min_words);
@ -446,6 +751,52 @@ bool GPU::HandleRenderPolyLineCommand()
return true; return true;
} }
void GPU::FinishPolyLineRenderCommand()
{
// Multiply by two because we don't use line strips.
const u32 num_vertices = GetPolyLineVertexCount();
if (!IsDrawingAreaIsValid())
return;
GPUBackendDrawLineCommand* cmd = g_gpu_backend->NewDrawLineCommand(num_vertices);
FillDrawCommand(cmd, m_render_command);
u32 buffer_pos = 0;
const GPUVertexPosition start_vp{m_blit_buffer[buffer_pos++]};
cmd->vertices[0].x = start_vp.x + m_drawing_offset.x;
cmd->vertices[0].y = start_vp.y + m_drawing_offset.y;
cmd->vertices[0].color = m_render_command.color_for_first_vertex;
cmd->bounds.SetInvalid();
const bool shaded = m_render_command.shading_enable;
for (u32 i = 1; i < num_vertices; i++)
{
cmd->vertices[i].color =
shaded ? (m_blit_buffer[buffer_pos++] & UINT32_C(0x00FFFFFF)) : m_render_command.color_for_first_vertex;
const GPUVertexPosition vp{m_blit_buffer[buffer_pos++]};
cmd->vertices[i].x = m_drawing_offset.x + vp.x;
cmd->vertices[i].y = m_drawing_offset.y + vp.y;
const auto [min_x, max_x] = MinMax(cmd->vertices[i - 1].x, cmd->vertices[i].y);
const auto [min_y, max_y] = MinMax(cmd->vertices[i - 1].x, cmd->vertices[i].y);
if ((max_x - min_x) >= MAX_PRIMITIVE_WIDTH || (max_y - min_y) >= MAX_PRIMITIVE_HEIGHT)
{
Log_DebugPrintf("Culling too-large line: %d,%d - %d,%d", cmd->vertices[i - 1].x, cmd->vertices[i - 1].y,
cmd->vertices[i].x, cmd->vertices[i].y);
}
else
{
const u32 clip_left = static_cast<u32>(std::clamp<s32>(min_x, m_drawing_area.left, m_drawing_area.left));
const u32 clip_right = static_cast<u32>(std::clamp<s32>(max_x, m_drawing_area.left, m_drawing_area.right)) + 1u;
const u32 clip_top = static_cast<u32>(std::clamp<s32>(min_y, m_drawing_area.top, m_drawing_area.bottom));
const u32 clip_bottom = static_cast<u32>(std::clamp<s32>(max_y, m_drawing_area.top, m_drawing_area.bottom)) + 1u;
cmd->bounds.Include(Truncate16(clip_left), Truncate16(clip_right), Truncate16(clip_top), Truncate16(clip_bottom));
AddDrawLineTicks(clip_right - clip_left, clip_bottom - clip_top, m_render_command.shading_enable);
}
}
}
bool GPU::HandleFillRectangleCommand() bool GPU::HandleFillRectangleCommand()
{ {
CHECK_COMMAND_SIZE(3); CHECK_COMMAND_SIZE(3);
@ -453,19 +804,22 @@ bool GPU::HandleFillRectangleCommand()
if (IsInterlacedRenderingEnabled() && IsCRTCScanlinePending()) if (IsInterlacedRenderingEnabled() && IsCRTCScanlinePending())
SynchronizeCRTC(); SynchronizeCRTC();
FlushRender(); GPUBackendFillVRAMCommand* cmd = g_gpu_backend->NewFillVRAMCommand();
FillBackendCommandParameters(cmd);
const u32 color = FifoPop() & 0x00FFFFFF; cmd->color = FifoPop() & 0x00FFFFFF;
const u32 dst_x = FifoPeek() & 0x3F0; cmd->x = Truncate16(FifoPeek() & 0x3F0);
const u32 dst_y = (FifoPop() >> 16) & VRAM_COORD_MASK; cmd->y = Truncate16((FifoPop() >> 16) & VRAM_COORD_MASK);
const u32 width = ((FifoPeek() & VRAM_WIDTH_MASK) + 0xF) & ~0xF; cmd->width = Truncate16(((FifoPeek() & VRAM_WIDTH_MASK) + 0xF) & ~0xF);
const u32 height = (FifoPop() >> 16) & VRAM_HEIGHT_MASK; cmd->height = Truncate16((FifoPop() >> 16) & VRAM_HEIGHT_MASK);
Log_DebugPrintf("Fill VRAM rectangle offset=(%u,%u), size=(%u,%u)", dst_x, dst_y, width, height); Log_DebugPrintf("Fill VRAM rectangle offset=(%u,%u), size=(%u,%u)", cmd->x, cmd->y, cmd->width, cmd->height);
FillVRAM(dst_x, dst_y, width, height, color); AddCommandTicks(46 + ((cmd->width / 8) + 9) * cmd->height);
g_gpu_backend->PushCommand(cmd);
m_stats.num_vram_fills++; m_stats.num_vram_fills++;
AddCommandTicks(46 + ((width / 8) + 9) * height);
EndCommand(); EndCommand();
return true; return true;
} }
@ -509,9 +863,17 @@ void GPU::FinishVRAMWrite()
if (IsInterlacedRenderingEnabled() && IsCRTCScanlinePending()) if (IsInterlacedRenderingEnabled() && IsCRTCScanlinePending())
SynchronizeCRTC(); SynchronizeCRTC();
FlushRender(); // TODO: skip this copy
const u32 num_words = static_cast<u32>(m_blit_buffer.size()) * 2u;
GPUBackendUpdateVRAMCommand* cmd = g_gpu_backend->NewUpdateVRAMCommand(num_words);
FillBackendCommandParameters(cmd);
cmd->x = m_vram_transfer.x;
cmd->y = m_vram_transfer.y;
cmd->width = m_vram_transfer.width;
cmd->height = m_vram_transfer.height;
std::memcpy(cmd->data, m_blit_buffer.data(), sizeof(u16) * num_words);
g_gpu_backend->PushCommand(cmd);
UpdateVRAM(m_vram_transfer.x, m_vram_transfer.y, m_vram_transfer.width, m_vram_transfer.height, m_blit_buffer.data());
m_blit_buffer.clear(); m_blit_buffer.clear();
m_vram_transfer = {}; m_vram_transfer = {};
m_blitter_state = BlitterState::Idle; m_blitter_state = BlitterState::Idle;
@ -532,17 +894,20 @@ bool GPU::HandleCopyRectangleVRAMToCPUCommand()
m_vram_transfer.width, m_vram_transfer.height); m_vram_transfer.width, m_vram_transfer.height);
DebugAssert(m_vram_transfer.col == 0 && m_vram_transfer.row == 0); DebugAssert(m_vram_transfer.col == 0 && m_vram_transfer.row == 0);
// all rendering should be done first...
FlushRender();
// ensure VRAM shadow is up to date // ensure VRAM shadow is up to date
ReadVRAM(m_vram_transfer.x, m_vram_transfer.y, m_vram_transfer.width, m_vram_transfer.height); GPUBackendReadVRAMCommand* cmd = g_gpu_backend->NewReadVRAMCommand();
cmd->x = m_vram_transfer.x;
cmd->y = m_vram_transfer.y;
cmd->width = m_vram_transfer.width;
cmd->height = m_vram_transfer.height;
g_gpu_backend->PushCommand(cmd);
g_gpu_backend->Sync();
if (g_settings.debugging.dump_vram_to_cpu_copies) if (g_settings.debugging.dump_vram_to_cpu_copies)
{ {
DumpVRAMToFile(StringUtil::StdStringFromFormat("vram_to_cpu_copy_%u.png", s_vram_to_cpu_dump_id++).c_str(), DumpVRAMToFile(StringUtil::StdStringFromFormat("vram_to_cpu_copy_%u.png", s_vram_to_cpu_dump_id++).c_str(),
m_vram_transfer.width, m_vram_transfer.height, sizeof(u16) * VRAM_WIDTH, m_vram_transfer.width, m_vram_transfer.height, sizeof(u16) * VRAM_WIDTH,
&m_vram_ptr[m_vram_transfer.y * VRAM_WIDTH + m_vram_transfer.x], true); g_gpu_backend->GetVRAM() + (m_vram_transfer.y * VRAM_WIDTH + m_vram_transfer.x), true);
} }
// switch to pixel-by-pixel read state // switch to pixel-by-pixel read state
@ -557,20 +922,22 @@ bool GPU::HandleCopyRectangleVRAMToVRAMCommand()
CHECK_COMMAND_SIZE(4); CHECK_COMMAND_SIZE(4);
m_fifo.RemoveOne(); m_fifo.RemoveOne();
const u32 src_x = FifoPeek() & VRAM_COORD_MASK; GPUBackendCopyVRAMCommand* cmd = g_gpu_backend->NewCopyVRAMCommand();
const u32 src_y = (FifoPop() >> 16) & VRAM_COORD_MASK; cmd->src_x = Truncate16(FifoPeek() & VRAM_COORD_MASK);
const u32 dst_x = FifoPeek() & VRAM_COORD_MASK; cmd->src_y = Truncate16((FifoPop() >> 16) & VRAM_COORD_MASK);
const u32 dst_y = (FifoPop() >> 16) & VRAM_COORD_MASK; cmd->dst_x = Truncate16(FifoPeek() & VRAM_COORD_MASK);
const u32 width = ReplaceZero(FifoPeek() & VRAM_WIDTH_MASK, 0x400); cmd->dst_y = Truncate16((FifoPop() >> 16) & VRAM_COORD_MASK);
const u32 height = ReplaceZero((FifoPop() >> 16) & VRAM_HEIGHT_MASK, 0x200); cmd->width = Truncate16(ReplaceZero(FifoPeek() & VRAM_WIDTH_MASK, 0x400));
cmd->height = Truncate16(ReplaceZero((FifoPop() >> 16) & VRAM_HEIGHT_MASK, 0x200));
Log_DebugPrintf("Copy rectangle from VRAM to VRAM src=(%u,%u), dst=(%u,%u), size=(%u,%u)", src_x, src_y, dst_x, dst_y, Log_DebugPrintf("Copy rectangle from VRAM to VRAM src=(%u,%u), dst=(%u,%u), size=(%u,%u)", cmd->src_x, cmd->src_y,
width, height); cmd->dst_x, cmd->dst_y, cmd->width, cmd->height);
AddCommandTicks(ZeroExtend32(cmd->width) * ZeroExtend32(cmd->height) * 2);
g_gpu_backend->PushCommand(cmd);
FlushRender();
CopyVRAM(src_x, src_y, dst_x, dst_y, width, height);
m_stats.num_vram_copies++; m_stats.num_vram_copies++;
AddCommandTicks(width * height * 2);
EndCommand(); EndCommand();
return true; return true;
} }

View File

@ -14,25 +14,13 @@
#endif #endif
Log_SetChannel(GPU_HW); Log_SetChannel(GPU_HW);
template<typename T>
ALWAYS_INLINE static constexpr std::tuple<T, T> MinMax(T v1, T v2)
{
if (v1 > v2)
return std::tie(v2, v1);
else
return std::tie(v1, v2);
}
ALWAYS_INLINE static bool ShouldUseUVLimits() ALWAYS_INLINE static bool ShouldUseUVLimits()
{ {
// We only need UV limits if PGXP is enabled, or texture filtering is enabled. // We only need UV limits if PGXP is enabled, or texture filtering is enabled.
return g_settings.gpu_pgxp_enable || g_settings.gpu_texture_filter != GPUTextureFilter::Nearest; return g_settings.gpu_pgxp_enable || g_settings.gpu_texture_filter != GPUTextureFilter::Nearest;
} }
GPU_HW::GPU_HW() : GPU() GPU_HW::GPU_HW() : GPUBackend() {}
{
m_vram_ptr = m_vram_shadow.data();
}
GPU_HW::~GPU_HW() = default; GPU_HW::~GPU_HW() = default;
@ -41,13 +29,14 @@ bool GPU_HW::IsHardwareRenderer() const
return true; return true;
} }
bool GPU_HW::Initialize(HostDisplay* host_display) bool GPU_HW::Initialize()
{ {
if (!GPU::Initialize(host_display)) if (!GPUBackend::Initialize())
return false; return false;
m_vram_ptr = m_vram_shadow.data();
m_resolution_scale = CalculateResolutionScale(); m_resolution_scale = CalculateResolutionScale();
m_render_api = host_display->GetRenderAPI(); m_render_api = g_host_interface->GetDisplay()->GetRenderAPI();
m_true_color = g_settings.gpu_true_color; m_true_color = g_settings.gpu_true_color;
m_scaled_dithering = g_settings.gpu_scaled_dithering; m_scaled_dithering = g_settings.gpu_scaled_dithering;
m_texture_filtering = g_settings.gpu_texture_filter; m_texture_filtering = g_settings.gpu_texture_filter;
@ -58,7 +47,7 @@ bool GPU_HW::Initialize(HostDisplay* host_display)
void GPU_HW::Reset() void GPU_HW::Reset()
{ {
GPU::Reset(); GPUBackend::Reset();
m_batch_current_vertex_ptr = m_batch_start_vertex_ptr; m_batch_current_vertex_ptr = m_batch_start_vertex_ptr;
@ -72,22 +61,6 @@ void GPU_HW::Reset()
SetFullVRAMDirtyRectangle(); SetFullVRAMDirtyRectangle();
} }
bool GPU_HW::DoState(StateWrapper& sw)
{
if (!GPU::DoState(sw))
return false;
// invalidate the whole VRAM read texture when loading state
if (sw.IsReading())
{
m_batch_current_vertex_ptr = m_batch_start_vertex_ptr;
SetFullVRAMDirtyRectangle();
ResetBatchVertexDepth();
}
return true;
}
void GPU_HW::UpdateHWSettings(bool* framebuffer_changed, bool* shaders_changed) void GPU_HW::UpdateHWSettings(bool* framebuffer_changed, bool* shaders_changed)
{ {
const u32 resolution_scale = CalculateResolutionScale(); const u32 resolution_scale = CalculateResolutionScale();
@ -100,10 +73,12 @@ void GPU_HW::UpdateHWSettings(bool* framebuffer_changed, bool* shaders_changed)
if (m_resolution_scale != resolution_scale) if (m_resolution_scale != resolution_scale)
{ {
#if FIXME
g_host_interface->AddFormattedOSDMessage(10.0f, "Resolution scale set to %ux (display %ux%u, VRAM %ux%u)", g_host_interface->AddFormattedOSDMessage(10.0f, "Resolution scale set to %ux (display %ux%u, VRAM %ux%u)",
resolution_scale, m_crtc_state.display_vram_width * resolution_scale, resolution_scale, m_crtc_state.display_vram_width * resolution_scale,
resolution_scale * m_crtc_state.display_vram_height, resolution_scale * m_crtc_state.display_vram_height,
VRAM_WIDTH * resolution_scale, VRAM_HEIGHT * resolution_scale); VRAM_WIDTH * resolution_scale, VRAM_HEIGHT * resolution_scale);
#endif
} }
m_resolution_scale = resolution_scale; m_resolution_scale = resolution_scale;
@ -119,6 +94,7 @@ u32 GPU_HW::CalculateResolutionScale() const
if (g_settings.gpu_resolution_scale != 0) if (g_settings.gpu_resolution_scale != 0)
return std::clamp<u32>(g_settings.gpu_resolution_scale, 1, m_max_resolution_scale); return std::clamp<u32>(g_settings.gpu_resolution_scale, 1, m_max_resolution_scale);
#if FIXME
// auto scaling // auto scaling
const s32 height = (m_crtc_state.display_height != 0) ? static_cast<s32>(m_crtc_state.display_height) : 480; const s32 height = (m_crtc_state.display_height != 0) ? static_cast<s32>(m_crtc_state.display_height) : 480;
const s32 preferred_scale = const s32 preferred_scale =
@ -126,11 +102,14 @@ u32 GPU_HW::CalculateResolutionScale() const
Log_InfoPrintf("Height = %d, preferred scale = %d", height, preferred_scale); Log_InfoPrintf("Height = %d, preferred scale = %d", height, preferred_scale);
return static_cast<u32>(std::clamp<s32>(preferred_scale, 1, m_max_resolution_scale)); return static_cast<u32>(std::clamp<s32>(preferred_scale, 1, m_max_resolution_scale));
#else
return 1;
#endif
} }
void GPU_HW::UpdateResolutionScale() void GPU_HW::UpdateResolutionScale()
{ {
GPU::UpdateResolutionScale(); GPUBackend::UpdateResolutionScale();
if (CalculateResolutionScale() != m_resolution_scale) if (CalculateResolutionScale() != m_resolution_scale)
UpdateSettings(); UpdateSettings();
@ -138,8 +117,7 @@ void GPU_HW::UpdateResolutionScale()
std::tuple<u32, u32> GPU_HW::GetEffectiveDisplayResolution() std::tuple<u32, u32> GPU_HW::GetEffectiveDisplayResolution()
{ {
return std::make_tuple(m_crtc_state.display_vram_width * m_resolution_scale, return std::make_tuple(m_display_vram_width * m_resolution_scale, m_resolution_scale * m_display_vram_height);
m_resolution_scale * m_crtc_state.display_vram_height);
} }
void GPU_HW::PrintSettingsToLog() void GPU_HW::PrintSettingsToLog()
@ -358,334 +336,119 @@ void GPU_HW::DrawLine(float x0, float y0, u32 col0, float x1, float y1, u32 col1
AddVertex(output[1]); AddVertex(output[1]);
} }
void GPU_HW::LoadVertices() void GPU_HW::DrawPolygon(const GPUBackendDrawPolygonCommand* cmd)
{ {
if (m_GPUSTAT.check_mask_before_draw) SetupDraw(cmd);
if (cmd->params.check_mask_before_draw)
m_current_depth++; m_current_depth++;
const RenderCommand rc{m_render_command.bits}; const GPURenderCommand rc{cmd->rc.bits};
const u32 texpage = ZeroExtend32(m_draw_mode.mode_reg.bits) | (ZeroExtend32(m_draw_mode.palette_reg) << 16); const u32 texpage = ZeroExtend32(cmd->draw_mode.bits) | (ZeroExtend32(cmd->palette.bits) << 16);
const float depth = GetCurrentNormalizedVertexDepth(); const float depth = GetCurrentNormalizedVertexDepth();
switch (rc.primitive) DebugAssert(GetBatchVertexSpace() >= (rc.quad_polygon ? 6u : 3u));
const u32 first_color = rc.color_for_first_vertex;
const bool shaded = rc.shading_enable;
const bool textured = rc.texture_enable;
const bool pgxp = g_settings.gpu_pgxp_enable;
std::array<BatchVertex, 4> vertices;
for (u32 i = 0; i < cmd->num_vertices; i++)
{ {
case Primitive::Polygon: const GPUBackendDrawPolygonCommand::Vertex& v = cmd->vertices[i];
{ vertices[i].Set(v.precise_x, v.precise_y, depth, v.precise_w, v.color, texpage, v.texcoord, 0xFFFF0000u);
DebugAssert(GetBatchVertexSpace() >= (rc.quad_polygon ? 6u : 3u));
const u32 first_color = rc.color_for_first_vertex;
const bool shaded = rc.shading_enable;
const bool textured = rc.texture_enable;
const bool pgxp = g_settings.gpu_pgxp_enable;
const u32 num_vertices = rc.quad_polygon ? 4 : 3;
std::array<BatchVertex, 4> vertices;
std::array<std::array<s32, 2>, 4> native_vertex_positions;
bool valid_w = g_settings.gpu_pgxp_texture_correction;
for (u32 i = 0; i < num_vertices; i++)
{
const u32 color = (shaded && i > 0) ? (FifoPop() & UINT32_C(0x00FFFFFF)) : first_color;
const u64 maddr_and_pos = m_fifo.Pop();
const VertexPosition vp{Truncate32(maddr_and_pos)};
const u16 texcoord = textured ? Truncate16(FifoPop()) : 0;
const s32 native_x = m_drawing_offset.x + vp.x;
const s32 native_y = m_drawing_offset.y + vp.y;
native_vertex_positions[i][0] = native_x;
native_vertex_positions[i][1] = native_y;
vertices[i].Set(static_cast<float>(native_x), static_cast<float>(native_y), depth, 1.0f, color, texpage,
texcoord, 0xFFFF0000u);
if (pgxp)
{
valid_w &=
PGXP::GetPreciseVertex(Truncate32(maddr_and_pos >> 32), vp.bits, native_x, native_y, m_drawing_offset.x,
m_drawing_offset.y, &vertices[i].x, &vertices[i].y, &vertices[i].w);
}
}
if (!valid_w)
{
for (BatchVertex& v : vertices)
v.w = 1.0f;
}
if (rc.quad_polygon && m_resolution_scale > 1)
HandleFlippedQuadTextureCoordinates(vertices.data());
if (m_using_uv_limits && textured)
ComputePolygonUVLimits(vertices.data(), num_vertices);
if (!IsDrawingAreaIsValid())
return;
// Cull polygons which are too large.
const auto [min_x_12, max_x_12] = MinMax(native_vertex_positions[1][0], native_vertex_positions[2][0]);
const auto [min_y_12, max_y_12] = MinMax(native_vertex_positions[1][1], native_vertex_positions[2][1]);
const s32 min_x = std::min(min_x_12, native_vertex_positions[0][0]);
const s32 max_x = std::max(max_x_12, native_vertex_positions[0][0]);
const s32 min_y = std::min(min_y_12, native_vertex_positions[0][1]);
const s32 max_y = std::max(max_y_12, native_vertex_positions[0][1]);
if ((max_x - min_x) >= MAX_PRIMITIVE_WIDTH || (max_y - min_y) >= MAX_PRIMITIVE_HEIGHT)
{
Log_DebugPrintf("Culling too-large polygon: %d,%d %d,%d %d,%d", native_vertex_positions[0][0],
native_vertex_positions[0][1], native_vertex_positions[1][0], native_vertex_positions[1][1],
native_vertex_positions[2][0], native_vertex_positions[2][1]);
}
else
{
const u32 clip_left = static_cast<u32>(std::clamp<s32>(min_x, m_drawing_area.left, m_drawing_area.right));
const u32 clip_right = static_cast<u32>(std::clamp<s32>(max_x, m_drawing_area.left, m_drawing_area.right)) + 1u;
const u32 clip_top = static_cast<u32>(std::clamp<s32>(min_y, m_drawing_area.top, m_drawing_area.bottom));
const u32 clip_bottom =
static_cast<u32>(std::clamp<s32>(max_y, m_drawing_area.top, m_drawing_area.bottom)) + 1u;
m_vram_dirty_rect.Include(clip_left, clip_right, clip_top, clip_bottom);
AddDrawTriangleTicks(clip_right - clip_left, clip_bottom - clip_top, rc.shading_enable, rc.texture_enable,
rc.transparency_enable);
std::memcpy(m_batch_current_vertex_ptr, vertices.data(), sizeof(BatchVertex) * 3);
m_batch_current_vertex_ptr += 3;
}
// quads
if (rc.quad_polygon)
{
const s32 min_x_123 = std::min(min_x_12, native_vertex_positions[3][0]);
const s32 max_x_123 = std::max(max_x_12, native_vertex_positions[3][0]);
const s32 min_y_123 = std::min(min_y_12, native_vertex_positions[3][1]);
const s32 max_y_123 = std::max(max_y_12, native_vertex_positions[3][1]);
// Cull polygons which are too large.
if ((max_x_123 - min_x_123) >= MAX_PRIMITIVE_WIDTH || (max_y_123 - min_y_123) >= MAX_PRIMITIVE_HEIGHT)
{
Log_DebugPrintf("Culling too-large polygon (quad second half): %d,%d %d,%d %d,%d",
native_vertex_positions[2][0], native_vertex_positions[2][1], native_vertex_positions[1][0],
native_vertex_positions[1][1], native_vertex_positions[0][0], native_vertex_positions[0][1]);
}
else
{
const u32 clip_left = static_cast<u32>(std::clamp<s32>(min_x_123, m_drawing_area.left, m_drawing_area.right));
const u32 clip_right =
static_cast<u32>(std::clamp<s32>(max_x_123, m_drawing_area.left, m_drawing_area.right)) + 1u;
const u32 clip_top = static_cast<u32>(std::clamp<s32>(min_y_123, m_drawing_area.top, m_drawing_area.bottom));
const u32 clip_bottom =
static_cast<u32>(std::clamp<s32>(max_y_123, m_drawing_area.top, m_drawing_area.bottom)) + 1u;
m_vram_dirty_rect.Include(clip_left, clip_right, clip_top, clip_bottom);
AddDrawTriangleTicks(clip_right - clip_left, clip_bottom - clip_top, rc.shading_enable, rc.texture_enable,
rc.transparency_enable);
AddVertex(vertices[2]);
AddVertex(vertices[1]);
AddVertex(vertices[3]);
}
}
}
break;
case Primitive::Rectangle:
{
const u32 color = rc.color_for_first_vertex;
const VertexPosition vp{FifoPop()};
const s32 pos_x = TruncateVertexPosition(m_drawing_offset.x + vp.x);
const s32 pos_y = TruncateVertexPosition(m_drawing_offset.y + vp.y);
const auto [texcoord_x, texcoord_y] = UnpackTexcoord(rc.texture_enable ? Truncate16(FifoPop()) : 0);
u16 orig_tex_left = ZeroExtend16(texcoord_x);
u16 orig_tex_top = ZeroExtend16(texcoord_y);
s32 rectangle_width;
s32 rectangle_height;
switch (rc.rectangle_size)
{
case DrawRectangleSize::R1x1:
rectangle_width = 1;
rectangle_height = 1;
break;
case DrawRectangleSize::R8x8:
rectangle_width = 8;
rectangle_height = 8;
break;
case DrawRectangleSize::R16x16:
rectangle_width = 16;
rectangle_height = 16;
break;
default:
{
const u32 width_and_height = FifoPop();
rectangle_width = static_cast<s32>(width_and_height & VRAM_WIDTH_MASK);
rectangle_height = static_cast<s32>((width_and_height >> 16) & VRAM_HEIGHT_MASK);
if (rectangle_width >= MAX_PRIMITIVE_WIDTH || rectangle_height >= MAX_PRIMITIVE_HEIGHT)
{
Log_DebugPrintf("Culling too-large rectangle: %d,%d %dx%d", pos_x, pos_y, rectangle_width,
rectangle_height);
return;
}
}
break;
}
// we can split the rectangle up into potentially 8 quads
DebugAssert(GetBatchVertexSpace() >= MAX_VERTICES_FOR_RECTANGLE);
if (!IsDrawingAreaIsValid())
return;
// Split the rectangle into multiple quads if it's greater than 256x256, as the texture page should repeat.
u16 tex_top = orig_tex_top;
for (s32 y_offset = 0; y_offset < rectangle_height;)
{
const s32 quad_height = std::min<s32>(rectangle_height - y_offset, TEXTURE_PAGE_WIDTH - tex_top);
const float quad_start_y = static_cast<float>(pos_y + y_offset);
const float quad_end_y = quad_start_y + static_cast<float>(quad_height);
const u16 tex_bottom = tex_top + static_cast<u16>(quad_height);
u16 tex_left = orig_tex_left;
for (s32 x_offset = 0; x_offset < rectangle_width;)
{
const s32 quad_width = std::min<s32>(rectangle_width - x_offset, TEXTURE_PAGE_HEIGHT - tex_left);
const float quad_start_x = static_cast<float>(pos_x + x_offset);
const float quad_end_x = quad_start_x + static_cast<float>(quad_width);
const u16 tex_right = tex_left + static_cast<u16>(quad_width);
const u32 uv_limits = BatchVertex::PackUVLimits(tex_left, tex_right - 1, tex_top, tex_bottom - 1);
AddNewVertex(quad_start_x, quad_start_y, depth, 1.0f, color, texpage, tex_left, tex_top, uv_limits);
AddNewVertex(quad_end_x, quad_start_y, depth, 1.0f, color, texpage, tex_right, tex_top, uv_limits);
AddNewVertex(quad_start_x, quad_end_y, depth, 1.0f, color, texpage, tex_left, tex_bottom, uv_limits);
AddNewVertex(quad_start_x, quad_end_y, depth, 1.0f, color, texpage, tex_left, tex_bottom, uv_limits);
AddNewVertex(quad_end_x, quad_start_y, depth, 1.0f, color, texpage, tex_right, tex_top, uv_limits);
AddNewVertex(quad_end_x, quad_end_y, depth, 1.0f, color, texpage, tex_right, tex_bottom, uv_limits);
x_offset += quad_width;
tex_left = 0;
}
y_offset += quad_height;
tex_top = 0;
}
const u32 clip_left = static_cast<u32>(std::clamp<s32>(pos_x, m_drawing_area.left, m_drawing_area.right));
const u32 clip_right =
static_cast<u32>(std::clamp<s32>(pos_x + rectangle_width, m_drawing_area.left, m_drawing_area.right)) + 1u;
const u32 clip_top = static_cast<u32>(std::clamp<s32>(pos_y, m_drawing_area.top, m_drawing_area.bottom));
const u32 clip_bottom =
static_cast<u32>(std::clamp<s32>(pos_y + rectangle_height, m_drawing_area.top, m_drawing_area.bottom)) + 1u;
m_vram_dirty_rect.Include(clip_left, clip_right, clip_top, clip_bottom);
AddDrawRectangleTicks(clip_right - clip_left, clip_bottom - clip_top, rc.texture_enable, rc.transparency_enable);
}
break;
case Primitive::Line:
{
if (!rc.polyline)
{
DebugAssert(GetBatchVertexSpace() >= 2);
u32 start_color, end_color;
VertexPosition start_pos, end_pos;
if (rc.shading_enable)
{
start_color = rc.color_for_first_vertex;
start_pos.bits = FifoPop();
end_color = FifoPop() & UINT32_C(0x00FFFFFF);
end_pos.bits = FifoPop();
}
else
{
start_color = end_color = rc.color_for_first_vertex;
start_pos.bits = FifoPop();
end_pos.bits = FifoPop();
}
if (!IsDrawingAreaIsValid())
return;
s32 start_x = start_pos.x + m_drawing_offset.x;
s32 start_y = start_pos.y + m_drawing_offset.y;
s32 end_x = end_pos.x + m_drawing_offset.x;
s32 end_y = end_pos.y + m_drawing_offset.y;
const auto [min_x, max_x] = MinMax(start_x, end_x);
const auto [min_y, max_y] = MinMax(start_y, end_y);
if ((max_x - min_x) >= MAX_PRIMITIVE_WIDTH || (max_y - min_y) >= MAX_PRIMITIVE_HEIGHT)
{
Log_DebugPrintf("Culling too-large line: %d,%d - %d,%d", start_x, start_y, end_x, end_y);
return;
}
const u32 clip_left = static_cast<u32>(std::clamp<s32>(min_x, m_drawing_area.left, m_drawing_area.left));
const u32 clip_right = static_cast<u32>(std::clamp<s32>(max_x, m_drawing_area.left, m_drawing_area.right)) + 1u;
const u32 clip_top = static_cast<u32>(std::clamp<s32>(min_y, m_drawing_area.top, m_drawing_area.bottom));
const u32 clip_bottom =
static_cast<u32>(std::clamp<s32>(max_y, m_drawing_area.top, m_drawing_area.bottom)) + 1u;
m_vram_dirty_rect.Include(clip_left, clip_right, clip_top, clip_bottom);
AddDrawLineTicks(clip_right - clip_left, clip_bottom - clip_top, rc.shading_enable);
// TODO: Should we do a PGXP lookup here? Most lines are 2D.
DrawLine(static_cast<float>(start_x), static_cast<float>(start_y), start_color, static_cast<float>(end_x),
static_cast<float>(end_y), end_color, depth);
}
else
{
// Multiply by two because we don't use line strips.
const u32 num_vertices = GetPolyLineVertexCount();
DebugAssert(GetBatchVertexSpace() >= (num_vertices * 2));
if (!IsDrawingAreaIsValid())
return;
const bool shaded = rc.shading_enable;
u32 buffer_pos = 0;
const VertexPosition start_vp{m_blit_buffer[buffer_pos++]};
s32 start_x = start_vp.x + m_drawing_offset.x;
s32 start_y = start_vp.y + m_drawing_offset.y;
u32 start_color = rc.color_for_first_vertex;
for (u32 i = 1; i < num_vertices; i++)
{
const u32 end_color = shaded ? (m_blit_buffer[buffer_pos++] & UINT32_C(0x00FFFFFF)) : start_color;
const VertexPosition vp{m_blit_buffer[buffer_pos++]};
const s32 end_x = m_drawing_offset.x + vp.x;
const s32 end_y = m_drawing_offset.y + vp.y;
const auto [min_x, max_x] = MinMax(start_x, end_x);
const auto [min_y, max_y] = MinMax(start_y, end_y);
if ((max_x - min_x) >= MAX_PRIMITIVE_WIDTH || (max_y - min_y) >= MAX_PRIMITIVE_HEIGHT)
{
Log_DebugPrintf("Culling too-large line: %d,%d - %d,%d", start_x, start_y, end_x, end_y);
}
else
{
const u32 clip_left = static_cast<u32>(std::clamp<s32>(min_x, m_drawing_area.left, m_drawing_area.left));
const u32 clip_right =
static_cast<u32>(std::clamp<s32>(max_x, m_drawing_area.left, m_drawing_area.right)) + 1u;
const u32 clip_top = static_cast<u32>(std::clamp<s32>(min_y, m_drawing_area.top, m_drawing_area.bottom));
const u32 clip_bottom =
static_cast<u32>(std::clamp<s32>(max_y, m_drawing_area.top, m_drawing_area.bottom)) + 1u;
m_vram_dirty_rect.Include(clip_left, clip_right, clip_top, clip_bottom);
AddDrawLineTicks(clip_right - clip_left, clip_bottom - clip_top, rc.shading_enable);
// TODO: Should we do a PGXP lookup here? Most lines are 2D.
DrawLine(static_cast<float>(start_x), static_cast<float>(start_y), start_color, static_cast<float>(end_x),
static_cast<float>(end_y), end_color, depth);
}
start_x = end_x;
start_y = end_y;
start_color = end_color;
}
}
}
break;
default:
UnreachableCode();
break;
} }
if (rc.quad_polygon && m_resolution_scale > 1)
HandleFlippedQuadTextureCoordinates(vertices.data());
if (m_using_uv_limits && textured)
ComputePolygonUVLimits(vertices.data(), cmd->num_vertices);
std::memcpy(m_batch_current_vertex_ptr, vertices.data(), sizeof(BatchVertex) * 3);
m_batch_current_vertex_ptr += 3;
// quads
if (rc.quad_polygon)
{
AddVertex(vertices[2]);
AddVertex(vertices[1]);
AddVertex(vertices[3]);
}
IncludeVRAMDityRectangle(cmd->bounds);
}
void GPU_HW::DrawRectangle(const GPUBackendDrawRectangleCommand* cmd)
{
SetupDraw(cmd);
if (cmd->params.check_mask_before_draw)
m_current_depth++;
const GPURenderCommand rc{cmd->rc.bits};
const u32 color = cmd->color;
const u32 texpage = ZeroExtend32(cmd->draw_mode.bits) | (ZeroExtend32(cmd->palette.bits) << 16);
const float depth = GetCurrentNormalizedVertexDepth();
u16 orig_tex_left = cmd->texcoord & 0xFFu;
u16 orig_tex_top = cmd->texcoord >> 8;
// Split the rectangle into multiple quads if it's greater than 256x256, as the texture page should repeat.
u16 tex_top = orig_tex_top;
for (u16 y_offset = 0; y_offset < cmd->height;)
{
const u16 quad_height = std::min<u16>(cmd->height - y_offset, TEXTURE_PAGE_WIDTH - tex_top);
const float quad_start_y = static_cast<float>(cmd->y + y_offset);
const float quad_end_y = quad_start_y + static_cast<float>(quad_height);
const u16 tex_bottom = tex_top + static_cast<u16>(quad_height);
u16 tex_left = orig_tex_left;
for (u16 x_offset = 0; x_offset < cmd->width;)
{
const u16 quad_width = std::min<u16>(cmd->width - x_offset, TEXTURE_PAGE_HEIGHT - tex_left);
const float quad_start_x = static_cast<float>(cmd->x + x_offset);
const float quad_end_x = quad_start_x + static_cast<float>(quad_width);
const u16 tex_right = tex_left + static_cast<u16>(quad_width);
const u32 uv_limits = BatchVertex::PackUVLimits(tex_left, tex_right - 1, tex_top, tex_bottom - 1);
AddNewVertex(quad_start_x, quad_start_y, depth, 1.0f, color, texpage, tex_left, tex_top, uv_limits);
AddNewVertex(quad_end_x, quad_start_y, depth, 1.0f, color, texpage, tex_right, tex_top, uv_limits);
AddNewVertex(quad_start_x, quad_end_y, depth, 1.0f, color, texpage, tex_left, tex_bottom, uv_limits);
AddNewVertex(quad_start_x, quad_end_y, depth, 1.0f, color, texpage, tex_left, tex_bottom, uv_limits);
AddNewVertex(quad_end_x, quad_start_y, depth, 1.0f, color, texpage, tex_right, tex_top, uv_limits);
AddNewVertex(quad_end_x, quad_end_y, depth, 1.0f, color, texpage, tex_right, tex_bottom, uv_limits);
x_offset += quad_width;
tex_left = 0;
}
y_offset += quad_height;
tex_top = 0;
}
IncludeVRAMDityRectangle(cmd->bounds);
}
void GPU_HW::DrawLine(const GPUBackendDrawLineCommand* cmd)
{
SetupDraw(cmd);
if (cmd->params.check_mask_before_draw)
m_current_depth++;
const GPURenderCommand rc{cmd->rc.bits};
const float depth = GetCurrentNormalizedVertexDepth();
for (u32 i = 1; i < cmd->num_vertices; i++)
{
const GPUBackendDrawLineCommand::Vertex& start = cmd->vertices[i - 1u];
const GPUBackendDrawLineCommand::Vertex& end = cmd->vertices[i];
DrawLine(static_cast<float>(start.x), static_cast<float>(start.y), start.color, static_cast<float>(end.x),
static_cast<float>(end.y), end.color, depth);
}
IncludeVRAMDityRectangle(cmd->bounds);
} }
void GPU_HW::CalcScissorRect(int* left, int* top, int* right, int* bottom) void GPU_HW::CalcScissorRect(int* left, int* top, int* right, int* bottom)
@ -696,7 +459,8 @@ void GPU_HW::CalcScissorRect(int* left, int* top, int* right, int* bottom)
*bottom = std::max<u32>((m_drawing_area.bottom + 1) * m_resolution_scale, *top + 1); *bottom = std::max<u32>((m_drawing_area.bottom + 1) * m_resolution_scale, *top + 1);
} }
GPU_HW::VRAMFillUBOData GPU_HW::GetVRAMFillUBOData(u32 x, u32 y, u32 width, u32 height, u32 color) const GPU_HW::VRAMFillUBOData GPU_HW::GetVRAMFillUBOData(u32 x, u32 y, u32 width, u32 height, u32 color,
GPUBackendCommandParameters params) const
{ {
// drop precision unless true colour is enabled // drop precision unless true colour is enabled
if (!m_true_color) if (!m_true_color)
@ -705,7 +469,7 @@ GPU_HW::VRAMFillUBOData GPU_HW::GetVRAMFillUBOData(u32 x, u32 y, u32 width, u32
VRAMFillUBOData uniforms; VRAMFillUBOData uniforms;
std::tie(uniforms.u_fill_color[0], uniforms.u_fill_color[1], uniforms.u_fill_color[2], uniforms.u_fill_color[3]) = std::tie(uniforms.u_fill_color[0], uniforms.u_fill_color[1], uniforms.u_fill_color[2], uniforms.u_fill_color[3]) =
RGBA8ToFloat(color); RGBA8ToFloat(color);
uniforms.u_interlaced_displayed_field = GetActiveLineLSB(); uniforms.u_interlaced_displayed_field = params.active_line_lsb;
return uniforms; return uniforms;
} }
@ -725,7 +489,8 @@ Common::Rectangle<u32> GPU_HW::GetVRAMTransferBounds(u32 x, u32 y, u32 width, u3
return out_rc; return out_rc;
} }
GPU_HW::VRAMWriteUBOData GPU_HW::GetVRAMWriteUBOData(u32 x, u32 y, u32 width, u32 height, u32 buffer_offset) const GPU_HW::VRAMWriteUBOData GPU_HW::GetVRAMWriteUBOData(u32 x, u32 y, u32 width, u32 height, u32 buffer_offset,
GPUBackendCommandParameters params) const
{ {
const VRAMWriteUBOData uniforms = {(x % VRAM_WIDTH), const VRAMWriteUBOData uniforms = {(x % VRAM_WIDTH),
(y % VRAM_HEIGHT), (y % VRAM_HEIGHT),
@ -734,23 +499,24 @@ GPU_HW::VRAMWriteUBOData GPU_HW::GetVRAMWriteUBOData(u32 x, u32 y, u32 width, u3
width, width,
height, height,
buffer_offset, buffer_offset,
m_GPUSTAT.set_mask_while_drawing ? 0x8000u : 0x00, params.set_mask_while_drawing ? 0x8000u : 0x00,
GetCurrentNormalizedVertexDepth()}; GetCurrentNormalizedVertexDepth()};
return uniforms; return uniforms;
} }
bool GPU_HW::UseVRAMCopyShader(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) const bool GPU_HW::UseVRAMCopyShader(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height,
GPUBackendCommandParameters params) const
{ {
// masking enabled, oversized, or overlapping // masking enabled, oversized, or overlapping
return (m_GPUSTAT.IsMaskingEnabled() || ((src_x % VRAM_WIDTH) + width) > VRAM_WIDTH || return (params.IsMaskingEnabled() || ((src_x % VRAM_WIDTH) + width) > VRAM_WIDTH ||
((src_y % VRAM_HEIGHT) + height) > VRAM_HEIGHT || ((dst_x % VRAM_WIDTH) + width) > VRAM_WIDTH || ((src_y % VRAM_HEIGHT) + height) > VRAM_HEIGHT || ((dst_x % VRAM_WIDTH) + width) > VRAM_WIDTH ||
((dst_y % VRAM_HEIGHT) + height) > VRAM_HEIGHT || ((dst_y % VRAM_HEIGHT) + height) > VRAM_HEIGHT ||
Common::Rectangle<u32>::FromExtents(src_x, src_y, width, height) Common::Rectangle<u32>::FromExtents(src_x, src_y, width, height)
.Intersects(Common::Rectangle<u32>::FromExtents(dst_x, dst_y, width, height))); .Intersects(Common::Rectangle<u32>::FromExtents(dst_x, dst_y, width, height)));
} }
GPU_HW::VRAMCopyUBOData GPU_HW::GetVRAMCopyUBOData(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, GPU_HW::VRAMCopyUBOData GPU_HW::GetVRAMCopyUBOData(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height,
u32 height) const GPUBackendCommandParameters params) const
{ {
const VRAMCopyUBOData uniforms = {(src_x % VRAM_WIDTH) * m_resolution_scale, const VRAMCopyUBOData uniforms = {(src_x % VRAM_WIDTH) * m_resolution_scale,
(src_y % VRAM_HEIGHT) * m_resolution_scale, (src_y % VRAM_HEIGHT) * m_resolution_scale,
@ -760,7 +526,7 @@ GPU_HW::VRAMCopyUBOData GPU_HW::GetVRAMCopyUBOData(u32 src_x, u32 src_y, u32 dst
((dst_y + height) % VRAM_HEIGHT) * m_resolution_scale, ((dst_y + height) % VRAM_HEIGHT) * m_resolution_scale,
width * m_resolution_scale, width * m_resolution_scale,
height * m_resolution_scale, height * m_resolution_scale,
m_GPUSTAT.set_mask_while_drawing ? 1u : 0u, params.set_mask_while_drawing ? 1u : 0u,
GetCurrentNormalizedVertexDepth()}; GetCurrentNormalizedVertexDepth()};
return uniforms; return uniforms;
@ -770,6 +536,7 @@ void GPU_HW::IncludeVRAMDityRectangle(const Common::Rectangle<u32>& rect)
{ {
m_vram_dirty_rect.Include(rect); m_vram_dirty_rect.Include(rect);
#if FIXME
// the vram area can include the texture page, but the game can leave it as-is. in this case, set it as dirty so the // the vram area can include the texture page, but the game can leave it as-is. in this case, set it as dirty so the
// shadow texture is updated // shadow texture is updated
if (!m_draw_mode.IsTexturePageChanged() && if (!m_draw_mode.IsTexturePageChanged() &&
@ -778,6 +545,13 @@ void GPU_HW::IncludeVRAMDityRectangle(const Common::Rectangle<u32>& rect)
{ {
m_draw_mode.SetTexturePageChanged(); m_draw_mode.SetTexturePageChanged();
} }
#endif
}
void GPU_HW::IncludeVRAMDityRectangle(const Common::Rectangle<u16>& rect)
{
IncludeVRAMDityRectangle(Common::Rectangle<u32>(ZeroExtend32(rect.left), ZeroExtend32(rect.top),
ZeroExtend32(rect.right), ZeroExtend32(rect.bottom)));
} }
void GPU_HW::EnsureVertexBufferSpace(u32 required_vertices) void GPU_HW::EnsureVertexBufferSpace(u32 required_vertices)
@ -793,20 +567,20 @@ void GPU_HW::EnsureVertexBufferSpace(u32 required_vertices)
MapBatchVertexPointer(required_vertices); MapBatchVertexPointer(required_vertices);
} }
void GPU_HW::EnsureVertexBufferSpaceForCurrentCommand() void GPU_HW::EnsureVertexBufferSpace(const GPUBackendDrawCommand* cmd)
{ {
u32 required_vertices; u32 required_vertices;
switch (m_render_command.primitive) switch (cmd->type)
{ {
case Primitive::Polygon: case GPUBackendCommandType::DrawPolygon:
required_vertices = m_render_command.quad_polygon ? 6 : 3; required_vertices = cmd->rc.quad_polygon ? 6 : 3;
break; break;
case Primitive::Rectangle: case GPUBackendCommandType::DrawRectangle:
required_vertices = MAX_VERTICES_FOR_RECTANGLE; required_vertices = MAX_VERTICES_FOR_RECTANGLE;
break; break;
case Primitive::Line: case GPUBackendCommandType::DrawLine:
default: default:
required_vertices = m_render_command.polyline ? (GetPolyLineVertexCount() * 6u) : 6u; required_vertices = static_cast<const GPUBackendDrawLineCommand*>(cmd)->num_vertices * 3u;
break; break;
} }
@ -836,50 +610,55 @@ void GPU_HW::ResetBatchVertexDepth()
m_current_depth = 1; m_current_depth = 1;
} }
void GPU_HW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) void GPU_HW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params)
{ {
IncludeVRAMDityRectangle( IncludeVRAMDityRectangle(
Common::Rectangle<u32>::FromExtents(x, y, width, height).Clamped(0, 0, VRAM_WIDTH, VRAM_HEIGHT)); Common::Rectangle<u32>::FromExtents(x, y, width, height).Clamped(0, 0, VRAM_WIDTH, VRAM_HEIGHT));
} }
void GPU_HW::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) void GPU_HW::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, GPUBackendCommandParameters params)
{ {
DebugAssert((x + width) <= VRAM_WIDTH && (y + height) <= VRAM_HEIGHT); DebugAssert((x + width) <= VRAM_WIDTH && (y + height) <= VRAM_HEIGHT);
IncludeVRAMDityRectangle(Common::Rectangle<u32>::FromExtents(x, y, width, height)); IncludeVRAMDityRectangle(Common::Rectangle<u32>::FromExtents(x, y, width, height));
if (m_GPUSTAT.check_mask_before_draw) if (params.check_mask_before_draw)
{ {
// set new vertex counter since we want this to take into consideration previous masked pixels // set new vertex counter since we want this to take into consideration previous masked pixels
m_current_depth++; m_current_depth++;
} }
} }
void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height,
GPUBackendCommandParameters params)
{ {
IncludeVRAMDityRectangle( IncludeVRAMDityRectangle(
Common::Rectangle<u32>::FromExtents(dst_x, dst_y, width, height).Clamped(0, 0, VRAM_WIDTH, VRAM_HEIGHT)); Common::Rectangle<u32>::FromExtents(dst_x, dst_y, width, height).Clamped(0, 0, VRAM_WIDTH, VRAM_HEIGHT));
if (m_GPUSTAT.check_mask_before_draw) if (params.check_mask_before_draw)
{ {
// set new vertex counter since we want this to take into consideration previous masked pixels // set new vertex counter since we want this to take into consideration previous masked pixels
m_current_depth++; m_current_depth++;
} }
} }
void GPU_HW::DispatchRenderCommand() void GPU_HW::SetupDraw(const GPUBackendDrawCommand* cmd)
{ {
const RenderCommand rc{m_render_command.bits}; const GPURenderCommand rc{cmd->rc.bits};
TextureMode texture_mode; GPUTextureMode texture_mode;
if (rc.IsTexturingEnabled()) if (rc.IsTexturingEnabled())
{ {
// texture page changed - check that the new page doesn't intersect the drawing area // texture page changed - check that the new page doesn't intersect the drawing area
if (m_draw_mode.IsTexturePageChanged()) if ((cmd->draw_mode.bits & GPUDrawModeReg::TEXTURE_PAGE_MASK) !=
(m_last_texture_page_bits.bits & GPUDrawModeReg::TEXTURE_PAGE_MASK) ||
true)
{ {
m_draw_mode.ClearTexturePageChangedFlag(); m_last_texture_page_bits.bits = cmd->draw_mode.bits;
if (m_vram_dirty_rect.Valid() && if (m_vram_dirty_rect.Valid() &&
(m_draw_mode.GetTexturePageRectangle().Intersects(m_vram_dirty_rect) || (m_last_texture_page_bits.GetTexturePageRectangle().Intersects(m_vram_dirty_rect) ||
(m_draw_mode.IsUsingPalette() && m_draw_mode.GetTexturePaletteRectangle().Intersects(m_vram_dirty_rect)))) (m_last_texture_page_bits.IsUsingPalette() &&
m_last_texture_page_bits.GetTexturePaletteRectangle().Intersects(m_vram_dirty_rect))))
{ {
// Log_DevPrintf("Invalidating VRAM read cache due to drawing area overlap"); // Log_DevPrintf("Invalidating VRAM read cache due to drawing area overlap");
if (!IsFlushed()) if (!IsFlushed())
@ -889,32 +668,32 @@ void GPU_HW::DispatchRenderCommand()
} }
} }
texture_mode = m_draw_mode.GetTextureMode(); texture_mode = cmd->draw_mode.texture_mode;
if (rc.raw_texture_enable) if (rc.raw_texture_enable)
{ {
texture_mode = texture_mode =
static_cast<TextureMode>(static_cast<u8>(texture_mode) | static_cast<u8>(TextureMode::RawTextureBit)); static_cast<GPUTextureMode>(static_cast<u8>(texture_mode) | static_cast<u8>(GPUTextureMode::RawTextureBit));
} }
} }
else else
{ {
texture_mode = TextureMode::Disabled; texture_mode = GPUTextureMode::Disabled;
} }
// has any state changed which requires a new batch? // has any state changed which requires a new batch?
const TransparencyMode transparency_mode = const GPUTransparencyMode transparency_mode =
rc.transparency_enable ? m_draw_mode.GetTransparencyMode() : TransparencyMode::Disabled; rc.transparency_enable ? cmd->draw_mode.transparency_mode : GPUTransparencyMode::Disabled;
const bool dithering_enable = (!m_true_color && rc.IsDitheringEnabled()) ? m_GPUSTAT.dither_enable : false; const bool dithering_enable = (!m_true_color && rc.IsDitheringEnabled()) ? cmd->draw_mode.dither_enable : false;
if (m_batch.texture_mode != texture_mode || m_batch.transparency_mode != transparency_mode || if (m_batch.texture_mode != texture_mode || m_batch.transparency_mode != transparency_mode ||
dithering_enable != m_batch.dithering) dithering_enable != m_batch.dithering)
{ {
FlushRender(); FlushRender();
} }
EnsureVertexBufferSpaceForCurrentCommand(); EnsureVertexBufferSpace(cmd);
// transparency mode change // transparency mode change
if (m_batch.transparency_mode != transparency_mode && transparency_mode != TransparencyMode::Disabled) if (m_batch.transparency_mode != transparency_mode && transparency_mode != GPUTransparencyMode::Disabled)
{ {
static constexpr float transparent_alpha[4][2] = {{0.5f, 0.5f}, {1.0f, 1.0f}, {1.0f, 1.0f}, {0.25f, 1.0f}}; static constexpr float transparent_alpha[4][2] = {{0.5f, 0.5f}, {1.0f, 1.0f}, {1.0f, 1.0f}, {0.25f, 1.0f}};
m_batch_ubo_data.u_src_alpha_factor = transparent_alpha[static_cast<u32>(transparency_mode)][0]; m_batch_ubo_data.u_src_alpha_factor = transparent_alpha[static_cast<u32>(transparency_mode)][0];
@ -922,19 +701,19 @@ void GPU_HW::DispatchRenderCommand()
m_batch_ubo_dirty = true; m_batch_ubo_dirty = true;
} }
if (m_batch.check_mask_before_draw != m_GPUSTAT.check_mask_before_draw || if (m_batch.check_mask_before_draw != cmd->params.check_mask_before_draw ||
m_batch.set_mask_while_drawing != m_GPUSTAT.set_mask_while_drawing) m_batch.set_mask_while_drawing != cmd->params.set_mask_while_drawing)
{ {
m_batch.check_mask_before_draw = m_GPUSTAT.check_mask_before_draw; m_batch.check_mask_before_draw = cmd->params.check_mask_before_draw;
m_batch.set_mask_while_drawing = m_GPUSTAT.set_mask_while_drawing; m_batch.set_mask_while_drawing = cmd->params.set_mask_while_drawing;
m_batch_ubo_data.u_set_mask_while_drawing = BoolToUInt32(m_batch.set_mask_while_drawing); m_batch_ubo_data.u_set_mask_while_drawing = BoolToUInt32(m_batch.set_mask_while_drawing);
m_batch_ubo_dirty = true; m_batch_ubo_dirty = true;
} }
m_batch.interlacing = IsInterlacedRenderingEnabled(); m_batch.interlacing = cmd->params.interlaced_rendering;
if (m_batch.interlacing) if (m_batch.interlacing)
{ {
const u32 displayed_field = GetActiveLineLSB(); const u32 displayed_field = cmd->params.active_line_lsb;
m_batch_ubo_dirty |= (m_batch_ubo_data.u_interlaced_displayed_field != displayed_field); m_batch_ubo_dirty |= (m_batch_ubo_data.u_interlaced_displayed_field != displayed_field);
m_batch_ubo_data.u_interlaced_displayed_field = displayed_field; m_batch_ubo_data.u_interlaced_displayed_field = displayed_field;
} }
@ -944,18 +723,16 @@ void GPU_HW::DispatchRenderCommand()
m_batch.transparency_mode = transparency_mode; m_batch.transparency_mode = transparency_mode;
m_batch.dithering = dithering_enable; m_batch.dithering = dithering_enable;
if (m_draw_mode.IsTextureWindowChanged()) if (m_last_texture_window_reg.bits != cmd->window.bits)
{ {
m_draw_mode.ClearTextureWindowChangedFlag(); m_last_texture_window_reg.bits = cmd->window.bits;
m_batch_ubo_data.u_texture_window_mask[0] = ZeroExtend32(m_draw_mode.texture_window_mask_x); m_batch_ubo_data.u_texture_window_mask[0] = ZeroExtend32(cmd->window.mask_x.GetValue());
m_batch_ubo_data.u_texture_window_mask[1] = ZeroExtend32(m_draw_mode.texture_window_mask_y); m_batch_ubo_data.u_texture_window_mask[1] = ZeroExtend32(cmd->window.mask_y.GetValue());
m_batch_ubo_data.u_texture_window_offset[0] = ZeroExtend32(m_draw_mode.texture_window_offset_x); m_batch_ubo_data.u_texture_window_offset[0] = ZeroExtend32(cmd->window.offset_x.GetValue());
m_batch_ubo_data.u_texture_window_offset[1] = ZeroExtend32(m_draw_mode.texture_window_offset_y); m_batch_ubo_data.u_texture_window_offset[1] = ZeroExtend32(cmd->window.offset_y.GetValue());
m_batch_ubo_dirty = true; m_batch_ubo_dirty = true;
} }
LoadVertices();
} }
void GPU_HW::FlushRender() void GPU_HW::FlushRender()
@ -1020,8 +797,10 @@ void GPU_HW::DrawRendererStats(bool is_idle_frame)
ImGui::TextUnformatted("Effective Display Resolution:"); ImGui::TextUnformatted("Effective Display Resolution:");
ImGui::NextColumn(); ImGui::NextColumn();
#if FIXME
ImGui::Text("%ux%u", m_crtc_state.display_vram_width * m_resolution_scale, ImGui::Text("%ux%u", m_crtc_state.display_vram_width * m_resolution_scale,
m_crtc_state.display_vram_height * m_resolution_scale); m_crtc_state.display_vram_height * m_resolution_scale);
#endif
ImGui::NextColumn(); ImGui::NextColumn();
ImGui::TextUnformatted("True Color:"); ImGui::TextUnformatted("True Color:");

View File

@ -1,6 +1,6 @@
#pragma once #pragma once
#include "common/heap_array.h" #include "common/heap_array.h"
#include "gpu.h" #include "gpu_backend.h"
#include "host_display.h" #include "host_display.h"
#include <sstream> #include <sstream>
#include <string> #include <string>
@ -8,7 +8,7 @@
#include <utility> #include <utility>
#include <vector> #include <vector>
class GPU_HW : public GPU class GPU_HW : public GPUBackend
{ {
public: public:
enum class BatchRenderMode : u8 enum class BatchRenderMode : u8
@ -19,22 +19,14 @@ public:
OnlyTransparent OnlyTransparent
}; };
enum class InterlacedRenderMode : u8
{
None,
InterleavedFields,
SeparateFields
};
GPU_HW(); GPU_HW();
virtual ~GPU_HW(); virtual ~GPU_HW();
virtual bool IsHardwareRenderer() const override; virtual bool IsHardwareRenderer() const override;
virtual bool Initialize(HostDisplay* host_display) override; virtual bool Initialize() override;
virtual void Reset() override; virtual void Reset() override;
virtual bool DoState(StateWrapper& sw) override;
void UpdateResolutionScale() override final; void UpdateResolutionScale() override final;
std::tuple<u32, u32> GetEffectiveDisplayResolution() override final; std::tuple<u32, u32> GetEffectiveDisplayResolution() override final;
@ -94,8 +86,8 @@ protected:
struct BatchConfig struct BatchConfig
{ {
TextureMode texture_mode; GPUTextureMode texture_mode;
TransparencyMode transparency_mode; GPUTransparencyMode transparency_mode;
bool dithering; bool dithering;
bool interlacing; bool interlacing;
bool set_mask_while_drawing; bool set_mask_while_drawing;
@ -105,15 +97,15 @@ protected:
// on a per-pixel basis, and the opaque pixels shouldn't be blended at all. // on a per-pixel basis, and the opaque pixels shouldn't be blended at all.
bool NeedsTwoPassRendering() const bool NeedsTwoPassRendering() const
{ {
return transparency_mode == GPU::TransparencyMode::BackgroundMinusForeground && return transparency_mode == GPUTransparencyMode::BackgroundMinusForeground &&
texture_mode != TextureMode::Disabled; texture_mode != GPUTextureMode::Disabled;
} }
// Returns the render mode for this batch. // Returns the render mode for this batch.
BatchRenderMode GetRenderMode() const BatchRenderMode GetRenderMode() const
{ {
return transparency_mode == TransparencyMode::Disabled ? BatchRenderMode::TransparencyDisabled : return transparency_mode == GPUTransparencyMode::Disabled ? BatchRenderMode::TransparencyDisabled :
BatchRenderMode::TransparentAndOpaque; BatchRenderMode::TransparentAndOpaque;
} }
}; };
@ -179,7 +171,6 @@ protected:
virtual void UpdateVRAMReadTexture(); virtual void UpdateVRAMReadTexture();
virtual void UpdateDepthBufferFromMaskBit() = 0; virtual void UpdateDepthBufferFromMaskBit() = 0;
virtual void SetScissorFromDrawingArea() = 0;
virtual void MapBatchVertexPointer(u32 required_vertices) = 0; virtual void MapBatchVertexPointer(u32 required_vertices) = 0;
virtual void UnmapBatchVertexPointer(u32 used_vertices) = 0; virtual void UnmapBatchVertexPointer(u32 used_vertices) = 0;
virtual void UploadUniformBuffer(const void* uniforms, u32 uniforms_size) = 0; virtual void UploadUniformBuffer(const void* uniforms, u32 uniforms_size) = 0;
@ -187,12 +178,9 @@ protected:
u32 CalculateResolutionScale() const; u32 CalculateResolutionScale() const;
void SetFullVRAMDirtyRectangle() void SetFullVRAMDirtyRectangle() { m_vram_dirty_rect.Set(0, 0, VRAM_WIDTH, VRAM_HEIGHT); }
{
m_vram_dirty_rect.Set(0, 0, VRAM_WIDTH, VRAM_HEIGHT);
m_draw_mode.SetTexturePageChanged();
}
void ClearVRAMDirtyRectangle() { m_vram_dirty_rect.SetInvalid(); } void ClearVRAMDirtyRectangle() { m_vram_dirty_rect.SetInvalid(); }
void IncludeVRAMDityRectangle(const Common::Rectangle<u16>& rect);
void IncludeVRAMDityRectangle(const Common::Rectangle<u32>& rect); void IncludeVRAMDityRectangle(const Common::Rectangle<u32>& rect);
bool IsFlushed() const { return m_batch_current_vertex_ptr == m_batch_start_vertex_ptr; } bool IsFlushed() const { return m_batch_current_vertex_ptr == m_batch_start_vertex_ptr; }
@ -200,7 +188,7 @@ protected:
u32 GetBatchVertexSpace() const { return static_cast<u32>(m_batch_end_vertex_ptr - m_batch_current_vertex_ptr); } u32 GetBatchVertexSpace() const { return static_cast<u32>(m_batch_end_vertex_ptr - m_batch_current_vertex_ptr); }
u32 GetBatchVertexCount() const { return static_cast<u32>(m_batch_current_vertex_ptr - m_batch_start_vertex_ptr); } u32 GetBatchVertexCount() const { return static_cast<u32>(m_batch_current_vertex_ptr - m_batch_start_vertex_ptr); }
void EnsureVertexBufferSpace(u32 required_vertices); void EnsureVertexBufferSpace(u32 required_vertices);
void EnsureVertexBufferSpaceForCurrentCommand(); void EnsureVertexBufferSpace(const GPUBackendDrawCommand* cmd);
void ResetBatchVertexDepth(); void ResetBatchVertexDepth();
/// Returns the value to be written to the depth buffer for the current operation for mask bit emulation. /// Returns the value to be written to the depth buffer for the current operation for mask bit emulation.
@ -209,43 +197,41 @@ protected:
return 1.0f - (static_cast<float>(m_current_depth) / 65535.0f); return 1.0f - (static_cast<float>(m_current_depth) / 65535.0f);
} }
/// Returns the interlaced mode to use when scanning out/displaying. void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params) override;
ALWAYS_INLINE InterlacedRenderMode GetInterlacedRenderMode() const void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, GPUBackendCommandParameters params) override;
{ void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height,
if (IsInterlacedDisplayEnabled()) GPUBackendCommandParameters params) override;
{ void DrawPolygon(const GPUBackendDrawPolygonCommand* cmd) override;
return m_GPUSTAT.vertical_resolution ? InterlacedRenderMode::InterleavedFields : void DrawLine(const GPUBackendDrawLineCommand* cmd) override;
InterlacedRenderMode::SeparateFields; void DrawRectangle(const GPUBackendDrawRectangleCommand* cmd) override;
}
else
{
return InterlacedRenderMode::None;
}
}
void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override;
void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) override;
void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override;
void DispatchRenderCommand() override;
void FlushRender() override; void FlushRender() override;
void DrawRendererStats(bool is_idle_frame) override; void DrawRendererStats(bool is_idle_frame) override;
void CalcScissorRect(int* left, int* top, int* right, int* bottom); void CalcScissorRect(int* left, int* top, int* right, int* bottom);
std::tuple<s32, s32> ScaleVRAMCoordinates(s32 x, s32 y) const ALWAYS_INLINE std::tuple<s32, s32> ScaleVRAMCoordinates(s32 x, s32 y) const
{ {
return std::make_tuple(x * s32(m_resolution_scale), y * s32(m_resolution_scale)); return std::make_tuple(x * s32(m_resolution_scale), y * s32(m_resolution_scale));
} }
ALWAYS_INLINE Common::Rectangle<u32> ScaleVRAMRect(const Common::Rectangle<u32>& rect)
{
return rect * m_resolution_scale;
}
/// Computes the area affected by a VRAM transfer, including wrap-around of X. /// Computes the area affected by a VRAM transfer, including wrap-around of X.
Common::Rectangle<u32> GetVRAMTransferBounds(u32 x, u32 y, u32 width, u32 height) const; Common::Rectangle<u32> GetVRAMTransferBounds(u32 x, u32 y, u32 width, u32 height) const;
/// Returns true if the VRAM copy shader should be used (oversized copies, masking). /// Returns true if the VRAM copy shader should be used (oversized copies, masking).
bool UseVRAMCopyShader(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) const; bool UseVRAMCopyShader(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height,
GPUBackendCommandParameters params) const;
VRAMFillUBOData GetVRAMFillUBOData(u32 x, u32 y, u32 width, u32 height, u32 color) const; VRAMFillUBOData GetVRAMFillUBOData(u32 x, u32 y, u32 width, u32 height, u32 color,
VRAMWriteUBOData GetVRAMWriteUBOData(u32 x, u32 y, u32 width, u32 height, u32 buffer_offset) const; GPUBackendCommandParameters params) const;
VRAMCopyUBOData GetVRAMCopyUBOData(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) const; VRAMWriteUBOData GetVRAMWriteUBOData(u32 x, u32 y, u32 width, u32 height, u32 buffer_offset,
GPUBackendCommandParameters params) const;
VRAMCopyUBOData GetVRAMCopyUBOData(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height,
GPUBackendCommandParameters params) const;
/// Expands a line into two triangles. /// Expands a line into two triangles.
void DrawLine(float x0, float y0, u32 col0, float x1, float y1, u32 col1, float depth); void DrawLine(float x0, float y0, u32 col0, float x1, float y1, u32 col1, float depth);
@ -257,6 +243,8 @@ protected:
static void ComputePolygonUVLimits(BatchVertex* vertices, u32 num_vertices); static void ComputePolygonUVLimits(BatchVertex* vertices, u32 num_vertices);
static bool AreUVLimitsNeeded(); static bool AreUVLimitsNeeded();
void SetupDraw(const GPUBackendDrawCommand* cmd);
HeapArray<u16, VRAM_WIDTH * VRAM_HEIGHT> m_vram_shadow; HeapArray<u16, VRAM_WIDTH * VRAM_HEIGHT> m_vram_shadow;
BatchVertex* m_batch_start_vertex_ptr = nullptr; BatchVertex* m_batch_start_vertex_ptr = nullptr;
@ -280,12 +268,16 @@ protected:
// Bounding box of VRAM area that the GPU has drawn into. // Bounding box of VRAM area that the GPU has drawn into.
Common::Rectangle<u32> m_vram_dirty_rect; Common::Rectangle<u32> m_vram_dirty_rect;
GPUDrawModeReg m_last_texture_page_bits{};
GPUTextureWindowReg m_last_texture_window_reg{};
// Statistics // Statistics
RendererStats m_renderer_stats = {}; RendererStats m_renderer_stats = {};
RendererStats m_last_renderer_stats = {}; RendererStats m_last_renderer_stats = {};
// Changed state // Changed state
bool m_batch_ubo_dirty = true; bool m_batch_ubo_dirty = true;
bool m_drawing_area_changed = false;
private: private:
enum : u32 enum : u32
@ -294,8 +286,6 @@ private:
MAX_BATCH_VERTEX_COUNT = VERTEX_BUFFER_SIZE / sizeof(BatchVertex) MAX_BATCH_VERTEX_COUNT = VERTEX_BUFFER_SIZE / sizeof(BatchVertex)
}; };
void LoadVertices();
ALWAYS_INLINE void AddVertex(const BatchVertex& v) ALWAYS_INLINE void AddVertex(const BatchVertex& v)
{ {
std::memcpy(m_batch_current_vertex_ptr, &v, sizeof(BatchVertex)); std::memcpy(m_batch_current_vertex_ptr, &v, sizeof(BatchVertex));

View File

@ -13,8 +13,8 @@ GPU_HW_D3D11::GPU_HW_D3D11() = default;
GPU_HW_D3D11::~GPU_HW_D3D11() GPU_HW_D3D11::~GPU_HW_D3D11()
{ {
if (m_host_display) if (g_host_interface->GetDisplay())
m_host_display->ClearDisplayTexture(); g_host_interface->GetDisplay()->ClearDisplayTexture();
m_context->ClearState(); m_context->ClearState();
@ -22,8 +22,9 @@ GPU_HW_D3D11::~GPU_HW_D3D11()
DestroyStateObjects(); DestroyStateObjects();
} }
bool GPU_HW_D3D11::Initialize(HostDisplay* host_display) bool GPU_HW_D3D11::Initialize()
{ {
HostDisplay* host_display = g_host_interface->GetDisplay();
if (host_display->GetRenderAPI() != HostDisplay::RenderAPI::D3D11) if (host_display->GetRenderAPI() != HostDisplay::RenderAPI::D3D11)
{ {
Log_ErrorPrintf("Host render API is incompatible"); Log_ErrorPrintf("Host render API is incompatible");
@ -32,7 +33,7 @@ bool GPU_HW_D3D11::Initialize(HostDisplay* host_display)
SetCapabilities(); SetCapabilities();
if (!GPU_HW::Initialize(host_display)) if (!GPU_HW::Initialize())
return false; return false;
m_device = static_cast<ID3D11Device*>(host_display->GetRenderDevice()); m_device = static_cast<ID3D11Device*>(host_display->GetRenderDevice());
@ -92,8 +93,6 @@ void GPU_HW_D3D11::Reset()
void GPU_HW_D3D11::ResetGraphicsAPIState() void GPU_HW_D3D11::ResetGraphicsAPIState()
{ {
GPU_HW::ResetGraphicsAPIState();
m_context->GSSetShader(nullptr, nullptr, 0); m_context->GSSetShader(nullptr, nullptr, 0);
// In D3D11 we can't leave a buffer mapped across a Present() call. // In D3D11 we can't leave a buffer mapped across a Present() call.
@ -126,7 +125,7 @@ void GPU_HW_D3D11::UpdateSettings()
if (framebuffer_changed) if (framebuffer_changed)
{ {
m_host_display->ClearDisplayTexture(); g_host_interface->GetDisplay()->ClearDisplayTexture();
CreateFramebuffer(); CreateFramebuffer();
} }
@ -333,8 +332,7 @@ bool GPU_HW_D3D11::CreateStateObjects()
for (u8 transparency_mode = 0; transparency_mode < 5; transparency_mode++) for (u8 transparency_mode = 0; transparency_mode < 5; transparency_mode++)
{ {
bl_desc = CD3D11_BLEND_DESC(CD3D11_DEFAULT()); bl_desc = CD3D11_BLEND_DESC(CD3D11_DEFAULT());
if (transparency_mode != static_cast<u8>(TransparencyMode::Disabled) || if (transparency_mode != static_cast<u8>(GPUTransparencyMode::Disabled) || m_texture_filtering != GPUTextureFilter::Nearest)
m_texture_filtering != GPUTextureFilter::Nearest)
{ {
bl_desc.RenderTarget[0].BlendEnable = TRUE; bl_desc.RenderTarget[0].BlendEnable = TRUE;
bl_desc.RenderTarget[0].SrcBlend = D3D11_BLEND_ONE; bl_desc.RenderTarget[0].SrcBlend = D3D11_BLEND_ONE;
@ -342,7 +340,7 @@ bool GPU_HW_D3D11::CreateStateObjects()
bl_desc.RenderTarget[0].SrcBlendAlpha = D3D11_BLEND_ONE; bl_desc.RenderTarget[0].SrcBlendAlpha = D3D11_BLEND_ONE;
bl_desc.RenderTarget[0].DestBlendAlpha = D3D11_BLEND_ZERO; bl_desc.RenderTarget[0].DestBlendAlpha = D3D11_BLEND_ZERO;
bl_desc.RenderTarget[0].BlendOp = bl_desc.RenderTarget[0].BlendOp =
(transparency_mode == static_cast<u8>(TransparencyMode::BackgroundMinusForeground)) ? (transparency_mode == static_cast<u8>(GPUTransparencyMode::BackgroundMinusForeground)) ?
D3D11_BLEND_OP_REV_SUBTRACT : D3D11_BLEND_OP_REV_SUBTRACT :
D3D11_BLEND_OP_ADD; D3D11_BLEND_OP_ADD;
bl_desc.RenderTarget[0].BlendOpAlpha = D3D11_BLEND_OP_ADD; bl_desc.RenderTarget[0].BlendOpAlpha = D3D11_BLEND_OP_ADD;
@ -371,8 +369,8 @@ void GPU_HW_D3D11::DestroyStateObjects()
bool GPU_HW_D3D11::CompileShaders() bool GPU_HW_D3D11::CompileShaders()
{ {
GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_true_color, m_scaled_dithering, GPU_HW_ShaderGen shadergen(g_host_interface->GetDisplay()->GetRenderAPI(), m_resolution_scale, m_true_color,
m_texture_filtering, m_using_uv_limits, m_supports_dual_source_blend); m_scaled_dithering, m_texture_filtering, m_using_uv_limits, m_supports_dual_source_blend);
Common::Timer compile_time; Common::Timer compile_time;
const int progress_total = 1 + 1 + 2 + (4 * 9 * 2 * 2) + 7 + (2 * 3); const int progress_total = 1 + 1 + 2 + (4 * 9 * 2 * 2) + 7 + (2 * 3);
@ -442,7 +440,7 @@ bool GPU_HW_D3D11::CompileShaders()
for (u8 interlacing = 0; interlacing < 2; interlacing++) for (u8 interlacing = 0; interlacing < 2; interlacing++)
{ {
const std::string ps = shadergen.GenerateBatchFragmentShader( const std::string ps = shadergen.GenerateBatchFragmentShader(
static_cast<BatchRenderMode>(render_mode), static_cast<TextureMode>(texture_mode), static_cast<BatchRenderMode>(render_mode), static_cast<GPUTextureMode>(texture_mode),
ConvertToBoolUnchecked(dithering), ConvertToBoolUnchecked(interlacing)); ConvertToBoolUnchecked(dithering), ConvertToBoolUnchecked(interlacing));
m_batch_pixel_shaders[render_mode][texture_mode][dithering][interlacing] = m_batch_pixel_shaders[render_mode][texture_mode][dithering][interlacing] =
@ -505,8 +503,8 @@ bool GPU_HW_D3D11::CompileShaders()
{ {
for (u8 interlacing = 0; interlacing < 3; interlacing++) for (u8 interlacing = 0; interlacing < 3; interlacing++)
{ {
const std::string ps = shadergen.GenerateDisplayFragmentShader(ConvertToBoolUnchecked(depth_24bit), const std::string ps = shadergen.GenerateDisplayFragmentShader(
static_cast<InterlacedRenderMode>(interlacing)); ConvertToBoolUnchecked(depth_24bit), static_cast<GPUInterlacedDisplayMode>(interlacing));
m_display_pixel_shaders[depth_24bit][interlacing] = m_shader_cache.GetPixelShader(m_device.Get(), ps); m_display_pixel_shaders[depth_24bit][interlacing] = m_shader_cache.GetPixelShader(m_device.Get(), ps);
if (!m_display_pixel_shaders[depth_24bit][interlacing]) if (!m_display_pixel_shaders[depth_24bit][interlacing])
return false; return false;
@ -608,7 +606,7 @@ void GPU_HW_D3D11::DrawUtilityShader(ID3D11PixelShader* shader, const void* unif
void GPU_HW_D3D11::DrawBatchVertices(BatchRenderMode render_mode, u32 base_vertex, u32 num_vertices) void GPU_HW_D3D11::DrawBatchVertices(BatchRenderMode render_mode, u32 base_vertex, u32 num_vertices)
{ {
const bool textured = (m_batch.texture_mode != TextureMode::Disabled); const bool textured = (m_batch.texture_mode != GPUTextureMode::Disabled);
m_context->VSSetShader(m_batch_vertex_shaders[BoolToUInt8(textured)].Get(), nullptr, 0); m_context->VSSetShader(m_batch_vertex_shaders[BoolToUInt8(textured)].Get(), nullptr, 0);
@ -617,8 +615,8 @@ void GPU_HW_D3D11::DrawBatchVertices(BatchRenderMode render_mode, u32 base_verte
.Get(), .Get(),
nullptr, 0); nullptr, 0);
const TransparencyMode transparency_mode = const GPUTransparencyMode transparency_mode =
(render_mode == BatchRenderMode::OnlyOpaque) ? TransparencyMode::Disabled : m_batch.transparency_mode; (render_mode == BatchRenderMode::OnlyOpaque) ? GPUTransparencyMode::Disabled : m_batch.transparency_mode;
m_context->OMSetBlendState(m_batch_blend_states[static_cast<u8>(transparency_mode)].Get(), nullptr, 0xFFFFFFFFu); m_context->OMSetBlendState(m_batch_blend_states[static_cast<u8>(transparency_mode)].Get(), nullptr, 0xFFFFFFFFu);
m_context->OMSetDepthStencilState( m_context->OMSetDepthStencilState(
m_batch.check_mask_before_draw ? m_depth_test_less_state.Get() : m_depth_test_always_state.Get(), 0); m_batch.check_mask_before_draw ? m_depth_test_less_state.Get() : m_depth_test_always_state.Get(), 0);
@ -637,46 +635,44 @@ void GPU_HW_D3D11::SetScissorFromDrawingArea()
void GPU_HW_D3D11::ClearDisplay() void GPU_HW_D3D11::ClearDisplay()
{ {
GPU_HW::ClearDisplay();
static constexpr std::array<float, 4> clear_color = {0.0f, 0.0f, 0.0f, 1.0f}; static constexpr std::array<float, 4> clear_color = {0.0f, 0.0f, 0.0f, 1.0f};
m_context->ClearRenderTargetView(m_display_texture.GetD3DRTV(), clear_color.data()); m_context->ClearRenderTargetView(m_display_texture.GetD3DRTV(), clear_color.data());
} }
void GPU_HW_D3D11::UpdateDisplay() void GPU_HW_D3D11::UpdateDisplay()
{ {
GPU_HW::UpdateDisplay(); HostDisplay* display = g_host_interface->GetDisplay();
if (g_settings.debugging.show_vram) if (g_settings.debugging.show_vram)
{ {
m_host_display->SetDisplayTexture(m_vram_texture.GetD3DSRV(), m_vram_texture.GetWidth(), m_vram_texture.GetHeight(), display->SetDisplayTexture(m_vram_texture.GetD3DSRV(), m_vram_texture.GetWidth(), m_vram_texture.GetHeight(), 0, 0,
0, 0, m_vram_texture.GetWidth(), m_vram_texture.GetHeight()); m_vram_texture.GetWidth(), m_vram_texture.GetHeight());
m_host_display->SetDisplayParameters(VRAM_WIDTH, VRAM_HEIGHT, 0, 0, VRAM_WIDTH, VRAM_HEIGHT, display->SetDisplayParameters(VRAM_WIDTH, VRAM_HEIGHT, 0, 0, VRAM_WIDTH, VRAM_HEIGHT,
static_cast<float>(VRAM_WIDTH) / static_cast<float>(VRAM_HEIGHT)); static_cast<float>(VRAM_WIDTH) / static_cast<float>(VRAM_HEIGHT));
} }
else else
{ {
const u32 vram_offset_x = m_crtc_state.display_vram_left; const u32 vram_offset_x = m_display_vram_left;
const u32 vram_offset_y = m_crtc_state.display_vram_top; const u32 vram_offset_y = m_display_vram_top;
const u32 scaled_vram_offset_x = vram_offset_x * m_resolution_scale; const u32 scaled_vram_offset_x = vram_offset_x * m_resolution_scale;
const u32 scaled_vram_offset_y = vram_offset_y * m_resolution_scale; const u32 scaled_vram_offset_y = vram_offset_y * m_resolution_scale;
const u32 display_width = m_crtc_state.display_vram_width; const u32 display_width = m_display_vram_width;
const u32 display_height = m_crtc_state.display_vram_height; const u32 display_height = m_display_vram_height;
const u32 scaled_display_width = display_width * m_resolution_scale; const u32 scaled_display_width = display_width * m_resolution_scale;
const u32 scaled_display_height = display_height * m_resolution_scale; const u32 scaled_display_height = display_height * m_resolution_scale;
const InterlacedRenderMode interlaced = GetInterlacedRenderMode(); const GPUInterlacedDisplayMode interlaced = m_display_interlace;
if (IsDisplayDisabled()) if (!m_display_enabled)
{ {
m_host_display->ClearDisplayTexture(); display->ClearDisplayTexture();
} }
else if (!m_GPUSTAT.display_area_color_depth_24 && interlaced == InterlacedRenderMode::None && else if (!m_display_24bit && interlaced == GPUInterlacedDisplayMode::None &&
(scaled_vram_offset_x + scaled_display_width) <= m_vram_texture.GetWidth() && (scaled_vram_offset_x + scaled_display_width) <= m_vram_texture.GetWidth() &&
(scaled_vram_offset_y + scaled_display_height) <= m_vram_texture.GetHeight()) (scaled_vram_offset_y + scaled_display_height) <= m_vram_texture.GetHeight())
{ {
m_host_display->SetDisplayTexture(m_vram_texture.GetD3DSRV(), m_vram_texture.GetWidth(), display->SetDisplayTexture(m_vram_texture.GetD3DSRV(), m_vram_texture.GetWidth(), m_vram_texture.GetHeight(),
m_vram_texture.GetHeight(), scaled_vram_offset_x, scaled_vram_offset_y, scaled_vram_offset_x, scaled_vram_offset_y, scaled_display_width,
scaled_display_width, scaled_display_height); scaled_display_height);
} }
else else
{ {
@ -684,28 +680,26 @@ void GPU_HW_D3D11::UpdateDisplay()
m_context->OMSetDepthStencilState(m_depth_disabled_state.Get(), 0); m_context->OMSetDepthStencilState(m_depth_disabled_state.Get(), 0);
m_context->PSSetShaderResources(0, 1, m_vram_texture.GetD3DSRVArray()); m_context->PSSetShaderResources(0, 1, m_vram_texture.GetD3DSRVArray());
const u32 reinterpret_field_offset = (interlaced != InterlacedRenderMode::None) ? GetInterlacedDisplayField() : 0; const u32 reinterpret_field_offset =
const u32 reinterpret_start_x = m_crtc_state.regs.X * m_resolution_scale; (interlaced != GPUInterlacedDisplayMode::None) ? m_display_interlace_field : 0;
const u32 reinterpret_crop_left = (m_crtc_state.display_vram_left - m_crtc_state.regs.X) * m_resolution_scale; const u32 reinterpret_start_x = m_display_vram_start_x * m_resolution_scale;
const u32 reinterpret_crop_left = (m_display_vram_left - m_display_vram_start_x) * m_resolution_scale;
const u32 uniforms[4] = {reinterpret_start_x, scaled_vram_offset_y + reinterpret_field_offset, const u32 uniforms[4] = {reinterpret_start_x, scaled_vram_offset_y + reinterpret_field_offset,
reinterpret_crop_left, reinterpret_field_offset}; reinterpret_crop_left, reinterpret_field_offset};
ID3D11PixelShader* display_pixel_shader = ID3D11PixelShader* display_pixel_shader =
m_display_pixel_shaders[BoolToUInt8(m_GPUSTAT.display_area_color_depth_24)][static_cast<u8>(interlaced)].Get(); m_display_pixel_shaders[BoolToUInt8(m_display_24bit)][static_cast<u8>(interlaced)].Get();
SetViewportAndScissor(0, 0, scaled_display_width, scaled_display_height); SetViewportAndScissor(0, 0, scaled_display_width, scaled_display_height);
DrawUtilityShader(display_pixel_shader, uniforms, sizeof(uniforms)); DrawUtilityShader(display_pixel_shader, uniforms, sizeof(uniforms));
m_host_display->SetDisplayTexture(m_display_texture.GetD3DSRV(), m_display_texture.GetWidth(), display->SetDisplayTexture(m_display_texture.GetD3DSRV(), m_display_texture.GetWidth(),
m_display_texture.GetHeight(), 0, 0, scaled_display_width, m_display_texture.GetHeight(), 0, 0, scaled_display_width, scaled_display_height);
scaled_display_height);
RestoreGraphicsAPIState(); RestoreGraphicsAPIState();
} }
m_host_display->SetDisplayParameters(m_crtc_state.display_width, m_crtc_state.display_height, display->SetDisplayParameters(m_display_width, m_display_height, m_display_origin_left, m_display_origin_top,
m_crtc_state.display_origin_left, m_crtc_state.display_origin_top, m_display_width, m_display_height, m_display_aspect_ratio);
m_crtc_state.display_vram_width, m_crtc_state.display_vram_height,
m_crtc_state.display_aspect_ratio);
} }
} }
@ -742,50 +736,50 @@ void GPU_HW_D3D11::ReadVRAM(u32 x, u32 y, u32 width, u32 height)
RestoreGraphicsAPIState(); RestoreGraphicsAPIState();
} }
void GPU_HW_D3D11::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) void GPU_HW_D3D11::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params)
{ {
if ((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT) if ((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT)
{ {
// CPU round trip if oversized for now. // CPU round trip if oversized for now.
Log_WarningPrintf("Oversized VRAM fill (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height); Log_WarningPrintf("Oversized VRAM fill (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height);
ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT);
GPU::FillVRAM(x, y, width, height, color); SoftwareFillVRAM(x, y, width, height, color, params);
UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data()); UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data(), params);
return; return;
} }
GPU_HW::FillVRAM(x, y, width, height, color); GPU_HW::FillVRAM(x, y, width, height, color, params);
const VRAMFillUBOData uniforms = GetVRAMFillUBOData(x, y, width, height, color); const VRAMFillUBOData uniforms = GetVRAMFillUBOData(x, y, width, height, color, params);
m_context->OMSetDepthStencilState(m_depth_test_always_state.Get(), 0); m_context->OMSetDepthStencilState(m_depth_test_always_state.Get(), 0);
SetViewportAndScissor(x * m_resolution_scale, y * m_resolution_scale, width * m_resolution_scale, SetViewportAndScissor(x * m_resolution_scale, y * m_resolution_scale, width * m_resolution_scale,
height * m_resolution_scale); height * m_resolution_scale);
DrawUtilityShader(IsInterlacedRenderingEnabled() ? m_vram_interlaced_fill_pixel_shader.Get() : DrawUtilityShader(params.interlaced_rendering ? m_vram_interlaced_fill_pixel_shader.Get() :
m_vram_fill_pixel_shader.Get(), m_vram_fill_pixel_shader.Get(),
&uniforms, sizeof(uniforms)); &uniforms, sizeof(uniforms));
RestoreGraphicsAPIState(); RestoreGraphicsAPIState();
} }
void GPU_HW_D3D11::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) void GPU_HW_D3D11::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, GPUBackendCommandParameters params)
{ {
const Common::Rectangle<u32> bounds = GetVRAMTransferBounds(x, y, width, height); const Common::Rectangle<u32> bounds = GetVRAMTransferBounds(x, y, width, height);
GPU_HW::UpdateVRAM(bounds.left, bounds.top, bounds.GetWidth(), bounds.GetHeight(), data); GPU_HW::UpdateVRAM(bounds.left, bounds.top, bounds.GetWidth(), bounds.GetHeight(), data, params);
const u32 num_pixels = width * height; const u32 num_pixels = width * height;
const auto map_result = m_texture_stream_buffer.Map(m_context.Get(), sizeof(u16), num_pixels * sizeof(u16)); const auto map_result = m_texture_stream_buffer.Map(m_context.Get(), sizeof(u16), num_pixels * sizeof(u16));
std::memcpy(map_result.pointer, data, num_pixels * sizeof(u16)); std::memcpy(map_result.pointer, data, num_pixels * sizeof(u16));
m_texture_stream_buffer.Unmap(m_context.Get(), num_pixels * sizeof(u16)); m_texture_stream_buffer.Unmap(m_context.Get(), num_pixels * sizeof(u16));
const VRAMWriteUBOData uniforms = GetVRAMWriteUBOData(x, y, width, height, map_result.index_aligned); const VRAMWriteUBOData uniforms = GetVRAMWriteUBOData(x, y, width, height, map_result.index_aligned, params);
m_context->OMSetDepthStencilState( m_context->OMSetDepthStencilState(
m_GPUSTAT.check_mask_before_draw ? m_depth_test_less_state.Get() : m_depth_test_always_state.Get(), 0); params.check_mask_before_draw ? m_depth_test_less_state.Get() : m_depth_test_always_state.Get(), 0);
m_context->PSSetShaderResources(0, 1, m_texture_stream_buffer_srv_r16ui.GetAddressOf()); m_context->PSSetShaderResources(0, 1, m_texture_stream_buffer_srv_r16ui.GetAddressOf());
// the viewport should already be set to the full vram, so just adjust the scissor // the viewport should already be set to the full vram, so just adjust the scissor
const Common::Rectangle<u32> scaled_bounds = bounds * m_resolution_scale; const Common::Rectangle<u32> scaled_bounds(ScaleVRAMRect(bounds));
SetScissor(scaled_bounds.left, scaled_bounds.top, scaled_bounds.GetWidth(), scaled_bounds.GetHeight()); SetScissor(scaled_bounds.left, scaled_bounds.top, scaled_bounds.GetWidth(), scaled_bounds.GetHeight());
DrawUtilityShader(m_vram_write_pixel_shader.Get(), &uniforms, sizeof(uniforms)); DrawUtilityShader(m_vram_write_pixel_shader.Get(), &uniforms, sizeof(uniforms));
@ -793,9 +787,10 @@ void GPU_HW_D3D11::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* d
RestoreGraphicsAPIState(); RestoreGraphicsAPIState();
} }
void GPU_HW_D3D11::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) void GPU_HW_D3D11::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height,
GPUBackendCommandParameters params)
{ {
if (UseVRAMCopyShader(src_x, src_y, dst_x, dst_y, width, height)) if (UseVRAMCopyShader(src_x, src_y, dst_x, dst_y, width, height, params))
{ {
const Common::Rectangle<u32> src_bounds = GetVRAMTransferBounds(src_x, src_y, width, height); const Common::Rectangle<u32> src_bounds = GetVRAMTransferBounds(src_x, src_y, width, height);
const Common::Rectangle<u32> dst_bounds = GetVRAMTransferBounds(dst_x, dst_y, width, height); const Common::Rectangle<u32> dst_bounds = GetVRAMTransferBounds(dst_x, dst_y, width, height);
@ -803,18 +798,18 @@ void GPU_HW_D3D11::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 widt
UpdateVRAMReadTexture(); UpdateVRAMReadTexture();
IncludeVRAMDityRectangle(dst_bounds); IncludeVRAMDityRectangle(dst_bounds);
const VRAMCopyUBOData uniforms = GetVRAMCopyUBOData(src_x, src_y, dst_x, dst_y, width, height); const VRAMCopyUBOData uniforms = GetVRAMCopyUBOData(src_x, src_y, dst_x, dst_y, width, height, params);
const Common::Rectangle<u32> dst_bounds_scaled(dst_bounds * m_resolution_scale); const Common::Rectangle<u32> dst_bounds_scaled(ScaleVRAMRect(dst_bounds));
SetViewportAndScissor(dst_bounds_scaled.left, dst_bounds_scaled.top, dst_bounds_scaled.GetWidth(), SetViewportAndScissor(dst_bounds_scaled.left, dst_bounds_scaled.top, dst_bounds_scaled.GetWidth(),
dst_bounds_scaled.GetHeight()); dst_bounds_scaled.GetHeight());
m_context->OMSetDepthStencilState( m_context->OMSetDepthStencilState(
m_GPUSTAT.check_mask_before_draw ? m_depth_test_less_state.Get() : m_depth_test_always_state.Get(), 0); params.check_mask_before_draw ? m_depth_test_less_state.Get() : m_depth_test_always_state.Get(), 0);
m_context->PSSetShaderResources(0, 1, m_vram_read_texture.GetD3DSRVArray()); m_context->PSSetShaderResources(0, 1, m_vram_read_texture.GetD3DSRVArray());
DrawUtilityShader(m_vram_copy_pixel_shader.Get(), &uniforms, sizeof(uniforms)); DrawUtilityShader(m_vram_copy_pixel_shader.Get(), &uniforms, sizeof(uniforms));
RestoreGraphicsAPIState(); RestoreGraphicsAPIState();
if (m_GPUSTAT.check_mask_before_draw) if (params.check_mask_before_draw)
m_current_depth++; m_current_depth++;
return; return;
@ -826,7 +821,7 @@ void GPU_HW_D3D11::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 widt
if (m_vram_dirty_rect.Intersects(Common::Rectangle<u32>::FromExtents(src_x, src_y, width, height))) if (m_vram_dirty_rect.Intersects(Common::Rectangle<u32>::FromExtents(src_x, src_y, width, height)))
UpdateVRAMReadTexture(); UpdateVRAMReadTexture();
GPU_HW::CopyVRAM(src_x, src_y, dst_x, dst_y, width, height); GPU_HW::CopyVRAM(src_x, src_y, dst_x, dst_y, width, height, params);
src_x *= m_resolution_scale; src_x *= m_resolution_scale;
src_y *= m_resolution_scale; src_y *= m_resolution_scale;
@ -841,7 +836,7 @@ void GPU_HW_D3D11::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 widt
void GPU_HW_D3D11::UpdateVRAMReadTexture() void GPU_HW_D3D11::UpdateVRAMReadTexture()
{ {
const auto scaled_rect = m_vram_dirty_rect * m_resolution_scale; const Common::Rectangle<u32> scaled_rect(ScaleVRAMRect(m_vram_dirty_rect));
const CD3D11_BOX src_box(scaled_rect.left, scaled_rect.top, 0, scaled_rect.right, scaled_rect.bottom, 1); const CD3D11_BOX src_box(scaled_rect.left, scaled_rect.top, 0, scaled_rect.right, scaled_rect.bottom, 1);
m_context->CopySubresourceRegion(m_vram_read_texture, 0, scaled_rect.left, scaled_rect.top, 0, m_vram_texture, 0, m_context->CopySubresourceRegion(m_vram_read_texture, 0, scaled_rect.left, scaled_rect.top, 0, m_vram_texture, 0,
&src_box); &src_box);
@ -864,7 +859,3 @@ void GPU_HW_D3D11::UpdateDepthBufferFromMaskBit()
RestoreGraphicsAPIState(); RestoreGraphicsAPIState();
} }
std::unique_ptr<GPU> GPU::CreateHardwareD3D11Renderer()
{
return std::make_unique<GPU_HW_D3D11>();
}

View File

@ -19,7 +19,7 @@ public:
GPU_HW_D3D11(); GPU_HW_D3D11();
~GPU_HW_D3D11() override; ~GPU_HW_D3D11() override;
bool Initialize(HostDisplay* host_display) override; bool Initialize() override;
void Reset() override; void Reset() override;
void ResetGraphicsAPIState() override; void ResetGraphicsAPIState() override;
@ -30,9 +30,9 @@ protected:
void ClearDisplay() override; void ClearDisplay() override;
void UpdateDisplay() override; void UpdateDisplay() override;
void ReadVRAM(u32 x, u32 y, u32 width, u32 height) override; void ReadVRAM(u32 x, u32 y, u32 width, u32 height) override;
void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override; void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params) override;
void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) override; void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, GPUBackendCommandParameters params) override;
void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override; void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height, GPUBackendCommandParameters params) override;
void UpdateVRAMReadTexture() override; void UpdateVRAMReadTexture() override;
void UpdateDepthBufferFromMaskBit() override; void UpdateDepthBufferFromMaskBit() override;
void SetScissorFromDrawingArea() override; void SetScissorFromDrawingArea() override;

View File

@ -21,9 +21,9 @@ GPU_HW_OpenGL::~GPU_HW_OpenGL()
if (m_texture_buffer_r16ui_texture != 0) if (m_texture_buffer_r16ui_texture != 0)
glDeleteTextures(1, &m_texture_buffer_r16ui_texture); glDeleteTextures(1, &m_texture_buffer_r16ui_texture);
if (m_host_display) if (g_host_interface->GetDisplay())
{ {
m_host_display->ClearDisplayTexture(); g_host_interface->GetDisplay()->ClearDisplayTexture();
ResetGraphicsAPIState(); ResetGraphicsAPIState();
} }
@ -32,8 +32,9 @@ GPU_HW_OpenGL::~GPU_HW_OpenGL()
glUseProgram(0); glUseProgram(0);
} }
bool GPU_HW_OpenGL::Initialize(HostDisplay* host_display) bool GPU_HW_OpenGL::Initialize()
{ {
HostDisplay* host_display = g_host_interface->GetDisplay();
if (host_display->GetRenderAPI() != HostDisplay::RenderAPI::OpenGL && if (host_display->GetRenderAPI() != HostDisplay::RenderAPI::OpenGL &&
host_display->GetRenderAPI() != HostDisplay::RenderAPI::OpenGLES) host_display->GetRenderAPI() != HostDisplay::RenderAPI::OpenGLES)
{ {
@ -45,7 +46,7 @@ bool GPU_HW_OpenGL::Initialize(HostDisplay* host_display)
m_shader_cache.Open(IsGLES(), g_host_interface->GetShaderCacheBasePath()); m_shader_cache.Open(IsGLES(), g_host_interface->GetShaderCacheBasePath());
if (!GPU_HW::Initialize(host_display)) if (!GPU_HW::Initialize())
return false; return false;
if (!CreateFramebuffer()) if (!CreateFramebuffer())
@ -130,7 +131,7 @@ void GPU_HW_OpenGL::UpdateSettings()
if (framebuffer_changed) if (framebuffer_changed)
{ {
m_host_display->ClearDisplayTexture(); g_host_interface->GetDisplay()->ClearDisplayTexture();
CreateFramebuffer(); CreateFramebuffer();
} }
if (shaders_changed) if (shaders_changed)
@ -358,8 +359,8 @@ bool GPU_HW_OpenGL::CreateTextureBuffer()
bool GPU_HW_OpenGL::CompilePrograms() bool GPU_HW_OpenGL::CompilePrograms()
{ {
const bool use_binding_layout = GPU_HW_ShaderGen::UseGLSLBindingLayout(); const bool use_binding_layout = GPU_HW_ShaderGen::UseGLSLBindingLayout();
GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_true_color, m_scaled_dithering, GPU_HW_ShaderGen shadergen(g_host_interface->GetDisplay()->GetRenderAPI(), m_resolution_scale, m_true_color,
m_texture_filtering, m_using_uv_limits, m_supports_dual_source_blend); m_scaled_dithering, m_texture_filtering, m_using_uv_limits, m_supports_dual_source_blend);
Common::Timer compile_time; Common::Timer compile_time;
const int progress_total = (4 * 9 * 2 * 2) + (2 * 3) + 5; const int progress_total = (4 * 9 * 2 * 2) + (2 * 3) + 5;
@ -383,10 +384,10 @@ bool GPU_HW_OpenGL::CompilePrograms()
{ {
for (u8 interlacing = 0; interlacing < 2; interlacing++) for (u8 interlacing = 0; interlacing < 2; interlacing++)
{ {
const bool textured = (static_cast<TextureMode>(texture_mode) != TextureMode::Disabled); const bool textured = (static_cast<GPUTextureMode>(texture_mode) != GPUTextureMode::Disabled);
const std::string batch_vs = shadergen.GenerateBatchVertexShader(textured); const std::string batch_vs = shadergen.GenerateBatchVertexShader(textured);
const std::string fs = shadergen.GenerateBatchFragmentShader( const std::string fs = shadergen.GenerateBatchFragmentShader(
static_cast<BatchRenderMode>(render_mode), static_cast<TextureMode>(texture_mode), static_cast<BatchRenderMode>(render_mode), static_cast<GPUTextureMode>(texture_mode),
ConvertToBoolUnchecked(dithering), ConvertToBoolUnchecked(interlacing)); ConvertToBoolUnchecked(dithering), ConvertToBoolUnchecked(interlacing));
const auto link_callback = [this, textured, use_binding_layout](GL::Program& prog) { const auto link_callback = [this, textured, use_binding_layout](GL::Program& prog) {
@ -444,7 +445,7 @@ bool GPU_HW_OpenGL::CompilePrograms()
{ {
const std::string vs = shadergen.GenerateScreenQuadVertexShader(); const std::string vs = shadergen.GenerateScreenQuadVertexShader();
const std::string fs = shadergen.GenerateDisplayFragmentShader(ConvertToBoolUnchecked(depth_24bit), const std::string fs = shadergen.GenerateDisplayFragmentShader(ConvertToBoolUnchecked(depth_24bit),
static_cast<InterlacedRenderMode>(interlaced)); static_cast<GPUInterlacedDisplayMode>(interlaced));
std::optional<GL::Program> prog = std::optional<GL::Program> prog =
m_shader_cache.GetProgram(vs, {}, fs, [this, use_binding_layout](GL::Program& prog) { m_shader_cache.GetProgram(vs, {}, fs, [this, use_binding_layout](GL::Program& prog) {
@ -558,23 +559,24 @@ void GPU_HW_OpenGL::DrawBatchVertices(BatchRenderMode render_mode, u32 base_vert
[BoolToUInt8(m_batch.dithering)][BoolToUInt8(m_batch.interlacing)]; [BoolToUInt8(m_batch.dithering)][BoolToUInt8(m_batch.interlacing)];
prog.Bind(); prog.Bind();
if (m_batch.texture_mode != TextureMode::Disabled) if (m_batch.texture_mode != GPUTextureMode::Disabled)
m_vram_read_texture.Bind(); m_vram_read_texture.Bind();
if (m_batch.transparency_mode == TransparencyMode::Disabled || render_mode == BatchRenderMode::OnlyOpaque) if (m_batch.transparency_mode == GPUTransparencyMode::Disabled || render_mode == BatchRenderMode::OnlyOpaque)
{ {
glDisable(GL_BLEND); glDisable(GL_BLEND);
} }
else else
{ {
glEnable(GL_BLEND); glEnable(GL_BLEND);
glBlendEquationSeparate( glBlendEquationSeparate(m_batch.transparency_mode == GPUTransparencyMode::BackgroundMinusForeground ?
m_batch.transparency_mode == TransparencyMode::BackgroundMinusForeground ? GL_FUNC_REVERSE_SUBTRACT : GL_FUNC_ADD, GL_FUNC_REVERSE_SUBTRACT :
GL_FUNC_ADD); GL_FUNC_ADD,
GL_FUNC_ADD);
glBlendFuncSeparate(GL_ONE, m_supports_dual_source_blend ? GL_SRC1_ALPHA : GL_SRC_ALPHA, GL_ONE, GL_ZERO); glBlendFuncSeparate(GL_ONE, m_supports_dual_source_blend ? GL_SRC1_ALPHA : GL_SRC_ALPHA, GL_ONE, GL_ZERO);
} }
glDepthFunc(m_GPUSTAT.check_mask_before_draw ? GL_GEQUAL : GL_ALWAYS); glDepthFunc(m_batch.check_mask_before_draw ? GL_GEQUAL : GL_ALWAYS);
glDrawArrays(GL_TRIANGLES, m_batch_base_vertex, num_vertices); glDrawArrays(GL_TRIANGLES, m_batch_base_vertex, num_vertices);
} }
@ -606,8 +608,6 @@ void GPU_HW_OpenGL::UploadUniformBuffer(const void* data, u32 data_size)
void GPU_HW_OpenGL::ClearDisplay() void GPU_HW_OpenGL::ClearDisplay()
{ {
GPU_HW::ClearDisplay();
m_display_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER); m_display_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER);
glDisable(GL_SCISSOR_TEST); glDisable(GL_SCISSOR_TEST);
glClearColor(0.0f, 0.0f, 0.0f, 1.0f); glClearColor(0.0f, 0.0f, 0.0f, 1.0f);
@ -618,41 +618,40 @@ void GPU_HW_OpenGL::ClearDisplay()
void GPU_HW_OpenGL::UpdateDisplay() void GPU_HW_OpenGL::UpdateDisplay()
{ {
GPU_HW::UpdateDisplay(); HostDisplay* display = g_host_interface->GetDisplay();
if (g_settings.debugging.show_vram) if (g_settings.debugging.show_vram)
{ {
m_host_display->SetDisplayTexture(reinterpret_cast<void*>(static_cast<uintptr_t>(m_vram_texture.GetGLId())), display->SetDisplayTexture(reinterpret_cast<void*>(static_cast<uintptr_t>(m_vram_texture.GetGLId())),
m_vram_texture.GetWidth(), static_cast<s32>(m_vram_texture.GetHeight()), 0, m_vram_texture.GetWidth(), static_cast<s32>(m_vram_texture.GetHeight()), 0,
m_vram_texture.GetHeight(), m_vram_texture.GetWidth(), m_vram_texture.GetHeight(), m_vram_texture.GetWidth(),
-static_cast<s32>(m_vram_texture.GetHeight())); -static_cast<s32>(m_vram_texture.GetHeight()));
m_host_display->SetDisplayParameters(VRAM_WIDTH, VRAM_HEIGHT, 0, 0, VRAM_WIDTH, VRAM_HEIGHT, display->SetDisplayParameters(VRAM_WIDTH, VRAM_HEIGHT, 0, 0, VRAM_WIDTH, VRAM_HEIGHT,
static_cast<float>(VRAM_WIDTH) / static_cast<float>(VRAM_HEIGHT)); static_cast<float>(VRAM_WIDTH) / static_cast<float>(VRAM_HEIGHT));
} }
else else
{ {
const u32 vram_offset_x = m_crtc_state.display_vram_left; const u32 vram_offset_x = m_display_vram_left;
const u32 vram_offset_y = m_crtc_state.display_vram_top; const u32 vram_offset_y = m_display_vram_top;
const u32 scaled_vram_offset_x = vram_offset_x * m_resolution_scale; const u32 scaled_vram_offset_x = vram_offset_x * m_resolution_scale;
const u32 scaled_vram_offset_y = vram_offset_y * m_resolution_scale; const u32 scaled_vram_offset_y = vram_offset_y * m_resolution_scale;
const u32 display_width = m_crtc_state.display_vram_width; const u32 display_width = m_display_vram_width;
const u32 display_height = m_crtc_state.display_vram_height; const u32 display_height = m_display_vram_height;
const u32 scaled_display_width = display_width * m_resolution_scale; const u32 scaled_display_width = m_display_width * m_resolution_scale;
const u32 scaled_display_height = display_height * m_resolution_scale; const u32 scaled_display_height = m_display_height * m_resolution_scale;
const InterlacedRenderMode interlaced = GetInterlacedRenderMode(); const GPUInterlacedDisplayMode interlaced = m_display_interlace;
if (IsDisplayDisabled()) if (!m_display_enabled)
{ {
m_host_display->ClearDisplayTexture(); display->ClearDisplayTexture();
} }
else if (!m_GPUSTAT.display_area_color_depth_24 && interlaced == GPU_HW::InterlacedRenderMode::None && else if (!m_display_24bit && interlaced == GPUInterlacedDisplayMode::None &&
(scaled_vram_offset_x + scaled_display_width) <= m_vram_texture.GetWidth() && (scaled_vram_offset_x + scaled_display_width) <= m_vram_texture.GetWidth() &&
(scaled_vram_offset_y + scaled_display_height) <= m_vram_texture.GetHeight()) (scaled_vram_offset_y + scaled_display_height) <= m_vram_texture.GetHeight())
{ {
m_host_display->SetDisplayTexture(reinterpret_cast<void*>(static_cast<uintptr_t>(m_vram_texture.GetGLId())), display->SetDisplayTexture(reinterpret_cast<void*>(static_cast<uintptr_t>(m_vram_texture.GetGLId())),
m_vram_texture.GetWidth(), m_vram_texture.GetHeight(), scaled_vram_offset_x, m_vram_texture.GetWidth(), m_vram_texture.GetHeight(), scaled_vram_offset_x,
m_vram_texture.GetHeight() - scaled_vram_offset_y, scaled_display_width, m_vram_texture.GetHeight() - scaled_vram_offset_y, scaled_display_width,
-static_cast<s32>(scaled_display_height)); -static_cast<s32>(scaled_display_height));
} }
else else
{ {
@ -660,16 +659,17 @@ void GPU_HW_OpenGL::UpdateDisplay()
glDisable(GL_SCISSOR_TEST); glDisable(GL_SCISSOR_TEST);
glDisable(GL_DEPTH_TEST); glDisable(GL_DEPTH_TEST);
m_display_programs[BoolToUInt8(m_GPUSTAT.display_area_color_depth_24)][static_cast<u8>(interlaced)].Bind(); m_display_programs[BoolToUInt8(m_display_24bit)][static_cast<u8>(interlaced)].Bind();
m_display_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER); m_display_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER);
m_vram_texture.Bind(); m_vram_texture.Bind();
const u8 height_div2 = BoolToUInt8(interlaced == GPU_HW::InterlacedRenderMode::SeparateFields); const u8 height_div2 = BoolToUInt8(interlaced == GPUInterlacedDisplayMode::SeparateFields);
const u32 reinterpret_field_offset = (interlaced != InterlacedRenderMode::None) ? GetInterlacedDisplayField() : 0; const u32 reinterpret_field_offset =
(interlaced != GPUInterlacedDisplayMode::None) ? m_display_interlace_field : 0;
const u32 scaled_flipped_vram_offset_y = m_vram_texture.GetHeight() - scaled_vram_offset_y - const u32 scaled_flipped_vram_offset_y = m_vram_texture.GetHeight() - scaled_vram_offset_y -
reinterpret_field_offset - (scaled_display_height >> height_div2); reinterpret_field_offset - (scaled_display_height >> height_div2);
const u32 reinterpret_start_x = m_crtc_state.regs.X * m_resolution_scale; const u32 reinterpret_start_x = m_display_vram_start_x * m_resolution_scale;
const u32 reinterpret_crop_left = (m_crtc_state.display_vram_left - m_crtc_state.regs.X) * m_resolution_scale; const u32 reinterpret_crop_left = (m_display_vram_left - m_display_vram_start_x) * m_resolution_scale;
const u32 uniforms[4] = {reinterpret_start_x, scaled_flipped_vram_offset_y, reinterpret_crop_left, const u32 uniforms[4] = {reinterpret_start_x, scaled_flipped_vram_offset_y, reinterpret_crop_left,
reinterpret_field_offset}; reinterpret_field_offset};
UploadUniformBuffer(uniforms, sizeof(uniforms)); UploadUniformBuffer(uniforms, sizeof(uniforms));
@ -679,10 +679,9 @@ void GPU_HW_OpenGL::UpdateDisplay()
glBindVertexArray(m_attributeless_vao_id); glBindVertexArray(m_attributeless_vao_id);
glDrawArrays(GL_TRIANGLES, 0, 3); glDrawArrays(GL_TRIANGLES, 0, 3);
m_host_display->SetDisplayTexture(reinterpret_cast<void*>(static_cast<uintptr_t>(m_display_texture.GetGLId())), display->SetDisplayTexture(reinterpret_cast<void*>(static_cast<uintptr_t>(m_display_texture.GetGLId())),
m_display_texture.GetWidth(), m_display_texture.GetHeight(), 0, m_display_texture.GetWidth(), m_display_texture.GetHeight(), 0, scaled_display_height,
scaled_display_height, scaled_display_width, scaled_display_width, -static_cast<s32>(scaled_display_height));
-static_cast<s32>(scaled_display_height));
// restore state // restore state
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_vram_fbo_id); glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_vram_fbo_id);
@ -692,10 +691,8 @@ void GPU_HW_OpenGL::UpdateDisplay()
glEnable(GL_SCISSOR_TEST); glEnable(GL_SCISSOR_TEST);
} }
m_host_display->SetDisplayParameters(m_crtc_state.display_width, m_crtc_state.display_height, display->SetDisplayParameters(m_display_width, m_display_height, m_display_origin_left, m_display_origin_top,
m_crtc_state.display_origin_left, m_crtc_state.display_origin_top, m_display_vram_width, m_display_vram_height, m_display_aspect_ratio);
m_crtc_state.display_vram_width, m_crtc_state.display_vram_height,
m_crtc_state.display_aspect_ratio);
} }
} }
@ -730,19 +727,19 @@ void GPU_HW_OpenGL::ReadVRAM(u32 x, u32 y, u32 width, u32 height)
RestoreGraphicsAPIState(); RestoreGraphicsAPIState();
} }
void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params)
{ {
if ((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT) if ((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT)
{ {
// CPU round trip if oversized for now. // CPU round trip if oversized for now.
Log_WarningPrintf("Oversized VRAM fill (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height); Log_WarningPrintf("Oversized VRAM fill (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height);
ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT);
GPU::FillVRAM(x, y, width, height, color); SoftwareFillVRAM(x, y, width, height, color, params);
UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data()); UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data(), params);
return; return;
} }
GPU_HW::FillVRAM(x, y, width, height, color); GPU_HW::FillVRAM(x, y, width, height, color, params);
// scale coordinates // scale coordinates
x *= m_resolution_scale; x *= m_resolution_scale;
@ -753,7 +750,7 @@ void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
glScissor(x, m_vram_texture.GetHeight() - y - height, width, height); glScissor(x, m_vram_texture.GetHeight() - y - height, width, height);
// fast path when not using interlaced rendering // fast path when not using interlaced rendering
if (!IsInterlacedRenderingEnabled()) if (!params.interlaced_rendering)
{ {
const auto [r, g, b, a] = RGBA8ToFloat(m_true_color ? color : RGBA5551ToRGBA8888(RGBA8888ToRGBA5551(color))); const auto [r, g, b, a] = RGBA8ToFloat(m_true_color ? color : RGBA5551ToRGBA8888(RGBA8888ToRGBA5551(color)));
glClearColor(r, g, b, a); glClearColor(r, g, b, a);
@ -763,7 +760,7 @@ void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
} }
else else
{ {
const VRAMFillUBOData uniforms = GetVRAMFillUBOData(x, y, width, height, color); const VRAMFillUBOData uniforms = GetVRAMFillUBOData(x, y, width, height, color, params);
m_vram_interlaced_fill_program.Bind(); m_vram_interlaced_fill_program.Bind();
UploadUniformBuffer(&uniforms, sizeof(uniforms)); UploadUniformBuffer(&uniforms, sizeof(uniforms));
@ -776,13 +773,14 @@ void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
} }
} }
void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data,
GPUBackendCommandParameters params)
{ {
const u32 num_pixels = width * height; const u32 num_pixels = width * height;
if (num_pixels < m_max_texture_buffer_size || m_use_ssbo_for_vram_writes) if (num_pixels < m_max_texture_buffer_size || m_use_ssbo_for_vram_writes)
{ {
const Common::Rectangle<u32> bounds = GetVRAMTransferBounds(x, y, width, height); const Common::Rectangle<u32> bounds = GetVRAMTransferBounds(x, y, width, height);
GPU_HW::UpdateVRAM(bounds.left, bounds.top, bounds.GetWidth(), bounds.GetHeight(), data); GPU_HW::UpdateVRAM(bounds.left, bounds.top, bounds.GetWidth(), bounds.GetHeight(), data, params);
const auto map_result = m_texture_stream_buffer->Map(sizeof(u16), num_pixels * sizeof(u16)); const auto map_result = m_texture_stream_buffer->Map(sizeof(u16), num_pixels * sizeof(u16));
std::memcpy(map_result.pointer, data, num_pixels * sizeof(u16)); std::memcpy(map_result.pointer, data, num_pixels * sizeof(u16));
@ -790,7 +788,7 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void*
m_texture_stream_buffer->Unbind(); m_texture_stream_buffer->Unbind();
glDisable(GL_BLEND); glDisable(GL_BLEND);
glDepthFunc(m_GPUSTAT.check_mask_before_draw ? GL_GEQUAL : GL_ALWAYS); glDepthFunc(params.check_mask_before_draw ? GL_GEQUAL : GL_ALWAYS);
m_vram_write_program.Bind(); m_vram_write_program.Bind();
if (m_use_ssbo_for_vram_writes) if (m_use_ssbo_for_vram_writes)
@ -798,11 +796,11 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void*
else else
glBindTexture(GL_TEXTURE_BUFFER, m_texture_buffer_r16ui_texture); glBindTexture(GL_TEXTURE_BUFFER, m_texture_buffer_r16ui_texture);
const VRAMWriteUBOData uniforms = GetVRAMWriteUBOData(x, y, width, height, map_result.index_aligned); const VRAMWriteUBOData uniforms = GetVRAMWriteUBOData(x, y, width, height, map_result.index_aligned, params);
UploadUniformBuffer(&uniforms, sizeof(uniforms)); UploadUniformBuffer(&uniforms, sizeof(uniforms));
// the viewport should already be set to the full vram, so just adjust the scissor // the viewport should already be set to the full vram, so just adjust the scissor
const Common::Rectangle<u32> scaled_bounds = bounds * m_resolution_scale; const Common::Rectangle<u32> scaled_bounds(ScaleVRAMRect(bounds));
glScissor(scaled_bounds.left, m_vram_texture.GetHeight() - scaled_bounds.top - scaled_bounds.GetHeight(), glScissor(scaled_bounds.left, m_vram_texture.GetHeight() - scaled_bounds.top - scaled_bounds.GetHeight(),
scaled_bounds.GetWidth(), scaled_bounds.GetHeight()); scaled_bounds.GetWidth(), scaled_bounds.GetHeight());
@ -818,12 +816,12 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void*
// CPU round trip if oversized for now. // CPU round trip if oversized for now.
Log_WarningPrintf("Oversized VRAM update (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height); Log_WarningPrintf("Oversized VRAM update (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height);
ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT);
GPU::UpdateVRAM(x, y, width, height, data); SoftwareUpdateVRAM(x, y, width, height, data, params);
UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data()); UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data(), params);
return; return;
} }
GPU_HW::UpdateVRAM(x, y, width, height, data); GPU_HW::UpdateVRAM(x, y, width, height, data, params);
const auto map_result = m_texture_stream_buffer->Map(sizeof(u32), num_pixels * sizeof(u32)); const auto map_result = m_texture_stream_buffer->Map(sizeof(u32), num_pixels * sizeof(u32));
@ -881,9 +879,10 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void*
} }
} }
void GPU_HW_OpenGL::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) void GPU_HW_OpenGL::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height,
GPUBackendCommandParameters params)
{ {
if (UseVRAMCopyShader(src_x, src_y, dst_x, dst_y, width, height)) if (UseVRAMCopyShader(src_x, src_y, dst_x, dst_y, width, height, params))
{ {
const Common::Rectangle<u32> src_bounds = GetVRAMTransferBounds(src_x, src_y, width, height); const Common::Rectangle<u32> src_bounds = GetVRAMTransferBounds(src_x, src_y, width, height);
const Common::Rectangle<u32> dst_bounds = GetVRAMTransferBounds(dst_x, dst_y, width, height); const Common::Rectangle<u32> dst_bounds = GetVRAMTransferBounds(dst_x, dst_y, width, height);
@ -891,14 +890,14 @@ void GPU_HW_OpenGL::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 wid
UpdateVRAMReadTexture(); UpdateVRAMReadTexture();
IncludeVRAMDityRectangle(dst_bounds); IncludeVRAMDityRectangle(dst_bounds);
VRAMCopyUBOData uniforms = GetVRAMCopyUBOData(src_x, src_y, dst_x, dst_y, width, height); VRAMCopyUBOData uniforms = GetVRAMCopyUBOData(src_x, src_y, dst_x, dst_y, width, height, params);
uniforms.u_src_y = m_vram_texture.GetHeight() - uniforms.u_src_y - uniforms.u_height; uniforms.u_src_y = m_vram_texture.GetHeight() - uniforms.u_src_y - uniforms.u_height;
uniforms.u_dst_y = m_vram_texture.GetHeight() - uniforms.u_dst_y - uniforms.u_height; uniforms.u_dst_y = m_vram_texture.GetHeight() - uniforms.u_dst_y - uniforms.u_height;
UploadUniformBuffer(&uniforms, sizeof(uniforms)); UploadUniformBuffer(&uniforms, sizeof(uniforms));
glDisable(GL_SCISSOR_TEST); glDisable(GL_SCISSOR_TEST);
glDisable(GL_BLEND); glDisable(GL_BLEND);
glDepthFunc(m_GPUSTAT.check_mask_before_draw ? GL_GEQUAL : GL_ALWAYS); glDepthFunc(params.check_mask_before_draw ? GL_GEQUAL : GL_ALWAYS);
const Common::Rectangle<u32> dst_bounds_scaled(dst_bounds * m_resolution_scale); const Common::Rectangle<u32> dst_bounds_scaled(dst_bounds * m_resolution_scale);
glViewport(dst_bounds_scaled.left, glViewport(dst_bounds_scaled.left,
@ -910,13 +909,13 @@ void GPU_HW_OpenGL::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 wid
RestoreGraphicsAPIState(); RestoreGraphicsAPIState();
if (m_GPUSTAT.check_mask_before_draw) if (params.check_mask_before_draw)
m_current_depth++; m_current_depth++;
return; return;
} }
GPU_HW::CopyVRAM(src_x, src_y, dst_x, dst_y, width, height); GPU_HW::CopyVRAM(src_x, src_y, dst_x, dst_y, width, height, params);
src_x *= m_resolution_scale; src_x *= m_resolution_scale;
src_y *= m_resolution_scale; src_y *= m_resolution_scale;
@ -951,7 +950,7 @@ void GPU_HW_OpenGL::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 wid
void GPU_HW_OpenGL::UpdateVRAMReadTexture() void GPU_HW_OpenGL::UpdateVRAMReadTexture()
{ {
const auto scaled_rect = m_vram_dirty_rect * m_resolution_scale; const Common::Rectangle<u32> scaled_rect = ScaleVRAMRect(m_vram_dirty_rect);
const u32 width = scaled_rect.GetWidth(); const u32 width = scaled_rect.GetWidth();
const u32 height = scaled_rect.GetHeight(); const u32 height = scaled_rect.GetHeight();
const u32 x = scaled_rect.left; const u32 x = scaled_rect.left;
@ -996,8 +995,3 @@ void GPU_HW_OpenGL::UpdateDepthBufferFromMaskBit()
glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
glEnable(GL_SCISSOR_TEST); glEnable(GL_SCISSOR_TEST);
} }
std::unique_ptr<GPU> GPU::CreateHardwareOpenGLRenderer()
{
return std::make_unique<GPU_HW_OpenGL>();
}

View File

@ -15,7 +15,7 @@ public:
GPU_HW_OpenGL(); GPU_HW_OpenGL();
~GPU_HW_OpenGL() override; ~GPU_HW_OpenGL() override;
bool Initialize(HostDisplay* host_display) override; bool Initialize() override;
void Reset() override; void Reset() override;
void ResetGraphicsAPIState() override; void ResetGraphicsAPIState() override;
@ -26,9 +26,10 @@ protected:
void ClearDisplay() override; void ClearDisplay() override;
void UpdateDisplay() override; void UpdateDisplay() override;
void ReadVRAM(u32 x, u32 y, u32 width, u32 height) override; void ReadVRAM(u32 x, u32 y, u32 width, u32 height) override;
void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override; void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params) override;
void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) override; void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, GPUBackendCommandParameters params) override;
void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override; void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height,
GPUBackendCommandParameters params) override;
void UpdateVRAMReadTexture() override; void UpdateVRAMReadTexture() override;
void UpdateDepthBufferFromMaskBit() override; void UpdateDepthBufferFromMaskBit() override;
void SetScissorFromDrawingArea() override; void SetScissorFromDrawingArea() override;

View File

@ -17,7 +17,7 @@ GPU_HW_ShaderGen::~GPU_HW_ShaderGen() = default;
void GPU_HW_ShaderGen::WriteCommonFunctions(std::stringstream& ss) void GPU_HW_ShaderGen::WriteCommonFunctions(std::stringstream& ss)
{ {
ss << "CONSTANT uint RESOLUTION_SCALE = " << m_resolution_scale << "u;\n"; ss << "CONSTANT uint RESOLUTION_SCALE = " << m_resolution_scale << "u;\n";
ss << "CONSTANT uint2 VRAM_SIZE = uint2(" << GPU::VRAM_WIDTH << ", " << GPU::VRAM_HEIGHT << ") * RESOLUTION_SCALE;\n"; ss << "CONSTANT uint2 VRAM_SIZE = uint2(" << VRAM_WIDTH << ", " << VRAM_HEIGHT << ") * RESOLUTION_SCALE;\n";
ss << "CONSTANT float2 RCP_VRAM_SIZE = float2(1.0, 1.0) / float2(VRAM_SIZE);\n"; ss << "CONSTANT float2 RCP_VRAM_SIZE = float2(1.0, 1.0) / float2(VRAM_SIZE);\n";
ss << R"( ss << R"(
@ -628,12 +628,11 @@ void FilteredSampleFromVRAM(uint4 texpage, float2 coords, float4 uv_limits,
} }
std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(GPU_HW::BatchRenderMode transparency, std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(GPU_HW::BatchRenderMode transparency,
GPU::TextureMode texture_mode, bool dithering, GPUTextureMode texture_mode, bool dithering, bool interlacing)
bool interlacing)
{ {
const GPU::TextureMode actual_texture_mode = texture_mode & ~GPU::TextureMode::RawTextureBit; const GPUTextureMode actual_texture_mode = texture_mode & ~GPUTextureMode::RawTextureBit;
const bool raw_texture = (texture_mode & GPU::TextureMode::RawTextureBit) == GPU::TextureMode::RawTextureBit; const bool raw_texture = (texture_mode & GPUTextureMode::RawTextureBit) == GPUTextureMode::RawTextureBit;
const bool textured = (texture_mode != GPU::TextureMode::Disabled); const bool textured = (texture_mode != GPUTextureMode::Disabled);
const bool use_dual_source = const bool use_dual_source =
m_supports_dual_source_blend && ((transparency != GPU_HW::BatchRenderMode::TransparencyDisabled && m_supports_dual_source_blend && ((transparency != GPU_HW::BatchRenderMode::TransparencyDisabled &&
transparency != GPU_HW::BatchRenderMode::OnlyOpaque) || transparency != GPU_HW::BatchRenderMode::OnlyOpaque) ||
@ -646,10 +645,9 @@ std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(GPU_HW::BatchRenderMod
DefineMacro(ss, "TRANSPARENCY_ONLY_TRANSPARENT", transparency == GPU_HW::BatchRenderMode::OnlyTransparent); DefineMacro(ss, "TRANSPARENCY_ONLY_TRANSPARENT", transparency == GPU_HW::BatchRenderMode::OnlyTransparent);
DefineMacro(ss, "TEXTURED", textured); DefineMacro(ss, "TEXTURED", textured);
DefineMacro(ss, "PALETTE", DefineMacro(ss, "PALETTE",
actual_texture_mode == GPU::TextureMode::Palette4Bit || actual_texture_mode == GPUTextureMode::Palette4Bit || actual_texture_mode == GPUTextureMode::Palette8Bit);
actual_texture_mode == GPU::TextureMode::Palette8Bit); DefineMacro(ss, "PALETTE_4_BIT", actual_texture_mode == GPUTextureMode::Palette4Bit);
DefineMacro(ss, "PALETTE_4_BIT", actual_texture_mode == GPU::TextureMode::Palette4Bit); DefineMacro(ss, "PALETTE_8_BIT", actual_texture_mode == GPUTextureMode::Palette8Bit);
DefineMacro(ss, "PALETTE_8_BIT", actual_texture_mode == GPU::TextureMode::Palette8Bit);
DefineMacro(ss, "RAW_TEXTURE", raw_texture); DefineMacro(ss, "RAW_TEXTURE", raw_texture);
DefineMacro(ss, "DITHERING", dithering); DefineMacro(ss, "DITHERING", dithering);
DefineMacro(ss, "DITHERING_SCALED", m_scaled_dithering); DefineMacro(ss, "DITHERING_SCALED", m_scaled_dithering);
@ -671,7 +669,7 @@ std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(GPU_HW::BatchRenderMod
{ {
if (i > 0) if (i > 0)
ss << ", "; ss << ", ";
ss << GPU::DITHER_MATRIX[i / 4][i % 4]; ss << DITHER_MATRIX[i / 4][i % 4];
} }
if (m_glsl) if (m_glsl)
ss << " );\n"; ss << " );\n";
@ -967,14 +965,13 @@ std::string GPU_HW_ShaderGen::GenerateInterlacedFillFragmentShader()
return ss.str(); return ss.str();
} }
std::string GPU_HW_ShaderGen::GenerateDisplayFragmentShader(bool depth_24bit, std::string GPU_HW_ShaderGen::GenerateDisplayFragmentShader(bool depth_24bit, GPUInterlacedDisplayMode interlace_mode)
GPU_HW::InterlacedRenderMode interlace_mode)
{ {
std::stringstream ss; std::stringstream ss;
WriteHeader(ss); WriteHeader(ss);
DefineMacro(ss, "DEPTH_24BIT", depth_24bit); DefineMacro(ss, "DEPTH_24BIT", depth_24bit);
DefineMacro(ss, "INTERLACED", interlace_mode != GPU_HW::InterlacedRenderMode::None); DefineMacro(ss, "INTERLACED", interlace_mode != GPUInterlacedDisplayMode::None);
DefineMacro(ss, "INTERLEAVED", interlace_mode == GPU_HW::InterlacedRenderMode::InterleavedFields); DefineMacro(ss, "INTERLEAVED", interlace_mode == GPUInterlacedDisplayMode::InterleavedFields);
WriteCommonFunctions(ss); WriteCommonFunctions(ss);
DeclareUniformBuffer(ss, {"uint2 u_vram_offset", "uint u_crop_left", "uint u_field_offset"}, true); DeclareUniformBuffer(ss, {"uint2 u_vram_offset", "uint u_crop_left", "uint u_field_offset"}, true);

View File

@ -10,10 +10,10 @@ public:
~GPU_HW_ShaderGen(); ~GPU_HW_ShaderGen();
std::string GenerateBatchVertexShader(bool textured); std::string GenerateBatchVertexShader(bool textured);
std::string GenerateBatchFragmentShader(GPU_HW::BatchRenderMode transparency, GPU::TextureMode texture_mode, std::string GenerateBatchFragmentShader(GPU_HW::BatchRenderMode transparency, GPUTextureMode texture_mode,
bool dithering, bool interlacing); bool dithering, bool interlacing);
std::string GenerateInterlacedFillFragmentShader(); std::string GenerateInterlacedFillFragmentShader();
std::string GenerateDisplayFragmentShader(bool depth_24bit, GPU_HW::InterlacedRenderMode interlace_mode); std::string GenerateDisplayFragmentShader(bool depth_24bit, GPUInterlacedDisplayMode interlace_mode);
std::string GenerateVRAMReadFragmentShader(); std::string GenerateVRAMReadFragmentShader();
std::string GenerateVRAMWriteFragmentShader(bool use_ssbo); std::string GenerateVRAMWriteFragmentShader(bool use_ssbo);
std::string GenerateVRAMCopyFragmentShader(); std::string GenerateVRAMCopyFragmentShader();

View File

@ -17,17 +17,18 @@ GPU_HW_Vulkan::GPU_HW_Vulkan() = default;
GPU_HW_Vulkan::~GPU_HW_Vulkan() GPU_HW_Vulkan::~GPU_HW_Vulkan()
{ {
if (m_host_display) if (g_host_interface->GetDisplay())
{ {
m_host_display->ClearDisplayTexture(); g_host_interface->GetDisplay()->ClearDisplayTexture();
ResetGraphicsAPIState(); ResetGraphicsAPIState();
} }
DestroyResources(); DestroyResources();
} }
bool GPU_HW_Vulkan::Initialize(HostDisplay* host_display) bool GPU_HW_Vulkan::Initialize()
{ {
HostDisplay* host_display = g_host_interface->GetDisplay();
if (host_display->GetRenderAPI() != HostDisplay::RenderAPI::Vulkan) if (host_display->GetRenderAPI() != HostDisplay::RenderAPI::Vulkan)
{ {
Log_ErrorPrintf("Host render API is incompatible"); Log_ErrorPrintf("Host render API is incompatible");
@ -37,7 +38,7 @@ bool GPU_HW_Vulkan::Initialize(HostDisplay* host_display)
Assert(g_vulkan_shader_cache); Assert(g_vulkan_shader_cache);
SetCapabilities(); SetCapabilities();
if (!GPU_HW::Initialize(host_display)) if (!GPU_HW::Initialize())
return false; return false;
if (!CreatePipelineLayouts()) if (!CreatePipelineLayouts())
@ -131,7 +132,7 @@ void GPU_HW_Vulkan::UpdateSettings()
if (shaders_changed) if (shaders_changed)
{ {
// clear it since we draw a loading screen and it's not in the correct state // clear it since we draw a loading screen and it's not in the correct state
m_host_display->ClearDisplayTexture(); g_host_interface->GetDisplay()->ClearDisplayTexture();
DestroyPipelines(); DestroyPipelines();
CompilePipelines(); CompilePipelines();
} }
@ -583,8 +584,8 @@ bool GPU_HW_Vulkan::CompilePipelines()
VkDevice device = g_vulkan_context->GetDevice(); VkDevice device = g_vulkan_context->GetDevice();
VkPipelineCache pipeline_cache = g_vulkan_shader_cache->GetPipelineCache(); VkPipelineCache pipeline_cache = g_vulkan_shader_cache->GetPipelineCache();
GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_true_color, m_scaled_dithering, GPU_HW_ShaderGen shadergen(g_host_interface->GetDisplay()->GetRenderAPI(), m_resolution_scale, m_true_color,
m_texture_filtering, m_using_uv_limits, m_supports_dual_source_blend); m_scaled_dithering, m_texture_filtering, m_using_uv_limits, m_supports_dual_source_blend);
Common::Timer compile_time; Common::Timer compile_time;
const int progress_total = 2 + (4 * 9 * 2 * 2) + (2 * 4 * 5 * 9 * 2 * 2) + 1 + 2 + 2 + 2 + 2 + (2 * 3); const int progress_total = 2 + (4 * 9 * 2 * 2) + (2 * 4 * 5 * 9 * 2 * 2) + 1 + 2 + 2 + 2 + 2 + (2 * 3);
@ -629,7 +630,7 @@ bool GPU_HW_Vulkan::CompilePipelines()
for (u8 interlacing = 0; interlacing < 2; interlacing++) for (u8 interlacing = 0; interlacing < 2; interlacing++)
{ {
const std::string fs = shadergen.GenerateBatchFragmentShader( const std::string fs = shadergen.GenerateBatchFragmentShader(
static_cast<BatchRenderMode>(render_mode), static_cast<TextureMode>(texture_mode), static_cast<BatchRenderMode>(render_mode), static_cast<GPUTextureMode>(texture_mode),
ConvertToBoolUnchecked(dithering), ConvertToBoolUnchecked(interlacing)); ConvertToBoolUnchecked(dithering), ConvertToBoolUnchecked(interlacing));
VkShaderModule shader = g_vulkan_shader_cache->GetFragmentShader(fs); VkShaderModule shader = g_vulkan_shader_cache->GetFragmentShader(fs);
@ -658,7 +659,7 @@ bool GPU_HW_Vulkan::CompilePipelines()
{ {
for (u8 interlacing = 0; interlacing < 2; interlacing++) for (u8 interlacing = 0; interlacing < 2; interlacing++)
{ {
const bool textured = (static_cast<TextureMode>(texture_mode) != TextureMode::Disabled); const bool textured = (static_cast<GPUTextureMode>(texture_mode) != GPUTextureMode::Disabled);
gpbuilder.SetPipelineLayout(m_batch_pipeline_layout); gpbuilder.SetPipelineLayout(m_batch_pipeline_layout);
gpbuilder.SetRenderPass(m_vram_render_pass, 0); gpbuilder.SetRenderPass(m_vram_render_pass, 0);
@ -683,7 +684,7 @@ bool GPU_HW_Vulkan::CompilePipelines()
(depth_test != 0) ? VK_COMPARE_OP_GREATER_OR_EQUAL : VK_COMPARE_OP_ALWAYS); (depth_test != 0) ? VK_COMPARE_OP_GREATER_OR_EQUAL : VK_COMPARE_OP_ALWAYS);
gpbuilder.SetNoBlendingState(); gpbuilder.SetNoBlendingState();
if ((static_cast<TransparencyMode>(transparency_mode) != TransparencyMode::Disabled && if ((static_cast<GPUTransparencyMode>(transparency_mode) != GPUTransparencyMode::Disabled &&
(static_cast<BatchRenderMode>(render_mode) != BatchRenderMode::TransparencyDisabled && (static_cast<BatchRenderMode>(render_mode) != BatchRenderMode::TransparencyDisabled &&
static_cast<BatchRenderMode>(render_mode) != BatchRenderMode::OnlyOpaque)) || static_cast<BatchRenderMode>(render_mode) != BatchRenderMode::OnlyOpaque)) ||
m_texture_filtering != GPUTextureFilter::Nearest) m_texture_filtering != GPUTextureFilter::Nearest)
@ -691,7 +692,8 @@ bool GPU_HW_Vulkan::CompilePipelines()
gpbuilder.SetBlendAttachment( gpbuilder.SetBlendAttachment(
0, true, VK_BLEND_FACTOR_ONE, 0, true, VK_BLEND_FACTOR_ONE,
m_supports_dual_source_blend ? VK_BLEND_FACTOR_SRC1_ALPHA : VK_BLEND_FACTOR_SRC_ALPHA, m_supports_dual_source_blend ? VK_BLEND_FACTOR_SRC1_ALPHA : VK_BLEND_FACTOR_SRC_ALPHA,
(static_cast<TransparencyMode>(transparency_mode) == TransparencyMode::BackgroundMinusForeground && (static_cast<GPUTransparencyMode>(transparency_mode) ==
GPUTransparencyMode::BackgroundMinusForeground &&
static_cast<BatchRenderMode>(render_mode) != BatchRenderMode::TransparencyDisabled && static_cast<BatchRenderMode>(render_mode) != BatchRenderMode::TransparencyDisabled &&
static_cast<BatchRenderMode>(render_mode) != BatchRenderMode::OnlyOpaque) ? static_cast<BatchRenderMode>(render_mode) != BatchRenderMode::OnlyOpaque) ?
VK_BLEND_OP_REVERSE_SUBTRACT : VK_BLEND_OP_REVERSE_SUBTRACT :
@ -874,7 +876,7 @@ bool GPU_HW_Vulkan::CompilePipelines()
for (u8 interlace_mode = 0; interlace_mode < 3; interlace_mode++) for (u8 interlace_mode = 0; interlace_mode < 3; interlace_mode++)
{ {
VkShaderModule fs = g_vulkan_shader_cache->GetFragmentShader(shadergen.GenerateDisplayFragmentShader( VkShaderModule fs = g_vulkan_shader_cache->GetFragmentShader(shadergen.GenerateDisplayFragmentShader(
ConvertToBoolUnchecked(depth_24), static_cast<InterlacedRenderMode>(interlace_mode))); ConvertToBoolUnchecked(depth_24), static_cast<GPUInterlacedDisplayMode>(interlace_mode)));
if (fs == VK_NULL_HANDLE) if (fs == VK_NULL_HANDLE)
return false; return false;
@ -940,7 +942,6 @@ void GPU_HW_Vulkan::SetScissorFromDrawingArea()
void GPU_HW_Vulkan::ClearDisplay() void GPU_HW_Vulkan::ClearDisplay()
{ {
GPU_HW::ClearDisplay();
EndRenderPass(); EndRenderPass();
VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer(); VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer();
@ -953,51 +954,51 @@ void GPU_HW_Vulkan::ClearDisplay()
void GPU_HW_Vulkan::UpdateDisplay() void GPU_HW_Vulkan::UpdateDisplay()
{ {
GPU_HW::UpdateDisplay(); HostDisplay* display = g_host_interface->GetDisplay();
EndRenderPass();
if (g_settings.debugging.show_vram) if (g_settings.debugging.show_vram)
{ {
m_vram_texture.TransitionToLayout(g_vulkan_context->GetCurrentCommandBuffer(), m_vram_texture.TransitionToLayout(g_vulkan_context->GetCurrentCommandBuffer(),
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
m_host_display->SetDisplayTexture(&m_vram_texture, m_vram_texture.GetWidth(), m_vram_texture.GetHeight(), 0, 0, display->SetDisplayTexture(&m_vram_texture, m_vram_texture.GetWidth(), m_vram_texture.GetHeight(), 0, 0,
m_vram_texture.GetWidth(), m_vram_texture.GetHeight()); m_vram_texture.GetWidth(), m_vram_texture.GetHeight());
m_host_display->SetDisplayParameters(VRAM_WIDTH, VRAM_HEIGHT, 0, 0, VRAM_WIDTH, VRAM_HEIGHT, display->SetDisplayParameters(VRAM_WIDTH, VRAM_HEIGHT, 0, 0, VRAM_WIDTH, VRAM_HEIGHT,
static_cast<float>(VRAM_WIDTH) / static_cast<float>(VRAM_HEIGHT)); static_cast<float>(VRAM_WIDTH) / static_cast<float>(VRAM_HEIGHT));
} }
else else
{ {
const u32 vram_offset_x = m_crtc_state.display_vram_left; const u32 vram_offset_x = m_display_vram_left;
const u32 vram_offset_y = m_crtc_state.display_vram_top; const u32 vram_offset_y = m_display_vram_top;
const u32 scaled_vram_offset_x = vram_offset_x * m_resolution_scale; const u32 scaled_vram_offset_x = vram_offset_x * m_resolution_scale;
const u32 scaled_vram_offset_y = vram_offset_y * m_resolution_scale; const u32 scaled_vram_offset_y = vram_offset_y * m_resolution_scale;
const u32 display_width = m_crtc_state.display_vram_width; const u32 display_width = m_display_vram_width;
const u32 display_height = m_crtc_state.display_vram_height; const u32 display_height = m_display_vram_height;
const u32 scaled_display_width = display_width * m_resolution_scale; const u32 scaled_display_width = display_width * m_resolution_scale;
const u32 scaled_display_height = display_height * m_resolution_scale; const u32 scaled_display_height = display_height * m_resolution_scale;
const InterlacedRenderMode interlaced = GetInterlacedRenderMode(); const GPUInterlacedDisplayMode interlaced = m_display_interlace;
if (IsDisplayDisabled()) if (!m_display_enabled)
{ {
m_host_display->ClearDisplayTexture(); display->ClearDisplayTexture();
} }
else if (!m_GPUSTAT.display_area_color_depth_24 && interlaced == InterlacedRenderMode::None && else if (!m_display_24bit && interlaced == GPUInterlacedDisplayMode::None &&
(scaled_vram_offset_x + scaled_display_width) <= m_vram_texture.GetWidth() && (scaled_vram_offset_x + scaled_display_width) <= m_vram_texture.GetWidth() &&
(scaled_vram_offset_y + scaled_display_height) <= m_vram_texture.GetHeight()) (scaled_vram_offset_y + scaled_display_height) <= m_vram_texture.GetHeight())
{ {
m_vram_texture.TransitionToLayout(g_vulkan_context->GetCurrentCommandBuffer(), m_vram_texture.TransitionToLayout(g_vulkan_context->GetCurrentCommandBuffer(),
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
m_host_display->SetDisplayTexture(&m_vram_texture, m_vram_texture.GetWidth(), m_vram_texture.GetHeight(), display->SetDisplayTexture(&m_vram_texture, m_vram_texture.GetWidth(), m_vram_texture.GetHeight(),
scaled_vram_offset_x, scaled_vram_offset_y, scaled_display_width, scaled_vram_offset_x, scaled_vram_offset_y, scaled_display_width,
scaled_display_height); scaled_display_height);
} }
else else
{ {
EndRenderPass(); EndRenderPass();
const u32 reinterpret_field_offset = (interlaced != InterlacedRenderMode::None) ? GetInterlacedDisplayField() : 0; const u32 reinterpret_field_offset =
const u32 reinterpret_start_x = m_crtc_state.regs.X * m_resolution_scale; (interlaced != GPUInterlacedDisplayMode::None) ? m_display_interlace_field : 0;
const u32 reinterpret_crop_left = (m_crtc_state.display_vram_left - m_crtc_state.regs.X) * m_resolution_scale; const u32 reinterpret_start_x = m_display_vram_start_x * m_resolution_scale;
const u32 reinterpret_crop_left = (m_display_vram_left - m_display_vram_start_x) * m_resolution_scale;
const u32 uniforms[4] = {reinterpret_start_x, scaled_vram_offset_y + reinterpret_field_offset, const u32 uniforms[4] = {reinterpret_start_x, scaled_vram_offset_y + reinterpret_field_offset,
reinterpret_crop_left, reinterpret_field_offset}; reinterpret_crop_left, reinterpret_field_offset};
@ -1007,9 +1008,8 @@ void GPU_HW_Vulkan::UpdateDisplay()
BeginRenderPass(m_display_render_pass, m_display_framebuffer, 0, 0, scaled_display_width, scaled_display_height); BeginRenderPass(m_display_render_pass, m_display_framebuffer, 0, 0, scaled_display_width, scaled_display_height);
vkCmdBindPipeline( vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS,
cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_display_pipelines[BoolToUInt8(m_display_24bit)][static_cast<u8>(interlaced)]);
m_display_pipelines[BoolToUInt8(m_GPUSTAT.display_area_color_depth_24)][static_cast<u8>(interlaced)]);
vkCmdPushConstants(cmdbuf, m_single_sampler_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(uniforms), vkCmdPushConstants(cmdbuf, m_single_sampler_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(uniforms),
uniforms); uniforms);
vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_single_sampler_pipeline_layout, 0, 1, vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_single_sampler_pipeline_layout, 0, 1,
@ -1022,16 +1022,14 @@ void GPU_HW_Vulkan::UpdateDisplay()
m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
m_display_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); m_display_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
m_host_display->SetDisplayTexture(&m_display_texture, m_display_texture.GetWidth(), m_display_texture.GetHeight(), display->SetDisplayTexture(&m_display_texture, m_display_texture.GetWidth(), m_display_texture.GetHeight(), 0, 0,
0, 0, scaled_display_width, scaled_display_height); scaled_display_width, scaled_display_height);
RestoreGraphicsAPIState(); RestoreGraphicsAPIState();
} }
m_host_display->SetDisplayParameters(m_crtc_state.display_width, m_crtc_state.display_height, display->SetDisplayParameters(m_display_width, m_display_height, m_display_origin_left, m_display_origin_top,
m_crtc_state.display_origin_left, m_crtc_state.display_origin_top, m_display_width, m_display_height, m_display_aspect_ratio);
m_crtc_state.display_vram_width, m_crtc_state.display_vram_height,
m_crtc_state.display_aspect_ratio);
} }
} }
@ -1080,19 +1078,19 @@ void GPU_HW_Vulkan::ReadVRAM(u32 x, u32 y, u32 width, u32 height)
RestoreGraphicsAPIState(); RestoreGraphicsAPIState();
} }
void GPU_HW_Vulkan::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) void GPU_HW_Vulkan::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params)
{ {
if ((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT) if ((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT)
{ {
// CPU round trip if oversized for now. // CPU round trip if oversized for now.
Log_WarningPrintf("Oversized VRAM fill (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height); Log_WarningPrintf("Oversized VRAM fill (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height);
ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT);
GPU::FillVRAM(x, y, width, height, color); SoftwareFillVRAM(x, y, width, height, color, params);
UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data()); UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data(), params);
return; return;
} }
GPU_HW::FillVRAM(x, y, width, height, color); GPU_HW::FillVRAM(x, y, width, height, color, params);
x *= m_resolution_scale; x *= m_resolution_scale;
y *= m_resolution_scale; y *= m_resolution_scale;
@ -1102,21 +1100,22 @@ void GPU_HW_Vulkan::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color)
BeginVRAMRenderPass(); BeginVRAMRenderPass();
VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer(); VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer();
const VRAMFillUBOData uniforms = GetVRAMFillUBOData(x, y, width, height, color); const VRAMFillUBOData uniforms = GetVRAMFillUBOData(x, y, width, height, color, params);
vkCmdPushConstants(cmdbuf, m_no_samplers_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(uniforms), vkCmdPushConstants(cmdbuf, m_no_samplers_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(uniforms),
&uniforms); &uniforms);
vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS,
m_vram_fill_pipelines[BoolToUInt8(IsInterlacedRenderingEnabled())]); m_vram_fill_pipelines[BoolToUInt8(params.interlaced_rendering)]);
Vulkan::Util::SetViewportAndScissor(cmdbuf, x, y, width, height); Vulkan::Util::SetViewportAndScissor(cmdbuf, x, y, width, height);
vkCmdDraw(cmdbuf, 3, 1, 0, 0); vkCmdDraw(cmdbuf, 3, 1, 0, 0);
RestoreGraphicsAPIState(); RestoreGraphicsAPIState();
} }
void GPU_HW_Vulkan::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) void GPU_HW_Vulkan::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data,
GPUBackendCommandParameters params)
{ {
const Common::Rectangle<u32> bounds = GetVRAMTransferBounds(x, y, width, height); const Common::Rectangle<u32> bounds = GetVRAMTransferBounds(x, y, width, height);
GPU_HW::UpdateVRAM(bounds.left, bounds.top, bounds.GetWidth(), bounds.GetHeight(), data); GPU_HW::UpdateVRAM(bounds.left, bounds.top, bounds.GetWidth(), bounds.GetHeight(), data, params);
const u32 data_size = width * height * sizeof(u16); const u32 data_size = width * height * sizeof(u16);
const u32 alignment = std::max<u32>(sizeof(u16), static_cast<u32>(g_vulkan_context->GetTexelBufferAlignment())); const u32 alignment = std::max<u32>(sizeof(u16), static_cast<u32>(g_vulkan_context->GetTexelBufferAlignment()));
@ -1140,16 +1139,16 @@ void GPU_HW_Vulkan::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void*
BeginVRAMRenderPass(); BeginVRAMRenderPass();
VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer(); VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer();
const VRAMWriteUBOData uniforms = GetVRAMWriteUBOData(x, y, width, height, start_index); const VRAMWriteUBOData uniforms = GetVRAMWriteUBOData(x, y, width, height, start_index, params);
vkCmdPushConstants(cmdbuf, m_vram_write_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(uniforms), vkCmdPushConstants(cmdbuf, m_vram_write_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(uniforms),
&uniforms); &uniforms);
vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS,
m_vram_write_pipelines[BoolToUInt8(m_GPUSTAT.check_mask_before_draw)]); m_vram_write_pipelines[BoolToUInt8(params.check_mask_before_draw)]);
vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_vram_write_pipeline_layout, 0, 1, vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_vram_write_pipeline_layout, 0, 1,
&m_vram_write_descriptor_set, 0, nullptr); &m_vram_write_descriptor_set, 0, nullptr);
// the viewport should already be set to the full vram, so just adjust the scissor // the viewport should already be set to the full vram, so just adjust the scissor
const Common::Rectangle<u32> scaled_bounds = bounds * m_resolution_scale; const Common::Rectangle<u32> scaled_bounds(ScaleVRAMRect(bounds));
Vulkan::Util::SetScissor(cmdbuf, scaled_bounds.left, scaled_bounds.top, scaled_bounds.GetWidth(), Vulkan::Util::SetScissor(cmdbuf, scaled_bounds.left, scaled_bounds.top, scaled_bounds.GetWidth(),
scaled_bounds.GetHeight()); scaled_bounds.GetHeight());
vkCmdDraw(cmdbuf, 3, 1, 0, 0); vkCmdDraw(cmdbuf, 3, 1, 0, 0);
@ -1157,9 +1156,10 @@ void GPU_HW_Vulkan::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void*
RestoreGraphicsAPIState(); RestoreGraphicsAPIState();
} }
void GPU_HW_Vulkan::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) void GPU_HW_Vulkan::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height,
GPUBackendCommandParameters params)
{ {
if (UseVRAMCopyShader(src_x, src_y, dst_x, dst_y, width, height)) if (UseVRAMCopyShader(src_x, src_y, dst_x, dst_y, width, height, params))
{ {
const Common::Rectangle<u32> src_bounds = GetVRAMTransferBounds(src_x, src_y, width, height); const Common::Rectangle<u32> src_bounds = GetVRAMTransferBounds(src_x, src_y, width, height);
const Common::Rectangle<u32> dst_bounds = GetVRAMTransferBounds(dst_x, dst_y, width, height); const Common::Rectangle<u32> dst_bounds = GetVRAMTransferBounds(dst_x, dst_y, width, height);
@ -1167,14 +1167,14 @@ void GPU_HW_Vulkan::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 wid
UpdateVRAMReadTexture(); UpdateVRAMReadTexture();
IncludeVRAMDityRectangle(dst_bounds); IncludeVRAMDityRectangle(dst_bounds);
const VRAMCopyUBOData uniforms(GetVRAMCopyUBOData(src_x, src_y, dst_x, dst_y, width, height)); const VRAMCopyUBOData uniforms(GetVRAMCopyUBOData(src_x, src_y, dst_x, dst_y, width, height, params));
const Common::Rectangle<u32> dst_bounds_scaled(dst_bounds * m_resolution_scale); const Common::Rectangle<u32> dst_bounds_scaled(ScaleVRAMRect(dst_bounds));
BeginVRAMRenderPass(); BeginVRAMRenderPass();
VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer(); VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer();
vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS,
m_vram_copy_pipelines[BoolToUInt8(m_GPUSTAT.check_mask_before_draw)]); m_vram_copy_pipelines[BoolToUInt8(params.check_mask_before_draw)]);
vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_single_sampler_pipeline_layout, 0, 1, vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_single_sampler_pipeline_layout, 0, 1,
&m_vram_copy_descriptor_set, 0, nullptr); &m_vram_copy_descriptor_set, 0, nullptr);
vkCmdPushConstants(cmdbuf, m_single_sampler_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(uniforms), vkCmdPushConstants(cmdbuf, m_single_sampler_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(uniforms),
@ -1184,13 +1184,13 @@ void GPU_HW_Vulkan::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 wid
vkCmdDraw(cmdbuf, 3, 1, 0, 0); vkCmdDraw(cmdbuf, 3, 1, 0, 0);
RestoreGraphicsAPIState(); RestoreGraphicsAPIState();
if (m_GPUSTAT.check_mask_before_draw) if (params.check_mask_before_draw)
m_current_depth++; m_current_depth++;
return; return;
} }
GPU_HW::CopyVRAM(src_x, src_y, dst_x, dst_y, width, height); GPU_HW::CopyVRAM(src_x, src_y, dst_x, dst_y, width, height, params);
src_x *= m_resolution_scale; src_x *= m_resolution_scale;
src_y *= m_resolution_scale; src_y *= m_resolution_scale;
@ -1224,7 +1224,7 @@ void GPU_HW_Vulkan::UpdateVRAMReadTexture()
m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
m_vram_read_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); m_vram_read_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
const auto scaled_rect = m_vram_dirty_rect * m_resolution_scale; const Common::Rectangle<u32> scaled_rect(ScaleVRAMRect(m_vram_dirty_rect));
const VkImageCopy copy{{VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u}, const VkImageCopy copy{{VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u},
{static_cast<s32>(scaled_rect.left), static_cast<s32>(scaled_rect.top), 0}, {static_cast<s32>(scaled_rect.left), static_cast<s32>(scaled_rect.top), 0},
{VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u}, {VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u},
@ -1262,8 +1262,3 @@ void GPU_HW_Vulkan::UpdateDepthBufferFromMaskBit()
RestoreGraphicsAPIState(); RestoreGraphicsAPIState();
} }
std::unique_ptr<GPU> GPU::CreateHardwareVulkanRenderer()
{
return std::make_unique<GPU_HW_Vulkan>();
}

View File

@ -14,7 +14,7 @@ public:
GPU_HW_Vulkan(); GPU_HW_Vulkan();
~GPU_HW_Vulkan() override; ~GPU_HW_Vulkan() override;
bool Initialize(HostDisplay* host_display) override; bool Initialize() override;
void Reset() override; void Reset() override;
void ResetGraphicsAPIState() override; void ResetGraphicsAPIState() override;
@ -25,9 +25,10 @@ protected:
void ClearDisplay() override; void ClearDisplay() override;
void UpdateDisplay() override; void UpdateDisplay() override;
void ReadVRAM(u32 x, u32 y, u32 width, u32 height) override; void ReadVRAM(u32 x, u32 y, u32 width, u32 height) override;
void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override; void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params) override;
void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) override; void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, GPUBackendCommandParameters params) override;
void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override; void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height,
GPUBackendCommandParameters params) override;
void UpdateVRAMReadTexture() override; void UpdateVRAMReadTexture() override;
void UpdateDepthBufferFromMaskBit() override; void UpdateDepthBufferFromMaskBit() override;
void SetScissorFromDrawingArea() override; void SetScissorFromDrawingArea() override;

View File

@ -6,16 +6,29 @@
#include <algorithm> #include <algorithm>
Log_SetChannel(GPU_SW); Log_SetChannel(GPU_SW);
static constexpr std::tuple<u8, u8> UnpackTexcoord(u16 texcoord)
{
return std::make_tuple(static_cast<u8>(texcoord), static_cast<u8>(texcoord >> 8));
}
static constexpr std::tuple<u8, u8, u8> UnpackColorRGB24(u32 rgb24)
{
return std::make_tuple(static_cast<u8>(rgb24), static_cast<u8>(rgb24 >> 8), static_cast<u8>(rgb24 >> 16));
}
static constexpr u32 PackColorRGB24(u8 r, u8 g, u8 b)
{
return ZeroExtend32(r) | (ZeroExtend32(g) << 8) | (ZeroExtend32(b) << 16);
}
GPU_SW::GPU_SW() GPU_SW::GPU_SW()
{ {
m_vram.fill(0); m_vram.fill(0);
m_vram_ptr = m_vram.data();
} }
GPU_SW::~GPU_SW() GPU_SW::~GPU_SW()
{ {
if (m_host_display) if (g_host_interface->GetDisplay())
m_host_display->ClearDisplayTexture(); g_host_interface->GetDisplay()->ClearDisplayTexture();
} }
bool GPU_SW::IsHardwareRenderer() const bool GPU_SW::IsHardwareRenderer() const
@ -23,21 +36,22 @@ bool GPU_SW::IsHardwareRenderer() const
return false; return false;
} }
bool GPU_SW::Initialize(HostDisplay* host_display) bool GPU_SW::Initialize()
{ {
if (!GPU::Initialize(host_display)) if (!GPUBackend::Initialize())
return false; return false;
m_display_texture = host_display->CreateTexture(VRAM_WIDTH, VRAM_HEIGHT, nullptr, 0, true); m_display_texture = g_host_interface->GetDisplay()->CreateTexture(VRAM_WIDTH, VRAM_HEIGHT, nullptr, 0, true);
if (!m_display_texture) if (!m_display_texture)
return false; return false;
m_vram_ptr = m_vram.data();
return true; return true;
} }
void GPU_SW::Reset() void GPU_SW::Reset()
{ {
GPU::Reset(); GPUBackend::Reset();
m_vram.fill(0); m_vram.fill(0);
} }
@ -151,211 +165,122 @@ void GPU_SW::UpdateDisplay()
// fill display texture // fill display texture
m_display_texture_buffer.resize(VRAM_WIDTH * VRAM_HEIGHT); m_display_texture_buffer.resize(VRAM_WIDTH * VRAM_HEIGHT);
HostDisplay* display = g_host_interface->GetDisplay();
if (!g_settings.debugging.show_vram) if (!g_settings.debugging.show_vram)
{ {
if (IsDisplayDisabled()) if (!m_display_enabled)
{ {
m_host_display->ClearDisplayTexture(); display->ClearDisplayTexture();
return; return;
} }
const u32 vram_offset_y = m_crtc_state.display_vram_top; const u32 vram_offset_x = m_display_vram_left;
const u32 display_width = m_crtc_state.display_vram_width; const u32 vram_offset_y = m_display_vram_top;
const u32 display_height = m_crtc_state.display_vram_height; const u32 display_width = m_display_vram_width;
const u32 texture_offset_x = m_crtc_state.display_vram_left - m_crtc_state.regs.X; const u32 display_height = m_display_vram_height;
if (IsInterlacedDisplayEnabled()) const u32 texture_offset_x = m_display_vram_left - m_display_vram_start_x;
if (m_display_interlace != GPUInterlacedDisplayMode::None)
{ {
const u32 field = GetInterlacedDisplayField(); const u32 field = m_display_interlace_field;
if (m_GPUSTAT.display_area_color_depth_24) const bool interleaved = (m_display_interlace == GPUInterlacedDisplayMode::InterleavedFields);
if (m_display_24bit)
{ {
CopyOut24Bit(m_crtc_state.regs.X, vram_offset_y + field, m_display_texture_buffer.data() + field * VRAM_WIDTH, CopyOut24Bit(m_display_vram_start_x, vram_offset_y + field,
VRAM_WIDTH, display_width + texture_offset_x, display_height, true, m_GPUSTAT.vertical_resolution); m_display_texture_buffer.data() + field * VRAM_WIDTH, VRAM_WIDTH, display_width + texture_offset_x,
display_height, true, interleaved);
} }
else else
{ {
CopyOut15Bit(m_crtc_state.regs.X, vram_offset_y + field, m_display_texture_buffer.data() + field * VRAM_WIDTH, CopyOut15Bit(m_display_vram_start_x, vram_offset_y + field,
VRAM_WIDTH, display_width + texture_offset_x, display_height, true, m_GPUSTAT.vertical_resolution); m_display_texture_buffer.data() + field * VRAM_WIDTH, VRAM_WIDTH, display_width + texture_offset_x,
display_height, true, interleaved);
} }
} }
else else
{ {
if (m_GPUSTAT.display_area_color_depth_24) if (m_display_24bit)
{ {
CopyOut24Bit(m_crtc_state.regs.X, vram_offset_y, m_display_texture_buffer.data(), VRAM_WIDTH, CopyOut24Bit(m_display_vram_start_x, vram_offset_y, m_display_texture_buffer.data(), VRAM_WIDTH,
display_width + texture_offset_x, display_height, false, false); display_width + texture_offset_x, display_height, false, false);
} }
else else
{ {
CopyOut15Bit(m_crtc_state.regs.X, vram_offset_y, m_display_texture_buffer.data(), VRAM_WIDTH, CopyOut15Bit(m_display_vram_start_x, vram_offset_y, m_display_texture_buffer.data(), VRAM_WIDTH,
display_width + texture_offset_x, display_height, false, false); display_width + texture_offset_x, display_height, false, false);
} }
} }
m_host_display->UpdateTexture(m_display_texture.get(), 0, 0, display_width, display_height, display->UpdateTexture(m_display_texture.get(), 0, 0, display_width, display_height,
m_display_texture_buffer.data(), VRAM_WIDTH * sizeof(u32)); m_display_texture_buffer.data(), VRAM_WIDTH * sizeof(u32));
m_host_display->SetDisplayTexture(m_display_texture->GetHandle(), VRAM_WIDTH, VRAM_HEIGHT, texture_offset_x, 0, display->SetDisplayTexture(m_display_texture->GetHandle(), VRAM_WIDTH, VRAM_HEIGHT, texture_offset_x, 0,
display_width, display_height); display_width, display_height);
m_host_display->SetDisplayParameters(m_crtc_state.display_width, m_crtc_state.display_height, display->SetDisplayParameters(m_display_width, m_display_height, m_display_origin_left, m_display_origin_top,
m_crtc_state.display_origin_left, m_crtc_state.display_origin_top, m_display_vram_width, m_display_vram_height, m_display_aspect_ratio);
m_crtc_state.display_vram_width, m_crtc_state.display_vram_height,
m_crtc_state.display_aspect_ratio);
} }
else else
{ {
CopyOut15Bit(0, 0, m_display_texture_buffer.data(), VRAM_WIDTH, VRAM_WIDTH, VRAM_HEIGHT, false, false); CopyOut15Bit(0, 0, m_display_texture_buffer.data(), VRAM_WIDTH, VRAM_WIDTH, VRAM_HEIGHT, false, false);
m_host_display->UpdateTexture(m_display_texture.get(), 0, 0, VRAM_WIDTH, VRAM_HEIGHT, display->UpdateTexture(m_display_texture.get(), 0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_display_texture_buffer.data(),
m_display_texture_buffer.data(), VRAM_WIDTH * sizeof(u32)); VRAM_WIDTH * sizeof(u32));
m_host_display->SetDisplayTexture(m_display_texture->GetHandle(), VRAM_WIDTH, VRAM_HEIGHT, 0, 0, VRAM_WIDTH, display->SetDisplayTexture(m_display_texture->GetHandle(), VRAM_WIDTH, VRAM_HEIGHT, 0, 0, VRAM_WIDTH, VRAM_HEIGHT);
VRAM_HEIGHT); display->SetDisplayParameters(VRAM_WIDTH, VRAM_HEIGHT, 0, 0, VRAM_WIDTH, VRAM_HEIGHT,
m_host_display->SetDisplayParameters(VRAM_WIDTH, VRAM_HEIGHT, 0, 0, VRAM_WIDTH, VRAM_HEIGHT, static_cast<float>(VRAM_WIDTH) / static_cast<float>(VRAM_HEIGHT));
static_cast<float>(VRAM_WIDTH) / static_cast<float>(VRAM_HEIGHT));
} }
} }
void GPU_SW::DispatchRenderCommand() void GPU_SW::ReadVRAM(u32 x, u32 y, u32 width, u32 height)
{ {
const RenderCommand rc{m_render_command.bits}; // no-op
const bool dithering_enable = rc.IsDitheringEnabled() && m_GPUSTAT.dither_enable; }
switch (rc.primitive) void GPU_SW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params)
{ {
case Primitive::Polygon: SoftwareFillVRAM(x, y, width, height, color, params);
{ }
const u32 first_color = rc.color_for_first_vertex;
const bool shaded = rc.shading_enable;
const bool textured = rc.texture_enable;
const u32 num_vertices = rc.quad_polygon ? 4 : 3; void GPU_SW::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, GPUBackendCommandParameters params)
std::array<SWVertex, 4> vertices; {
for (u32 i = 0; i < num_vertices; i++) SoftwareUpdateVRAM(x, y, width, height, data, params);
{ }
SWVertex& vert = vertices[i];
const u32 color_rgb = (shaded && i > 0) ? (FifoPop() & UINT32_C(0x00FFFFFF)) : first_color;
vert.color_r = Truncate8(color_rgb);
vert.color_g = Truncate8(color_rgb >> 8);
vert.color_b = Truncate8(color_rgb >> 16);
const VertexPosition vp{FifoPop()}; void GPU_SW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height,
vert.x = vp.x; GPUBackendCommandParameters params)
vert.y = vp.y; {
SoftwareCopyVRAM(src_x, src_y, dst_x, dst_y, width, height, params);
}
if (textured) void GPU_SW::DrawPolygon(const GPUBackendDrawPolygonCommand* cmd)
{ {
std::tie(vert.texcoord_x, vert.texcoord_y) = UnpackTexcoord(Truncate16(FifoPop())); const GPURenderCommand rc{cmd->rc.bits};
} const bool dithering_enable = rc.IsDitheringEnabled() && cmd->draw_mode.dither_enable;
else
{
vert.texcoord_x = 0;
vert.texcoord_y = 0;
}
}
if (!IsDrawingAreaIsValid()) const DrawTriangleFunction DrawFunction = GetDrawTriangleFunction(
return; rc.shading_enable, rc.texture_enable, rc.raw_texture_enable, rc.transparency_enable, dithering_enable);
const DrawTriangleFunction DrawFunction = GetDrawTriangleFunction( (this->*DrawFunction)(cmd, &cmd->vertices[0], &cmd->vertices[1], &cmd->vertices[2]);
rc.shading_enable, rc.texture_enable, rc.raw_texture_enable, rc.transparency_enable, dithering_enable); if (rc.quad_polygon)
(this->*DrawFunction)(cmd, &cmd->vertices[2], &cmd->vertices[1], &cmd->vertices[3]);
}
(this->*DrawFunction)(&vertices[0], &vertices[1], &vertices[2]); void GPU_SW::DrawRectangle(const GPUBackendDrawRectangleCommand* cmd)
if (num_vertices > 3) {
(this->*DrawFunction)(&vertices[2], &vertices[1], &vertices[3]); const GPURenderCommand rc{cmd->rc.bits};
} const bool dithering_enable = rc.IsDitheringEnabled() && cmd->draw_mode.dither_enable;
break;
case Primitive::Rectangle: const DrawRectangleFunction DrawFunction =
{ GetDrawRectangleFunction(rc.texture_enable, rc.raw_texture_enable, rc.transparency_enable);
const auto [r, g, b] = UnpackColorRGB24(rc.color_for_first_vertex);
const VertexPosition vp{FifoPop()};
const u32 texcoord_and_palette = rc.texture_enable ? FifoPop() : 0;
const auto [texcoord_x, texcoord_y] = UnpackTexcoord(Truncate16(texcoord_and_palette));
s32 width; (this->*DrawFunction)(cmd);
s32 height; }
switch (rc.rectangle_size)
{
case DrawRectangleSize::R1x1:
width = 1;
height = 1;
break;
case DrawRectangleSize::R8x8:
width = 8;
height = 8;
break;
case DrawRectangleSize::R16x16:
width = 16;
height = 16;
break;
default:
{
const u32 width_and_height = FifoPop();
width = static_cast<s32>(width_and_height & VRAM_WIDTH_MASK);
height = static_cast<s32>((width_and_height >> 16) & VRAM_HEIGHT_MASK);
if (width >= MAX_PRIMITIVE_WIDTH || height >= MAX_PRIMITIVE_HEIGHT) void GPU_SW::DrawLine(const GPUBackendDrawLineCommand* cmd)
{ {
Log_DebugPrintf("Culling too-large rectangle: %d,%d %dx%d", vp.x.GetValue(), vp.y.GetValue(), width, const DrawLineFunction DrawFunction =
height); GetDrawLineFunction(cmd->rc.shading_enable, cmd->rc.transparency_enable, cmd->IsDitheringEnabled());
return;
}
}
break;
}
if (!IsDrawingAreaIsValid()) for (u16 i = 1; i < cmd->num_vertices; i++)
return; (this->*DrawFunction)(cmd, &cmd->vertices[i - 1], &cmd->vertices[i]);
const DrawRectangleFunction DrawFunction =
GetDrawRectangleFunction(rc.texture_enable, rc.raw_texture_enable, rc.transparency_enable);
(this->*DrawFunction)(vp.x, vp.y, width, height, r, g, b, texcoord_x, texcoord_y);
}
break;
case Primitive::Line:
{
const u32 first_color = rc.color_for_first_vertex;
const bool shaded = rc.shading_enable;
const DrawLineFunction DrawFunction = GetDrawLineFunction(shaded, rc.transparency_enable, dithering_enable);
std::array<SWVertex, 2> vertices = {};
u32 buffer_pos = 0;
// first vertex
SWVertex* p0 = &vertices[0];
SWVertex* p1 = &vertices[1];
p0->SetPosition(VertexPosition{rc.polyline ? m_blit_buffer[buffer_pos++] : Truncate32(FifoPop())});
p0->SetColorRGB24(first_color);
// remaining vertices in line strip
const u32 num_vertices = rc.polyline ? GetPolyLineVertexCount() : 2;
for (u32 i = 1; i < num_vertices; i++)
{
if (rc.polyline)
{
p1->SetColorRGB24(shaded ? (m_blit_buffer[buffer_pos++] & UINT32_C(0x00FFFFFF)) : first_color);
p1->SetPosition(VertexPosition{m_blit_buffer[buffer_pos++]});
}
else
{
p1->SetColorRGB24(shaded ? (FifoPop() & UINT32_C(0x00FFFFFF)) : first_color);
p1->SetPosition(VertexPosition{Truncate32(FifoPop())});
}
// down here because of the FIFO pops
if (IsDrawingAreaIsValid())
(this->*DrawFunction)(p0, p1);
// swap p0/p1 so that the last vertex is used as the first for the next line
std::swap(p0, p1);
}
}
break;
default:
UnreachableCode();
break;
}
} }
enum : u32 enum : u32
@ -383,7 +308,9 @@ constexpr u8 FixedColorToInt(FixedPointColor r)
return Truncate8(r >> 12); return Truncate8(r >> 12);
} }
bool GPU_SW::IsClockwiseWinding(const SWVertex* v0, const SWVertex* v1, const SWVertex* v2) bool GPU_SW::IsClockwiseWinding(const GPUBackendDrawPolygonCommand::Vertex* v0,
const GPUBackendDrawPolygonCommand::Vertex* v1,
const GPUBackendDrawPolygonCommand::Vertex* v2)
{ {
const s32 abx = v1->x - v0->x; const s32 abx = v1->x - v0->x;
const s32 aby = v1->y - v0->y; const s32 aby = v1->y - v0->y;
@ -407,7 +334,9 @@ static constexpr u8 Interpolate(u8 v0, u8 v1, u8 v2, s32 w0, s32 w1, s32 w2, s32
template<bool shading_enable, bool texture_enable, bool raw_texture_enable, bool transparency_enable, template<bool shading_enable, bool texture_enable, bool raw_texture_enable, bool transparency_enable,
bool dithering_enable> bool dithering_enable>
void GPU_SW::DrawTriangle(const SWVertex* v0, const SWVertex* v1, const SWVertex* v2) void GPU_SW::DrawTriangle(const GPUBackendDrawPolygonCommand* cmd, const GPUBackendDrawPolygonCommand::Vertex* v0,
const GPUBackendDrawPolygonCommand::Vertex* v1,
const GPUBackendDrawPolygonCommand::Vertex* v2)
{ {
#define orient2d(ax, ay, bx, by, cx, cy) ((bx - ax) * (cy - ay) - (by - ay) * (cx - ax)) #define orient2d(ax, ay, bx, by, cx, cy) ((bx - ax) * (cy - ay) - (by - ay) * (cx - ax))
@ -415,12 +344,12 @@ void GPU_SW::DrawTriangle(const SWVertex* v0, const SWVertex* v1, const SWVertex
if (IsClockwiseWinding(v0, v1, v2)) if (IsClockwiseWinding(v0, v1, v2))
std::swap(v1, v2); std::swap(v1, v2);
const s32 px0 = v0->x + m_drawing_offset.x; const s32 px0 = v0->x;
const s32 py0 = v0->y + m_drawing_offset.y; const s32 py0 = v0->y;
const s32 px1 = v1->x + m_drawing_offset.x; const s32 px1 = v1->x;
const s32 py1 = v1->y + m_drawing_offset.y; const s32 py1 = v1->y;
const s32 px2 = v2->x + m_drawing_offset.x; const s32 px2 = v2->x;
const s32 py2 = v2->y + m_drawing_offset.y; const s32 py2 = v2->y;
// Barycentric coordinates at minX/minY corner // Barycentric coordinates at minX/minY corner
const s32 ws = orient2d(px0, py0, px1, py1, px2, py2); const s32 ws = orient2d(px0, py0, px1, py1, px2, py2);
@ -434,16 +363,11 @@ void GPU_SW::DrawTriangle(const SWVertex* v0, const SWVertex* v1, const SWVertex
s32 min_y = std::min(py0, std::min(py1, py2)); s32 min_y = std::min(py0, std::min(py1, py2));
s32 max_y = std::max(py0, std::max(py1, py2)); s32 max_y = std::max(py0, std::max(py1, py2));
// reject triangles which cover the whole vram area
if (static_cast<u32>(max_x - min_x) > MAX_PRIMITIVE_WIDTH || static_cast<u32>(max_y - min_y) > MAX_PRIMITIVE_HEIGHT)
return;
// clip to drawing area // clip to drawing area
min_x = std::clamp(min_x, static_cast<s32>(m_drawing_area.left), static_cast<s32>(m_drawing_area.right)); min_x = std::clamp(min_x, static_cast<s32>(m_drawing_area.left), static_cast<s32>(m_drawing_area.right));
max_x = std::clamp(max_x, static_cast<s32>(m_drawing_area.left), static_cast<s32>(m_drawing_area.right)); max_x = std::clamp(max_x, static_cast<s32>(m_drawing_area.left), static_cast<s32>(m_drawing_area.right));
min_y = std::clamp(min_y, static_cast<s32>(m_drawing_area.top), static_cast<s32>(m_drawing_area.bottom)); min_y = std::clamp(min_y, static_cast<s32>(m_drawing_area.top), static_cast<s32>(m_drawing_area.bottom));
max_y = std::clamp(max_y, static_cast<s32>(m_drawing_area.top), static_cast<s32>(m_drawing_area.bottom)); max_y = std::clamp(max_y, static_cast<s32>(m_drawing_area.top), static_cast<s32>(m_drawing_area.bottom));
AddDrawTriangleTicks(max_x - min_x + 1, max_y - min_y + 1, shading_enable, texture_enable, transparency_enable);
// compute per-pixel increments // compute per-pixel increments
const s32 a01 = py0 - py1, b01 = px1 - px0; const s32 a01 = py0 - py1, b01 = px1 - px0;
@ -476,17 +400,17 @@ void GPU_SW::DrawTriangle(const SWVertex* v0, const SWVertex* v1, const SWVertex
const s32 b2 = row_w2; const s32 b2 = row_w2;
const u8 r = const u8 r =
shading_enable ? Interpolate(v0->color_r, v1->color_r, v2->color_r, b0, b1, b2, ws, half_ws) : v0->color_r; shading_enable ? Interpolate(v0->GetR(), v1->GetR(), v2->GetR(), b0, b1, b2, ws, half_ws) : v0->GetR();
const u8 g = const u8 g =
shading_enable ? Interpolate(v0->color_g, v1->color_g, v2->color_g, b0, b1, b2, ws, half_ws) : v0->color_g; shading_enable ? Interpolate(v0->GetG(), v1->GetG(), v2->GetG(), b0, b1, b2, ws, half_ws) : v0->GetG();
const u8 b = const u8 b =
shading_enable ? Interpolate(v0->color_b, v1->color_b, v2->color_b, b0, b1, b2, ws, half_ws) : v0->color_b; shading_enable ? Interpolate(v0->GetB(), v1->GetB(), v2->GetB(), b0, b1, b2, ws, half_ws) : v0->GetB();
const u8 texcoord_x = Interpolate(v0->texcoord_x, v1->texcoord_x, v2->texcoord_x, b0, b1, b2, ws, half_ws); const u8 u = texture_enable ? Interpolate(v0->GetU(), v1->GetU(), v2->GetU(), b0, b1, b2, ws, half_ws) : 0;
const u8 texcoord_y = Interpolate(v0->texcoord_y, v1->texcoord_y, v2->texcoord_y, b0, b1, b2, ws, half_ws); const u8 v = texture_enable ? Interpolate(v0->GetV(), v1->GetV(), v2->GetV(), b0, b1, b2, ws, half_ws) : 0;
ShadePixel<texture_enable, raw_texture_enable, transparency_enable, dithering_enable>( ShadePixel<texture_enable, raw_texture_enable, transparency_enable, dithering_enable>(
static_cast<u32>(x), static_cast<u32>(y), r, g, b, texcoord_x, texcoord_y); cmd, static_cast<u32>(x), static_cast<u32>(y), r, g, b, u, v);
} }
row_w0 += a12; row_w0 += a12;
@ -534,42 +458,31 @@ GPU_SW::DrawTriangleFunction GPU_SW::GetDrawTriangleFunction(bool shading_enable
} }
template<bool texture_enable, bool raw_texture_enable, bool transparency_enable> template<bool texture_enable, bool raw_texture_enable, bool transparency_enable>
void GPU_SW::DrawRectangle(s32 origin_x, s32 origin_y, u32 width, u32 height, u8 r, u8 g, u8 b, u8 origin_texcoord_x, void GPU_SW::DrawRectangle(const GPUBackendDrawRectangleCommand* cmd)
u8 origin_texcoord_y)
{ {
const s32 start_x = TruncateVertexPosition(m_drawing_offset.x + origin_x); const s32 origin_x = cmd->x;
const s32 start_y = TruncateVertexPosition(m_drawing_offset.y + origin_y); const s32 origin_y = cmd->y;
const auto [r, g, b] = UnpackColorRGB24(cmd->color);
const auto [origin_texcoord_x, origin_texcoord_y] = UnpackTexcoord(cmd->texcoord);
for (u32 offset_y = 0; offset_y < cmd->height; offset_y++)
{ {
const u32 clip_left = static_cast<u32>(std::clamp<s32>(start_x, m_drawing_area.left, m_drawing_area.right)); const s32 y = origin_y + static_cast<s32>(offset_y);
const u32 clip_right =
static_cast<u32>(std::clamp<s32>(start_x + static_cast<s32>(width), m_drawing_area.left, m_drawing_area.right)) +
1u;
const u32 clip_top = static_cast<u32>(std::clamp<s32>(start_y, m_drawing_area.top, m_drawing_area.bottom));
const u32 clip_bottom =
static_cast<u32>(std::clamp<s32>(start_y + static_cast<s32>(height), m_drawing_area.top, m_drawing_area.bottom)) +
1u;
AddDrawRectangleTicks(clip_right - clip_left, clip_bottom - clip_top, texture_enable, transparency_enable);
}
for (u32 offset_y = 0; offset_y < height; offset_y++)
{
const s32 y = start_y + static_cast<s32>(offset_y);
if (y < static_cast<s32>(m_drawing_area.top) || y > static_cast<s32>(m_drawing_area.bottom)) if (y < static_cast<s32>(m_drawing_area.top) || y > static_cast<s32>(m_drawing_area.bottom))
continue; continue;
const u8 texcoord_y = Truncate8(ZeroExtend32(origin_texcoord_y) + offset_y); const u8 texcoord_y = Truncate8(ZeroExtend32(origin_texcoord_y) + offset_y);
for (u32 offset_x = 0; offset_x < width; offset_x++) for (u32 offset_x = 0; offset_x < cmd->width; offset_x++)
{ {
const s32 x = start_x + static_cast<s32>(offset_x); const s32 x = origin_x + static_cast<s32>(offset_x);
if (x < static_cast<s32>(m_drawing_area.left) || x > static_cast<s32>(m_drawing_area.right)) if (x < static_cast<s32>(m_drawing_area.left) || x > static_cast<s32>(m_drawing_area.right))
continue; continue;
const u8 texcoord_x = Truncate8(ZeroExtend32(origin_texcoord_x) + offset_x); const u8 texcoord_x = Truncate8(ZeroExtend32(origin_texcoord_x) + offset_x);
ShadePixel<texture_enable, raw_texture_enable, transparency_enable, false>( ShadePixel<texture_enable, raw_texture_enable, transparency_enable, false>(
static_cast<u32>(x), static_cast<u32>(y), r, g, b, texcoord_x, texcoord_y); cmd, static_cast<u32>(x), static_cast<u32>(y), r, g, b, texcoord_x, texcoord_y);
} }
} }
} }
@ -583,7 +496,7 @@ constexpr GPU_SW::DitherLUT GPU_SW::ComputeDitherLUT()
{ {
for (s32 value = 0; value < DITHER_LUT_SIZE; value++) for (s32 value = 0; value < DITHER_LUT_SIZE; value++)
{ {
const s32 dithered_value = (value + DITHER_MATRIX[i][j]) >> 3; const s32 dithered_value = (value + GPU::DITHER_MATRIX[i][j]) >> 3;
lut[i][j][value] = static_cast<u8>((dithered_value < 0) ? 0 : ((dithered_value > 31) ? 31 : dithered_value)); lut[i][j][value] = static_cast<u8>((dithered_value < 0) ? 0 : ((dithered_value > 31) ? 31 : dithered_value));
} }
} }
@ -594,7 +507,8 @@ constexpr GPU_SW::DitherLUT GPU_SW::ComputeDitherLUT()
static constexpr GPU_SW::DitherLUT s_dither_lut = GPU_SW::ComputeDitherLUT(); static constexpr GPU_SW::DitherLUT s_dither_lut = GPU_SW::ComputeDitherLUT();
template<bool texture_enable, bool raw_texture_enable, bool transparency_enable, bool dithering_enable> template<bool texture_enable, bool raw_texture_enable, bool transparency_enable, bool dithering_enable>
void GPU_SW::ShadePixel(u32 x, u32 y, u8 color_r, u8 color_g, u8 color_b, u8 texcoord_x, u8 texcoord_y) void GPU_SW::ShadePixel(const GPUBackendDrawCommand* cmd, u32 x, u32 y, u8 color_r, u8 color_g, u8 color_b,
u8 texcoord_x, u8 texcoord_y)
{ {
VRAMPixel color; VRAMPixel color;
bool transparent; bool transparent;
@ -602,38 +516,41 @@ void GPU_SW::ShadePixel(u32 x, u32 y, u8 color_r, u8 color_g, u8 color_b, u8 tex
{ {
// Apply texture window // Apply texture window
// TODO: Precompute the second half // TODO: Precompute the second half
texcoord_x = (texcoord_x & ~(m_draw_mode.texture_window_mask_x * 8u)) | texcoord_x = (texcoord_x & ~(cmd->window.mask_x * 8u)) | ((cmd->window.offset_x & cmd->window.mask_x) * 8u);
((m_draw_mode.texture_window_offset_x & m_draw_mode.texture_window_mask_x) * 8u); texcoord_y = (texcoord_y & ~(cmd->window.mask_y * 8u)) | ((cmd->window.offset_y & cmd->window.mask_y) * 8u);
texcoord_y = (texcoord_y & ~(m_draw_mode.texture_window_mask_y * 8u)) |
((m_draw_mode.texture_window_offset_y & m_draw_mode.texture_window_mask_y) * 8u);
VRAMPixel texture_color; VRAMPixel texture_color;
switch (m_draw_mode.GetTextureMode()) switch (cmd->draw_mode.texture_mode)
{ {
case GPU::TextureMode::Palette4Bit: case GPUTextureMode::Palette4Bit:
{ {
const u16 palette_value = GetPixel((m_draw_mode.texture_page_x + ZeroExtend32(texcoord_x / 4)) % VRAM_WIDTH, const u16 palette_value =
(m_draw_mode.texture_page_y + ZeroExtend32(texcoord_y)) % VRAM_HEIGHT); GetPixel((cmd->draw_mode.GetTexturePageBaseX() + ZeroExtend32(texcoord_x / 4)) % VRAM_WIDTH,
(cmd->draw_mode.GetTexturePageBaseY() + ZeroExtend32(texcoord_y)) % VRAM_HEIGHT);
const u16 palette_index = (palette_value >> ((texcoord_x % 4) * 4)) & 0x0Fu; const u16 palette_index = (palette_value >> ((texcoord_x % 4) * 4)) & 0x0Fu;
texture_color.bits = GetPixel((m_draw_mode.texture_palette_x + ZeroExtend32(palette_index)) % VRAM_WIDTH,
m_draw_mode.texture_palette_y); const u32 px = (cmd->palette.GetXBase() + ZeroExtend32(palette_index)) % VRAM_WIDTH;
const u32 py = cmd->palette.GetYBase();
texture_color.bits =
GetPixel((cmd->palette.GetXBase() + ZeroExtend32(palette_index)) % VRAM_WIDTH, cmd->palette.GetYBase());
} }
break; break;
case GPU::TextureMode::Palette8Bit: case GPUTextureMode::Palette8Bit:
{ {
const u16 palette_value = GetPixel((m_draw_mode.texture_page_x + ZeroExtend32(texcoord_x / 2)) % VRAM_WIDTH, const u16 palette_value =
(m_draw_mode.texture_page_y + ZeroExtend32(texcoord_y)) % VRAM_HEIGHT); GetPixel((cmd->draw_mode.GetTexturePageBaseX() + ZeroExtend32(texcoord_x / 2)) % VRAM_WIDTH,
(cmd->draw_mode.GetTexturePageBaseY() + ZeroExtend32(texcoord_y)) % VRAM_HEIGHT);
const u16 palette_index = (palette_value >> ((texcoord_x % 2) * 8)) & 0xFFu; const u16 palette_index = (palette_value >> ((texcoord_x % 2) * 8)) & 0xFFu;
texture_color.bits = GetPixel((m_draw_mode.texture_palette_x + ZeroExtend32(palette_index)) % VRAM_WIDTH, texture_color.bits =
m_draw_mode.texture_palette_y); GetPixel((cmd->palette.GetXBase() + ZeroExtend32(palette_index)) % VRAM_WIDTH, cmd->palette.GetYBase());
} }
break; break;
default: default:
{ {
texture_color.bits = GetPixel((m_draw_mode.texture_page_x + ZeroExtend32(texcoord_x)) % VRAM_WIDTH, texture_color.bits = GetPixel((cmd->draw_mode.GetTexturePageBaseX() + ZeroExtend32(texcoord_x)) % VRAM_WIDTH,
(m_draw_mode.texture_page_y + ZeroExtend32(texcoord_y)) % VRAM_HEIGHT); (cmd->draw_mode.GetTexturePageBaseY() + ZeroExtend32(texcoord_y)) % VRAM_HEIGHT);
} }
break; break;
} }
@ -684,18 +601,18 @@ void GPU_SW::ShadePixel(u32 x, u32 y, u8 color_r, u8 color_g, u8 color_b, u8 tex
color.Set(func(bg_color.r.GetValue(), color.r.GetValue()), func(bg_color.g.GetValue(), color.g.GetValue()), \ color.Set(func(bg_color.r.GetValue(), color.r.GetValue()), func(bg_color.g.GetValue(), color.g.GetValue()), \
func(bg_color.b.GetValue(), color.b.GetValue()), color.c.GetValue()) func(bg_color.b.GetValue(), color.b.GetValue()), color.c.GetValue())
switch (m_draw_mode.GetTransparencyMode()) switch (cmd->draw_mode.transparency_mode)
{ {
case GPU::TransparencyMode::HalfBackgroundPlusHalfForeground: case GPUTransparencyMode::HalfBackgroundPlusHalfForeground:
BLEND_RGB(BLEND_AVERAGE); BLEND_RGB(BLEND_AVERAGE);
break; break;
case GPU::TransparencyMode::BackgroundPlusForeground: case GPUTransparencyMode::BackgroundPlusForeground:
BLEND_RGB(BLEND_ADD); BLEND_RGB(BLEND_ADD);
break; break;
case GPU::TransparencyMode::BackgroundMinusForeground: case GPUTransparencyMode::BackgroundMinusForeground:
BLEND_RGB(BLEND_SUBTRACT); BLEND_RGB(BLEND_SUBTRACT);
break; break;
case GPU::TransparencyMode::BackgroundPlusQuarterForeground: case GPUTransparencyMode::BackgroundPlusQuarterForeground:
BLEND_RGB(BLEND_QUARTER); BLEND_RGB(BLEND_QUARTER);
break; break;
default: default:
@ -715,14 +632,14 @@ void GPU_SW::ShadePixel(u32 x, u32 y, u8 color_r, u8 color_g, u8 color_b, u8 tex
UNREFERENCED_VARIABLE(transparent); UNREFERENCED_VARIABLE(transparent);
} }
const u16 mask_and = m_GPUSTAT.GetMaskAND(); const u16 mask_and = cmd->params.GetMaskAND();
if ((bg_color.bits & mask_and) != 0) if ((bg_color.bits & mask_and) != 0)
return; return;
if (IsInterlacedRenderingEnabled() && GetActiveLineLSB() == (static_cast<u32>(y) & 1u)) if (cmd->params.interlaced_rendering && cmd->params.active_line_lsb == (Truncate8(static_cast<u32>(y)) & 1u))
return; return;
SetPixel(static_cast<u32>(x), static_cast<u32>(y), color.bits | m_GPUSTAT.GetMaskOR()); SetPixel(static_cast<u32>(x), static_cast<u32>(y), color.bits | cmd->params.GetMaskOR());
} }
constexpr FixedPointCoord GetLineCoordStep(s32 delta, s32 k) constexpr FixedPointCoord GetLineCoordStep(s32 delta, s32 k)
@ -747,7 +664,8 @@ constexpr FixedPointColor GetLineColorStep(s32 delta, s32 k)
} }
template<bool shading_enable, bool transparency_enable, bool dithering_enable> template<bool shading_enable, bool transparency_enable, bool dithering_enable>
void GPU_SW::DrawLine(const SWVertex* p0, const SWVertex* p1) void GPU_SW::DrawLine(const GPUBackendDrawLineCommand* cmd, const GPUBackendDrawLineCommand::Vertex* p0,
const GPUBackendDrawLineCommand::Vertex* p1)
{ {
// Algorithm based on Mednafen. // Algorithm based on Mednafen.
if (p0->x > p1->x) if (p0->x > p1->x)
@ -757,21 +675,6 @@ void GPU_SW::DrawLine(const SWVertex* p0, const SWVertex* p1)
const s32 dy = p1->y - p0->y; const s32 dy = p1->y - p0->y;
const s32 k = std::max(std::abs(dx), std::abs(dy)); const s32 k = std::max(std::abs(dx), std::abs(dy));
{
// TODO: Move to base class
const s32 min_x = std::min(p0->x, p1->x);
const s32 max_x = std::max(p0->x, p1->x);
const s32 min_y = std::min(p0->y, p1->y);
const s32 max_y = std::max(p0->y, p1->y);
const u32 clip_left = static_cast<u32>(std::clamp<s32>(min_x, m_drawing_area.left, m_drawing_area.left));
const u32 clip_right = static_cast<u32>(std::clamp<s32>(max_x, m_drawing_area.left, m_drawing_area.right)) + 1u;
const u32 clip_top = static_cast<u32>(std::clamp<s32>(min_y, m_drawing_area.top, m_drawing_area.bottom));
const u32 clip_bottom = static_cast<u32>(std::clamp<s32>(max_y, m_drawing_area.top, m_drawing_area.bottom)) + 1u;
AddDrawLineTicks(clip_right - clip_left, clip_bottom - clip_top, shading_enable);
}
FixedPointCoord step_x, step_y; FixedPointCoord step_x, step_y;
FixedPointColor step_r, step_g, step_b; FixedPointColor step_r, step_g, step_b;
if (k > 0) if (k > 0)
@ -781,9 +684,9 @@ void GPU_SW::DrawLine(const SWVertex* p0, const SWVertex* p1)
if constexpr (shading_enable) if constexpr (shading_enable)
{ {
step_r = GetLineColorStep(s32(ZeroExtend32(p1->color_r)) - s32(ZeroExtend32(p0->color_r)), k); step_r = GetLineColorStep(s32(ZeroExtend32(p1->GetR())) - s32(ZeroExtend32(p0->GetR())), k);
step_g = GetLineColorStep(s32(ZeroExtend32(p1->color_g)) - s32(ZeroExtend32(p0->color_g)), k); step_g = GetLineColorStep(s32(ZeroExtend32(p1->GetG())) - s32(ZeroExtend32(p0->GetG())), k);
step_b = GetLineColorStep(s32(ZeroExtend32(p1->color_b)) - s32(ZeroExtend32(p0->color_b)), k); step_b = GetLineColorStep(s32(ZeroExtend32(p1->GetB())) - s32(ZeroExtend32(p0->GetB())), k);
} }
else else
{ {
@ -803,24 +706,25 @@ void GPU_SW::DrawLine(const SWVertex* p0, const SWVertex* p1)
FixedPointCoord current_x = IntToFixedCoord(p0->x); FixedPointCoord current_x = IntToFixedCoord(p0->x);
FixedPointCoord current_y = IntToFixedCoord(p0->y); FixedPointCoord current_y = IntToFixedCoord(p0->y);
FixedPointColor current_r = IntToFixedColor(p0->color_r); FixedPointColor current_r = IntToFixedColor(p0->GetR());
FixedPointColor current_g = IntToFixedColor(p0->color_g); FixedPointColor current_g = IntToFixedColor(p0->GetG());
FixedPointColor current_b = IntToFixedColor(p0->color_b); FixedPointColor current_b = IntToFixedColor(p0->GetB());
for (s32 i = 0; i <= k; i++) for (s32 i = 0; i <= k; i++)
{ {
const s32 x = m_drawing_offset.x + FixedToIntCoord(current_x); // FIXME: Draw offset should be applied here
const s32 y = m_drawing_offset.y + FixedToIntCoord(current_y); const s32 x = /*m_drawing_offset.x + */ FixedToIntCoord(current_x);
const s32 y = /*m_drawing_offset.y + */ FixedToIntCoord(current_y);
const u8 r = shading_enable ? FixedColorToInt(current_r) : p0->color_r; const u8 r = shading_enable ? FixedColorToInt(current_r) : p0->GetR();
const u8 g = shading_enable ? FixedColorToInt(current_g) : p0->color_g; const u8 g = shading_enable ? FixedColorToInt(current_g) : p0->GetG();
const u8 b = shading_enable ? FixedColorToInt(current_b) : p0->color_b; const u8 b = shading_enable ? FixedColorToInt(current_b) : p0->GetB();
if (x >= static_cast<s32>(m_drawing_area.left) && x <= static_cast<s32>(m_drawing_area.right) && if (x >= static_cast<s32>(m_drawing_area.left) && x <= static_cast<s32>(m_drawing_area.right) &&
y >= static_cast<s32>(m_drawing_area.top) && y <= static_cast<s32>(m_drawing_area.bottom)) y >= static_cast<s32>(m_drawing_area.top) && y <= static_cast<s32>(m_drawing_area.bottom))
{ {
ShadePixel<false, false, transparency_enable, dithering_enable>(static_cast<u32>(x), static_cast<u32>(y), r, g, b, ShadePixel<false, false, transparency_enable, dithering_enable>(cmd, static_cast<u32>(x), static_cast<u32>(y), r,
0, 0); g, b, 0, 0);
} }
current_x += step_x; current_x += step_x;
@ -863,7 +767,7 @@ GPU_SW::DrawRectangleFunction GPU_SW::GetDrawRectangleFunction(bool texture_enab
return funcs[u8(texture_enable)][u8(raw_texture_enable)][u8(transparency_enable)]; return funcs[u8(texture_enable)][u8(raw_texture_enable)][u8(transparency_enable)];
} }
std::unique_ptr<GPU> GPU::CreateSoftwareRenderer() void GPU_SW::FlushRender()
{ {
return std::make_unique<GPU_SW>(); // no-op
} }

View File

@ -1,12 +1,13 @@
#pragma once #pragma once
#include "gpu.h" #include "gpu.h"
#include "gpu_backend.h"
#include <array> #include <array>
#include <memory> #include <memory>
#include <vector> #include <vector>
class HostDisplayTexture; class HostDisplayTexture;
class GPU_SW final : public GPU class GPU_SW final : public GPUBackend
{ {
public: public:
GPU_SW(); GPU_SW();
@ -14,7 +15,7 @@ public:
bool IsHardwareRenderer() const override; bool IsHardwareRenderer() const override;
bool Initialize(HostDisplay* host_display) override; bool Initialize() override;
void Reset() override; void Reset() override;
u16 GetPixel(u32 x, u32 y) const { return m_vram[VRAM_WIDTH * y + x]; } u16 GetPixel(u32 x, u32 y) const { return m_vram[VRAM_WIDTH * y + x]; }
@ -28,20 +29,61 @@ public:
static constexpr DitherLUT ComputeDitherLUT(); static constexpr DitherLUT ComputeDitherLUT();
protected: protected:
struct SWVertex static constexpr u8 Convert5To8(u8 x5) { return (x5 << 3) | (x5 & 7); }
{ static constexpr u8 Convert8To5(u8 x8) { return (x8 >> 3); }
s32 x, y;
u8 color_r, color_g, color_b;
u8 texcoord_x, texcoord_y;
ALWAYS_INLINE void SetPosition(VertexPosition p) union VRAMPixel
{
u16 bits;
BitField<u16, u8, 0, 5> r;
BitField<u16, u8, 5, 5> g;
BitField<u16, u8, 10, 5> b;
BitField<u16, bool, 15, 1> c;
u8 GetR8() const { return Convert5To8(r); }
u8 GetG8() const { return Convert5To8(g); }
u8 GetB8() const { return Convert5To8(b); }
void Set(u8 r_, u8 g_, u8 b_, bool c_ = false)
{ {
x = p.x; bits = (ZeroExtend16(r_)) | (ZeroExtend16(g_) << 5) | (ZeroExtend16(b_) << 10) | (static_cast<u16>(c_) << 15);
y = p.y;
} }
ALWAYS_INLINE void SetColorRGB24(u32 color) { std::tie(color_r, color_g, color_b) = UnpackColorRGB24(color); } void ClampAndSet(u8 r_, u8 g_, u8 b_, bool c_ = false)
ALWAYS_INLINE void SetTexcoord(u16 value) { std::tie(texcoord_x, texcoord_y) = UnpackTexcoord(value); } {
Set(std::min<u8>(r_, 0x1F), std::min<u8>(g_, 0x1F), std::min<u8>(b_, 0x1F), c_);
}
void SetRGB24(u32 rgb24, bool c_ = false)
{
bits = Truncate16(((rgb24 >> 3) & 0x1F) | (((rgb24 >> 11) & 0x1F) << 5) | (((rgb24 >> 19) & 0x1F) << 10)) |
(static_cast<u16>(c_) << 15);
}
void SetRGB24(u8 r8, u8 g8, u8 b8, bool c_ = false)
{
bits = (ZeroExtend16(r8 >> 3)) | (ZeroExtend16(g8 >> 3) << 5) | (ZeroExtend16(b8 >> 3) << 10) |
(static_cast<u16>(c_) << 15);
}
void SetRGB24Dithered(u32 x, u32 y, u8 r8, u8 g8, u8 b8, bool c_ = false)
{
const s32 offset = GPU::DITHER_MATRIX[y & 3][x & 3];
r8 = static_cast<u8>(std::clamp<s32>(static_cast<s32>(ZeroExtend32(r8)) + offset, 0, 255));
g8 = static_cast<u8>(std::clamp<s32>(static_cast<s32>(ZeroExtend32(g8)) + offset, 0, 255));
b8 = static_cast<u8>(std::clamp<s32>(static_cast<s32>(ZeroExtend32(b8)) + offset, 0, 255));
SetRGB24(r8, g8, b8, c_);
}
u32 ToRGB24() const
{
const u32 r_ = ZeroExtend32(r.GetValue());
const u32 g_ = ZeroExtend32(g.GetValue());
const u32 b_ = ZeroExtend32(b.GetValue());
return ((r_ << 3) | (r_ & 7)) | (((g_ << 3) | (g_ & 7)) << 8) | (((b_ << 3) | (b_ & 7)) << 16);
}
}; };
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
@ -53,39 +95,51 @@ protected:
bool interleaved); bool interleaved);
void ClearDisplay() override; void ClearDisplay() override;
void UpdateDisplay() override; void UpdateDisplay() override;
void ReadVRAM(u32 x, u32 y, u32 width, u32 height) override;
void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params) override;
void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, GPUBackendCommandParameters params) override;
void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height,
GPUBackendCommandParameters params) override;
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
// Rasterization // Rasterization
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
void DispatchRenderCommand() override; void DrawPolygon(const GPUBackendDrawPolygonCommand* cmd) override;
void DrawLine(const GPUBackendDrawLineCommand* cmd) override;
void DrawRectangle(const GPUBackendDrawRectangleCommand* cmd) override;
void FlushRender() override;
static bool IsClockwiseWinding(const SWVertex* v0, const SWVertex* v1, const SWVertex* v2); static bool IsClockwiseWinding(const GPUBackendDrawPolygonCommand::Vertex* v0, const GPUBackendDrawPolygonCommand::Vertex* v1,
const GPUBackendDrawPolygonCommand::Vertex* v2);
template<bool texture_enable, bool raw_texture_enable, bool transparency_enable, bool dithering_enable> template<bool texture_enable, bool raw_texture_enable, bool transparency_enable, bool dithering_enable>
void ShadePixel(u32 x, u32 y, u8 color_r, u8 color_g, u8 color_b, u8 texcoord_x, u8 texcoord_y); void ShadePixel(const GPUBackendDrawCommand* cmd, u32 x, u32 y, u8 color_r, u8 color_g, u8 color_b, u8 texcoord_x,
u8 texcoord_y);
template<bool shading_enable, bool texture_enable, bool raw_texture_enable, bool transparency_enable, template<bool shading_enable, bool texture_enable, bool raw_texture_enable, bool transparency_enable,
bool dithering_enable> bool dithering_enable>
void DrawTriangle(const SWVertex* v0, const SWVertex* v1, const SWVertex* v2); void DrawTriangle(const GPUBackendDrawPolygonCommand* cmd, const GPUBackendDrawPolygonCommand::Vertex* v0,
const GPUBackendDrawPolygonCommand::Vertex* v1, const GPUBackendDrawPolygonCommand::Vertex* v2);
using DrawTriangleFunction = void (GPU_SW::*)(const SWVertex* v0, const SWVertex* v1, const SWVertex* v2); using DrawTriangleFunction = void (GPU_SW::*)(const GPUBackendDrawPolygonCommand* cmd, const GPUBackendDrawPolygonCommand::Vertex* v0,
const GPUBackendDrawPolygonCommand::Vertex* v1,
const GPUBackendDrawPolygonCommand::Vertex* v2);
DrawTriangleFunction GetDrawTriangleFunction(bool shading_enable, bool texture_enable, bool raw_texture_enable, DrawTriangleFunction GetDrawTriangleFunction(bool shading_enable, bool texture_enable, bool raw_texture_enable,
bool transparency_enable, bool dithering_enable); bool transparency_enable, bool dithering_enable);
template<bool texture_enable, bool raw_texture_enable, bool transparency_enable> template<bool texture_enable, bool raw_texture_enable, bool transparency_enable>
void DrawRectangle(s32 origin_x, s32 origin_y, u32 width, u32 height, u8 r, u8 g, u8 b, u8 origin_texcoord_x, void DrawRectangle(const GPUBackendDrawRectangleCommand* cmd);
u8 origin_texcoord_y);
using DrawRectangleFunction = void (GPU_SW::*)(s32 origin_x, s32 origin_y, u32 width, u32 height, u8 r, u8 g, u8 b, using DrawRectangleFunction = void (GPU_SW::*)(const GPUBackendDrawRectangleCommand* cmd);
u8 origin_texcoord_x, u8 origin_texcoord_y);
DrawRectangleFunction GetDrawRectangleFunction(bool texture_enable, bool raw_texture_enable, DrawRectangleFunction GetDrawRectangleFunction(bool texture_enable, bool raw_texture_enable,
bool transparency_enable); bool transparency_enable);
template<bool shading_enable, bool transparency_enable, bool dithering_enable> template<bool shading_enable, bool transparency_enable, bool dithering_enable>
void DrawLine(const SWVertex* p0, const SWVertex* p1); void DrawLine(const GPUBackendDrawLineCommand* cmd, const GPUBackendDrawLineCommand::Vertex* p0, const GPUBackendDrawLineCommand::Vertex* p1);
using DrawLineFunction = void (GPU_SW::*)(const SWVertex* p0, const SWVertex* p1); using DrawLineFunction = void (GPU_SW::*)(const GPUBackendDrawLineCommand* cmd, const GPUBackendDrawLineCommand::Vertex* p0,
const GPUBackendDrawLineCommand::Vertex* p1);
DrawLineFunction GetDrawLineFunction(bool shading_enable, bool transparency_enable, bool dithering_enable); DrawLineFunction GetDrawLineFunction(bool shading_enable, bool transparency_enable, bool dithering_enable);
std::vector<u32> m_display_texture_buffer; std::vector<u32> m_display_texture_buffer;

455
src/core/gpu_types.h Normal file
View File

@ -0,0 +1,455 @@
#pragma once
#include "common/bitfield.h"
#include "common/rectangle.h"
#include "types.h"
#include <array>
enum : u32
{
VRAM_WIDTH = 1024,
VRAM_HEIGHT = 512,
VRAM_SIZE = VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16),
VRAM_WIDTH_MASK = VRAM_WIDTH - 1,
VRAM_HEIGHT_MASK = VRAM_HEIGHT - 1,
VRAM_COORD_MASK = 0x3FF,
TEXTURE_PAGE_WIDTH = 256,
TEXTURE_PAGE_HEIGHT = 256,
MAX_PRIMITIVE_WIDTH = 1024,
MAX_PRIMITIVE_HEIGHT = 512,
DITHER_MATRIX_SIZE = 4
};
enum class GPUPrimitive : u8
{
Reserved = 0,
Polygon = 1,
Line = 2,
Rectangle = 3
};
enum class GPUDrawRectangleSize : u8
{
Variable = 0,
R1x1 = 1,
R8x8 = 2,
R16x16 = 3
};
enum class GPUTextureMode : u8
{
Palette4Bit = 0,
Palette8Bit = 1,
Direct16Bit = 2,
Reserved_Direct16Bit = 3,
// Not register values.
RawTextureBit = 4,
RawPalette4Bit = RawTextureBit | Palette4Bit,
RawPalette8Bit = RawTextureBit | Palette8Bit,
RawDirect16Bit = RawTextureBit | Direct16Bit,
Reserved_RawDirect16Bit = RawTextureBit | Reserved_Direct16Bit,
Disabled = 8 // Not a register value
};
IMPLEMENT_ENUM_CLASS_BITWISE_OPERATORS(GPUTextureMode);
enum class GPUTransparencyMode : u8
{
HalfBackgroundPlusHalfForeground = 0,
BackgroundPlusForeground = 1,
BackgroundMinusForeground = 2,
BackgroundPlusQuarterForeground = 3,
Disabled = 4 // Not a register value
};
enum class GPUInterlacedDisplayMode : u8
{
None,
InterleavedFields,
SeparateFields
};
union GPURenderCommand
{
u32 bits;
BitField<u32, u32, 0, 24> color_for_first_vertex;
BitField<u32, bool, 24, 1> raw_texture_enable; // not valid for lines
BitField<u32, bool, 25, 1> transparency_enable;
BitField<u32, bool, 26, 1> texture_enable;
BitField<u32, GPUDrawRectangleSize, 27, 2> rectangle_size; // only for rectangles
BitField<u32, bool, 27, 1> quad_polygon; // only for polygons
BitField<u32, bool, 27, 1> polyline; // only for lines
BitField<u32, bool, 28, 1> shading_enable; // 0 - flat, 1 = gouroud
BitField<u32, GPUPrimitive, 29, 21> primitive;
/// Returns true if texturing should be enabled. Depends on the primitive type.
ALWAYS_INLINE bool IsTexturingEnabled() const { return (primitive != GPUPrimitive::Line) ? texture_enable : false; }
/// Returns true if dithering should be enabled. Depends on the primitive type.
ALWAYS_INLINE bool IsDitheringEnabled() const
{
switch (primitive)
{
case GPUPrimitive::Polygon:
return shading_enable || (texture_enable && !raw_texture_enable);
case GPUPrimitive::Line:
return true;
case GPUPrimitive::Rectangle:
default:
return false;
}
}
};
// Helper/format conversion functions.
static constexpr u32 RGBA5551ToRGBA8888(u16 color)
{
u8 r = Truncate8(color & 31);
u8 g = Truncate8((color >> 5) & 31);
u8 b = Truncate8((color >> 10) & 31);
u8 a = Truncate8((color >> 15) & 1);
// 00012345 -> 1234545
b = (b << 3) | (b & 0b111);
g = (g << 3) | (g & 0b111);
r = (r << 3) | (r & 0b111);
a = a ? 255 : 0;
return ZeroExtend32(r) | (ZeroExtend32(g) << 8) | (ZeroExtend32(b) << 16) | (ZeroExtend32(a) << 24);
}
static constexpr u16 RGBA8888ToRGBA5551(u32 color)
{
const u16 r = Truncate16((color >> 3) & 0x1Fu);
const u16 g = Truncate16((color >> 11) & 0x1Fu);
const u16 b = Truncate16((color >> 19) & 0x1Fu);
const u16 a = Truncate16((color >> 31) & 0x01u);
return r | (g << 5) | (b << 10) | (a << 15);
}
union GPUVertexPosition
{
u32 bits;
BitField<u32, s32, 0, 12> x;
BitField<u32, s32, 16, 12> y;
};
// Sprites/rectangles should be clipped to 12 bits before drawing.
static constexpr s32 TruncateGPUVertexPosition(s32 x)
{
return SignExtendN<11, s32>(x);
}
// bits in GP0(E1h) or texpage part of polygon
union GPUDrawModeReg
{
static constexpr u16 MASK = 0b1111111111111;
static constexpr u16 TEXTURE_PAGE_MASK = UINT16_C(0b0000000000011111);
// Polygon texpage commands only affect bits 0-8, 11
static constexpr u16 POLYGON_TEXPAGE_MASK = 0b0000100111111111;
// Bits 0..5 are returned in the GPU status register, latched at E1h/polygon draw time.
static constexpr u32 GPUSTAT_MASK = 0b11111111111;
u16 bits;
BitField<u16, u8, 0, 4> texture_page_x_base;
BitField<u16, u8, 4, 1> texture_page_y_base;
BitField<u16, GPUTransparencyMode, 5, 2> transparency_mode;
BitField<u16, GPUTextureMode, 7, 2> texture_mode;
BitField<u16, bool, 9, 1> dither_enable;
BitField<u16, bool, 10, 1> draw_to_displayed_field;
BitField<u16, bool, 11, 1> texture_disable;
BitField<u16, bool, 12, 1> texture_x_flip;
BitField<u16, bool, 13, 1> texture_y_flip;
ALWAYS_INLINE u16 GetTexturePageBaseX() const { return ZeroExtend16(texture_page_x_base.GetValue()) * 64; }
ALWAYS_INLINE u16 GetTexturePageBaseY() const { return ZeroExtend16(texture_page_y_base.GetValue()) * 256; }
/// Returns true if the texture mode requires a palette.
bool IsUsingPalette() const { return (bits & (2 << 7)) == 0; }
/// Returns a rectangle comprising the texture page area.
Common::Rectangle<u32> GetTexturePageRectangle() const
{
static constexpr std::array<u32, 4> texture_page_widths = {
{TEXTURE_PAGE_WIDTH / 4, TEXTURE_PAGE_WIDTH / 2, TEXTURE_PAGE_WIDTH, TEXTURE_PAGE_WIDTH}};
return Common::Rectangle<u32>::FromExtents(GetTexturePageBaseX(), GetTexturePageBaseY(),
texture_page_widths[static_cast<u8>(texture_mode.GetValue())],
TEXTURE_PAGE_HEIGHT);
}
/// Returns a rectangle comprising the texture palette area.
Common::Rectangle<u32> GetTexturePaletteRectangle() const
{
static constexpr std::array<u32, 4> palette_widths = {{16, 256, 0, 0}};
return Common::Rectangle<u32>::FromExtents(GetTexturePageBaseX(), GetTexturePageBaseY(),
palette_widths[static_cast<u8>(texture_mode.GetValue())], 1);
}
};
union GPUTexturePaletteReg
{
static constexpr u16 MASK = UINT16_C(0b0111111111111111);
u16 bits;
BitField<u16, u16, 0, 6> x;
BitField<u16, u16, 6, 10> y;
ALWAYS_INLINE u32 GetXBase() const { return static_cast<u32>(x) * 16u; }
ALWAYS_INLINE u32 GetYBase() const { return static_cast<u32>(y); }
};
union GPUTextureWindowReg
{
static constexpr u32 MASK = 0b11111111111111111111;
u32 bits;
BitField<u32, u8, 0, 5> mask_x;
BitField<u32, u8, 5, 5> mask_y;
BitField<u32, u8, 10, 5> offset_x;
BitField<u32, u8, 15, 5> offset_y;
};
// 4x4 dither matrix.
static constexpr s32 DITHER_MATRIX[DITHER_MATRIX_SIZE][DITHER_MATRIX_SIZE] = {{-4, +0, -3, +1}, // row 0
{+2, -2, +3, -1}, // row 1
{-3, +1, -4, +0}, // row 2
{+4, -1, +2, -2}}; // row 3
enum class GPUBackendCommandType : u8
{
Sync,
FrameDone,
Reset,
UpdateSettings,
UpdateResolutionScale,
ReadVRAM,
FillVRAM,
UpdateVRAM,
CopyVRAM,
SetDrawingArea,
DrawPolygon,
DrawRectangle,
DrawLine,
ClearDisplay,
UpdateDisplay,
FlushRender
};
union GPUBackendCommandParameters
{
u8 bits;
BitField<u8, bool, 0, 1> interlaced_rendering;
/// Returns 0 if the currently-displayed field is on an even line in VRAM, otherwise 1.
BitField<u8, u8, 1, 1> active_line_lsb;
BitField<u8, bool, 2, 1> set_mask_while_drawing;
BitField<u8, bool, 3, 1> check_mask_before_draw;
ALWAYS_INLINE bool IsMaskingEnabled() const { return (bits & 12u) != 0u; }
// During transfer/render operations, if ((dst_pixel & mask_and) == 0) { pixel = src_pixel | mask_or }
u16 GetMaskAND() const
{
// return check_mask_before_draw ? 0x8000 : 0x0000;
return Truncate16((bits << 12) & 0x8000);
}
u16 GetMaskOR() const
{
// return set_mask_while_drawing ? 0x8000 : 0x0000;
return Truncate16((bits << 13) & 0x8000);
}
};
struct GPUBackendCommand
{
GPUBackendCommandType type;
GPUBackendCommandParameters params;
u32 size;
};
struct GPUBackendSyncCommand : public GPUBackendCommand
{
ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendSyncCommand); }
};
struct GPUBackendFrameDoneCommand : public GPUBackendCommand
{
ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendFrameDoneCommand); }
};
struct GPUBackendResetCommand : public GPUBackendCommand
{
ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendResetCommand); }
};
struct GPUBackendUpdateSettingsCommand : public GPUBackendCommand
{
ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendUpdateSettingsCommand); }
};
struct GPUBackendUpdateResolutionScaleCommand : public GPUBackendCommand
{
ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendUpdateResolutionScaleCommand); }
};
struct GPUBackendReadVRAMCommand : public GPUBackendCommand
{
u16 x;
u16 y;
u16 width;
u16 height;
ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendReadVRAMCommand); }
};
struct GPUBackendFillVRAMCommand : public GPUBackendCommand
{
u16 x;
u16 y;
u16 width;
u16 height;
u32 color;
ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendFillVRAMCommand); }
};
struct GPUBackendUpdateVRAMCommand : public GPUBackendCommand
{
u16 x;
u16 y;
u16 width;
u16 height;
u16 data[0];
ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendUpdateVRAMCommand) + (sizeof(u16) * width * height); }
};
struct GPUBackendCopyVRAMCommand : public GPUBackendCommand
{
u16 src_x;
u16 src_y;
u16 dst_x;
u16 dst_y;
u16 width;
u16 height;
ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendCopyVRAMCommand); }
};
struct GPUBackendSetDrawingAreaCommand : public GPUBackendCommand
{
Common::Rectangle<u32> new_area;
ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendSetDrawingAreaCommand); }
};
struct GPUBackendDrawCommand : public GPUBackendCommand
{
GPURenderCommand rc;
GPUDrawModeReg draw_mode;
GPUTexturePaletteReg palette;
GPUTextureWindowReg window;
Common::Rectangle<u16> bounds;
ALWAYS_INLINE bool IsDitheringEnabled() const { return rc.IsDitheringEnabled() && draw_mode.dither_enable; }
};
struct GPUBackendDrawPolygonCommand : public GPUBackendDrawCommand
{
u16 num_vertices;
struct Vertex
{
float precise_x, precise_y, precise_w;
s32 x, y;
u32 color;
u16 texcoord;
ALWAYS_INLINE u8 GetR() const { return Truncate8(color); }
ALWAYS_INLINE u8 GetG() const { return Truncate8(color >> 8); }
ALWAYS_INLINE u8 GetB() const { return Truncate8(color >> 16); }
ALWAYS_INLINE u8 GetU() const { return Truncate8(texcoord); }
ALWAYS_INLINE u8 GetV() const { return Truncate8(texcoord >> 8); }
};
Vertex vertices[0];
ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendDrawPolygonCommand) + sizeof(Vertex) * num_vertices; }
};
struct GPUBackendDrawRectangleCommand : public GPUBackendDrawCommand
{
s32 x, y;
u16 width, height;
u16 texcoord;
u32 color;
ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendDrawRectangleCommand); }
};
struct GPUBackendDrawLineCommand : public GPUBackendDrawCommand
{
u16 num_vertices;
struct Vertex
{
s32 x, y;
u32 color;
ALWAYS_INLINE u8 GetR() const { return Truncate8(color); }
ALWAYS_INLINE u8 GetG() const { return Truncate8(color >> 8); }
ALWAYS_INLINE u8 GetB() const { return Truncate8(color >> 16); }
};
Vertex vertices[0];
ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendDrawLineCommand) + sizeof(Vertex) * num_vertices; }
};
struct GPUBackendClearDisplayCommand : public GPUBackendCommand
{
ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendClearDisplayCommand); }
};
struct GPUBackendUpdateDisplayCommand : public GPUBackendCommand
{
float display_aspect_ratio;
u16 display_width;
u16 display_height;
u16 display_origin_left;
u16 display_origin_top;
u16 display_vram_left;
u16 display_vram_top;
u16 display_vram_width;
u16 display_vram_height;
u16 display_vram_start_x;
u16 display_vram_start_y;
GPUInterlacedDisplayMode display_interlace;
/// Returns 0 if the currently-displayed field is on odd lines (1,3,5,...) or 1 if even (2,4,6,...).
u8 display_interlace_field;
bool display_enabled;
bool display_24bit;
ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendUpdateDisplayCommand); }
};
struct GPUBackendFlushRenderCommand : public GPUBackendCommand
{
ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendFlushRenderCommand); }
};

View File

@ -12,6 +12,7 @@
#include "cpu_core.h" #include "cpu_core.h"
#include "dma.h" #include "dma.h"
#include "gpu.h" #include "gpu.h"
#include "gpu_backend.h"
#include "gte.h" #include "gte.h"
#include "host_display.h" #include "host_display.h"
#include "pgxp.h" #include "pgxp.h"
@ -554,7 +555,7 @@ void HostInterface::CheckForSettingsChanges(const Settings& old_settings)
g_settings.display_active_start_offset != old_settings.display_active_start_offset || g_settings.display_active_start_offset != old_settings.display_active_start_offset ||
g_settings.display_active_end_offset != old_settings.display_active_end_offset) g_settings.display_active_end_offset != old_settings.display_active_end_offset)
{ {
g_gpu->UpdateSettings(); g_gpu.UpdateSettings();
} }
if (g_settings.gpu_pgxp_enable != old_settings.gpu_pgxp_enable || if (g_settings.gpu_pgxp_enable != old_settings.gpu_pgxp_enable ||
@ -726,7 +727,7 @@ void HostInterface::ToggleSoftwareRendering()
if (System::IsShutdown() || g_settings.gpu_renderer == GPURenderer::Software) if (System::IsShutdown() || g_settings.gpu_renderer == GPURenderer::Software)
return; return;
const GPURenderer new_renderer = g_gpu->IsHardwareRenderer() ? GPURenderer::Software : g_settings.gpu_renderer; const GPURenderer new_renderer = g_gpu_backend->IsHardwareRenderer() ? GPURenderer::Software : g_settings.gpu_renderer;
AddFormattedOSDMessage(5.0f, "Switching to %s renderer...", Settings::GetRendererDisplayName(new_renderer)); AddFormattedOSDMessage(5.0f, "Switching to %s renderer...", Settings::GetRendererDisplayName(new_renderer));
System::RecreateGPU(new_renderer); System::RecreateGPU(new_renderer);
@ -743,9 +744,9 @@ void HostInterface::ModifyResolutionScale(s32 increment)
if (!System::IsShutdown()) if (!System::IsShutdown())
{ {
g_gpu->RestoreGraphicsAPIState(); g_gpu_backend->RestoreGraphicsAPIState();
g_gpu->UpdateSettings(); g_gpu_backend->UpdateSettings();
g_gpu->ResetGraphicsAPIState(); g_gpu_backend->ResetGraphicsAPIState();
} }
} }

View File

@ -159,7 +159,7 @@ void NamcoGunCon::UpdatePosition()
// are we within the active display area? // are we within the active display area?
u32 tick, line; u32 tick, line;
if (mouse_x < 0 || mouse_y < 0 || !g_gpu->ConvertScreenCoordinatesToBeamTicksAndLines(mouse_x, mouse_y, &tick, &line)) if (mouse_x < 0 || mouse_y < 0 || !g_gpu.ConvertScreenCoordinatesToBeamTicksAndLines(mouse_x, mouse_y, &tick, &line))
{ {
Log_DebugPrintf("Lightgun out of range for window coordinates %d,%d", mouse_x, mouse_y); Log_DebugPrintf("Lightgun out of range for window coordinates %d,%d", mouse_x, mouse_y);
m_position_x = 0x01; m_position_x = 0x01;
@ -168,7 +168,7 @@ void NamcoGunCon::UpdatePosition()
} }
// 8MHz units for X = 44100*768*11/7 = 53222400 / 8000000 = 6.6528 // 8MHz units for X = 44100*768*11/7 = 53222400 / 8000000 = 6.6528
const double divider = static_cast<double>(g_gpu->GetCRTCFrequency()) / 8000000.0; const double divider = static_cast<double>(g_gpu.GetCRTCFrequency()) / 8000000.0;
m_position_x = static_cast<u16>(static_cast<float>(tick) / static_cast<float>(divider)); m_position_x = static_cast<u16>(static_cast<float>(tick) / static_cast<float>(divider));
m_position_y = static_cast<u16>(line); m_position_y = static_cast<u16>(line);
Log_DebugPrintf("Lightgun window coordinates %d,%d -> tick %u line %u 8mhz ticks %u", mouse_x, mouse_y, tick, line, Log_DebugPrintf("Lightgun window coordinates %d,%d -> tick %u line %u 8mhz ticks %u", mouse_x, mouse_y, tick, line,

View File

@ -72,6 +72,7 @@ struct Settings
CPUExecutionMode cpu_execution_mode = CPUExecutionMode::Interpreter; CPUExecutionMode cpu_execution_mode = CPUExecutionMode::Interpreter;
bool cpu_recompiler_memory_exceptions = false; bool cpu_recompiler_memory_exceptions = false;
bool cpu_recompiler_icache = false; bool cpu_recompiler_icache = false;
bool cpu_thread = true;
float emulation_speed = 1.0f; float emulation_speed = 1.0f;
bool speed_limiter_enabled = true; bool speed_limiter_enabled = true;

View File

@ -14,6 +14,7 @@
#include "cpu_core.h" #include "cpu_core.h"
#include "dma.h" #include "dma.h"
#include "gpu.h" #include "gpu.h"
#include "gpu_backend.h"
#include "gte.h" #include "gte.h"
#include "host_display.h" #include "host_display.h"
#include "host_interface.h" #include "host_interface.h"
@ -28,9 +29,12 @@
#include "spu.h" #include "spu.h"
#include "timers.h" #include "timers.h"
#include <cctype> #include <cctype>
#include <condition_variable>
#include <cstdio> #include <cstdio>
#include <fstream> #include <fstream>
#include <limits> #include <limits>
#include <mutex>
#include <thread>
Log_SetChannel(System); Log_SetChannel(System);
#ifdef WIN32 #ifdef WIN32
@ -59,7 +63,6 @@ static std::unique_ptr<CDImage> OpenCDImage(const char* path, bool force_preload
static bool DoLoadState(ByteStream* stream, bool force_software_renderer); static bool DoLoadState(ByteStream* stream, bool force_software_renderer);
static bool DoState(StateWrapper& sw); static bool DoState(StateWrapper& sw);
static bool CreateGPU(GPURenderer renderer);
static bool Initialize(bool force_software_renderer); static bool Initialize(bool force_software_renderer);
@ -95,6 +98,17 @@ static u32 s_last_global_tick_counter = 0;
static Common::Timer s_fps_timer; static Common::Timer s_fps_timer;
static Common::Timer s_frame_timer; static Common::Timer s_frame_timer;
static float s_average_cpu_frame_time_accumulator = 0.0f;
static float s_worst_cpu_frame_time_accumulator = 0.0f;
static float s_worst_cpu_frame_time = 0.0f;
static float s_average_cpu_frame_time = 0.0f;
static Common::Timer s_cpu_frame_timer;
static void StartCPUThread();
static void WakeCPUThread();
static void WaitForCPUThread();
static void StopCPUThread();
// Playlist of disc images. // Playlist of disc images.
static std::vector<std::string> s_media_playlist; static std::vector<std::string> s_media_playlist;
static std::string s_media_playlist_filename; static std::string s_media_playlist_filename;
@ -153,12 +167,6 @@ u32 GetInternalFrameNumber()
return s_internal_frame_number; return s_internal_frame_number;
} }
void FrameDone()
{
s_frame_number++;
CPU::g_state.frame_done = true;
}
void IncrementInternalFrameNumber() void IncrementInternalFrameNumber()
{ {
s_internal_frame_number++; s_internal_frame_number++;
@ -194,10 +202,18 @@ float GetAverageFrameTime()
{ {
return s_average_frame_time; return s_average_frame_time;
} }
float GetAverageCPUFrameTime()
{
return s_average_cpu_frame_time;
}
float GetWorstFrameTime() float GetWorstFrameTime()
{ {
return s_worst_frame_time; return s_worst_frame_time;
} }
float GetWorstCPUFrameTime()
{
return s_worst_cpu_frame_time;
}
float GetThrottleFrequency() float GetThrottleFrequency()
{ {
return s_throttle_frequency; return s_throttle_frequency;
@ -469,22 +485,22 @@ std::optional<DiscRegion> GetRegionForPath(const char* image_path)
bool RecreateGPU(GPURenderer renderer) bool RecreateGPU(GPURenderer renderer)
{ {
g_gpu->RestoreGraphicsAPIState(); g_gpu_backend->RestoreGraphicsAPIState();
// save current state // save current state
std::unique_ptr<ByteStream> state_stream = ByteStream_CreateGrowableMemoryStream(); std::unique_ptr<ByteStream> state_stream = ByteStream_CreateGrowableMemoryStream();
StateWrapper sw(state_stream.get(), StateWrapper::Mode::Write); StateWrapper sw(state_stream.get(), StateWrapper::Mode::Write);
const bool state_valid = g_gpu->DoState(sw) && TimingEvents::DoState(sw); const bool state_valid = g_gpu_backend->DoState(sw);
if (!state_valid) if (!state_valid)
Log_ErrorPrintf("Failed to save old GPU state when switching renderers"); Log_ErrorPrintf("Failed to save old GPU state when switching renderers");
g_gpu->ResetGraphicsAPIState(); g_gpu_backend->ResetGraphicsAPIState();
g_gpu_backend.reset();
// create new renderer // create new renderer
g_gpu.reset(); if (!GPUBackend::Create(renderer))
if (!CreateGPU(renderer))
{ {
Panic("Failed to recreate GPU"); Panic("Failed to recreate GPU backend");
return false; return false;
} }
@ -492,10 +508,9 @@ bool RecreateGPU(GPURenderer renderer)
{ {
state_stream->SeekAbsolute(0); state_stream->SeekAbsolute(0);
sw.SetMode(StateWrapper::Mode::Read); sw.SetMode(StateWrapper::Mode::Read);
g_gpu->RestoreGraphicsAPIState(); g_gpu_backend->RestoreGraphicsAPIState();
g_gpu->DoState(sw); g_gpu_backend->DoState(sw);
TimingEvents::DoState(sw); g_gpu_backend->ResetGraphicsAPIState();
g_gpu->ResetGraphicsAPIState();
} }
return true; return true;
@ -526,6 +541,7 @@ bool Boot(const SystemBootParameters& params)
Assert(s_media_playlist.empty()); Assert(s_media_playlist.empty());
s_state = State::Starting; s_state = State::Starting;
s_region = g_settings.region; s_region = g_settings.region;
// g_settings.cpu_thread = false;
if (params.state_stream) if (params.state_stream)
{ {
@ -705,15 +721,16 @@ bool Initialize(bool force_software_renderer)
s_fps_timer.Reset(); s_fps_timer.Reset();
s_frame_timer.Reset(); s_frame_timer.Reset();
if (!GPUBackend::Create(force_software_renderer ? GPURenderer::Software : g_settings.gpu_renderer))
return false;
TimingEvents::Initialize(); TimingEvents::Initialize();
CPU::Initialize(); CPU::Initialize();
CPU::CodeCache::Initialize(g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler); CPU::CodeCache::Initialize(g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler);
Bus::Initialize(); Bus::Initialize();
if (!CreateGPU(force_software_renderer ? GPURenderer::Software : g_settings.gpu_renderer)) g_gpu.Initialize();
return false;
g_dma.Initialize(); g_dma.Initialize();
g_interrupt_controller.Initialize(); g_interrupt_controller.Initialize();
@ -726,6 +743,10 @@ bool Initialize(bool force_software_renderer)
g_sio.Initialize(); g_sio.Initialize();
UpdateThrottlePeriod(); UpdateThrottlePeriod();
if (g_settings.cpu_thread)
StartCPUThread();
return true; return true;
} }
@ -734,13 +755,19 @@ void Shutdown()
if (s_state == State::Shutdown) if (s_state == State::Shutdown)
return; return;
s_state = State::Stopping;
if (g_settings.cpu_thread)
StopCPUThread();
g_sio.Shutdown(); g_sio.Shutdown();
g_mdec.Shutdown(); g_mdec.Shutdown();
g_spu.Shutdown(); g_spu.Shutdown();
g_timers.Shutdown(); g_timers.Shutdown();
g_pad.Shutdown(); g_pad.Shutdown();
g_cdrom.Shutdown(); g_cdrom.Shutdown();
g_gpu.reset(); g_gpu_backend.reset();
g_gpu.Shutdown();
g_interrupt_controller.Shutdown(); g_interrupt_controller.Shutdown();
g_dma.Shutdown(); g_dma.Shutdown();
CPU::CodeCache::Shutdown(); CPU::CodeCache::Shutdown();
@ -756,44 +783,6 @@ void Shutdown()
s_state = State::Shutdown; s_state = State::Shutdown;
} }
bool CreateGPU(GPURenderer renderer)
{
switch (renderer)
{
case GPURenderer::HardwareOpenGL:
g_gpu = GPU::CreateHardwareOpenGLRenderer();
break;
case GPURenderer::HardwareVulkan:
g_gpu = GPU::CreateHardwareVulkanRenderer();
break;
#ifdef WIN32
case GPURenderer::HardwareD3D11:
g_gpu = GPU::CreateHardwareD3D11Renderer();
break;
#endif
case GPURenderer::Software:
default:
g_gpu = GPU::CreateSoftwareRenderer();
break;
}
if (!g_gpu || !g_gpu->Initialize(g_host_interface->GetDisplay()))
{
Log_ErrorPrintf("Failed to initialize GPU, falling back to software");
g_gpu.reset();
g_gpu = GPU::CreateSoftwareRenderer();
if (!g_gpu->Initialize(g_host_interface->GetDisplay()))
return false;
}
// we put this here rather than in Initialize() because of the virtual calls
g_gpu->Reset();
return true;
}
bool DoState(StateWrapper& sw) bool DoState(StateWrapper& sw)
{ {
if (!sw.DoMarker("System")) if (!sw.DoMarker("System"))
@ -818,12 +807,15 @@ bool DoState(StateWrapper& sw)
if (!sw.DoMarker("InterruptController") || !g_interrupt_controller.DoState(sw)) if (!sw.DoMarker("InterruptController") || !g_interrupt_controller.DoState(sw))
return false; return false;
g_gpu->RestoreGraphicsAPIState(); g_gpu_backend->RestoreGraphicsAPIState();
const bool gpu_result = sw.DoMarker("GPU") && g_gpu->DoState(sw); const bool gpu_result = sw.DoMarker("GPUBackend") && g_gpu_backend->DoState(sw);
g_gpu->ResetGraphicsAPIState(); g_gpu_backend->ResetGraphicsAPIState();
if (!gpu_result) if (!gpu_result)
return false; return false;
if (!sw.DoMarker("GPU") || !g_gpu.DoState(sw))
return false;
if (!sw.DoMarker("CDROM") || !g_cdrom.DoState(sw)) if (!sw.DoMarker("CDROM") || !g_cdrom.DoState(sw))
return false; return false;
@ -853,14 +845,14 @@ void Reset()
if (IsShutdown()) if (IsShutdown())
return; return;
g_gpu->RestoreGraphicsAPIState(); g_gpu_backend->RestoreGraphicsAPIState();
CPU::Reset(); CPU::Reset();
CPU::CodeCache::Flush(); CPU::CodeCache::Flush();
Bus::Reset(); Bus::Reset();
g_dma.Reset(); g_dma.Reset();
g_interrupt_controller.Reset(); g_interrupt_controller.Reset();
g_gpu->Reset(); g_gpu.Reset();
g_cdrom.Reset(); g_cdrom.Reset();
g_pad.Reset(); g_pad.Reset();
g_timers.Reset(); g_timers.Reset();
@ -872,7 +864,7 @@ void Reset()
TimingEvents::Reset(); TimingEvents::Reset();
ResetPerformanceCounters(); ResetPerformanceCounters();
g_gpu->ResetGraphicsAPIState(); g_gpu_backend->ResetGraphicsAPIState();
} }
bool LoadState(ByteStream* state) bool LoadState(ByteStream* state)
@ -1052,12 +1044,12 @@ bool SaveState(ByteStream* state, u32 screenshot_size /* = 128 */)
{ {
header.offset_to_data = static_cast<u32>(state->GetPosition()); header.offset_to_data = static_cast<u32>(state->GetPosition());
g_gpu->RestoreGraphicsAPIState(); g_gpu_backend->RestoreGraphicsAPIState();
StateWrapper sw(state, StateWrapper::Mode::Write); StateWrapper sw(state, StateWrapper::Mode::Write);
const bool result = DoState(sw); const bool result = DoState(sw);
g_gpu->ResetGraphicsAPIState(); g_gpu_backend->ResetGraphicsAPIState();
if (!result) if (!result)
return false; return false;
@ -1077,12 +1069,14 @@ bool SaveState(ByteStream* state, u32 screenshot_size /* = 128 */)
return true; return true;
} }
void RunFrame() static std::thread s_cpu_thread;
static std::atomic_bool s_cpu_thread_running{false};
static std::atomic_bool s_cpu_thread_sleeping{false};
static std::mutex s_cpu_thread_wake_mutex;
static std::condition_variable s_cpu_thread_wake_cv;
static void ExecuteCPUFrame()
{ {
s_frame_timer.Reset();
g_gpu->RestoreGraphicsAPIState();
switch (g_settings.cpu_execution_mode) switch (g_settings.cpu_execution_mode)
{ {
case CPUExecutionMode::Recompiler: case CPUExecutionMode::Recompiler:
@ -1105,11 +1099,100 @@ void RunFrame()
// Generate any pending samples from the SPU before sleeping, this way we reduce the chances of underruns. // Generate any pending samples from the SPU before sleeping, this way we reduce the chances of underruns.
g_spu.GeneratePendingSamples(); g_spu.GeneratePendingSamples();
}
static void CPUThreadFunction()
{
for (;;)
{
{
std::unique_lock<std::mutex> lock(s_cpu_thread_wake_mutex);
s_cpu_thread_sleeping.store(true);
s_cpu_thread_wake_cv.wait(lock, []() { return !CPU::g_state.frame_done || s_state == State::Stopping; });
s_cpu_thread_sleeping.store(false);
if (s_state == State::Stopping)
break;
}
s_cpu_frame_timer.Reset();
ExecuteCPUFrame();
{
const float frame_time = static_cast<float>(s_frame_timer.GetTimeMilliseconds());
s_average_cpu_frame_time_accumulator += frame_time;
s_worst_cpu_frame_time_accumulator = std::max(s_worst_cpu_frame_time_accumulator, frame_time);
}
}
s_cpu_thread_running.store(false);
}
void StartCPUThread()
{
Assert(!s_cpu_thread.joinable() && !s_cpu_thread_running.load());
s_cpu_thread_running.store(true);
s_cpu_thread = std::thread(CPUThreadFunction);
}
void StopCPUThread()
{
Assert(s_state == State::Stopping);
while (s_cpu_thread_running.load())
{
WakeCPUThread();
g_gpu_backend->ProcessGPUCommands();
}
if (s_cpu_thread.joinable())
s_cpu_thread.join();
}
void WakeCPUThread()
{
std::unique_lock<std::mutex> lock(s_cpu_thread_wake_mutex);
if (!s_cpu_thread_sleeping.load())
return;
CPU::g_state.frame_done = false;
s_cpu_thread_wake_cv.notify_one();
}
void WaitForCPUThread()
{
while (!s_cpu_thread_sleeping.load())
;
}
void RunFrame()
{
s_frame_timer.Reset();
g_gpu_backend->RestoreGraphicsAPIState();
if (!g_settings.cpu_thread)
{
CPU::g_state.frame_done = false;
ExecuteCPUFrame();
}
else
{
WakeCPUThread();
g_gpu_backend->RunGPUFrame();
}
if (s_cheat_list) if (s_cheat_list)
s_cheat_list->Apply(); s_cheat_list->Apply();
g_gpu->ResetGraphicsAPIState(); g_gpu_backend->ResetGraphicsAPIState();
}
void FrameDone()
{
s_frame_number++;
CPU::g_state.frame_done = true;
g_gpu_backend->CPUFrameDone();
} }
void SetThrottleFrequency(float frequency) void SetThrottleFrequency(float frequency)
@ -1163,8 +1246,15 @@ void Throttle()
s_last_throttle_time += s_throttle_period; s_last_throttle_time += s_throttle_period;
} }
void UpdatePerformanceCounters() void EndFrame()
{ {
if (g_settings.cpu_thread)
{
// finish up anything the CPU pushed to the GPU after vblank
WaitForCPUThread();
g_gpu_backend->EndGPUFrame();
}
const float frame_time = static_cast<float>(s_frame_timer.GetTimeMilliseconds()); const float frame_time = static_cast<float>(s_frame_timer.GetTimeMilliseconds());
s_average_frame_time_accumulator += frame_time; s_average_frame_time_accumulator += frame_time;
s_worst_frame_time_accumulator = std::max(s_worst_frame_time_accumulator, frame_time); s_worst_frame_time_accumulator = std::max(s_worst_frame_time_accumulator, frame_time);
@ -1179,8 +1269,12 @@ void UpdatePerformanceCounters()
s_worst_frame_time = s_worst_frame_time_accumulator; s_worst_frame_time = s_worst_frame_time_accumulator;
s_worst_frame_time_accumulator = 0.0f; s_worst_frame_time_accumulator = 0.0f;
s_worst_cpu_frame_time = s_worst_cpu_frame_time_accumulator;
s_worst_cpu_frame_time_accumulator = 0.0f;
s_average_frame_time = s_average_frame_time_accumulator / frames_presented; s_average_frame_time = s_average_frame_time_accumulator / frames_presented;
s_average_frame_time_accumulator = 0.0f; s_average_frame_time_accumulator = 0.0f;
s_average_cpu_frame_time = s_average_cpu_frame_time_accumulator / frames_presented;
s_average_cpu_frame_time_accumulator = 0.0f;
s_vps = static_cast<float>(frames_presented / time); s_vps = static_cast<float>(frames_presented / time);
s_last_frame_number = s_frame_number; s_last_frame_number = s_frame_number;
s_fps = static_cast<float>(s_internal_frame_number - s_last_internal_frame_number) / time; s_fps = static_cast<float>(s_internal_frame_number - s_last_internal_frame_number) / time;
@ -1191,6 +1285,11 @@ void UpdatePerformanceCounters()
s_last_global_tick_counter = global_tick_counter; s_last_global_tick_counter = global_tick_counter;
s_fps_timer.Reset(); s_fps_timer.Reset();
#ifndef WIN32
Log_InfoPrintf("FPS: %.2f VPS: %.2f Average: %.2fms (%.2fms CPU) Worst: %.2fms (%.2fms CPU)", s_fps, s_vps,
s_average_frame_time, s_average_cpu_frame_time, s_worst_frame_time, s_worst_cpu_frame_time);
#endif
g_host_interface->OnSystemPerformanceCountersUpdated(); g_host_interface->OnSystemPerformanceCountersUpdated();
} }

View File

@ -45,7 +45,8 @@ enum class State
Shutdown, Shutdown,
Starting, Starting,
Running, Running,
Paused Paused,
Stopping
}; };
/// Returns true if the filename is a PlayStation executable we can inject. /// Returns true if the filename is a PlayStation executable we can inject.
@ -93,7 +94,9 @@ float GetFPS();
float GetVPS(); float GetVPS();
float GetEmulationSpeed(); float GetEmulationSpeed();
float GetAverageFrameTime(); float GetAverageFrameTime();
float GetAverageCPUFrameTime();
float GetWorstFrameTime(); float GetWorstFrameTime();
float GetWorstCPUFrameTime();
float GetThrottleFrequency(); float GetThrottleFrequency();
bool Boot(const SystemBootParameters& params); bool Boot(const SystemBootParameters& params);
@ -117,7 +120,7 @@ void UpdateThrottlePeriod();
/// Throttles the system, i.e. sleeps until it's time to execute the next frame. /// Throttles the system, i.e. sleeps until it's time to execute the next frame.
void Throttle(); void Throttle();
void UpdatePerformanceCounters(); void EndFrame();
void ResetPerformanceCounters(); void ResetPerformanceCounters();
// Access controllers for simulating input. // Access controllers for simulating input.

View File

@ -194,8 +194,8 @@ u32 Timers::ReadRegister(u32 offset)
if (timer_index < 2 && cs.external_counting_enabled) if (timer_index < 2 && cs.external_counting_enabled)
{ {
// timers 0/1 depend on the GPU // timers 0/1 depend on the GPU
if (timer_index == 0 || g_gpu->IsCRTCScanlinePending()) if (timer_index == 0 || g_gpu.IsCRTCScanlinePending())
g_gpu->SynchronizeCRTC(); g_gpu.SynchronizeCRTC();
} }
m_sysclk_event->InvokeEarly(); m_sysclk_event->InvokeEarly();
@ -208,8 +208,8 @@ u32 Timers::ReadRegister(u32 offset)
if (timer_index < 2 && cs.external_counting_enabled) if (timer_index < 2 && cs.external_counting_enabled)
{ {
// timers 0/1 depend on the GPU // timers 0/1 depend on the GPU
if (timer_index == 0 || g_gpu->IsCRTCScanlinePending()) if (timer_index == 0 || g_gpu.IsCRTCScanlinePending())
g_gpu->SynchronizeCRTC(); g_gpu.SynchronizeCRTC();
} }
m_sysclk_event->InvokeEarly(); m_sysclk_event->InvokeEarly();
@ -244,8 +244,8 @@ void Timers::WriteRegister(u32 offset, u32 value)
if (timer_index < 2 && cs.external_counting_enabled) if (timer_index < 2 && cs.external_counting_enabled)
{ {
// timers 0/1 depend on the GPU // timers 0/1 depend on the GPU
if (timer_index == 0 || g_gpu->IsCRTCScanlinePending()) if (timer_index == 0 || g_gpu.IsCRTCScanlinePending())
g_gpu->SynchronizeCRTC(); g_gpu.SynchronizeCRTC();
} }
m_sysclk_event->InvokeEarly(); m_sysclk_event->InvokeEarly();

View File

@ -8,6 +8,7 @@
#include "core/cheats.h" #include "core/cheats.h"
#include "core/controller.h" #include "core/controller.h"
#include "core/gpu.h" #include "core/gpu.h"
#include "core/gpu_backend.h"
#include "core/system.h" #include "core/system.h"
#include "frontend-common/game_list.h" #include "frontend-common/game_list.h"
#include "frontend-common/imgui_styles.h" #include "frontend-common/imgui_styles.h"
@ -432,7 +433,7 @@ void QtHostInterface::onHostDisplayWindowResized(int width, int height)
// re-render the display, since otherwise it will be out of date and stretched if paused // re-render the display, since otherwise it will be out of date and stretched if paused
if (!System::IsShutdown()) if (!System::IsShutdown())
{ {
g_gpu->UpdateResolutionScale(); g_gpu_backend->UpdateResolutionScale();
renderDisplay(); renderDisplay();
} }
} }
@ -550,7 +551,7 @@ void QtHostInterface::updateDisplayState()
if (!System::IsShutdown()) if (!System::IsShutdown())
{ {
g_gpu->UpdateResolutionScale(); g_gpu_backend->UpdateResolutionScale();
redrawDisplayWindow(); redrawDisplayWindow();
} }
UpdateSpeedLimiterState(); UpdateSpeedLimiterState();
@ -1247,7 +1248,7 @@ void QtHostInterface::threadEntryPoint()
renderDisplay(); renderDisplay();
System::UpdatePerformanceCounters(); System::EndFrame();
if (m_speed_limiter_enabled) if (m_speed_limiter_enabled)
System::Throttle(); System::Throttle();

View File

@ -7,6 +7,7 @@
#include "core/cheats.h" #include "core/cheats.h"
#include "core/controller.h" #include "core/controller.h"
#include "core/gpu.h" #include "core/gpu.h"
#include "core/gpu_backend.h"
#include "core/host_display.h" #include "core/host_display.h"
#include "core/system.h" #include "core/system.h"
#include "frontend-common/icon.h" #include "frontend-common/icon.h"
@ -376,7 +377,7 @@ bool SDLHostInterface::SetFullscreen(bool enabled)
m_display->ResizeRenderWindow(window_width, window_height); m_display->ResizeRenderWindow(window_width, window_height);
if (!System::IsShutdown()) if (!System::IsShutdown())
g_gpu->UpdateResolutionScale(); g_gpu_backend->UpdateResolutionScale();
m_fullscreen = enabled; m_fullscreen = enabled;
return true; return true;
@ -534,7 +535,7 @@ void SDLHostInterface::HandleSDLEvent(const SDL_Event* event)
UpdateFramebufferScale(); UpdateFramebufferScale();
if (!System::IsShutdown()) if (!System::IsShutdown())
g_gpu->UpdateResolutionScale(); g_gpu_backend->UpdateResolutionScale();
} }
else if (event->window.event == SDL_WINDOWEVENT_MOVED) else if (event->window.event == SDL_WINDOWEVENT_MOVED)
{ {
@ -824,11 +825,11 @@ void SDLHostInterface::DrawMainMenuBar()
} }
else else
{ {
ImGui::SetCursorPosX(ImGui::GetIO().DisplaySize.x - (420.0f * framebuffer_scale)); ImGui::SetCursorPosX(ImGui::GetIO().DisplaySize.x - (500.0f * framebuffer_scale));
ImGui::Text("Average: %.2fms", System::GetAverageFrameTime()); ImGui::Text("Average: %.2fms / %.2fms", System::GetAverageFrameTime(), System::GetAverageCPUFrameTime());
ImGui::SetCursorPosX(ImGui::GetIO().DisplaySize.x - (310.0f * framebuffer_scale)); ImGui::SetCursorPosX(ImGui::GetIO().DisplaySize.x - (350.0f * framebuffer_scale));
ImGui::Text("Worst: %.2fms", System::GetWorstFrameTime()); ImGui::Text("Worst: %.2fms / %.2fms", System::GetWorstFrameTime(), System::GetWorstCPUFrameTime());
ImGui::SetCursorPosX(ImGui::GetIO().DisplaySize.x - (210.0f * framebuffer_scale)); ImGui::SetCursorPosX(ImGui::GetIO().DisplaySize.x - (210.0f * framebuffer_scale));
@ -907,7 +908,7 @@ void SDLHostInterface::DrawQuickSettingsMenu()
for (u32 scale = 1; scale <= GPU::MAX_RESOLUTION_SCALE; scale++) for (u32 scale = 1; scale <= GPU::MAX_RESOLUTION_SCALE; scale++)
{ {
char buf[32]; char buf[32];
std::snprintf(buf, sizeof(buf), "%ux (%ux%u)", scale, scale * GPU::VRAM_WIDTH, scale * GPU::VRAM_HEIGHT); std::snprintf(buf, sizeof(buf), "%ux (%ux%u)", scale, scale * VRAM_WIDTH, scale * VRAM_HEIGHT);
if (ImGui::MenuItem(buf, nullptr, current_internal_resolution == scale)) if (ImGui::MenuItem(buf, nullptr, current_internal_resolution == scale))
{ {
@ -1653,7 +1654,7 @@ void SDLHostInterface::Run()
if (System::IsRunning()) if (System::IsRunning())
{ {
System::UpdatePerformanceCounters(); System::EndFrame();
if (m_speed_limiter_enabled) if (m_speed_limiter_enabled)
System::Throttle(); System::Throttle();

View File

@ -11,6 +11,7 @@
#include "core/cpu_code_cache.h" #include "core/cpu_code_cache.h"
#include "core/dma.h" #include "core/dma.h"
#include "core/gpu.h" #include "core/gpu.h"
#include "core/gpu_backend.h"
#include "core/host_display.h" #include "core/host_display.h"
#include "core/mdec.h" #include "core/mdec.h"
#include "core/pgxp.h" #include "core/pgxp.h"
@ -825,8 +826,8 @@ void CommonHostInterface::DrawFPSWindow()
if (g_settings.display_show_resolution) if (g_settings.display_show_resolution)
{ {
const auto [effective_width, effective_height] = g_gpu->GetEffectiveDisplayResolution(); const auto [effective_width, effective_height] = g_gpu_backend->GetEffectiveDisplayResolution();
const bool interlaced = g_gpu->IsInterlacedDisplayEnabled(); const bool interlaced = g_gpu.IsInterlacedDisplayEnabled();
ImGui::Text("%ux%u (%s)", effective_width, effective_height, interlaced ? "interlaced" : "progressive"); ImGui::Text("%ux%u (%s)", effective_width, effective_height, interlaced ? "interlaced" : "progressive");
} }
@ -906,7 +907,7 @@ void CommonHostInterface::DrawOSDMessages()
void CommonHostInterface::DrawDebugWindows() void CommonHostInterface::DrawDebugWindows()
{ {
if (g_settings.debugging.show_gpu_state) if (g_settings.debugging.show_gpu_state)
g_gpu->DrawDebugStateWindow(); g_gpu.DrawDebugStateWindow();
if (g_settings.debugging.show_cdrom_state) if (g_settings.debugging.show_cdrom_state)
g_cdrom.DrawDebugWindow(); g_cdrom.DrawDebugWindow();
if (g_settings.debugging.show_timers_state) if (g_settings.debugging.show_timers_state)
@ -1419,7 +1420,7 @@ void CommonHostInterface::RegisterGraphicsHotkeys()
if (!pressed) if (!pressed)
{ {
g_settings.gpu_pgxp_enable = !g_settings.gpu_pgxp_enable; g_settings.gpu_pgxp_enable = !g_settings.gpu_pgxp_enable;
g_gpu->UpdateSettings(); g_gpu.UpdateSettings();
AddFormattedOSDMessage(5.0f, "PGXP is now %s.", AddFormattedOSDMessage(5.0f, "PGXP is now %s.",
g_settings.gpu_pgxp_enable ? "enabled" : "disabled"); g_settings.gpu_pgxp_enable ? "enabled" : "disabled");