diff --git a/src/common/rectangle.h b/src/common/rectangle.h index c7369e57e..d3d491555 100644 --- a/src/common/rectangle.h +++ b/src/common/rectangle.h @@ -1,5 +1,6 @@ #pragma once #include +#include #include #include #include @@ -63,10 +64,7 @@ struct Rectangle /// Assignment operator. constexpr Rectangle& operator=(const Rectangle& rhs) { - left = rhs.left; - top = rhs.top; - right = rhs.right; - bottom = rhs.bottom; + std::memcpy(this, &rhs, sizeof(Rectangle)); return *this; } diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index bc2fc30bc..f0cd48fa1 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -28,6 +28,8 @@ add_library(core dma.h gpu.cpp gpu.h + gpu_backend.cpp + gpu_backend.h gpu_commands.cpp gpu_hw.cpp gpu_hw.h diff --git a/src/core/bus.cpp b/src/core/bus.cpp index 7b8a70c0a..6954994f4 100644 --- a/src/core/bus.cpp +++ b/src/core/bus.cpp @@ -579,14 +579,14 @@ ALWAYS_INLINE static TickCount DoGPUAccess(u32 offset, u32& value) { if constexpr (type == MemoryAccessType::Read) { - value = g_gpu->ReadRegister(offset); + value = g_gpu.ReadRegister(offset); FixupUnalignedWordAccessW32(offset, value); return 2; } else { FixupUnalignedWordAccessW32(offset, value); - g_gpu->WriteRegister(offset, value); + g_gpu.WriteRegister(offset, value); return 0; } } diff --git a/src/core/core.vcxproj b/src/core/core.vcxproj index e0121765a..cabad4486 100644 --- a/src/core/core.vcxproj +++ b/src/core/core.vcxproj @@ -60,7 +60,9 @@ + + @@ -68,7 +70,6 @@ - @@ -109,6 +110,9 @@ + + + @@ -117,7 +121,6 @@ - diff --git a/src/core/core.vcxproj.filters b/src/core/core.vcxproj.filters index 84d774ca5..8a601c5dc 100644 --- a/src/core/core.vcxproj.filters +++ b/src/core/core.vcxproj.filters @@ -8,7 +8,6 @@ - @@ -49,6 +48,8 @@ + + @@ -61,7 +62,6 @@ - @@ -101,5 +101,8 @@ + + + - \ No newline at end of file + diff --git a/src/core/cpu_code_cache.cpp b/src/core/cpu_code_cache.cpp index 727f449d8..134ec92ac 100644 --- a/src/core/cpu_code_cache.cpp +++ b/src/core/cpu_code_cache.cpp @@ -129,7 +129,6 @@ static void ExecuteImpl() { CodeBlockKey next_block_key; - g_state.frame_done = false; while (!g_state.frame_done) { TimingEvents::UpdateCPUDowncount(); @@ -240,7 +239,6 @@ void Execute() void ExecuteRecompiler() { - g_state.frame_done = false; while (!g_state.frame_done) { TimingEvents::UpdateCPUDowncount(); diff --git a/src/core/cpu_core.cpp b/src/core/cpu_core.cpp index 30d7e1e3f..c9bd6935e 100644 --- a/src/core/cpu_core.cpp +++ b/src/core/cpu_core.cpp @@ -66,6 +66,8 @@ void Shutdown() void Reset() { + g_state.frame_done = true; + g_state.pending_ticks = 0; g_state.downcount = MAX_SLICE_SIZE; @@ -1368,7 +1370,6 @@ restart_instruction: template static void ExecuteImpl() { - g_state.frame_done = false; while (!g_state.frame_done) { TimingEvents::UpdateCPUDowncount(); diff --git a/src/core/cpu_core.h b/src/core/cpu_core.h index 43c14c99a..66de1210a 100644 --- a/src/core/cpu_core.h +++ b/src/core/cpu_core.h @@ -66,7 +66,7 @@ struct State bool branch_was_taken = false; bool exception_raised = false; bool interrupt_delay = false; - bool frame_done = false; + bool frame_done = true; // load delays Reg load_delay_reg = Reg::count; diff --git a/src/core/dma.cpp b/src/core/dma.cpp index d685f5158..66b5faf5d 100644 --- a/src/core/dma.cpp +++ b/src/core/dma.cpp @@ -449,17 +449,17 @@ TickCount DMA::TransferMemoryToDevice(Channel channel, u32 address, u32 incremen { case Channel::GPU: { - if (g_gpu->BeginDMAWrite()) + if (g_gpu.BeginDMAWrite()) { u8* ram_pointer = Bus::g_ram; for (u32 i = 0; i < word_count; i++) { u32 value; std::memcpy(&value, &ram_pointer[address], sizeof(u32)); - g_gpu->DMAWrite(address, value); + g_gpu.DMAWrite(address, value); address = (address + increment) & ADDRESS_MASK; } - g_gpu->EndDMAWrite(); + g_gpu.EndDMAWrite(); } } break; @@ -516,7 +516,7 @@ TickCount DMA::TransferDeviceToMemory(Channel channel, u32 address, u32 incremen switch (channel) { case Channel::GPU: - g_gpu->DMARead(dest_pointer, word_count); + g_gpu.DMARead(dest_pointer, word_count); break; case Channel::CDROM: diff --git a/src/core/gpu.cpp b/src/core/gpu.cpp index 086865c83..3cc917c7c 100644 --- a/src/core/gpu.cpp +++ b/src/core/gpu.cpp @@ -4,6 +4,7 @@ #include "common/log.h" #include "common/state_wrapper.h" #include "dma.h" +#include "gpu_backend.h" #include "host_display.h" #include "host_interface.h" #include "interrupt_controller.h" @@ -16,7 +17,7 @@ #endif Log_SetChannel(GPU); -std::unique_ptr g_gpu; +GPU g_gpu; const GPU::GP0CommandHandlerTable GPU::s_GP0_command_handler_table = GPU::GenerateGP0CommandHandlerTable(); @@ -24,9 +25,8 @@ GPU::GPU() = default; GPU::~GPU() = default; -bool GPU::Initialize(HostDisplay* host_display) +void GPU::Initialize() { - m_host_display = host_display; m_force_progressive_scan = g_settings.gpu_disable_interlacing; m_force_ntsc_timings = g_settings.gpu_force_ntsc_timings; m_crtc_state.display_aspect_ratio = Settings::GetDisplayAspectRatioValue(g_settings.display_aspect_ratio); @@ -38,7 +38,14 @@ bool GPU::Initialize(HostDisplay* host_display) m_max_run_ahead = g_settings.gpu_max_run_ahead; m_console_is_pal = System::IsPALRegion(); UpdateCRTCConfig(); - return true; +} + +void GPU::Shutdown() +{ + m_command_tick_event.reset(); + m_crtc_tick_event.reset(); + m_fifo.Clear(); + std::vector().swap(m_blit_buffer); } void GPU::UpdateSettings() @@ -58,13 +65,8 @@ void GPU::UpdateSettings() // Crop mode calls this, so recalculate the display area UpdateCRTCDisplayParameters(); -} -void GPU::UpdateResolutionScale() {} - -std::tuple GPU::GetEffectiveDisplayResolution() -{ - return std::tie(m_crtc_state.display_vram_width, m_crtc_state.display_vram_height); + g_gpu_backend->PushCommand(g_gpu_backend->NewUpdateSettingsCommand()); } void GPU::Reset() @@ -72,6 +74,8 @@ void GPU::Reset() SoftReset(); m_set_texture_disable_mask = false; m_GPUREAD_latch = 0; + + g_gpu_backend->PushCommand(g_gpu_backend->NewResetCommand()); } void GPU::SoftReset() @@ -81,7 +85,7 @@ void GPU::SoftReset() m_GPUSTAT.bits = 0x14802000; m_GPUSTAT.pal_mode = System::IsPALRegion(); m_drawing_area.Set(0, 0, 0, 0); - m_drawing_area_changed = true; + UpdateDrawingArea(); m_drawing_offset = {}; std::memset(&m_crtc_state.regs, 0, sizeof(m_crtc_state.regs)); m_crtc_state.regs.horizontal_display_range = 0xC60260; @@ -98,9 +102,8 @@ void GPU::SoftReset() m_fifo.Clear(); m_blit_buffer.clear(); m_blit_remaining_words = 0; - SetDrawMode(0); - SetTexturePalette(0); - SetTextureWindow(0); + m_draw_mode.bits = 0; + m_texture_window.bits = 0; UpdateDMARequest(); UpdateCRTCConfig(); UpdateCRTCTickEvent(); @@ -117,27 +120,14 @@ bool GPU::DoState(StateWrapper& sw) sw.Do(&m_GPUSTAT.bits); - sw.Do(&m_draw_mode.mode_reg.bits); - sw.Do(&m_draw_mode.palette_reg); - sw.Do(&m_draw_mode.texture_window_value); - sw.Do(&m_draw_mode.texture_page_x); - sw.Do(&m_draw_mode.texture_page_y); - sw.Do(&m_draw_mode.texture_palette_x); - sw.Do(&m_draw_mode.texture_palette_y); - sw.Do(&m_draw_mode.texture_window_mask_x); - sw.Do(&m_draw_mode.texture_window_mask_y); - sw.Do(&m_draw_mode.texture_window_offset_x); - sw.Do(&m_draw_mode.texture_window_offset_y); - sw.Do(&m_draw_mode.texture_x_flip); - sw.Do(&m_draw_mode.texture_y_flip); - + sw.Do(&m_drawing_offset.x); + sw.Do(&m_drawing_offset.y); sw.Do(&m_drawing_area.left); sw.Do(&m_drawing_area.top); sw.Do(&m_drawing_area.right); sw.Do(&m_drawing_area.bottom); - sw.Do(&m_drawing_offset.x); - sw.Do(&m_drawing_offset.y); - sw.Do(&m_drawing_offset.x); + sw.Do(&m_draw_mode.bits); + sw.Do(&m_texture_window.bits); sw.Do(&m_console_is_pal); sw.Do(&m_set_texture_disable_mask); @@ -195,9 +185,7 @@ bool GPU::DoState(StateWrapper& sw) if (sw.IsReading()) { - m_draw_mode.texture_page_changed = true; - m_draw_mode.texture_window_changed = true; - m_drawing_area_changed = true; + UpdateDrawingArea(); UpdateDMARequest(); } @@ -206,37 +194,15 @@ bool GPU::DoState(StateWrapper& sw) if (sw.IsReading()) { - // Need to clear the mask bits since we want to pull it in from the copy. - const u32 old_GPUSTAT = m_GPUSTAT.bits; - m_GPUSTAT.check_mask_before_draw = false; - m_GPUSTAT.set_mask_while_drawing = false; - - // Still need a temporary here. - HeapArray temp; - sw.DoBytes(temp.data(), VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16)); - UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, temp.data()); - - // Restore mask setting. - m_GPUSTAT.bits = old_GPUSTAT; - UpdateCRTCConfig(); UpdateDisplay(); UpdateCRTCTickEvent(); UpdateCommandTickEvent(); } - else - { - ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); - sw.DoBytes(m_vram_ptr, VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16)); - } return !sw.HasError(); } -void GPU::ResetGraphicsAPIState() {} - -void GPU::RestoreGraphicsAPIState() {} - void GPU::UpdateDMARequest() { switch (m_blitter_state) @@ -818,9 +784,9 @@ void GPU::UpdateCommandTickEvent() bool GPU::ConvertScreenCoordinatesToBeamTicksAndLines(s32 window_x, s32 window_y, u32* out_tick, u32* out_line) const { - const auto [display_x, display_y] = m_host_display->ConvertWindowCoordinatesToDisplayCoordinates( - window_x, window_y, m_host_display->GetWindowWidth(), m_host_display->GetWindowHeight(), - m_host_display->GetDisplayTopMargin()); + HostDisplay* display = g_host_interface->GetDisplay(); + const auto [display_x, display_y] = display->ConvertWindowCoordinatesToDisplayCoordinates( + window_x, window_y, display->GetWindowWidth(), display->GetWindowHeight(), display->GetDisplayTopMargin()); Log_DebugPrintf("win %d,%d -> disp %d,%d (size %u,%u frac %f,%f)", window_x, window_y, display_x, display_y, m_crtc_state.display_width, m_crtc_state.display_height, static_cast(display_x) / static_cast(m_crtc_state.display_width), @@ -850,7 +816,7 @@ u32 GPU::ReadGPUREAD() // Read with correct wrap-around behavior. const u16 read_x = (m_vram_transfer.x + m_vram_transfer.col) % VRAM_WIDTH; const u16 read_y = (m_vram_transfer.y + m_vram_transfer.row) % VRAM_HEIGHT; - value |= ZeroExtend32(m_vram_ptr[read_y * VRAM_WIDTH + read_x]) << (i * 16); + value |= ZeroExtend32(g_gpu_backend->GetVRAM()[read_y * VRAM_WIDTH + read_x]) << (i * 16); if (++m_vram_transfer.col == m_vram_transfer.width) { @@ -1064,7 +1030,7 @@ void GPU::HandleGetGPUInfoCommand(u32 value) case 0x02: // Get Texture Window { Log_DebugPrintf("Get texture window"); - m_GPUREAD_latch = m_draw_mode.texture_window_value; + m_GPUREAD_latch = m_texture_window.bits; } break; @@ -1098,227 +1064,6 @@ void GPU::HandleGetGPUInfoCommand(u32 value) } } -void GPU::ClearDisplay() {} - -void GPU::UpdateDisplay() {} - -void GPU::ReadVRAM(u32 x, u32 y, u32 width, u32 height) {} - -void GPU::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) -{ - const u16 color16 = RGBA8888ToRGBA5551(color); - if ((x + width) <= VRAM_WIDTH && !IsInterlacedRenderingEnabled()) - { - for (u32 yoffs = 0; yoffs < height; yoffs++) - { - const u32 row = (y + yoffs) % VRAM_HEIGHT; - std::fill_n(&m_vram_ptr[row * VRAM_WIDTH + x], width, color16); - } - } - else if (IsInterlacedRenderingEnabled()) - { - // Hardware tests show that fills seem to break on the first two lines when the offset matches the displayed field. - if (IsCRTCScanlinePending()) - SynchronizeCRTC(); - - const u32 active_field = GetActiveLineLSB(); - for (u32 yoffs = 0; yoffs < height; yoffs++) - { - const u32 row = (y + yoffs) % VRAM_HEIGHT; - if ((row & u32(1)) == active_field) - continue; - - u16* row_ptr = &m_vram_ptr[row * VRAM_WIDTH]; - for (u32 xoffs = 0; xoffs < width; xoffs++) - { - const u32 col = (x + xoffs) % VRAM_WIDTH; - row_ptr[col] = color16; - } - } - } - else - { - for (u32 yoffs = 0; yoffs < height; yoffs++) - { - const u32 row = (y + yoffs) % VRAM_HEIGHT; - u16* row_ptr = &m_vram_ptr[row * VRAM_WIDTH]; - for (u32 xoffs = 0; xoffs < width; xoffs++) - { - const u32 col = (x + xoffs) % VRAM_WIDTH; - row_ptr[col] = color16; - } - } - } -} - -void GPU::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) -{ - // Fast path when the copy is not oversized. - if ((x + width) <= VRAM_WIDTH && (y + height) <= VRAM_HEIGHT && !m_GPUSTAT.IsMaskingEnabled()) - { - const u16* src_ptr = static_cast(data); - u16* dst_ptr = &m_vram_ptr[y * VRAM_WIDTH + x]; - for (u32 yoffs = 0; yoffs < height; yoffs++) - { - std::copy_n(src_ptr, width, dst_ptr); - src_ptr += width; - dst_ptr += VRAM_WIDTH; - } - } - else - { - // Slow path when we need to handle wrap-around. - const u16* src_ptr = static_cast(data); - const u16 mask_and = m_GPUSTAT.GetMaskAND(); - const u16 mask_or = m_GPUSTAT.GetMaskOR(); - - for (u32 row = 0; row < height;) - { - u16* dst_row_ptr = &m_vram_ptr[((y + row++) % VRAM_HEIGHT) * VRAM_WIDTH]; - for (u32 col = 0; col < width;) - { - // TODO: Handle unaligned reads... - u16* pixel_ptr = &dst_row_ptr[(x + col++) % VRAM_WIDTH]; - if (((*pixel_ptr) & mask_and) == 0) - *pixel_ptr = *(src_ptr++) | mask_or; - } - } - } -} - -void GPU::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) -{ - // Break up oversized copies. This behavior has not been verified on console. - if ((src_x + width) > VRAM_WIDTH || (dst_x + width) > VRAM_WIDTH) - { - u32 remaining_rows = height; - u32 current_src_y = src_y; - u32 current_dst_y = dst_y; - while (remaining_rows > 0) - { - const u32 rows_to_copy = - std::min(remaining_rows, std::min(VRAM_HEIGHT - current_src_y, VRAM_HEIGHT - current_dst_y)); - - u32 remaining_columns = width; - u32 current_src_x = src_x; - u32 current_dst_x = dst_x; - while (remaining_columns > 0) - { - const u32 columns_to_copy = - std::min(remaining_columns, std::min(VRAM_WIDTH - current_src_x, VRAM_WIDTH - current_dst_x)); - CopyVRAM(current_src_x, current_src_y, current_dst_x, current_dst_y, columns_to_copy, rows_to_copy); - current_src_x = (current_src_x + columns_to_copy) % VRAM_WIDTH; - current_dst_x = (current_dst_x + columns_to_copy) % VRAM_WIDTH; - remaining_columns -= columns_to_copy; - } - - current_src_y = (current_src_y + rows_to_copy) % VRAM_HEIGHT; - current_dst_y = (current_dst_y + rows_to_copy) % VRAM_HEIGHT; - remaining_rows -= rows_to_copy; - } - - return; - } - - // This doesn't have a fast path, but do we really need one? It's not common. - const u16 mask_and = m_GPUSTAT.GetMaskAND(); - const u16 mask_or = m_GPUSTAT.GetMaskOR(); - - // Copy in reverse when src_x < dst_x, this is verified on console. - if (src_x < dst_x || ((src_x + width - 1) % VRAM_WIDTH) < ((dst_x + width - 1) % VRAM_WIDTH)) - { - for (u32 row = 0; row < height; row++) - { - const u16* src_row_ptr = &m_vram_ptr[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH]; - u16* dst_row_ptr = &m_vram_ptr[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH]; - - for (s32 col = static_cast(width - 1); col >= 0; col--) - { - const u16 src_pixel = src_row_ptr[(src_x + static_cast(col)) % VRAM_WIDTH]; - u16* dst_pixel_ptr = &dst_row_ptr[(dst_x + static_cast(col)) % VRAM_WIDTH]; - if ((*dst_pixel_ptr & mask_and) == 0) - *dst_pixel_ptr = src_pixel | mask_or; - } - } - } - else - { - for (u32 row = 0; row < height; row++) - { - const u16* src_row_ptr = &m_vram_ptr[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH]; - u16* dst_row_ptr = &m_vram_ptr[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH]; - - for (u32 col = 0; col < width; col++) - { - const u16 src_pixel = src_row_ptr[(src_x + col) % VRAM_WIDTH]; - u16* dst_pixel_ptr = &dst_row_ptr[(dst_x + col) % VRAM_WIDTH]; - if ((*dst_pixel_ptr & mask_and) == 0) - *dst_pixel_ptr = src_pixel | mask_or; - } - } - } -} - -void GPU::DispatchRenderCommand() {} - -void GPU::FlushRender() {} - -void GPU::SetDrawMode(u16 value) -{ - DrawMode::Reg new_mode_reg{static_cast(value & DrawMode::Reg::MASK)}; - if (!m_set_texture_disable_mask) - new_mode_reg.texture_disable = false; - - if (new_mode_reg.bits == m_draw_mode.mode_reg.bits) - return; - - if ((new_mode_reg.bits & DrawMode::Reg::TEXTURE_PAGE_MASK) != - (m_draw_mode.mode_reg.bits & DrawMode::Reg::TEXTURE_PAGE_MASK)) - { - m_draw_mode.texture_page_x = new_mode_reg.GetTexturePageXBase(); - m_draw_mode.texture_page_y = new_mode_reg.GetTexturePageYBase(); - m_draw_mode.texture_page_changed = true; - } - - m_draw_mode.mode_reg.bits = new_mode_reg.bits; - - if (m_GPUSTAT.draw_to_displayed_field != new_mode_reg.draw_to_displayed_field) - FlushRender(); - - // Bits 0..10 are returned in the GPU status register. - m_GPUSTAT.bits = - (m_GPUSTAT.bits & ~(DrawMode::Reg::GPUSTAT_MASK)) | (ZeroExtend32(new_mode_reg.bits) & DrawMode::Reg::GPUSTAT_MASK); - m_GPUSTAT.texture_disable = m_draw_mode.mode_reg.texture_disable; -} - -void GPU::SetTexturePalette(u16 value) -{ - value &= DrawMode::PALETTE_MASK; - if (m_draw_mode.palette_reg == value) - return; - - m_draw_mode.texture_palette_x = ZeroExtend32(value & 0x3F) * 16; - m_draw_mode.texture_palette_y = ZeroExtend32(value >> 6); - m_draw_mode.palette_reg = value; - m_draw_mode.texture_page_changed = true; -} - -void GPU::SetTextureWindow(u32 value) -{ - value &= DrawMode::TEXTURE_WINDOW_MASK; - if (m_draw_mode.texture_window_value == value) - return; - - FlushRender(); - - m_draw_mode.texture_window_mask_x = value & UINT32_C(0x1F); - m_draw_mode.texture_window_mask_y = (value >> 5) & UINT32_C(0x1F); - m_draw_mode.texture_window_offset_x = (value >> 10) & UINT32_C(0x1F); - m_draw_mode.texture_window_offset_y = (value >> 15) & UINT32_C(0x1F); - m_draw_mode.texture_window_value = value; - m_draw_mode.texture_window_changed = true; -} - bool GPU::DumpVRAMToFile(const char* filename, u32 width, u32 height, u32 stride, const void* buffer, bool remove_alpha) { auto fp = FileSystem::OpenManagedCFile(filename, "wb"); @@ -1417,7 +1162,7 @@ void GPU::DrawDebugStateWindow() ImGui::Columns(1); } - DrawRendererStats(is_idle_frame); + g_gpu_backend->DrawRendererStats(is_idle_frame); if (ImGui::CollapsingHeader("GPU", ImGuiTreeNodeFlags_DefaultOpen)) { @@ -1468,5 +1213,3 @@ void GPU::DrawDebugStateWindow() ImGui::End(); #endif } - -void GPU::DrawRendererStats(bool is_idle_frame) {} diff --git a/src/core/gpu.h b/src/core/gpu.h index 54078114b..39f339227 100644 --- a/src/core/gpu.h +++ b/src/core/gpu.h @@ -2,6 +2,7 @@ #include "common/bitfield.h" #include "common/fifo_queue.h" #include "common/rectangle.h" +#include "gpu_types.h" #include "timers.h" #include "types.h" #include @@ -13,12 +14,12 @@ class StateWrapper; -class HostDisplay; - class TimingEvent; class Timers; -class GPU +class GPUBackend; + +class GPU final { public: enum class BlitterState : u8 @@ -37,66 +38,12 @@ public: GPUREADtoCPU = 3 }; - enum class Primitive : u8 - { - Reserved = 0, - Polygon = 1, - Line = 2, - Rectangle = 3 - }; - - enum class DrawRectangleSize : u8 - { - Variable = 0, - R1x1 = 1, - R8x8 = 2, - R16x16 = 3 - }; - - enum class TextureMode : u8 - { - Palette4Bit = 0, - Palette8Bit = 1, - Direct16Bit = 2, - Reserved_Direct16Bit = 3, - - // Not register values. - RawTextureBit = 4, - RawPalette4Bit = RawTextureBit | Palette4Bit, - RawPalette8Bit = RawTextureBit | Palette8Bit, - RawDirect16Bit = RawTextureBit | Direct16Bit, - Reserved_RawDirect16Bit = RawTextureBit | Reserved_Direct16Bit, - - Disabled = 8 // Not a register value - }; - - enum class TransparencyMode : u8 - { - HalfBackgroundPlusHalfForeground = 0, - BackgroundPlusForeground = 1, - BackgroundMinusForeground = 2, - BackgroundPlusQuarterForeground = 3, - - Disabled = 4 // Not a register value - }; - enum : u32 { - VRAM_WIDTH = 1024, - VRAM_HEIGHT = 512, - VRAM_SIZE = VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16), - VRAM_WIDTH_MASK = VRAM_WIDTH - 1, - VRAM_HEIGHT_MASK = VRAM_HEIGHT - 1, - VRAM_COORD_MASK = 0x3FF, MAX_FIFO_SIZE = 4096, - TEXTURE_PAGE_WIDTH = 256, - TEXTURE_PAGE_HEIGHT = 256, - MAX_PRIMITIVE_WIDTH = 1024, - MAX_PRIMITIVE_HEIGHT = 512, DOT_TIMER_INDEX = 0, HBLANK_TIMER_INDEX = 1, - MAX_RESOLUTION_SCALE = 16, - DITHER_MATRIX_SIZE = 4 + MAX_RESOLUTION_SCALE = 16 }; enum : u16 @@ -117,17 +64,12 @@ public: // Base class constructor. GPU(); - virtual ~GPU(); + ~GPU(); - virtual bool IsHardwareRenderer() const = 0; - - virtual bool Initialize(HostDisplay* host_display); - virtual void Reset(); - virtual bool DoState(StateWrapper& sw); - - // Graphics API state reset/restore - call when drawing the UI etc. - virtual void ResetGraphicsAPIState(); - virtual void RestoreGraphicsAPIState(); + void Initialize(); + void Shutdown(); + void Reset(); + bool DoState(StateWrapper& sw); // Render statistics debug window. void DrawDebugStateWindow(); @@ -164,6 +106,20 @@ public: return (!m_force_progressive_scan) & m_GPUSTAT.SkipDrawingToActiveField(); } + /// Returns the interlaced mode to use when scanning out/displaying. + ALWAYS_INLINE GPUInterlacedDisplayMode GetInterlacedDisplayMode() const + { + if (IsInterlacedDisplayEnabled()) + { + return m_GPUSTAT.vertical_resolution ? GPUInterlacedDisplayMode::InterleavedFields : + GPUInterlacedDisplayMode::SeparateFields; + } + else + { + return GPUInterlacedDisplayMode::None; + } + } + /// Returns the number of pending GPU ticks. TickCount GetPendingCRTCTicks() const; TickCount GetPendingCommandTicks() const; @@ -178,25 +134,9 @@ public: void SynchronizeCRTC(); /// Recompile shaders/recreate framebuffers when needed. - virtual void UpdateSettings(); + void UpdateSettings(); - /// Updates the resolution scale when it's set to automatic. - virtual void UpdateResolutionScale(); - /// Returns the effective display resolution of the GPU. - virtual std::tuple GetEffectiveDisplayResolution(); - - // gpu_hw_d3d11.cpp - static std::unique_ptr CreateHardwareD3D11Renderer(); - - // gpu_hw_opengl.cpp - static std::unique_ptr CreateHardwareOpenGLRenderer(); - - // gpu_hw_vulkan.cpp - static std::unique_ptr CreateHardwareVulkanRenderer(); - - // gpu_sw.cpp - static std::unique_ptr CreateSoftwareRenderer(); // Converts window coordinates into horizontal ticks and scanlines. Returns false if out of range. Used for lightguns. bool ConvertScreenCoordinatesToBeamTicksAndLines(s32 window_x, s32 window_y, u32* out_tick, u32* out_line) const; @@ -204,7 +144,7 @@ public: // Returns the video clock frequency. TickCount GetCRTCFrequency() const; -protected: +private: TickCount CRTCTicksToSystemTicks(TickCount crtc_ticks, TickCount fractional_ticks) const; TickCount SystemTicksToCRTCTicks(TickCount sysclk_ticks, TickCount* fractional_ticks) const; @@ -215,161 +155,9 @@ protected: } ALWAYS_INLINE static constexpr TickCount SystemTicksToGPUTicks(TickCount sysclk_ticks) { return sysclk_ticks << 1; } - // Helper/format conversion functions. - static constexpr u8 Convert5To8(u8 x5) { return (x5 << 3) | (x5 & 7); } - static constexpr u8 Convert8To5(u8 x8) { return (x8 >> 3); } - - static constexpr u32 RGBA5551ToRGBA8888(u16 color) - { - u8 r = Truncate8(color & 31); - u8 g = Truncate8((color >> 5) & 31); - u8 b = Truncate8((color >> 10) & 31); - u8 a = Truncate8((color >> 15) & 1); - - // 00012345 -> 1234545 - b = (b << 3) | (b & 0b111); - g = (g << 3) | (g & 0b111); - r = (r << 3) | (r & 0b111); - a = a ? 255 : 0; - - return ZeroExtend32(r) | (ZeroExtend32(g) << 8) | (ZeroExtend32(b) << 16) | (ZeroExtend32(a) << 24); - } - - static constexpr u16 RGBA8888ToRGBA5551(u32 color) - { - const u16 r = Truncate16((color >> 3) & 0x1Fu); - const u16 g = Truncate16((color >> 11) & 0x1Fu); - const u16 b = Truncate16((color >> 19) & 0x1Fu); - const u16 a = Truncate16((color >> 31) & 0x01u); - - return r | (g << 5) | (b << 10) | (a << 15); - } - - static constexpr std::tuple UnpackTexcoord(u16 texcoord) - { - return std::make_tuple(static_cast(texcoord), static_cast(texcoord >> 8)); - } - - static constexpr std::tuple UnpackColorRGB24(u32 rgb24) - { - return std::make_tuple(static_cast(rgb24), static_cast(rgb24 >> 8), static_cast(rgb24 >> 16)); - } - static constexpr u32 PackColorRGB24(u8 r, u8 g, u8 b) - { - return ZeroExtend32(r) | (ZeroExtend32(g) << 8) | (ZeroExtend32(b) << 16); - } - static bool DumpVRAMToFile(const char* filename, u32 width, u32 height, u32 stride, const void* buffer, bool remove_alpha); - union RenderCommand - { - u32 bits; - - BitField color_for_first_vertex; - BitField raw_texture_enable; // not valid for lines - BitField transparency_enable; - BitField texture_enable; - BitField rectangle_size; // only for rectangles - BitField quad_polygon; // only for polygons - BitField polyline; // only for lines - BitField shading_enable; // 0 - flat, 1 = gouroud - BitField primitive; - - /// Returns true if texturing should be enabled. Depends on the primitive type. - bool IsTexturingEnabled() const { return (primitive != Primitive::Line) ? texture_enable : false; } - - /// Returns true if dithering should be enabled. Depends on the primitive type. - bool IsDitheringEnabled() const - { - switch (primitive) - { - case Primitive::Polygon: - return shading_enable || (texture_enable && !raw_texture_enable); - - case Primitive::Line: - return true; - - case Primitive::Rectangle: - default: - return false; - } - } - }; - - union VertexPosition - { - u32 bits; - - BitField x; - BitField y; - }; - - // Sprites/rectangles should be clipped to 12 bits before drawing. - static constexpr s32 TruncateVertexPosition(s32 x) { return SignExtendN<11, s32>(x); } - - struct NativeVertex - { - s16 x; - s16 y; - u32 color; - u16 texcoord; - }; - - union VRAMPixel - { - u16 bits; - - BitField r; - BitField g; - BitField b; - BitField c; - - u8 GetR8() const { return Convert5To8(r); } - u8 GetG8() const { return Convert5To8(g); } - u8 GetB8() const { return Convert5To8(b); } - - void Set(u8 r_, u8 g_, u8 b_, bool c_ = false) - { - bits = (ZeroExtend16(r_)) | (ZeroExtend16(g_) << 5) | (ZeroExtend16(b_) << 10) | (static_cast(c_) << 15); - } - - void ClampAndSet(u8 r_, u8 g_, u8 b_, bool c_ = false) - { - Set(std::min(r_, 0x1F), std::min(g_, 0x1F), std::min(b_, 0x1F), c_); - } - - void SetRGB24(u32 rgb24, bool c_ = false) - { - bits = Truncate16(((rgb24 >> 3) & 0x1F) | (((rgb24 >> 11) & 0x1F) << 5) | (((rgb24 >> 19) & 0x1F) << 10)) | - (static_cast(c_) << 15); - } - - void SetRGB24(u8 r8, u8 g8, u8 b8, bool c_ = false) - { - bits = (ZeroExtend16(r8 >> 3)) | (ZeroExtend16(g8 >> 3) << 5) | (ZeroExtend16(b8 >> 3) << 10) | - (static_cast(c_) << 15); - } - - void SetRGB24Dithered(u32 x, u32 y, u8 r8, u8 g8, u8 b8, bool c_ = false) - { - const s32 offset = DITHER_MATRIX[y & 3][x & 3]; - r8 = static_cast(std::clamp(static_cast(ZeroExtend32(r8)) + offset, 0, 255)); - g8 = static_cast(std::clamp(static_cast(ZeroExtend32(g8)) + offset, 0, 255)); - b8 = static_cast(std::clamp(static_cast(ZeroExtend32(b8)) + offset, 0, 255)); - SetRGB24(r8, g8, b8, c_); - } - - u32 ToRGB24() const - { - const u32 r_ = ZeroExtend32(r.GetValue()); - const u32 g_ = ZeroExtend32(g.GetValue()); - const u32 b_ = ZeroExtend32(b.GetValue()); - - return ((r_ << 3) | (r_ & 7)) | (((g_ << 3) | (g_ & 7)) << 8) | (((b_ << 3) | (b_ & 7)) << 16); - } - }; - void SoftReset(); // Sets dots per scanline @@ -390,21 +178,6 @@ protected: void CRTCTickEvent(TickCount ticks); void CommandTickEvent(TickCount ticks); - /// Returns 0 if the currently-displayed field is on odd lines (1,3,5,...) or 1 if even (2,4,6,...). - ALWAYS_INLINE u32 GetInterlacedDisplayField() const { return ZeroExtend32(m_crtc_state.interlaced_field); } - - /// Returns 0 if the currently-displayed field is on an even line in VRAM, otherwise 1. - ALWAYS_INLINE u32 GetActiveLineLSB() const { return ZeroExtend32(m_crtc_state.active_line_lsb); } - - /// Sets/decodes GP0(E1h) (set draw mode). - void SetDrawMode(u16 bits); - - /// Sets/decodes polygon/rectangle texture palette value. - void SetTexturePalette(u16 bits); - - /// Sets/decodes texture window bits. - void SetTextureWindow(u32 value); - u32 ReadGPUREAD(); void FinishVRAMWrite(); @@ -425,17 +198,6 @@ protected: void ExecuteCommands(); void HandleGetGPUInfoCommand(u32 value); - // Rendering in the backend - virtual void ReadVRAM(u32 x, u32 y, u32 width, u32 height); - virtual void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color); - virtual void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data); - virtual void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height); - virtual void DispatchRenderCommand(); - virtual void FlushRender(); - virtual void ClearDisplay(); - virtual void UpdateDisplay(); - virtual void DrawRendererStats(bool is_idle_frame); - // These are **very** approximate. ALWAYS_INLINE void AddDrawTriangleTicks(u32 width, u32 height, bool shaded, bool textured, bool semitransparent) { @@ -470,21 +232,16 @@ protected: AddCommandTicks(std::max(width, height)); } - HostDisplay* m_host_display = nullptr; - std::unique_ptr m_crtc_tick_event; std::unique_ptr m_command_tick_event; - // Pointer to VRAM, used for reads/writes. In the hardware backends, this is the shadow buffer. - u16* m_vram_ptr = nullptr; - union GPUSTAT { u32 bits; BitField texture_page_x_base; BitField texture_page_y_base; - BitField semi_transparency_mode; - BitField texture_color_mode; + BitField semi_transparency_mode; + BitField texture_color_mode; BitField dither_enable; BitField draw_to_displayed_field; BitField set_mask_while_drawing; @@ -537,105 +294,18 @@ protected: } } m_GPUSTAT = {}; - struct DrawMode - { - static constexpr u16 PALETTE_MASK = UINT16_C(0b0111111111111111); - static constexpr u32 TEXTURE_WINDOW_MASK = UINT32_C(0b11111111111111111111); - - // bits in GP0(E1h) or texpage part of polygon - union Reg - { - static constexpr u16 MASK = 0b1111111111111; - static constexpr u16 TEXTURE_PAGE_MASK = UINT16_C(0b0000000000011111); - - // Polygon texpage commands only affect bits 0-8, 11 - static constexpr u16 POLYGON_TEXPAGE_MASK = 0b0000100111111111; - - // Bits 0..5 are returned in the GPU status register, latched at E1h/polygon draw time. - static constexpr u32 GPUSTAT_MASK = 0b11111111111; - - u16 bits; - - BitField texture_page_x_base; - BitField texture_page_y_base; - BitField transparency_mode; - BitField texture_mode; - BitField dither_enable; - BitField draw_to_displayed_field; - BitField texture_disable; - BitField texture_x_flip; - BitField texture_y_flip; - - u32 GetTexturePageXBase() const { return ZeroExtend32(texture_page_x_base.GetValue()) * 64; } - u32 GetTexturePageYBase() const { return ZeroExtend32(texture_page_y_base.GetValue()) * 256; } - }; - - // original values - Reg mode_reg; - u16 palette_reg; // from vertex - u32 texture_window_value; - - // decoded values - u32 texture_page_x; - u32 texture_page_y; - u32 texture_palette_x; - u32 texture_palette_y; - u8 texture_window_mask_x; // in 8 pixel steps - u8 texture_window_mask_y; // in 8 pixel steps - u8 texture_window_offset_x; // in 8 pixel steps - u8 texture_window_offset_y; // in 8 pixel steps - bool texture_x_flip; - bool texture_y_flip; - bool texture_page_changed; - bool texture_window_changed; - - /// Returns the texture/palette rendering mode. - TextureMode GetTextureMode() const { return mode_reg.texture_mode; } - - /// Returns the semi-transparency mode when enabled. - TransparencyMode GetTransparencyMode() const { return mode_reg.transparency_mode; } - - /// Returns true if the texture mode requires a palette. - bool IsUsingPalette() const { return (mode_reg.bits & (2 << 7)) == 0; } - - /// Returns a rectangle comprising the texture page area. - Common::Rectangle GetTexturePageRectangle() const - { - static constexpr std::array texture_page_widths = { - {TEXTURE_PAGE_WIDTH / 4, TEXTURE_PAGE_WIDTH / 2, TEXTURE_PAGE_WIDTH, TEXTURE_PAGE_WIDTH}}; - return Common::Rectangle::FromExtents(texture_page_x, texture_page_y, - texture_page_widths[static_cast(mode_reg.texture_mode.GetValue())], - TEXTURE_PAGE_HEIGHT); - } - - /// Returns a rectangle comprising the texture palette area. - Common::Rectangle GetTexturePaletteRectangle() const - { - static constexpr std::array palette_widths = {{16, 256, 0, 0}}; - return Common::Rectangle::FromExtents(texture_palette_x, texture_palette_y, - palette_widths[static_cast(mode_reg.texture_mode.GetValue())], 1); - } - - bool IsTexturePageChanged() const { return texture_page_changed; } - void SetTexturePageChanged() { texture_page_changed = true; } - void ClearTexturePageChangedFlag() { texture_page_changed = false; } - - bool IsTextureWindowChanged() const { return texture_window_changed; } - void SetTextureWindowChanged() { texture_window_changed = true; } - void ClearTextureWindowChangedFlag() { texture_window_changed = false; } - } m_draw_mode = {}; - - Common::Rectangle m_drawing_area{0, 0, VRAM_WIDTH, VRAM_HEIGHT}; - struct DrawingOffset { s32 x; s32 y; } m_drawing_offset = {}; + Common::Rectangle m_drawing_area{0, 0, VRAM_WIDTH, VRAM_HEIGHT}; + GPUDrawModeReg m_draw_mode{}; + GPUTextureWindowReg m_texture_window{}; + bool m_console_is_pal = false; bool m_set_texture_disable_mask = false; - bool m_drawing_area_changed = false; bool m_force_progressive_scan = false; bool m_force_ntsc_timings = false; @@ -733,7 +403,7 @@ protected: HeapFIFOQueue m_fifo; std::vector m_blit_buffer; u32 m_blit_remaining_words; - RenderCommand m_render_command{}; + GPURenderCommand m_render_command{}; ALWAYS_INLINE u32 FifoPop() { return Truncate32(m_fifo.Pop()); } ALWAYS_INLINE u32 FifoPeek() { return Truncate32(m_fifo.Peek()); } @@ -754,11 +424,17 @@ protected: Stats m_stats = {}; Stats m_last_stats = {}; -private: using GP0CommandHandler = bool (GPU::*)(); using GP0CommandHandlerTable = std::array; static GP0CommandHandlerTable GenerateGP0CommandHandlerTable(); + void ClearDisplay(); + void UpdateDisplay(); + void FillBackendCommandParameters(GPUBackendCommand* cmd) const; + void FillDrawCommand(GPUBackendDrawCommand* cmd, GPURenderCommand rc) const; + void UpdateDrawingArea(); + void FlushRender(); + // Rendering commands, returns false if not enough data is provided bool HandleUnknownGP0Command(); bool HandleNOPCommand(); @@ -774,6 +450,7 @@ private: bool HandleRenderRectangleCommand(); bool HandleRenderLineCommand(); bool HandleRenderPolyLineCommand(); + void FinishPolyLineRenderCommand(); bool HandleFillRectangleCommand(); bool HandleCopyRectangleCPUToVRAMCommand(); bool HandleCopyRectangleVRAMToCPUCommand(); @@ -782,6 +459,4 @@ private: static const GP0CommandHandlerTable s_GP0_command_handler_table; }; -IMPLEMENT_ENUM_CLASS_BITWISE_OPERATORS(GPU::TextureMode); - -extern std::unique_ptr g_gpu; +extern GPU g_gpu; diff --git a/src/core/gpu_backend.cpp b/src/core/gpu_backend.cpp new file mode 100644 index 000000000..ee095e975 --- /dev/null +++ b/src/core/gpu_backend.cpp @@ -0,0 +1,679 @@ +#include "gpu_backend.h" +#include "common/log.h" +#include "common/state_wrapper.h" +#include "settings.h" + +#include "gpu_hw_opengl.h" +#include "gpu_hw_vulkan.h" +#include "gpu_sw.h" + +#ifdef WIN32 +#include "gpu_hw_d3d11.h" +#endif + +Log_SetChannel(GPUBackend); + +std::unique_ptr g_gpu_backend; + +GPUBackend::GPUBackend() = default; + +GPUBackend::~GPUBackend() = default; + +static std::unique_ptr CreateBackend(GPURenderer backend) +{ + switch (backend) + { +#ifdef WIN32 + case GPURenderer::HardwareD3D11: + return std::make_unique(); +#endif + + case GPURenderer::HardwareOpenGL: + return std::make_unique(); + + case GPURenderer::HardwareVulkan: + return std::make_unique(); + + case GPURenderer::Software: + default: + return std::make_unique(); + } +} + +bool GPUBackend::Create(GPURenderer backend) +{ + g_gpu_backend = CreateBackend(backend); + if (!g_gpu_backend || !g_gpu_backend->Initialize()) + { + Log_ErrorPrintf("Failed to initialize GPU backend, falling back to software"); + g_gpu_backend.reset(); + g_gpu_backend = CreateBackend(GPURenderer::Software); + if (!g_gpu_backend->Initialize()) + { + g_gpu_backend.reset(); + return false; + } + } + + return true; +} + +bool GPUBackend::Initialize() +{ + return true; +} + +void GPUBackend::Reset() +{ + m_drawing_area = {}; + m_display_aspect_ratio = 1.0f; + m_display_width = 0; + m_display_height = 0; + m_display_origin_left = 0; + m_display_origin_top = 0; + m_display_vram_left = 0; + m_display_vram_top = 0; + m_display_vram_width = 0; + m_display_vram_height = 0; + m_display_vram_start_x = 0; + m_display_vram_start_y = 0; + m_display_interlace = GPUInterlacedDisplayMode::None; + m_display_interlace_field = 0; + m_display_enabled = false; + m_display_24bit = false; +} + +void GPUBackend::UpdateSettings() {} + +void GPUBackend::ResetGraphicsAPIState() {} + +void GPUBackend::RestoreGraphicsAPIState() {} + +bool GPUBackend::IsHardwareRenderer() const +{ + return false; +} + +void GPUBackend::UpdateResolutionScale() {} + +std::tuple GPUBackend::GetEffectiveDisplayResolution() +{ + return std::tie(m_display_vram_width, m_display_vram_height); +} + +void GPUBackend::DrawRendererStats(bool is_idle_frame) {} + +bool GPUBackend::DoState(StateWrapper& sw) +{ + if (sw.IsReading()) + { + // Still need a temporary here. + HeapArray temp; + sw.DoBytes(temp.data(), VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16)); + UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, temp.data(), {}); + } + else + { + FlushRender(); + ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); + sw.DoBytes(m_vram_ptr, VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16)); + } + + sw.Do(&m_drawing_area.left); + sw.Do(&m_drawing_area.top); + sw.Do(&m_drawing_area.right); + sw.Do(&m_drawing_area.bottom); + + sw.Do(&m_display_aspect_ratio); + sw.Do(&m_display_width); + sw.Do(&m_display_height); + sw.Do(&m_display_origin_left); + sw.Do(&m_display_origin_top); + sw.Do(&m_display_vram_left); + sw.Do(&m_display_vram_top); + sw.Do(&m_display_vram_width); + sw.Do(&m_display_vram_height); + sw.Do(&m_display_vram_start_x); + sw.Do(&m_display_vram_start_y); + sw.Do(&m_display_interlace); + sw.Do(&m_display_interlace_field); + sw.Do(&m_display_enabled); + sw.Do(&m_display_24bit); + + return !sw.HasError(); +} + +GPUBackendResetCommand* GPUBackend::NewResetCommand() +{ + GPUBackendResetCommand* cmd = static_cast(AllocateCommand(sizeof(GPUBackendResetCommand))); + cmd->type = GPUBackendCommandType::Reset; + cmd->size = cmd->Size(); + return cmd; +} + +GPUBackendUpdateSettingsCommand* GPUBackend::NewUpdateSettingsCommand() +{ + GPUBackendUpdateSettingsCommand* cmd = + static_cast(AllocateCommand(sizeof(GPUBackendUpdateSettingsCommand))); + cmd->type = GPUBackendCommandType::UpdateSettings; + cmd->size = cmd->Size(); + return cmd; +} + +GPUBackendUpdateResolutionScaleCommand* GPUBackend::NewUpdateResolutionScaleCommand() +{ + GPUBackendUpdateResolutionScaleCommand* cmd = static_cast( + AllocateCommand(sizeof(GPUBackendUpdateResolutionScaleCommand))); + cmd->type = GPUBackendCommandType::UpdateResolutionScale; + cmd->size = cmd->Size(); + return cmd; +} + +GPUBackendReadVRAMCommand* GPUBackend::NewReadVRAMCommand() +{ + GPUBackendReadVRAMCommand* cmd = + static_cast(AllocateCommand(sizeof(GPUBackendReadVRAMCommand))); + cmd->type = GPUBackendCommandType::ReadVRAM; + cmd->size = cmd->Size(); + return cmd; +} + +GPUBackendFillVRAMCommand* GPUBackend::NewFillVRAMCommand() +{ + GPUBackendFillVRAMCommand* cmd = + static_cast(AllocateCommand(sizeof(GPUBackendFillVRAMCommand))); + cmd->type = GPUBackendCommandType::FillVRAM; + cmd->size = cmd->Size(); + return cmd; +} + +GPUBackendUpdateVRAMCommand* GPUBackend::NewUpdateVRAMCommand(u32 num_words) +{ + const u32 size = sizeof(GPUBackendUpdateVRAMCommand) + (num_words * sizeof(u16)); + GPUBackendUpdateVRAMCommand* cmd = static_cast(AllocateCommand(size)); + cmd->type = GPUBackendCommandType::UpdateVRAM; + cmd->size = size; + return cmd; +} + +GPUBackendCopyVRAMCommand* GPUBackend::NewCopyVRAMCommand() +{ + GPUBackendCopyVRAMCommand* cmd = + static_cast(AllocateCommand(sizeof(GPUBackendCopyVRAMCommand))); + cmd->type = GPUBackendCommandType::CopyVRAM; + cmd->size = cmd->Size(); + return cmd; +} + +GPUBackendSetDrawingAreaCommand* GPUBackend::NewSetDrawingAreaCommand() +{ + GPUBackendSetDrawingAreaCommand* cmd = + static_cast(AllocateCommand(sizeof(GPUBackendSetDrawingAreaCommand))); + cmd->type = GPUBackendCommandType::SetDrawingArea; + cmd->size = cmd->Size(); + return cmd; +} + +GPUBackendDrawPolygonCommand* GPUBackend::NewDrawPolygonCommand(u32 num_vertices) +{ + const u32 size = sizeof(GPUBackendDrawPolygonCommand) + (num_vertices * sizeof(GPUBackendDrawPolygonCommand::Vertex)); + GPUBackendDrawPolygonCommand* cmd = static_cast(AllocateCommand(size)); + cmd->type = GPUBackendCommandType::DrawPolygon; + cmd->size = size; + cmd->num_vertices = Truncate16(num_vertices); + return cmd; +} + +GPUBackendDrawRectangleCommand* GPUBackend::NewDrawRectangleCommand() +{ + GPUBackendDrawRectangleCommand* cmd = + static_cast(AllocateCommand(sizeof(GPUBackendDrawRectangleCommand))); + cmd->type = GPUBackendCommandType::DrawRectangle; + cmd->size = cmd->Size(); + return cmd; +} + +GPUBackendDrawLineCommand* GPUBackend::NewDrawLineCommand(u32 num_vertices) +{ + const u32 size = sizeof(GPUBackendDrawLineCommand) + (num_vertices * sizeof(GPUBackendDrawLineCommand::Vertex)); + GPUBackendDrawLineCommand* cmd = static_cast(AllocateCommand(size)); + cmd->type = GPUBackendCommandType::DrawLine; + cmd->size = cmd->Size(); + cmd->num_vertices = Truncate16(num_vertices); + return cmd; +} + +GPUBackendClearDisplayCommand* GPUBackend::NewClearDisplayCommand() +{ + GPUBackendClearDisplayCommand* cmd = + static_cast(AllocateCommand(sizeof(GPUBackendUpdateVRAMCommand))); + cmd->type = GPUBackendCommandType::ClearDisplay; + cmd->size = cmd->Size(); + return cmd; +} + +GPUBackendUpdateDisplayCommand* GPUBackend::NewUpdateDisplayCommand() +{ + GPUBackendUpdateDisplayCommand* cmd = + static_cast(AllocateCommand(sizeof(GPUBackendUpdateDisplayCommand))); + cmd->type = GPUBackendCommandType::UpdateDisplay; + cmd->size = cmd->Size(); + return cmd; +} + +GPUBackendFlushRenderCommand* GPUBackend::NewFlushRenderCommand() +{ + GPUBackendFlushRenderCommand* cmd = + static_cast(AllocateCommand(sizeof(GPUBackendFlushRenderCommand))); + cmd->type = GPUBackendCommandType::FlushRender; + cmd->size = cmd->Size(); + return cmd; +} + +void* GPUBackend::AllocateCommand(u32 size) +{ + for (;;) + { + const u32 write_ptr = m_command_fifo_write_ptr.load(); + const u32 available_size = COMMAND_QUEUE_SIZE - write_ptr; + if ((size + sizeof(GPUBackendSyncCommand)) > available_size) + { + Sync(); + continue; + } + + return &m_command_fifo_data[write_ptr]; + } +} + +u32 GPUBackend::GetPendingCommandSize() const +{ + const u32 read_ptr = m_command_fifo_read_ptr.load(); + const u32 write_ptr = m_command_fifo_write_ptr.load(); + return (write_ptr - read_ptr); +} + +void GPUBackend::PushCommand(GPUBackendCommand* cmd) +{ + if (!g_settings.cpu_thread) + { + // single-thread mode + if (cmd->type != GPUBackendCommandType::Sync) + HandleCommand(cmd); + } + else + { + const u32 new_write_ptr = m_command_fifo_write_ptr.fetch_add(cmd->size) + cmd->size; + DebugAssert(new_write_ptr <= COMMAND_QUEUE_SIZE); + if (cmd->type == GPUBackendCommandType::Sync || cmd->type == GPUBackendCommandType::FrameDone || + (new_write_ptr - m_command_fifo_read_ptr.load()) >= THRESHOLD_TO_WAKE_GPU) + { + WakeGPUThread(); + } + } +} + +void GPUBackend::WakeGPUThread() +{ + std::unique_lock lock(m_sync_mutex); + if (!m_gpu_thread_sleeping.load()) + return; + + m_wake_gpu_thread_cv.notify_one(); +} + +void GPUBackend::Sync() +{ + if (!g_settings.cpu_thread) + return; + + // since we do this on wrap-around, it can't go through the regular path + const u32 write_ptr = m_command_fifo_write_ptr.load(); + Assert((COMMAND_QUEUE_SIZE - write_ptr) >= sizeof(GPUBackendSyncCommand)); + GPUBackendSyncCommand* cmd = reinterpret_cast(&m_command_fifo_data[write_ptr]); + cmd->type = GPUBackendCommandType::Sync; + cmd->size = cmd->Size(); + PushCommand(cmd); + + m_sync_event.Wait(); + m_sync_event.Reset(); +} + +void GPUBackend::CPUFrameDone() +{ + if (!g_settings.cpu_thread) + return; + + GPUBackendFrameDoneCommand* cmd = + reinterpret_cast(AllocateCommand(sizeof(GPUBackendFrameDoneCommand))); + cmd->type = GPUBackendCommandType::FrameDone; + cmd->size = cmd->Size(); + PushCommand(cmd); +} + +void GPUBackend::ProcessGPUCommands() +{ + for (;;) + { + const u32 write_ptr = m_command_fifo_write_ptr.load(); + u32 read_ptr = m_command_fifo_read_ptr.load(); + if (read_ptr == write_ptr) + return; + + while (read_ptr < write_ptr) + { + const GPUBackendCommand* cmd = reinterpret_cast(&m_command_fifo_data[read_ptr]); + read_ptr += cmd->size; + + if (cmd->type == GPUBackendCommandType::Sync) + { + Assert(read_ptr == m_command_fifo_write_ptr.load()); + m_command_fifo_read_ptr.store(0); + m_command_fifo_write_ptr.store(0); + m_sync_event.Signal(); + return; + } + else if (cmd->type == GPUBackendCommandType::FrameDone) + { + m_frame_done = true; + m_command_fifo_read_ptr.store(read_ptr); + return; + } + else + { + HandleCommand(cmd); + } + } + + m_command_fifo_read_ptr.store(read_ptr); + } +} + +void GPUBackend::RunGPUFrame() +{ + m_frame_done = false; + + for (;;) + { + g_gpu_backend->ProcessGPUCommands(); + + if (m_frame_done) + break; + + std::unique_lock lock(m_sync_mutex); + m_gpu_thread_sleeping.store(true); + m_wake_gpu_thread_cv.wait(lock); + m_gpu_thread_sleeping.store(false); + } +} + +void GPUBackend::EndGPUFrame() +{ + g_gpu_backend->ProcessGPUCommands(); + Assert(m_command_fifo_read_ptr.load() == m_command_fifo_write_ptr.load()); + m_command_fifo_read_ptr.store(0); + m_command_fifo_write_ptr.store(0); +} + +void GPUBackend::SetScissorFromDrawingArea() {} + +void GPUBackend::HandleCommand(const GPUBackendCommand* cmd) +{ + switch (cmd->type) + { + case GPUBackendCommandType::ReadVRAM: + { + FlushRender(); + const GPUBackendReadVRAMCommand* ccmd = static_cast(cmd); + ReadVRAM(ZeroExtend32(ccmd->x), ZeroExtend32(ccmd->y), ZeroExtend32(ccmd->width), ZeroExtend32(ccmd->height)); + } + break; + + case GPUBackendCommandType::FillVRAM: + { + FlushRender(); + const GPUBackendFillVRAMCommand* ccmd = static_cast(cmd); + FillVRAM(ZeroExtend32(ccmd->x), ZeroExtend32(ccmd->y), ZeroExtend32(ccmd->width), ZeroExtend32(ccmd->height), + ccmd->color, ccmd->params); + } + break; + + case GPUBackendCommandType::UpdateVRAM: + { + FlushRender(); + const GPUBackendUpdateVRAMCommand* ccmd = static_cast(cmd); + UpdateVRAM(ZeroExtend32(ccmd->x), ZeroExtend32(ccmd->y), ZeroExtend32(ccmd->width), ZeroExtend32(ccmd->height), + ccmd->data, ccmd->params); + } + break; + + case GPUBackendCommandType::CopyVRAM: + { + FlushRender(); + const GPUBackendCopyVRAMCommand* ccmd = static_cast(cmd); + CopyVRAM(ZeroExtend32(ccmd->src_x), ZeroExtend32(ccmd->src_y), ZeroExtend32(ccmd->dst_x), + ZeroExtend32(ccmd->dst_y), ZeroExtend32(ccmd->width), ZeroExtend32(ccmd->height), ccmd->params); + } + break; + + case GPUBackendCommandType::SetDrawingArea: + { + FlushRender(); + m_drawing_area = static_cast(cmd)->new_area; + SetScissorFromDrawingArea(); + } + break; + + case GPUBackendCommandType::DrawPolygon: + { + DrawPolygon(static_cast(cmd)); + } + break; + + case GPUBackendCommandType::DrawRectangle: + { + DrawRectangle(static_cast(cmd)); + } + break; + + case GPUBackendCommandType::DrawLine: + { + DrawLine(static_cast(cmd)); + } + break; + + case GPUBackendCommandType::ClearDisplay: + { + ClearDisplay(); + } + break; + + case GPUBackendCommandType::UpdateDisplay: + { + const GPUBackendUpdateDisplayCommand* ccmd = static_cast(cmd); + + m_display_aspect_ratio = ccmd->display_aspect_ratio; + m_display_width = ccmd->display_width; + m_display_height = ccmd->display_height; + m_display_origin_left = ccmd->display_origin_left; + m_display_origin_top = ccmd->display_origin_top; + m_display_vram_left = ccmd->display_vram_left; + m_display_vram_top = ccmd->display_vram_top; + m_display_vram_width = ccmd->display_vram_width; + m_display_vram_height = ccmd->display_vram_height; + m_display_vram_start_x = ccmd->display_vram_start_x; + m_display_vram_start_y = ccmd->display_vram_start_y; + m_display_interlace = ccmd->display_interlace; + m_display_interlace_field = ccmd->display_interlace_field; + m_display_enabled = ccmd->display_enabled; + m_display_24bit = ccmd->display_24bit; + + UpdateDisplay(); + } + break; + + case GPUBackendCommandType::FlushRender: + { + FlushRender(); + } + break; + + default: + break; + } +} + +void GPUBackend::SoftwareFillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params) +{ + const u16 color16 = RGBA8888ToRGBA5551(color); + if ((x + width) <= VRAM_WIDTH && !params.interlaced_rendering) + { + for (u32 yoffs = 0; yoffs < height; yoffs++) + { + const u32 row = (y + yoffs) % VRAM_HEIGHT; + std::fill_n(&m_vram_ptr[row * VRAM_WIDTH + x], width, color16); + } + } + else if (params.interlaced_rendering) + { + // Hardware tests show that fills seem to break on the first two lines when the offset matches the displayed field. + const u32 active_field = params.active_line_lsb; + for (u32 yoffs = 0; yoffs < height; yoffs++) + { + const u32 row = (y + yoffs) % VRAM_HEIGHT; + if ((row & u32(1)) == active_field) + continue; + + u16* row_ptr = &m_vram_ptr[row * VRAM_WIDTH]; + for (u32 xoffs = 0; xoffs < width; xoffs++) + { + const u32 col = (x + xoffs) % VRAM_WIDTH; + row_ptr[col] = color16; + } + } + } + else + { + for (u32 yoffs = 0; yoffs < height; yoffs++) + { + const u32 row = (y + yoffs) % VRAM_HEIGHT; + u16* row_ptr = &m_vram_ptr[row * VRAM_WIDTH]; + for (u32 xoffs = 0; xoffs < width; xoffs++) + { + const u32 col = (x + xoffs) % VRAM_WIDTH; + row_ptr[col] = color16; + } + } + } +} + +void GPUBackend::SoftwareUpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, + GPUBackendCommandParameters params) +{ + // Fast path when the copy is not oversized. + if ((x + width) <= VRAM_WIDTH && (y + height) <= VRAM_HEIGHT && !params.IsMaskingEnabled()) + { + const u16* src_ptr = static_cast(data); + u16* dst_ptr = &m_vram_ptr[y * VRAM_WIDTH + x]; + for (u32 yoffs = 0; yoffs < height; yoffs++) + { + std::copy_n(src_ptr, width, dst_ptr); + src_ptr += width; + dst_ptr += VRAM_WIDTH; + } + } + else + { + // Slow path when we need to handle wrap-around. + const u16* src_ptr = static_cast(data); + const u16 mask_and = params.GetMaskAND(); + const u16 mask_or = params.GetMaskOR(); + + for (u32 row = 0; row < height;) + { + u16* dst_row_ptr = &m_vram_ptr[((y + row++) % VRAM_HEIGHT) * VRAM_WIDTH]; + for (u32 col = 0; col < width;) + { + // TODO: Handle unaligned reads... + u16* pixel_ptr = &dst_row_ptr[(x + col++) % VRAM_WIDTH]; + if (((*pixel_ptr) & mask_and) == 0) + *pixel_ptr = *(src_ptr++) | mask_or; + } + } + } +} + +void GPUBackend::SoftwareCopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height, + GPUBackendCommandParameters params) +{ + // Break up oversized copies. This behavior has not been verified on console. + if ((src_x + width) > VRAM_WIDTH || (dst_x + width) > VRAM_WIDTH) + { + u32 remaining_rows = height; + u32 current_src_y = src_y; + u32 current_dst_y = dst_y; + while (remaining_rows > 0) + { + const u32 rows_to_copy = + std::min(remaining_rows, std::min(VRAM_HEIGHT - current_src_y, VRAM_HEIGHT - current_dst_y)); + + u32 remaining_columns = width; + u32 current_src_x = src_x; + u32 current_dst_x = dst_x; + while (remaining_columns > 0) + { + const u32 columns_to_copy = + std::min(remaining_columns, std::min(VRAM_WIDTH - current_src_x, VRAM_WIDTH - current_dst_x)); + SoftwareCopyVRAM(current_src_x, current_src_y, current_dst_x, current_dst_y, columns_to_copy, rows_to_copy, + params); + current_src_x = (current_src_x + columns_to_copy) % VRAM_WIDTH; + current_dst_x = (current_dst_x + columns_to_copy) % VRAM_WIDTH; + remaining_columns -= columns_to_copy; + } + + current_src_y = (current_src_y + rows_to_copy) % VRAM_HEIGHT; + current_dst_y = (current_dst_y + rows_to_copy) % VRAM_HEIGHT; + remaining_rows -= rows_to_copy; + } + + return; + } + + // This doesn't have a fast path, but do we really need one? It's not common. + const u16 mask_and = params.GetMaskAND(); + const u16 mask_or = params.GetMaskOR(); + + // Copy in reverse when src_x < dst_x, this is verified on console. + if (src_x < dst_x || ((src_x + width - 1) % VRAM_WIDTH) < ((dst_x + width - 1) % VRAM_WIDTH)) + { + for (u32 row = 0; row < height; row++) + { + const u16* src_row_ptr = &m_vram_ptr[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH]; + u16* dst_row_ptr = &m_vram_ptr[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH]; + + for (s32 col = static_cast(width - 1); col >= 0; col--) + { + const u16 src_pixel = src_row_ptr[(src_x + static_cast(col)) % VRAM_WIDTH]; + u16* dst_pixel_ptr = &dst_row_ptr[(dst_x + static_cast(col)) % VRAM_WIDTH]; + if ((*dst_pixel_ptr & mask_and) == 0) + *dst_pixel_ptr = src_pixel | mask_or; + } + } + } + else + { + for (u32 row = 0; row < height; row++) + { + const u16* src_row_ptr = &m_vram_ptr[((src_y + row) % VRAM_HEIGHT) * VRAM_WIDTH]; + u16* dst_row_ptr = &m_vram_ptr[((dst_y + row) % VRAM_HEIGHT) * VRAM_WIDTH]; + + for (u32 col = 0; col < width; col++) + { + const u16 src_pixel = src_row_ptr[(src_x + col) % VRAM_WIDTH]; + u16* dst_pixel_ptr = &dst_row_ptr[(dst_x + col) % VRAM_WIDTH]; + if ((*dst_pixel_ptr & mask_and) == 0) + *dst_pixel_ptr = src_pixel | mask_or; + } + } + } +} diff --git a/src/core/gpu_backend.h b/src/core/gpu_backend.h new file mode 100644 index 000000000..cf5e987ac --- /dev/null +++ b/src/core/gpu_backend.h @@ -0,0 +1,143 @@ +#pragma once +#include "common/heap_array.h" +#include "common/event.h" +#include "gpu_types.h" +#include +#include +#include +#include + +class StateWrapper; + +class GPUBackend +{ +public: + GPUBackend(); + virtual ~GPUBackend(); + + ALWAYS_INLINE u16* GetVRAM() const { return m_vram_ptr; } + + static bool Create(GPURenderer backend); + + virtual bool Initialize(); + + // Graphics API state reset/restore - call when drawing the UI etc. + virtual void ResetGraphicsAPIState(); + virtual void RestoreGraphicsAPIState(); + + virtual bool IsHardwareRenderer() const; + + /// Recompile shaders/recreate framebuffers when needed. + virtual void UpdateSettings(); + + /// Updates the resolution scale when it's set to automatic. + virtual void UpdateResolutionScale(); + + /// Returns the effective display resolution of the GPU. + virtual std::tuple GetEffectiveDisplayResolution(); + + virtual void DrawRendererStats(bool is_idle_frame); + + bool DoState(StateWrapper& sw); + + GPUBackendResetCommand* NewResetCommand(); + GPUBackendUpdateSettingsCommand* NewUpdateSettingsCommand(); + GPUBackendUpdateResolutionScaleCommand* NewUpdateResolutionScaleCommand(); + GPUBackendReadVRAMCommand* NewReadVRAMCommand(); + GPUBackendFillVRAMCommand* NewFillVRAMCommand(); + GPUBackendUpdateVRAMCommand* NewUpdateVRAMCommand(u32 num_words); + GPUBackendCopyVRAMCommand* NewCopyVRAMCommand(); + GPUBackendSetDrawingAreaCommand* NewSetDrawingAreaCommand(); + GPUBackendDrawPolygonCommand* NewDrawPolygonCommand(u32 num_vertices); + GPUBackendDrawRectangleCommand* NewDrawRectangleCommand(); + GPUBackendDrawLineCommand* NewDrawLineCommand(u32 num_vertices); + GPUBackendClearDisplayCommand* NewClearDisplayCommand(); + GPUBackendUpdateDisplayCommand* NewUpdateDisplayCommand(); + GPUBackendFlushRenderCommand* NewFlushRenderCommand(); + + void PushCommand(GPUBackendCommand* cmd); + void Sync(); + + /// Processes all pending GPU commands. + void ProcessGPUCommands(); + + void CPUFrameDone(); + void RunGPUFrame(); + void EndGPUFrame(); + +protected: + void* AllocateCommand(u32 size); + u32 GetPendingCommandSize() const; + void WakeGPUThread(); + + virtual void Reset(); + virtual void ReadVRAM(u32 x, u32 y, u32 width, u32 height) = 0; + virtual void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params) = 0; + virtual void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, + GPUBackendCommandParameters params) = 0; + virtual void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height, + GPUBackendCommandParameters params) = 0; + virtual void DrawPolygon(const GPUBackendDrawPolygonCommand* cmd) = 0; + virtual void DrawRectangle(const GPUBackendDrawRectangleCommand* cmd) = 0; + virtual void DrawLine(const GPUBackendDrawLineCommand* cmd) = 0; + virtual void SetScissorFromDrawingArea(); + virtual void ClearDisplay() = 0; + virtual void UpdateDisplay() = 0; + virtual void FlushRender() = 0; + + void HandleCommand(const GPUBackendCommand* cmd); + + void SoftwareFillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params); + void SoftwareUpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, GPUBackendCommandParameters params); + void SoftwareCopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height, + GPUBackendCommandParameters params); + + u16* m_vram_ptr = nullptr; + + Common::Rectangle m_drawing_area{}; + + float m_display_aspect_ratio = 1.0f; + + // Size of the simulated screen in pixels. Depending on crop mode, this may include overscan area. + u16 m_display_width = 0; + u16 m_display_height = 0; + + // Top-left corner where the VRAM is displayed. Depending on the CRTC config, this may indicate padding. + u16 m_display_origin_left = 0; + u16 m_display_origin_top = 0; + + // Rectangle describing the displayed area of VRAM, in coordinates. + u16 m_display_vram_left = 0; + u16 m_display_vram_top = 0; + u16 m_display_vram_width = 0; + u16 m_display_vram_height = 0; + u16 m_display_vram_start_x = 0; + u16 m_display_vram_start_y = 0; + + GPUInterlacedDisplayMode m_display_interlace = GPUInterlacedDisplayMode::None; + u8 m_display_interlace_field = 0; + bool m_display_enabled = false; + bool m_display_24bit = false; + + bool m_frame_done = false; + + Common::Event m_sync_event; + std::atomic_bool m_gpu_thread_sleeping{ false }; + + std::mutex m_sync_mutex; + std::condition_variable m_sync_cpu_thread_cv; + std::condition_variable m_wake_gpu_thread_cv; + bool m_sync_done = false; + + enum : u32 + { + COMMAND_QUEUE_SIZE = 8 * 1024 * 1024, + THRESHOLD_TO_WAKE_GPU = 256 + }; + + HeapArray m_command_fifo_data; + alignas(64) std::atomic m_command_fifo_read_ptr{0}; + alignas(64) std::atomic m_command_fifo_write_ptr{0}; +}; + +extern std::unique_ptr g_gpu_backend; \ No newline at end of file diff --git a/src/core/gpu_commands.cpp b/src/core/gpu_commands.cpp index 1f379af3f..da091797d 100644 --- a/src/core/gpu_commands.cpp +++ b/src/core/gpu_commands.cpp @@ -2,7 +2,9 @@ #include "common/log.h" #include "common/string_util.h" #include "gpu.h" +#include "gpu_backend.h" #include "interrupt_controller.h" +#include "pgxp.h" #include "system.h" Log_SetChannel(GPU); @@ -21,6 +23,15 @@ static constexpr u32 ReplaceZero(u32 value, u32 value_for_zero) return value == 0 ? value_for_zero : value; } +template +ALWAYS_INLINE static constexpr std::tuple MinMax(T v1, T v2) +{ + if (v1 > v2) + return std::tie(v2, v1); + else + return std::tie(v1, v2); +} + void GPU::ExecuteCommands() { m_syncing = true; @@ -91,7 +102,7 @@ void GPU::ExecuteCommands() // drop terminator m_fifo.RemoveOne(); Log_DebugPrintf("Drawing poly-line with %u vertices", GetPolyLineVertexCount()); - DispatchRenderCommand(); + FinishPolyLineRenderCommand(); m_blit_buffer.clear(); EndCommand(); continue; @@ -132,16 +143,16 @@ GPU::GP0CommandHandlerTable GPU::GenerateGP0CommandHandlerTable() table[0x1F] = &GPU::HandleInterruptRequestCommand; for (u32 i = 0x20; i <= 0x7F; i++) { - const RenderCommand rc{i << 24}; + const GPURenderCommand rc{i << 24}; switch (rc.primitive) { - case Primitive::Polygon: + case GPUPrimitive::Polygon: table[i] = &GPU::HandleRenderPolygonCommand; break; - case Primitive::Line: + case GPUPrimitive::Line: table[i] = rc.polyline ? &GPU::HandleRenderPolyLineCommand : &GPU::HandleRenderLineCommand; break; - case Primitive::Rectangle: + case GPUPrimitive::Rectangle: table[i] = &GPU::HandleRenderRectangleCommand; break; default: @@ -218,7 +229,17 @@ bool GPU::HandleSetDrawModeCommand() { const u32 param = FifoPop() & 0x00FFFFFFu; Log_DebugPrintf("Set draw mode %08X", param); - SetDrawMode(Truncate16(param)); + + GPUDrawModeReg new_mode_reg{static_cast(param & GPUDrawModeReg::MASK)}; + if (!m_set_texture_disable_mask) + new_mode_reg.texture_disable = false; + + // Bits 0..10 are returned in the GPU status register. + m_GPUSTAT.bits = (m_GPUSTAT.bits & ~(GPUDrawModeReg::GPUSTAT_MASK)) | + (ZeroExtend32(new_mode_reg.bits) & GPUDrawModeReg::GPUSTAT_MASK); + m_GPUSTAT.texture_disable = new_mode_reg.texture_disable; + m_draw_mode.bits = new_mode_reg.bits; + AddCommandTicks(1); EndCommand(); return true; @@ -227,10 +248,10 @@ bool GPU::HandleSetDrawModeCommand() bool GPU::HandleSetTextureWindowCommand() { const u32 param = FifoPop() & 0x00FFFFFFu; - SetTextureWindow(param); - Log_DebugPrintf("Set texture window %02X %02X %02X %02X", m_draw_mode.texture_window_mask_x, - m_draw_mode.texture_window_mask_y, m_draw_mode.texture_window_offset_x, - m_draw_mode.texture_window_offset_y); + + m_texture_window.bits = param; + Log_DebugPrintf("Set texture window %02X %02X %02X %02X", m_texture_window.mask_x, m_texture_window.mask_y, + m_texture_window.offset_x, m_texture_window.offset_y); AddCommandTicks(1); EndCommand(); @@ -245,11 +266,9 @@ bool GPU::HandleSetDrawingAreaTopLeftCommand() Log_DebugPrintf("Set drawing area top-left: (%u, %u)", left, top); if (m_drawing_area.left != left || m_drawing_area.top != top) { - FlushRender(); - m_drawing_area.left = left; m_drawing_area.top = top; - m_drawing_area_changed = true; + UpdateDrawingArea(); } AddCommandTicks(1); @@ -266,11 +285,9 @@ bool GPU::HandleSetDrawingAreaBottomRightCommand() Log_DebugPrintf("Set drawing area bottom-right: (%u, %u)", m_drawing_area.right, m_drawing_area.bottom); if (m_drawing_area.right != right || m_drawing_area.bottom != bottom) { - FlushRender(); - m_drawing_area.right = right; m_drawing_area.bottom = bottom; - m_drawing_area_changed = true; + UpdateDrawingArea(); } AddCommandTicks(1); @@ -304,10 +321,8 @@ bool GPU::HandleSetMaskBitCommand() constexpr u32 gpustat_mask = (1 << 11) | (1 << 12); const u32 gpustat_bits = (param & 0x03) << 11; if ((m_GPUSTAT.bits & gpustat_mask) != gpustat_bits) - { - FlushRender(); m_GPUSTAT.bits = (m_GPUSTAT.bits & ~gpustat_mask) | gpustat_bits; - } + Log_DebugPrintf("Set mask bit %u %u", BoolToUInt32(m_GPUSTAT.set_mask_while_drawing), BoolToUInt32(m_GPUSTAT.check_mask_before_draw)); @@ -316,9 +331,64 @@ bool GPU::HandleSetMaskBitCommand() return true; } +void GPU::FillBackendCommandParameters(GPUBackendCommand* cmd) const +{ + cmd->params.bits = 0; + cmd->params.check_mask_before_draw = m_GPUSTAT.check_mask_before_draw; + cmd->params.set_mask_while_drawing = m_GPUSTAT.set_mask_while_drawing; + cmd->params.active_line_lsb = m_crtc_state.active_line_lsb; + cmd->params.interlaced_rendering = IsInterlacedRenderingEnabled(); +} + +void GPU::ClearDisplay() +{ + g_gpu_backend->PushCommand(g_gpu_backend->NewClearDisplayCommand()); +} + +void GPU::UpdateDisplay() +{ + GPUBackendUpdateDisplayCommand* cmd = g_gpu_backend->NewUpdateDisplayCommand(); + cmd->display_aspect_ratio = m_crtc_state.display_aspect_ratio; + cmd->display_width = m_crtc_state.display_width; + cmd->display_height = m_crtc_state.display_height; + cmd->display_origin_left = m_crtc_state.display_origin_left; + cmd->display_origin_top = m_crtc_state.display_origin_top; + cmd->display_vram_left = m_crtc_state.display_vram_left; + cmd->display_vram_top = m_crtc_state.display_vram_top; + cmd->display_vram_width = m_crtc_state.display_vram_width; + cmd->display_vram_height = m_crtc_state.display_vram_height; + cmd->display_vram_start_x = m_crtc_state.regs.X; + cmd->display_vram_start_y = m_crtc_state.regs.Y; + cmd->display_interlace = GetInterlacedDisplayMode(); + cmd->display_interlace_field = m_crtc_state.interlaced_display_field; + cmd->display_enabled = !m_GPUSTAT.display_disable; + cmd->display_24bit = m_GPUSTAT.display_area_color_depth_24; + g_gpu_backend->PushCommand(cmd); +} + +void GPU::FillDrawCommand(GPUBackendDrawCommand* cmd, GPURenderCommand rc) const +{ + FillBackendCommandParameters(cmd); + cmd->rc.bits = rc.bits; + cmd->draw_mode.bits = m_draw_mode.bits; + cmd->window.bits = m_texture_window.bits; +} + +void GPU::UpdateDrawingArea() +{ + GPUBackendSetDrawingAreaCommand* cmd = g_gpu_backend->NewSetDrawingAreaCommand(); + cmd->new_area = m_drawing_area; + g_gpu_backend->PushCommand(cmd); +} + +void GPU::FlushRender() +{ + g_gpu_backend->PushCommand(g_gpu_backend->NewFlushRenderCommand()); +} + bool GPU::HandleRenderPolygonCommand() { - const RenderCommand rc{FifoPeek(0)}; + const GPURenderCommand rc{FifoPeek(0)}; // shaded vertices use the colour from the first word for the first vertex const u32 words_per_vertex = 1 + BoolToUInt32(rc.texture_enable) + BoolToUInt32(rc.shading_enable); @@ -341,39 +411,155 @@ bool GPU::HandleRenderPolygonCommand() rc.texture_enable ? "textured" : "non-textured", rc.shading_enable ? "shaded" : "monochrome", ZeroExtend32(num_vertices), ZeroExtend32(words_per_vertex), setup_ticks); + GPUBackendDrawPolygonCommand* cmd = g_gpu_backend->NewDrawPolygonCommand(num_vertices); + FillDrawCommand(cmd, rc); + // set draw state up if (rc.texture_enable) { const u16 texpage_attribute = Truncate16((rc.shading_enable ? FifoPeek(5) : FifoPeek(4)) >> 16); - SetDrawMode((texpage_attribute & DrawMode::Reg::POLYGON_TEXPAGE_MASK) | - (m_draw_mode.mode_reg.bits & ~DrawMode::Reg::POLYGON_TEXPAGE_MASK)); - SetTexturePalette(Truncate16(FifoPeek(2) >> 16)); + + m_GPUSTAT.bits = (m_GPUSTAT.bits & ~(GPUDrawModeReg::GPUSTAT_MASK)) | + (ZeroExtend32(texpage_attribute) & GPUDrawModeReg::GPUSTAT_MASK); + + cmd->draw_mode.bits = ((texpage_attribute & GPUDrawModeReg::POLYGON_TEXPAGE_MASK) | + (m_draw_mode.bits & ~GPUDrawModeReg::POLYGON_TEXPAGE_MASK)); + cmd->palette.bits = Truncate16(FifoPeek(2) >> 16); + } + else + { + cmd->palette.bits = 0; } m_stats.num_vertices += num_vertices; m_stats.num_polygons++; - m_render_command.bits = rc.bits; + m_fifo.RemoveOne(); - DispatchRenderCommand(); + const u32 first_color = rc.color_for_first_vertex; + const bool shaded = rc.shading_enable; + const bool textured = rc.texture_enable; + const bool pgxp = g_settings.gpu_pgxp_enable; + + bool valid_w = g_settings.gpu_pgxp_texture_correction; + for (u32 i = 0; i < num_vertices; i++) + { + GPUBackendDrawPolygonCommand::Vertex* vert = &cmd->vertices[i]; + vert->color = (shaded && i > 0) ? (FifoPop() & UINT32_C(0x00FFFFFF)) : first_color; + const u64 maddr_and_pos = m_fifo.Pop(); + const GPUVertexPosition vp{Truncate32(maddr_and_pos)}; + vert->x = m_drawing_offset.x + vp.x; + vert->y = m_drawing_offset.y + vp.y; + vert->precise_x = static_cast(vert->x); + vert->precise_y = static_cast(vert->y); + vert->precise_w = 1.0f; + vert->texcoord = textured ? Truncate16(FifoPop()) : 0; + const s32 native_x = m_drawing_offset.x + vp.x; + + if (pgxp) + { + valid_w &= PGXP::GetPreciseVertex(Truncate32(maddr_and_pos >> 32), vp.bits, vert->x, vert->y, m_drawing_offset.x, + m_drawing_offset.y, &vert->precise_x, &vert->precise_y, &vert->precise_w); + } + } + if (pgxp && !valid_w) + { + for (u32 i = 0; i < num_vertices; i++) + cmd->vertices[i].precise_w = 1.0f; + } + + if (!IsDrawingAreaIsValid()) + { + EndCommand(); + return true; + } + + // Cull polygons which are too large. + const auto [min_x_12, max_x_12] = MinMax(cmd->vertices[1].x, cmd->vertices[2].x); + const auto [min_y_12, max_y_12] = MinMax(cmd->vertices[1].y, cmd->vertices[2].y); + const s32 min_x = std::min(min_x_12, cmd->vertices[0].x); + const s32 max_x = std::max(max_x_12, cmd->vertices[0].x); + const s32 min_y = std::min(min_y_12, cmd->vertices[0].y); + const s32 max_y = std::max(max_y_12, cmd->vertices[0].y); + + if ((max_x - min_x) >= MAX_PRIMITIVE_WIDTH || (max_y - min_y) >= MAX_PRIMITIVE_HEIGHT) + { + Log_DebugPrintf("Culling too-large polygon: %d,%d %d,%d %d,%d", cmd->vertices[0].x, cmd->vertices[0].y, + cmd->vertices[1].x, cmd->vertices[1].y, cmd->vertices[2].x, cmd->vertices[2].y); + + if (!rc.quad_polygon) + { + EndCommand(); + return true; + } + + // turn it into a degenerate triangle + std::memcpy(&cmd->vertices[0], &cmd->vertices[1], sizeof(GPUBackendDrawPolygonCommand::Vertex)); + cmd->bounds.SetInvalid(); + } + else + { + const u32 clip_left = static_cast(std::clamp(min_x, m_drawing_area.left, m_drawing_area.right)); + const u32 clip_right = static_cast(std::clamp(max_x, m_drawing_area.left, m_drawing_area.right)) + 1u; + const u32 clip_top = static_cast(std::clamp(min_y, m_drawing_area.top, m_drawing_area.bottom)); + const u32 clip_bottom = static_cast(std::clamp(max_y, m_drawing_area.top, m_drawing_area.bottom)) + 1u; + + cmd->bounds.Set(Truncate16(clip_left), Truncate16(clip_top), Truncate16(clip_right), Truncate16(clip_bottom)); + AddDrawTriangleTicks(clip_right - clip_left, clip_bottom - clip_top, rc.shading_enable, rc.texture_enable, + rc.transparency_enable); + } + + // quads + if (rc.quad_polygon) + { + const s32 min_x_123 = std::min(min_x_12, cmd->vertices[3].x); + const s32 max_x_123 = std::max(max_x_12, cmd->vertices[3].x); + const s32 min_y_123 = std::min(min_y_12, cmd->vertices[3].y); + const s32 max_y_123 = std::max(max_y_12, cmd->vertices[3].y); + + // Cull polygons which are too large. + if ((max_x_123 - min_x_123) >= MAX_PRIMITIVE_WIDTH || (max_y_123 - min_y_123) >= MAX_PRIMITIVE_HEIGHT) + { + Log_DebugPrintf("Culling too-large polygon (quad second half): %d,%d %d,%d %d,%d", cmd->vertices[2].x, + cmd->vertices[2].y, cmd->vertices[1].x, cmd->vertices[1].y, cmd->vertices[0].x, + cmd->vertices[0].y); + + // turn it into a degenerate triangle + std::memcpy(&cmd->vertices[3], &cmd->vertices[2], sizeof(GPUBackendDrawPolygonCommand::Vertex)); + cmd->bounds.SetInvalid(); + } + else + { + const u32 clip_left = static_cast(std::clamp(min_x_123, m_drawing_area.left, m_drawing_area.right)); + const u32 clip_right = + static_cast(std::clamp(max_x_123, m_drawing_area.left, m_drawing_area.right)) + 1u; + const u32 clip_top = static_cast(std::clamp(min_y_123, m_drawing_area.top, m_drawing_area.bottom)); + const u32 clip_bottom = + static_cast(std::clamp(max_y_123, m_drawing_area.top, m_drawing_area.bottom)) + 1u; + + cmd->bounds.Include(Truncate16(clip_left), Truncate16(clip_top), Truncate16(clip_right), Truncate16(clip_bottom)); + AddDrawTriangleTicks(clip_right - clip_left, clip_bottom - clip_top, rc.shading_enable, rc.texture_enable, + rc.transparency_enable); + } + } + + g_gpu_backend->PushCommand(cmd); + EndCommand(); return true; } bool GPU::HandleRenderRectangleCommand() { - const RenderCommand rc{FifoPeek(0)}; + const GPURenderCommand rc{FifoPeek(0)}; const u32 total_words = - 2 + BoolToUInt32(rc.texture_enable) + BoolToUInt32(rc.rectangle_size == DrawRectangleSize::Variable); + 2 + BoolToUInt32(rc.texture_enable) + BoolToUInt32(rc.rectangle_size == GPUDrawRectangleSize::Variable); CHECK_COMMAND_SIZE(total_words); if (IsInterlacedRenderingEnabled() && IsCRTCScanlinePending()) SynchronizeCRTC(); - if (rc.texture_enable) - SetTexturePalette(Truncate16(FifoPeek(2) >> 16)); - const TickCount setup_ticks = 16; AddCommandTicks(setup_ticks); @@ -384,17 +570,84 @@ bool GPU::HandleRenderRectangleCommand() m_stats.num_vertices++; m_stats.num_polygons++; - m_render_command.bits = rc.bits; m_fifo.RemoveOne(); - DispatchRenderCommand(); + GPUBackendDrawRectangleCommand* cmd = g_gpu_backend->NewDrawRectangleCommand(); + FillDrawCommand(cmd, rc); + cmd->color = rc.color_for_first_vertex; + cmd->draw_mode.bits = m_draw_mode.bits; + cmd->window.bits = m_texture_window.bits; + + const GPUVertexPosition vp{FifoPop()}; + cmd->x = TruncateGPUVertexPosition(m_drawing_offset.x + vp.x); + cmd->y = TruncateGPUVertexPosition(m_drawing_offset.y + vp.y); + + if (rc.texture_enable) + { + const u32 texcoord_and_palette = FifoPop(); + cmd->palette.bits = Truncate16(texcoord_and_palette >> 16); + cmd->texcoord = Truncate16(texcoord_and_palette); + } + else + { + cmd->palette.bits = 0; + cmd->texcoord = 0; + } + + switch (rc.rectangle_size) + { + case GPUDrawRectangleSize::R1x1: + cmd->width = 1; + cmd->height = 1; + break; + case GPUDrawRectangleSize::R8x8: + cmd->width = 8; + cmd->height = 8; + break; + case GPUDrawRectangleSize::R16x16: + cmd->width = 16; + cmd->height = 16; + break; + default: + { + const u32 width_and_height = FifoPop(); + cmd->width = static_cast(width_and_height & VRAM_WIDTH_MASK); + cmd->height = static_cast((width_and_height >> 16) & VRAM_HEIGHT_MASK); + + if (cmd->width >= MAX_PRIMITIVE_WIDTH || cmd->height >= MAX_PRIMITIVE_HEIGHT) + { + Log_DebugPrintf("Culling too-large rectangle: %d,%d %dx%d", cmd->x, cmd->y, cmd->width, cmd->height); + return true; + } + } + break; + } + + if (!IsDrawingAreaIsValid()) + { + EndCommand(); + return true; + } + + const u32 clip_left = static_cast(std::clamp(cmd->x, m_drawing_area.left, m_drawing_area.right)); + const u32 clip_right = + static_cast(std::clamp(cmd->x + cmd->width, m_drawing_area.left, m_drawing_area.right)) + 1u; + const u32 clip_top = static_cast(std::clamp(cmd->y, m_drawing_area.top, m_drawing_area.bottom)); + const u32 clip_bottom = + static_cast(std::clamp(cmd->y + cmd->height, m_drawing_area.top, m_drawing_area.bottom)) + 1u; + + cmd->bounds.Set(Truncate16(clip_left), Truncate16(clip_top), Truncate16(clip_right), Truncate16(clip_bottom)); + AddDrawRectangleTicks(clip_right - clip_left, clip_bottom - clip_top, rc.texture_enable, rc.transparency_enable); + + g_gpu_backend->PushCommand(cmd); + EndCommand(); return true; } bool GPU::HandleRenderLineCommand() { - const RenderCommand rc{FifoPeek(0)}; + const GPURenderCommand rc{FifoPeek(0)}; const u32 total_words = rc.shading_enable ? 4 : 3; CHECK_COMMAND_SIZE(total_words); @@ -409,7 +662,59 @@ bool GPU::HandleRenderLineCommand() m_render_command.bits = rc.bits; m_fifo.RemoveOne(); - DispatchRenderCommand(); + GPUBackendDrawLineCommand* cmd = g_gpu_backend->NewDrawLineCommand(2); + FillDrawCommand(cmd, rc); + cmd->palette.bits = 0; + + if (rc.shading_enable) + { + cmd->vertices[0].color = rc.color_for_first_vertex; + const GPUVertexPosition start_pos{FifoPop()}; + cmd->vertices[0].x = m_drawing_offset.x + start_pos.x; + cmd->vertices[0].y = m_drawing_offset.y + start_pos.y; + + cmd->vertices[1].color = FifoPop() & UINT32_C(0x00FFFFFF); + const GPUVertexPosition end_pos{FifoPop()}; + cmd->vertices[1].x = m_drawing_offset.x + end_pos.x; + cmd->vertices[1].y = m_drawing_offset.y + end_pos.y; + } + else + { + cmd->vertices[0].color = rc.color_for_first_vertex; + cmd->vertices[1].color = rc.color_for_first_vertex; + + const GPUVertexPosition start_pos{FifoPop()}; + cmd->vertices[0].x = m_drawing_offset.x + start_pos.x; + cmd->vertices[0].y = m_drawing_offset.y + start_pos.y; + + const GPUVertexPosition end_pos{FifoPop()}; + cmd->vertices[1].x = m_drawing_offset.x + end_pos.x; + cmd->vertices[1].y = m_drawing_offset.y + end_pos.y; + } + + if (!IsDrawingAreaIsValid()) + { + EndCommand(); + return true; + } + + const auto [min_x, max_x] = MinMax(cmd->vertices[0].x, cmd->vertices[1].x); + const auto [min_y, max_y] = MinMax(cmd->vertices[0].y, cmd->vertices[1].y); + if ((max_x - min_x) >= MAX_PRIMITIVE_WIDTH || (max_y - min_y) >= MAX_PRIMITIVE_HEIGHT) + { + Log_DebugPrintf("Culling too-large line: %d,%d - %d,%d", cmd->vertices[0].y, cmd->vertices[0].y, cmd->vertices[1].x, + cmd->vertices[1].y); + EndCommand(); + return true; + } + + const u32 clip_left = static_cast(std::clamp(min_x, m_drawing_area.left, m_drawing_area.left)); + const u32 clip_right = static_cast(std::clamp(max_x, m_drawing_area.left, m_drawing_area.right)) + 1u; + const u32 clip_top = static_cast(std::clamp(min_y, m_drawing_area.top, m_drawing_area.bottom)); + const u32 clip_bottom = static_cast(std::clamp(max_y, m_drawing_area.top, m_drawing_area.bottom)) + 1u; + cmd->bounds.Set(Truncate16(clip_left), Truncate16(clip_top), Truncate16(clip_right), Truncate16(clip_bottom)); + AddDrawLineTicks(clip_right - clip_left, clip_bottom - clip_top, rc.shading_enable); + EndCommand(); return true; } @@ -417,7 +722,7 @@ bool GPU::HandleRenderLineCommand() bool GPU::HandleRenderPolyLineCommand() { // always read the first two vertices, we test for the terminator after that - const RenderCommand rc{FifoPeek(0)}; + const GPURenderCommand rc{FifoPeek(0)}; const u32 min_words = rc.shading_enable ? 3 : 4; CHECK_COMMAND_SIZE(min_words); @@ -446,6 +751,52 @@ bool GPU::HandleRenderPolyLineCommand() return true; } +void GPU::FinishPolyLineRenderCommand() +{ + // Multiply by two because we don't use line strips. + const u32 num_vertices = GetPolyLineVertexCount(); + if (!IsDrawingAreaIsValid()) + return; + + GPUBackendDrawLineCommand* cmd = g_gpu_backend->NewDrawLineCommand(num_vertices); + FillDrawCommand(cmd, m_render_command); + + u32 buffer_pos = 0; + const GPUVertexPosition start_vp{m_blit_buffer[buffer_pos++]}; + cmd->vertices[0].x = start_vp.x + m_drawing_offset.x; + cmd->vertices[0].y = start_vp.y + m_drawing_offset.y; + cmd->vertices[0].color = m_render_command.color_for_first_vertex; + cmd->bounds.SetInvalid(); + + const bool shaded = m_render_command.shading_enable; + for (u32 i = 1; i < num_vertices; i++) + { + cmd->vertices[i].color = + shaded ? (m_blit_buffer[buffer_pos++] & UINT32_C(0x00FFFFFF)) : m_render_command.color_for_first_vertex; + const GPUVertexPosition vp{m_blit_buffer[buffer_pos++]}; + cmd->vertices[i].x = m_drawing_offset.x + vp.x; + cmd->vertices[i].y = m_drawing_offset.y + vp.y; + + const auto [min_x, max_x] = MinMax(cmd->vertices[i - 1].x, cmd->vertices[i].y); + const auto [min_y, max_y] = MinMax(cmd->vertices[i - 1].x, cmd->vertices[i].y); + if ((max_x - min_x) >= MAX_PRIMITIVE_WIDTH || (max_y - min_y) >= MAX_PRIMITIVE_HEIGHT) + { + Log_DebugPrintf("Culling too-large line: %d,%d - %d,%d", cmd->vertices[i - 1].x, cmd->vertices[i - 1].y, + cmd->vertices[i].x, cmd->vertices[i].y); + } + else + { + const u32 clip_left = static_cast(std::clamp(min_x, m_drawing_area.left, m_drawing_area.left)); + const u32 clip_right = static_cast(std::clamp(max_x, m_drawing_area.left, m_drawing_area.right)) + 1u; + const u32 clip_top = static_cast(std::clamp(min_y, m_drawing_area.top, m_drawing_area.bottom)); + const u32 clip_bottom = static_cast(std::clamp(max_y, m_drawing_area.top, m_drawing_area.bottom)) + 1u; + + cmd->bounds.Include(Truncate16(clip_left), Truncate16(clip_right), Truncate16(clip_top), Truncate16(clip_bottom)); + AddDrawLineTicks(clip_right - clip_left, clip_bottom - clip_top, m_render_command.shading_enable); + } + } +} + bool GPU::HandleFillRectangleCommand() { CHECK_COMMAND_SIZE(3); @@ -453,19 +804,22 @@ bool GPU::HandleFillRectangleCommand() if (IsInterlacedRenderingEnabled() && IsCRTCScanlinePending()) SynchronizeCRTC(); - FlushRender(); + GPUBackendFillVRAMCommand* cmd = g_gpu_backend->NewFillVRAMCommand(); + FillBackendCommandParameters(cmd); - const u32 color = FifoPop() & 0x00FFFFFF; - const u32 dst_x = FifoPeek() & 0x3F0; - const u32 dst_y = (FifoPop() >> 16) & VRAM_COORD_MASK; - const u32 width = ((FifoPeek() & VRAM_WIDTH_MASK) + 0xF) & ~0xF; - const u32 height = (FifoPop() >> 16) & VRAM_HEIGHT_MASK; + cmd->color = FifoPop() & 0x00FFFFFF; + cmd->x = Truncate16(FifoPeek() & 0x3F0); + cmd->y = Truncate16((FifoPop() >> 16) & VRAM_COORD_MASK); + cmd->width = Truncate16(((FifoPeek() & VRAM_WIDTH_MASK) + 0xF) & ~0xF); + cmd->height = Truncate16((FifoPop() >> 16) & VRAM_HEIGHT_MASK); - Log_DebugPrintf("Fill VRAM rectangle offset=(%u,%u), size=(%u,%u)", dst_x, dst_y, width, height); + Log_DebugPrintf("Fill VRAM rectangle offset=(%u,%u), size=(%u,%u)", cmd->x, cmd->y, cmd->width, cmd->height); - FillVRAM(dst_x, dst_y, width, height, color); + AddCommandTicks(46 + ((cmd->width / 8) + 9) * cmd->height); + + g_gpu_backend->PushCommand(cmd); m_stats.num_vram_fills++; - AddCommandTicks(46 + ((width / 8) + 9) * height); + EndCommand(); return true; } @@ -509,9 +863,17 @@ void GPU::FinishVRAMWrite() if (IsInterlacedRenderingEnabled() && IsCRTCScanlinePending()) SynchronizeCRTC(); - FlushRender(); + // TODO: skip this copy + const u32 num_words = static_cast(m_blit_buffer.size()) * 2u; + GPUBackendUpdateVRAMCommand* cmd = g_gpu_backend->NewUpdateVRAMCommand(num_words); + FillBackendCommandParameters(cmd); + cmd->x = m_vram_transfer.x; + cmd->y = m_vram_transfer.y; + cmd->width = m_vram_transfer.width; + cmd->height = m_vram_transfer.height; + std::memcpy(cmd->data, m_blit_buffer.data(), sizeof(u16) * num_words); + g_gpu_backend->PushCommand(cmd); - UpdateVRAM(m_vram_transfer.x, m_vram_transfer.y, m_vram_transfer.width, m_vram_transfer.height, m_blit_buffer.data()); m_blit_buffer.clear(); m_vram_transfer = {}; m_blitter_state = BlitterState::Idle; @@ -532,17 +894,20 @@ bool GPU::HandleCopyRectangleVRAMToCPUCommand() m_vram_transfer.width, m_vram_transfer.height); DebugAssert(m_vram_transfer.col == 0 && m_vram_transfer.row == 0); - // all rendering should be done first... - FlushRender(); - // ensure VRAM shadow is up to date - ReadVRAM(m_vram_transfer.x, m_vram_transfer.y, m_vram_transfer.width, m_vram_transfer.height); + GPUBackendReadVRAMCommand* cmd = g_gpu_backend->NewReadVRAMCommand(); + cmd->x = m_vram_transfer.x; + cmd->y = m_vram_transfer.y; + cmd->width = m_vram_transfer.width; + cmd->height = m_vram_transfer.height; + g_gpu_backend->PushCommand(cmd); + g_gpu_backend->Sync(); if (g_settings.debugging.dump_vram_to_cpu_copies) { DumpVRAMToFile(StringUtil::StdStringFromFormat("vram_to_cpu_copy_%u.png", s_vram_to_cpu_dump_id++).c_str(), m_vram_transfer.width, m_vram_transfer.height, sizeof(u16) * VRAM_WIDTH, - &m_vram_ptr[m_vram_transfer.y * VRAM_WIDTH + m_vram_transfer.x], true); + g_gpu_backend->GetVRAM() + (m_vram_transfer.y * VRAM_WIDTH + m_vram_transfer.x), true); } // switch to pixel-by-pixel read state @@ -557,20 +922,22 @@ bool GPU::HandleCopyRectangleVRAMToVRAMCommand() CHECK_COMMAND_SIZE(4); m_fifo.RemoveOne(); - const u32 src_x = FifoPeek() & VRAM_COORD_MASK; - const u32 src_y = (FifoPop() >> 16) & VRAM_COORD_MASK; - const u32 dst_x = FifoPeek() & VRAM_COORD_MASK; - const u32 dst_y = (FifoPop() >> 16) & VRAM_COORD_MASK; - const u32 width = ReplaceZero(FifoPeek() & VRAM_WIDTH_MASK, 0x400); - const u32 height = ReplaceZero((FifoPop() >> 16) & VRAM_HEIGHT_MASK, 0x200); + GPUBackendCopyVRAMCommand* cmd = g_gpu_backend->NewCopyVRAMCommand(); + cmd->src_x = Truncate16(FifoPeek() & VRAM_COORD_MASK); + cmd->src_y = Truncate16((FifoPop() >> 16) & VRAM_COORD_MASK); + cmd->dst_x = Truncate16(FifoPeek() & VRAM_COORD_MASK); + cmd->dst_y = Truncate16((FifoPop() >> 16) & VRAM_COORD_MASK); + cmd->width = Truncate16(ReplaceZero(FifoPeek() & VRAM_WIDTH_MASK, 0x400)); + cmd->height = Truncate16(ReplaceZero((FifoPop() >> 16) & VRAM_HEIGHT_MASK, 0x200)); - Log_DebugPrintf("Copy rectangle from VRAM to VRAM src=(%u,%u), dst=(%u,%u), size=(%u,%u)", src_x, src_y, dst_x, dst_y, - width, height); + Log_DebugPrintf("Copy rectangle from VRAM to VRAM src=(%u,%u), dst=(%u,%u), size=(%u,%u)", cmd->src_x, cmd->src_y, + cmd->dst_x, cmd->dst_y, cmd->width, cmd->height); + + AddCommandTicks(ZeroExtend32(cmd->width) * ZeroExtend32(cmd->height) * 2); + + g_gpu_backend->PushCommand(cmd); - FlushRender(); - CopyVRAM(src_x, src_y, dst_x, dst_y, width, height); m_stats.num_vram_copies++; - AddCommandTicks(width * height * 2); EndCommand(); return true; } diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index b4b937bda..1514f2c7c 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -14,25 +14,13 @@ #endif Log_SetChannel(GPU_HW); -template -ALWAYS_INLINE static constexpr std::tuple MinMax(T v1, T v2) -{ - if (v1 > v2) - return std::tie(v2, v1); - else - return std::tie(v1, v2); -} - ALWAYS_INLINE static bool ShouldUseUVLimits() { // We only need UV limits if PGXP is enabled, or texture filtering is enabled. return g_settings.gpu_pgxp_enable || g_settings.gpu_texture_filter != GPUTextureFilter::Nearest; } -GPU_HW::GPU_HW() : GPU() -{ - m_vram_ptr = m_vram_shadow.data(); -} +GPU_HW::GPU_HW() : GPUBackend() {} GPU_HW::~GPU_HW() = default; @@ -41,13 +29,14 @@ bool GPU_HW::IsHardwareRenderer() const return true; } -bool GPU_HW::Initialize(HostDisplay* host_display) +bool GPU_HW::Initialize() { - if (!GPU::Initialize(host_display)) + if (!GPUBackend::Initialize()) return false; + m_vram_ptr = m_vram_shadow.data(); m_resolution_scale = CalculateResolutionScale(); - m_render_api = host_display->GetRenderAPI(); + m_render_api = g_host_interface->GetDisplay()->GetRenderAPI(); m_true_color = g_settings.gpu_true_color; m_scaled_dithering = g_settings.gpu_scaled_dithering; m_texture_filtering = g_settings.gpu_texture_filter; @@ -58,7 +47,7 @@ bool GPU_HW::Initialize(HostDisplay* host_display) void GPU_HW::Reset() { - GPU::Reset(); + GPUBackend::Reset(); m_batch_current_vertex_ptr = m_batch_start_vertex_ptr; @@ -72,22 +61,6 @@ void GPU_HW::Reset() SetFullVRAMDirtyRectangle(); } -bool GPU_HW::DoState(StateWrapper& sw) -{ - if (!GPU::DoState(sw)) - return false; - - // invalidate the whole VRAM read texture when loading state - if (sw.IsReading()) - { - m_batch_current_vertex_ptr = m_batch_start_vertex_ptr; - SetFullVRAMDirtyRectangle(); - ResetBatchVertexDepth(); - } - - return true; -} - void GPU_HW::UpdateHWSettings(bool* framebuffer_changed, bool* shaders_changed) { const u32 resolution_scale = CalculateResolutionScale(); @@ -100,10 +73,12 @@ void GPU_HW::UpdateHWSettings(bool* framebuffer_changed, bool* shaders_changed) if (m_resolution_scale != resolution_scale) { +#if FIXME g_host_interface->AddFormattedOSDMessage(10.0f, "Resolution scale set to %ux (display %ux%u, VRAM %ux%u)", resolution_scale, m_crtc_state.display_vram_width * resolution_scale, resolution_scale * m_crtc_state.display_vram_height, VRAM_WIDTH * resolution_scale, VRAM_HEIGHT * resolution_scale); +#endif } m_resolution_scale = resolution_scale; @@ -119,6 +94,7 @@ u32 GPU_HW::CalculateResolutionScale() const if (g_settings.gpu_resolution_scale != 0) return std::clamp(g_settings.gpu_resolution_scale, 1, m_max_resolution_scale); +#if FIXME // auto scaling const s32 height = (m_crtc_state.display_height != 0) ? static_cast(m_crtc_state.display_height) : 480; const s32 preferred_scale = @@ -126,11 +102,14 @@ u32 GPU_HW::CalculateResolutionScale() const Log_InfoPrintf("Height = %d, preferred scale = %d", height, preferred_scale); return static_cast(std::clamp(preferred_scale, 1, m_max_resolution_scale)); +#else + return 1; +#endif } void GPU_HW::UpdateResolutionScale() { - GPU::UpdateResolutionScale(); + GPUBackend::UpdateResolutionScale(); if (CalculateResolutionScale() != m_resolution_scale) UpdateSettings(); @@ -138,8 +117,7 @@ void GPU_HW::UpdateResolutionScale() std::tuple GPU_HW::GetEffectiveDisplayResolution() { - return std::make_tuple(m_crtc_state.display_vram_width * m_resolution_scale, - m_resolution_scale * m_crtc_state.display_vram_height); + return std::make_tuple(m_display_vram_width * m_resolution_scale, m_resolution_scale * m_display_vram_height); } void GPU_HW::PrintSettingsToLog() @@ -358,334 +336,119 @@ void GPU_HW::DrawLine(float x0, float y0, u32 col0, float x1, float y1, u32 col1 AddVertex(output[1]); } -void GPU_HW::LoadVertices() +void GPU_HW::DrawPolygon(const GPUBackendDrawPolygonCommand* cmd) { - if (m_GPUSTAT.check_mask_before_draw) + SetupDraw(cmd); + if (cmd->params.check_mask_before_draw) m_current_depth++; - const RenderCommand rc{m_render_command.bits}; - const u32 texpage = ZeroExtend32(m_draw_mode.mode_reg.bits) | (ZeroExtend32(m_draw_mode.palette_reg) << 16); + const GPURenderCommand rc{cmd->rc.bits}; + const u32 texpage = ZeroExtend32(cmd->draw_mode.bits) | (ZeroExtend32(cmd->palette.bits) << 16); const float depth = GetCurrentNormalizedVertexDepth(); - switch (rc.primitive) + DebugAssert(GetBatchVertexSpace() >= (rc.quad_polygon ? 6u : 3u)); + + const u32 first_color = rc.color_for_first_vertex; + const bool shaded = rc.shading_enable; + const bool textured = rc.texture_enable; + const bool pgxp = g_settings.gpu_pgxp_enable; + + std::array vertices; + for (u32 i = 0; i < cmd->num_vertices; i++) { - case Primitive::Polygon: - { - DebugAssert(GetBatchVertexSpace() >= (rc.quad_polygon ? 6u : 3u)); - - const u32 first_color = rc.color_for_first_vertex; - const bool shaded = rc.shading_enable; - const bool textured = rc.texture_enable; - const bool pgxp = g_settings.gpu_pgxp_enable; - - const u32 num_vertices = rc.quad_polygon ? 4 : 3; - std::array vertices; - std::array, 4> native_vertex_positions; - bool valid_w = g_settings.gpu_pgxp_texture_correction; - for (u32 i = 0; i < num_vertices; i++) - { - const u32 color = (shaded && i > 0) ? (FifoPop() & UINT32_C(0x00FFFFFF)) : first_color; - const u64 maddr_and_pos = m_fifo.Pop(); - const VertexPosition vp{Truncate32(maddr_and_pos)}; - const u16 texcoord = textured ? Truncate16(FifoPop()) : 0; - const s32 native_x = m_drawing_offset.x + vp.x; - const s32 native_y = m_drawing_offset.y + vp.y; - native_vertex_positions[i][0] = native_x; - native_vertex_positions[i][1] = native_y; - vertices[i].Set(static_cast(native_x), static_cast(native_y), depth, 1.0f, color, texpage, - texcoord, 0xFFFF0000u); - - if (pgxp) - { - valid_w &= - PGXP::GetPreciseVertex(Truncate32(maddr_and_pos >> 32), vp.bits, native_x, native_y, m_drawing_offset.x, - m_drawing_offset.y, &vertices[i].x, &vertices[i].y, &vertices[i].w); - } - } - if (!valid_w) - { - for (BatchVertex& v : vertices) - v.w = 1.0f; - } - - if (rc.quad_polygon && m_resolution_scale > 1) - HandleFlippedQuadTextureCoordinates(vertices.data()); - - if (m_using_uv_limits && textured) - ComputePolygonUVLimits(vertices.data(), num_vertices); - - if (!IsDrawingAreaIsValid()) - return; - - // Cull polygons which are too large. - const auto [min_x_12, max_x_12] = MinMax(native_vertex_positions[1][0], native_vertex_positions[2][0]); - const auto [min_y_12, max_y_12] = MinMax(native_vertex_positions[1][1], native_vertex_positions[2][1]); - const s32 min_x = std::min(min_x_12, native_vertex_positions[0][0]); - const s32 max_x = std::max(max_x_12, native_vertex_positions[0][0]); - const s32 min_y = std::min(min_y_12, native_vertex_positions[0][1]); - const s32 max_y = std::max(max_y_12, native_vertex_positions[0][1]); - - if ((max_x - min_x) >= MAX_PRIMITIVE_WIDTH || (max_y - min_y) >= MAX_PRIMITIVE_HEIGHT) - { - Log_DebugPrintf("Culling too-large polygon: %d,%d %d,%d %d,%d", native_vertex_positions[0][0], - native_vertex_positions[0][1], native_vertex_positions[1][0], native_vertex_positions[1][1], - native_vertex_positions[2][0], native_vertex_positions[2][1]); - } - else - { - const u32 clip_left = static_cast(std::clamp(min_x, m_drawing_area.left, m_drawing_area.right)); - const u32 clip_right = static_cast(std::clamp(max_x, m_drawing_area.left, m_drawing_area.right)) + 1u; - const u32 clip_top = static_cast(std::clamp(min_y, m_drawing_area.top, m_drawing_area.bottom)); - const u32 clip_bottom = - static_cast(std::clamp(max_y, m_drawing_area.top, m_drawing_area.bottom)) + 1u; - - m_vram_dirty_rect.Include(clip_left, clip_right, clip_top, clip_bottom); - AddDrawTriangleTicks(clip_right - clip_left, clip_bottom - clip_top, rc.shading_enable, rc.texture_enable, - rc.transparency_enable); - - std::memcpy(m_batch_current_vertex_ptr, vertices.data(), sizeof(BatchVertex) * 3); - m_batch_current_vertex_ptr += 3; - } - - // quads - if (rc.quad_polygon) - { - const s32 min_x_123 = std::min(min_x_12, native_vertex_positions[3][0]); - const s32 max_x_123 = std::max(max_x_12, native_vertex_positions[3][0]); - const s32 min_y_123 = std::min(min_y_12, native_vertex_positions[3][1]); - const s32 max_y_123 = std::max(max_y_12, native_vertex_positions[3][1]); - - // Cull polygons which are too large. - if ((max_x_123 - min_x_123) >= MAX_PRIMITIVE_WIDTH || (max_y_123 - min_y_123) >= MAX_PRIMITIVE_HEIGHT) - { - Log_DebugPrintf("Culling too-large polygon (quad second half): %d,%d %d,%d %d,%d", - native_vertex_positions[2][0], native_vertex_positions[2][1], native_vertex_positions[1][0], - native_vertex_positions[1][1], native_vertex_positions[0][0], native_vertex_positions[0][1]); - } - else - { - const u32 clip_left = static_cast(std::clamp(min_x_123, m_drawing_area.left, m_drawing_area.right)); - const u32 clip_right = - static_cast(std::clamp(max_x_123, m_drawing_area.left, m_drawing_area.right)) + 1u; - const u32 clip_top = static_cast(std::clamp(min_y_123, m_drawing_area.top, m_drawing_area.bottom)); - const u32 clip_bottom = - static_cast(std::clamp(max_y_123, m_drawing_area.top, m_drawing_area.bottom)) + 1u; - - m_vram_dirty_rect.Include(clip_left, clip_right, clip_top, clip_bottom); - AddDrawTriangleTicks(clip_right - clip_left, clip_bottom - clip_top, rc.shading_enable, rc.texture_enable, - rc.transparency_enable); - - AddVertex(vertices[2]); - AddVertex(vertices[1]); - AddVertex(vertices[3]); - } - } - } - break; - - case Primitive::Rectangle: - { - const u32 color = rc.color_for_first_vertex; - const VertexPosition vp{FifoPop()}; - const s32 pos_x = TruncateVertexPosition(m_drawing_offset.x + vp.x); - const s32 pos_y = TruncateVertexPosition(m_drawing_offset.y + vp.y); - - const auto [texcoord_x, texcoord_y] = UnpackTexcoord(rc.texture_enable ? Truncate16(FifoPop()) : 0); - u16 orig_tex_left = ZeroExtend16(texcoord_x); - u16 orig_tex_top = ZeroExtend16(texcoord_y); - s32 rectangle_width; - s32 rectangle_height; - switch (rc.rectangle_size) - { - case DrawRectangleSize::R1x1: - rectangle_width = 1; - rectangle_height = 1; - break; - case DrawRectangleSize::R8x8: - rectangle_width = 8; - rectangle_height = 8; - break; - case DrawRectangleSize::R16x16: - rectangle_width = 16; - rectangle_height = 16; - break; - default: - { - const u32 width_and_height = FifoPop(); - rectangle_width = static_cast(width_and_height & VRAM_WIDTH_MASK); - rectangle_height = static_cast((width_and_height >> 16) & VRAM_HEIGHT_MASK); - - if (rectangle_width >= MAX_PRIMITIVE_WIDTH || rectangle_height >= MAX_PRIMITIVE_HEIGHT) - { - Log_DebugPrintf("Culling too-large rectangle: %d,%d %dx%d", pos_x, pos_y, rectangle_width, - rectangle_height); - return; - } - } - break; - } - - // we can split the rectangle up into potentially 8 quads - DebugAssert(GetBatchVertexSpace() >= MAX_VERTICES_FOR_RECTANGLE); - - if (!IsDrawingAreaIsValid()) - return; - - // Split the rectangle into multiple quads if it's greater than 256x256, as the texture page should repeat. - u16 tex_top = orig_tex_top; - for (s32 y_offset = 0; y_offset < rectangle_height;) - { - const s32 quad_height = std::min(rectangle_height - y_offset, TEXTURE_PAGE_WIDTH - tex_top); - const float quad_start_y = static_cast(pos_y + y_offset); - const float quad_end_y = quad_start_y + static_cast(quad_height); - const u16 tex_bottom = tex_top + static_cast(quad_height); - - u16 tex_left = orig_tex_left; - for (s32 x_offset = 0; x_offset < rectangle_width;) - { - const s32 quad_width = std::min(rectangle_width - x_offset, TEXTURE_PAGE_HEIGHT - tex_left); - const float quad_start_x = static_cast(pos_x + x_offset); - const float quad_end_x = quad_start_x + static_cast(quad_width); - const u16 tex_right = tex_left + static_cast(quad_width); - const u32 uv_limits = BatchVertex::PackUVLimits(tex_left, tex_right - 1, tex_top, tex_bottom - 1); - - AddNewVertex(quad_start_x, quad_start_y, depth, 1.0f, color, texpage, tex_left, tex_top, uv_limits); - AddNewVertex(quad_end_x, quad_start_y, depth, 1.0f, color, texpage, tex_right, tex_top, uv_limits); - AddNewVertex(quad_start_x, quad_end_y, depth, 1.0f, color, texpage, tex_left, tex_bottom, uv_limits); - - AddNewVertex(quad_start_x, quad_end_y, depth, 1.0f, color, texpage, tex_left, tex_bottom, uv_limits); - AddNewVertex(quad_end_x, quad_start_y, depth, 1.0f, color, texpage, tex_right, tex_top, uv_limits); - AddNewVertex(quad_end_x, quad_end_y, depth, 1.0f, color, texpage, tex_right, tex_bottom, uv_limits); - - x_offset += quad_width; - tex_left = 0; - } - - y_offset += quad_height; - tex_top = 0; - } - - const u32 clip_left = static_cast(std::clamp(pos_x, m_drawing_area.left, m_drawing_area.right)); - const u32 clip_right = - static_cast(std::clamp(pos_x + rectangle_width, m_drawing_area.left, m_drawing_area.right)) + 1u; - const u32 clip_top = static_cast(std::clamp(pos_y, m_drawing_area.top, m_drawing_area.bottom)); - const u32 clip_bottom = - static_cast(std::clamp(pos_y + rectangle_height, m_drawing_area.top, m_drawing_area.bottom)) + 1u; - - m_vram_dirty_rect.Include(clip_left, clip_right, clip_top, clip_bottom); - AddDrawRectangleTicks(clip_right - clip_left, clip_bottom - clip_top, rc.texture_enable, rc.transparency_enable); - } - break; - - case Primitive::Line: - { - if (!rc.polyline) - { - DebugAssert(GetBatchVertexSpace() >= 2); - - u32 start_color, end_color; - VertexPosition start_pos, end_pos; - if (rc.shading_enable) - { - start_color = rc.color_for_first_vertex; - start_pos.bits = FifoPop(); - end_color = FifoPop() & UINT32_C(0x00FFFFFF); - end_pos.bits = FifoPop(); - } - else - { - start_color = end_color = rc.color_for_first_vertex; - start_pos.bits = FifoPop(); - end_pos.bits = FifoPop(); - } - - if (!IsDrawingAreaIsValid()) - return; - - s32 start_x = start_pos.x + m_drawing_offset.x; - s32 start_y = start_pos.y + m_drawing_offset.y; - s32 end_x = end_pos.x + m_drawing_offset.x; - s32 end_y = end_pos.y + m_drawing_offset.y; - const auto [min_x, max_x] = MinMax(start_x, end_x); - const auto [min_y, max_y] = MinMax(start_y, end_y); - if ((max_x - min_x) >= MAX_PRIMITIVE_WIDTH || (max_y - min_y) >= MAX_PRIMITIVE_HEIGHT) - { - Log_DebugPrintf("Culling too-large line: %d,%d - %d,%d", start_x, start_y, end_x, end_y); - return; - } - - const u32 clip_left = static_cast(std::clamp(min_x, m_drawing_area.left, m_drawing_area.left)); - const u32 clip_right = static_cast(std::clamp(max_x, m_drawing_area.left, m_drawing_area.right)) + 1u; - const u32 clip_top = static_cast(std::clamp(min_y, m_drawing_area.top, m_drawing_area.bottom)); - const u32 clip_bottom = - static_cast(std::clamp(max_y, m_drawing_area.top, m_drawing_area.bottom)) + 1u; - - m_vram_dirty_rect.Include(clip_left, clip_right, clip_top, clip_bottom); - AddDrawLineTicks(clip_right - clip_left, clip_bottom - clip_top, rc.shading_enable); - - // TODO: Should we do a PGXP lookup here? Most lines are 2D. - DrawLine(static_cast(start_x), static_cast(start_y), start_color, static_cast(end_x), - static_cast(end_y), end_color, depth); - } - else - { - // Multiply by two because we don't use line strips. - const u32 num_vertices = GetPolyLineVertexCount(); - DebugAssert(GetBatchVertexSpace() >= (num_vertices * 2)); - - if (!IsDrawingAreaIsValid()) - return; - - const bool shaded = rc.shading_enable; - - u32 buffer_pos = 0; - const VertexPosition start_vp{m_blit_buffer[buffer_pos++]}; - s32 start_x = start_vp.x + m_drawing_offset.x; - s32 start_y = start_vp.y + m_drawing_offset.y; - u32 start_color = rc.color_for_first_vertex; - - for (u32 i = 1; i < num_vertices; i++) - { - const u32 end_color = shaded ? (m_blit_buffer[buffer_pos++] & UINT32_C(0x00FFFFFF)) : start_color; - const VertexPosition vp{m_blit_buffer[buffer_pos++]}; - const s32 end_x = m_drawing_offset.x + vp.x; - const s32 end_y = m_drawing_offset.y + vp.y; - - const auto [min_x, max_x] = MinMax(start_x, end_x); - const auto [min_y, max_y] = MinMax(start_y, end_y); - if ((max_x - min_x) >= MAX_PRIMITIVE_WIDTH || (max_y - min_y) >= MAX_PRIMITIVE_HEIGHT) - { - Log_DebugPrintf("Culling too-large line: %d,%d - %d,%d", start_x, start_y, end_x, end_y); - } - else - { - const u32 clip_left = static_cast(std::clamp(min_x, m_drawing_area.left, m_drawing_area.left)); - const u32 clip_right = - static_cast(std::clamp(max_x, m_drawing_area.left, m_drawing_area.right)) + 1u; - const u32 clip_top = static_cast(std::clamp(min_y, m_drawing_area.top, m_drawing_area.bottom)); - const u32 clip_bottom = - static_cast(std::clamp(max_y, m_drawing_area.top, m_drawing_area.bottom)) + 1u; - - m_vram_dirty_rect.Include(clip_left, clip_right, clip_top, clip_bottom); - AddDrawLineTicks(clip_right - clip_left, clip_bottom - clip_top, rc.shading_enable); - - // TODO: Should we do a PGXP lookup here? Most lines are 2D. - DrawLine(static_cast(start_x), static_cast(start_y), start_color, static_cast(end_x), - static_cast(end_y), end_color, depth); - } - - start_x = end_x; - start_y = end_y; - start_color = end_color; - } - } - } - break; - - default: - UnreachableCode(); - break; + const GPUBackendDrawPolygonCommand::Vertex& v = cmd->vertices[i]; + vertices[i].Set(v.precise_x, v.precise_y, depth, v.precise_w, v.color, texpage, v.texcoord, 0xFFFF0000u); } + + if (rc.quad_polygon && m_resolution_scale > 1) + HandleFlippedQuadTextureCoordinates(vertices.data()); + + if (m_using_uv_limits && textured) + ComputePolygonUVLimits(vertices.data(), cmd->num_vertices); + + std::memcpy(m_batch_current_vertex_ptr, vertices.data(), sizeof(BatchVertex) * 3); + m_batch_current_vertex_ptr += 3; + + // quads + if (rc.quad_polygon) + { + AddVertex(vertices[2]); + AddVertex(vertices[1]); + AddVertex(vertices[3]); + } + + IncludeVRAMDityRectangle(cmd->bounds); +} + +void GPU_HW::DrawRectangle(const GPUBackendDrawRectangleCommand* cmd) +{ + SetupDraw(cmd); + if (cmd->params.check_mask_before_draw) + m_current_depth++; + + const GPURenderCommand rc{cmd->rc.bits}; + const u32 color = cmd->color; + const u32 texpage = ZeroExtend32(cmd->draw_mode.bits) | (ZeroExtend32(cmd->palette.bits) << 16); + const float depth = GetCurrentNormalizedVertexDepth(); + u16 orig_tex_left = cmd->texcoord & 0xFFu; + u16 orig_tex_top = cmd->texcoord >> 8; + + // Split the rectangle into multiple quads if it's greater than 256x256, as the texture page should repeat. + u16 tex_top = orig_tex_top; + for (u16 y_offset = 0; y_offset < cmd->height;) + { + const u16 quad_height = std::min(cmd->height - y_offset, TEXTURE_PAGE_WIDTH - tex_top); + const float quad_start_y = static_cast(cmd->y + y_offset); + const float quad_end_y = quad_start_y + static_cast(quad_height); + const u16 tex_bottom = tex_top + static_cast(quad_height); + + u16 tex_left = orig_tex_left; + for (u16 x_offset = 0; x_offset < cmd->width;) + { + const u16 quad_width = std::min(cmd->width - x_offset, TEXTURE_PAGE_HEIGHT - tex_left); + const float quad_start_x = static_cast(cmd->x + x_offset); + const float quad_end_x = quad_start_x + static_cast(quad_width); + const u16 tex_right = tex_left + static_cast(quad_width); + const u32 uv_limits = BatchVertex::PackUVLimits(tex_left, tex_right - 1, tex_top, tex_bottom - 1); + + AddNewVertex(quad_start_x, quad_start_y, depth, 1.0f, color, texpage, tex_left, tex_top, uv_limits); + AddNewVertex(quad_end_x, quad_start_y, depth, 1.0f, color, texpage, tex_right, tex_top, uv_limits); + AddNewVertex(quad_start_x, quad_end_y, depth, 1.0f, color, texpage, tex_left, tex_bottom, uv_limits); + + AddNewVertex(quad_start_x, quad_end_y, depth, 1.0f, color, texpage, tex_left, tex_bottom, uv_limits); + AddNewVertex(quad_end_x, quad_start_y, depth, 1.0f, color, texpage, tex_right, tex_top, uv_limits); + AddNewVertex(quad_end_x, quad_end_y, depth, 1.0f, color, texpage, tex_right, tex_bottom, uv_limits); + + x_offset += quad_width; + tex_left = 0; + } + + y_offset += quad_height; + tex_top = 0; + } + + IncludeVRAMDityRectangle(cmd->bounds); +} + +void GPU_HW::DrawLine(const GPUBackendDrawLineCommand* cmd) +{ + SetupDraw(cmd); + if (cmd->params.check_mask_before_draw) + m_current_depth++; + + const GPURenderCommand rc{cmd->rc.bits}; + const float depth = GetCurrentNormalizedVertexDepth(); + + for (u32 i = 1; i < cmd->num_vertices; i++) + { + const GPUBackendDrawLineCommand::Vertex& start = cmd->vertices[i - 1u]; + const GPUBackendDrawLineCommand::Vertex& end = cmd->vertices[i]; + + DrawLine(static_cast(start.x), static_cast(start.y), start.color, static_cast(end.x), + static_cast(end.y), end.color, depth); + } + + IncludeVRAMDityRectangle(cmd->bounds); } void GPU_HW::CalcScissorRect(int* left, int* top, int* right, int* bottom) @@ -696,7 +459,8 @@ void GPU_HW::CalcScissorRect(int* left, int* top, int* right, int* bottom) *bottom = std::max((m_drawing_area.bottom + 1) * m_resolution_scale, *top + 1); } -GPU_HW::VRAMFillUBOData GPU_HW::GetVRAMFillUBOData(u32 x, u32 y, u32 width, u32 height, u32 color) const +GPU_HW::VRAMFillUBOData GPU_HW::GetVRAMFillUBOData(u32 x, u32 y, u32 width, u32 height, u32 color, + GPUBackendCommandParameters params) const { // drop precision unless true colour is enabled if (!m_true_color) @@ -705,7 +469,7 @@ GPU_HW::VRAMFillUBOData GPU_HW::GetVRAMFillUBOData(u32 x, u32 y, u32 width, u32 VRAMFillUBOData uniforms; std::tie(uniforms.u_fill_color[0], uniforms.u_fill_color[1], uniforms.u_fill_color[2], uniforms.u_fill_color[3]) = RGBA8ToFloat(color); - uniforms.u_interlaced_displayed_field = GetActiveLineLSB(); + uniforms.u_interlaced_displayed_field = params.active_line_lsb; return uniforms; } @@ -725,7 +489,8 @@ Common::Rectangle GPU_HW::GetVRAMTransferBounds(u32 x, u32 y, u32 width, u3 return out_rc; } -GPU_HW::VRAMWriteUBOData GPU_HW::GetVRAMWriteUBOData(u32 x, u32 y, u32 width, u32 height, u32 buffer_offset) const +GPU_HW::VRAMWriteUBOData GPU_HW::GetVRAMWriteUBOData(u32 x, u32 y, u32 width, u32 height, u32 buffer_offset, + GPUBackendCommandParameters params) const { const VRAMWriteUBOData uniforms = {(x % VRAM_WIDTH), (y % VRAM_HEIGHT), @@ -734,23 +499,24 @@ GPU_HW::VRAMWriteUBOData GPU_HW::GetVRAMWriteUBOData(u32 x, u32 y, u32 width, u3 width, height, buffer_offset, - m_GPUSTAT.set_mask_while_drawing ? 0x8000u : 0x00, + params.set_mask_while_drawing ? 0x8000u : 0x00, GetCurrentNormalizedVertexDepth()}; return uniforms; } -bool GPU_HW::UseVRAMCopyShader(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) const +bool GPU_HW::UseVRAMCopyShader(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height, + GPUBackendCommandParameters params) const { // masking enabled, oversized, or overlapping - return (m_GPUSTAT.IsMaskingEnabled() || ((src_x % VRAM_WIDTH) + width) > VRAM_WIDTH || + return (params.IsMaskingEnabled() || ((src_x % VRAM_WIDTH) + width) > VRAM_WIDTH || ((src_y % VRAM_HEIGHT) + height) > VRAM_HEIGHT || ((dst_x % VRAM_WIDTH) + width) > VRAM_WIDTH || ((dst_y % VRAM_HEIGHT) + height) > VRAM_HEIGHT || Common::Rectangle::FromExtents(src_x, src_y, width, height) .Intersects(Common::Rectangle::FromExtents(dst_x, dst_y, width, height))); } -GPU_HW::VRAMCopyUBOData GPU_HW::GetVRAMCopyUBOData(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, - u32 height) const +GPU_HW::VRAMCopyUBOData GPU_HW::GetVRAMCopyUBOData(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height, + GPUBackendCommandParameters params) const { const VRAMCopyUBOData uniforms = {(src_x % VRAM_WIDTH) * m_resolution_scale, (src_y % VRAM_HEIGHT) * m_resolution_scale, @@ -760,7 +526,7 @@ GPU_HW::VRAMCopyUBOData GPU_HW::GetVRAMCopyUBOData(u32 src_x, u32 src_y, u32 dst ((dst_y + height) % VRAM_HEIGHT) * m_resolution_scale, width * m_resolution_scale, height * m_resolution_scale, - m_GPUSTAT.set_mask_while_drawing ? 1u : 0u, + params.set_mask_while_drawing ? 1u : 0u, GetCurrentNormalizedVertexDepth()}; return uniforms; @@ -770,6 +536,7 @@ void GPU_HW::IncludeVRAMDityRectangle(const Common::Rectangle& rect) { m_vram_dirty_rect.Include(rect); +#if FIXME // the vram area can include the texture page, but the game can leave it as-is. in this case, set it as dirty so the // shadow texture is updated if (!m_draw_mode.IsTexturePageChanged() && @@ -778,6 +545,13 @@ void GPU_HW::IncludeVRAMDityRectangle(const Common::Rectangle& rect) { m_draw_mode.SetTexturePageChanged(); } +#endif +} + +void GPU_HW::IncludeVRAMDityRectangle(const Common::Rectangle& rect) +{ + IncludeVRAMDityRectangle(Common::Rectangle(ZeroExtend32(rect.left), ZeroExtend32(rect.top), + ZeroExtend32(rect.right), ZeroExtend32(rect.bottom))); } void GPU_HW::EnsureVertexBufferSpace(u32 required_vertices) @@ -793,20 +567,20 @@ void GPU_HW::EnsureVertexBufferSpace(u32 required_vertices) MapBatchVertexPointer(required_vertices); } -void GPU_HW::EnsureVertexBufferSpaceForCurrentCommand() +void GPU_HW::EnsureVertexBufferSpace(const GPUBackendDrawCommand* cmd) { u32 required_vertices; - switch (m_render_command.primitive) + switch (cmd->type) { - case Primitive::Polygon: - required_vertices = m_render_command.quad_polygon ? 6 : 3; + case GPUBackendCommandType::DrawPolygon: + required_vertices = cmd->rc.quad_polygon ? 6 : 3; break; - case Primitive::Rectangle: + case GPUBackendCommandType::DrawRectangle: required_vertices = MAX_VERTICES_FOR_RECTANGLE; break; - case Primitive::Line: + case GPUBackendCommandType::DrawLine: default: - required_vertices = m_render_command.polyline ? (GetPolyLineVertexCount() * 6u) : 6u; + required_vertices = static_cast(cmd)->num_vertices * 3u; break; } @@ -836,50 +610,55 @@ void GPU_HW::ResetBatchVertexDepth() m_current_depth = 1; } -void GPU_HW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) +void GPU_HW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params) { IncludeVRAMDityRectangle( Common::Rectangle::FromExtents(x, y, width, height).Clamped(0, 0, VRAM_WIDTH, VRAM_HEIGHT)); } -void GPU_HW::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) +void GPU_HW::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, GPUBackendCommandParameters params) { DebugAssert((x + width) <= VRAM_WIDTH && (y + height) <= VRAM_HEIGHT); IncludeVRAMDityRectangle(Common::Rectangle::FromExtents(x, y, width, height)); - if (m_GPUSTAT.check_mask_before_draw) + if (params.check_mask_before_draw) { // set new vertex counter since we want this to take into consideration previous masked pixels m_current_depth++; } } -void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) +void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height, + GPUBackendCommandParameters params) { IncludeVRAMDityRectangle( Common::Rectangle::FromExtents(dst_x, dst_y, width, height).Clamped(0, 0, VRAM_WIDTH, VRAM_HEIGHT)); - if (m_GPUSTAT.check_mask_before_draw) + if (params.check_mask_before_draw) { // set new vertex counter since we want this to take into consideration previous masked pixels m_current_depth++; } } -void GPU_HW::DispatchRenderCommand() +void GPU_HW::SetupDraw(const GPUBackendDrawCommand* cmd) { - const RenderCommand rc{m_render_command.bits}; + const GPURenderCommand rc{cmd->rc.bits}; - TextureMode texture_mode; + GPUTextureMode texture_mode; if (rc.IsTexturingEnabled()) { // texture page changed - check that the new page doesn't intersect the drawing area - if (m_draw_mode.IsTexturePageChanged()) + if ((cmd->draw_mode.bits & GPUDrawModeReg::TEXTURE_PAGE_MASK) != + (m_last_texture_page_bits.bits & GPUDrawModeReg::TEXTURE_PAGE_MASK) || + true) { - m_draw_mode.ClearTexturePageChangedFlag(); + m_last_texture_page_bits.bits = cmd->draw_mode.bits; + if (m_vram_dirty_rect.Valid() && - (m_draw_mode.GetTexturePageRectangle().Intersects(m_vram_dirty_rect) || - (m_draw_mode.IsUsingPalette() && m_draw_mode.GetTexturePaletteRectangle().Intersects(m_vram_dirty_rect)))) + (m_last_texture_page_bits.GetTexturePageRectangle().Intersects(m_vram_dirty_rect) || + (m_last_texture_page_bits.IsUsingPalette() && + m_last_texture_page_bits.GetTexturePaletteRectangle().Intersects(m_vram_dirty_rect)))) { // Log_DevPrintf("Invalidating VRAM read cache due to drawing area overlap"); if (!IsFlushed()) @@ -889,32 +668,32 @@ void GPU_HW::DispatchRenderCommand() } } - texture_mode = m_draw_mode.GetTextureMode(); + texture_mode = cmd->draw_mode.texture_mode; if (rc.raw_texture_enable) { texture_mode = - static_cast(static_cast(texture_mode) | static_cast(TextureMode::RawTextureBit)); + static_cast(static_cast(texture_mode) | static_cast(GPUTextureMode::RawTextureBit)); } } else { - texture_mode = TextureMode::Disabled; + texture_mode = GPUTextureMode::Disabled; } // has any state changed which requires a new batch? - const TransparencyMode transparency_mode = - rc.transparency_enable ? m_draw_mode.GetTransparencyMode() : TransparencyMode::Disabled; - const bool dithering_enable = (!m_true_color && rc.IsDitheringEnabled()) ? m_GPUSTAT.dither_enable : false; + const GPUTransparencyMode transparency_mode = + rc.transparency_enable ? cmd->draw_mode.transparency_mode : GPUTransparencyMode::Disabled; + const bool dithering_enable = (!m_true_color && rc.IsDitheringEnabled()) ? cmd->draw_mode.dither_enable : false; if (m_batch.texture_mode != texture_mode || m_batch.transparency_mode != transparency_mode || dithering_enable != m_batch.dithering) { FlushRender(); } - EnsureVertexBufferSpaceForCurrentCommand(); + EnsureVertexBufferSpace(cmd); // transparency mode change - if (m_batch.transparency_mode != transparency_mode && transparency_mode != TransparencyMode::Disabled) + if (m_batch.transparency_mode != transparency_mode && transparency_mode != GPUTransparencyMode::Disabled) { static constexpr float transparent_alpha[4][2] = {{0.5f, 0.5f}, {1.0f, 1.0f}, {1.0f, 1.0f}, {0.25f, 1.0f}}; m_batch_ubo_data.u_src_alpha_factor = transparent_alpha[static_cast(transparency_mode)][0]; @@ -922,19 +701,19 @@ void GPU_HW::DispatchRenderCommand() m_batch_ubo_dirty = true; } - if (m_batch.check_mask_before_draw != m_GPUSTAT.check_mask_before_draw || - m_batch.set_mask_while_drawing != m_GPUSTAT.set_mask_while_drawing) + if (m_batch.check_mask_before_draw != cmd->params.check_mask_before_draw || + m_batch.set_mask_while_drawing != cmd->params.set_mask_while_drawing) { - m_batch.check_mask_before_draw = m_GPUSTAT.check_mask_before_draw; - m_batch.set_mask_while_drawing = m_GPUSTAT.set_mask_while_drawing; + m_batch.check_mask_before_draw = cmd->params.check_mask_before_draw; + m_batch.set_mask_while_drawing = cmd->params.set_mask_while_drawing; m_batch_ubo_data.u_set_mask_while_drawing = BoolToUInt32(m_batch.set_mask_while_drawing); m_batch_ubo_dirty = true; } - m_batch.interlacing = IsInterlacedRenderingEnabled(); + m_batch.interlacing = cmd->params.interlaced_rendering; if (m_batch.interlacing) { - const u32 displayed_field = GetActiveLineLSB(); + const u32 displayed_field = cmd->params.active_line_lsb; m_batch_ubo_dirty |= (m_batch_ubo_data.u_interlaced_displayed_field != displayed_field); m_batch_ubo_data.u_interlaced_displayed_field = displayed_field; } @@ -944,18 +723,16 @@ void GPU_HW::DispatchRenderCommand() m_batch.transparency_mode = transparency_mode; m_batch.dithering = dithering_enable; - if (m_draw_mode.IsTextureWindowChanged()) + if (m_last_texture_window_reg.bits != cmd->window.bits) { - m_draw_mode.ClearTextureWindowChangedFlag(); + m_last_texture_window_reg.bits = cmd->window.bits; - m_batch_ubo_data.u_texture_window_mask[0] = ZeroExtend32(m_draw_mode.texture_window_mask_x); - m_batch_ubo_data.u_texture_window_mask[1] = ZeroExtend32(m_draw_mode.texture_window_mask_y); - m_batch_ubo_data.u_texture_window_offset[0] = ZeroExtend32(m_draw_mode.texture_window_offset_x); - m_batch_ubo_data.u_texture_window_offset[1] = ZeroExtend32(m_draw_mode.texture_window_offset_y); + m_batch_ubo_data.u_texture_window_mask[0] = ZeroExtend32(cmd->window.mask_x.GetValue()); + m_batch_ubo_data.u_texture_window_mask[1] = ZeroExtend32(cmd->window.mask_y.GetValue()); + m_batch_ubo_data.u_texture_window_offset[0] = ZeroExtend32(cmd->window.offset_x.GetValue()); + m_batch_ubo_data.u_texture_window_offset[1] = ZeroExtend32(cmd->window.offset_y.GetValue()); m_batch_ubo_dirty = true; } - - LoadVertices(); } void GPU_HW::FlushRender() @@ -1020,8 +797,10 @@ void GPU_HW::DrawRendererStats(bool is_idle_frame) ImGui::TextUnformatted("Effective Display Resolution:"); ImGui::NextColumn(); +#if FIXME ImGui::Text("%ux%u", m_crtc_state.display_vram_width * m_resolution_scale, m_crtc_state.display_vram_height * m_resolution_scale); +#endif ImGui::NextColumn(); ImGui::TextUnformatted("True Color:"); diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index 1d64de71d..27fb672ee 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -1,6 +1,6 @@ #pragma once #include "common/heap_array.h" -#include "gpu.h" +#include "gpu_backend.h" #include "host_display.h" #include #include @@ -8,7 +8,7 @@ #include #include -class GPU_HW : public GPU +class GPU_HW : public GPUBackend { public: enum class BatchRenderMode : u8 @@ -19,22 +19,14 @@ public: OnlyTransparent }; - enum class InterlacedRenderMode : u8 - { - None, - InterleavedFields, - SeparateFields - }; - GPU_HW(); virtual ~GPU_HW(); virtual bool IsHardwareRenderer() const override; - virtual bool Initialize(HostDisplay* host_display) override; + virtual bool Initialize() override; virtual void Reset() override; - virtual bool DoState(StateWrapper& sw) override; - + void UpdateResolutionScale() override final; std::tuple GetEffectiveDisplayResolution() override final; @@ -94,8 +86,8 @@ protected: struct BatchConfig { - TextureMode texture_mode; - TransparencyMode transparency_mode; + GPUTextureMode texture_mode; + GPUTransparencyMode transparency_mode; bool dithering; bool interlacing; bool set_mask_while_drawing; @@ -105,15 +97,15 @@ protected: // on a per-pixel basis, and the opaque pixels shouldn't be blended at all. bool NeedsTwoPassRendering() const { - return transparency_mode == GPU::TransparencyMode::BackgroundMinusForeground && - texture_mode != TextureMode::Disabled; + return transparency_mode == GPUTransparencyMode::BackgroundMinusForeground && + texture_mode != GPUTextureMode::Disabled; } // Returns the render mode for this batch. BatchRenderMode GetRenderMode() const { - return transparency_mode == TransparencyMode::Disabled ? BatchRenderMode::TransparencyDisabled : - BatchRenderMode::TransparentAndOpaque; + return transparency_mode == GPUTransparencyMode::Disabled ? BatchRenderMode::TransparencyDisabled : + BatchRenderMode::TransparentAndOpaque; } }; @@ -179,7 +171,6 @@ protected: virtual void UpdateVRAMReadTexture(); virtual void UpdateDepthBufferFromMaskBit() = 0; - virtual void SetScissorFromDrawingArea() = 0; virtual void MapBatchVertexPointer(u32 required_vertices) = 0; virtual void UnmapBatchVertexPointer(u32 used_vertices) = 0; virtual void UploadUniformBuffer(const void* uniforms, u32 uniforms_size) = 0; @@ -187,12 +178,9 @@ protected: u32 CalculateResolutionScale() const; - void SetFullVRAMDirtyRectangle() - { - m_vram_dirty_rect.Set(0, 0, VRAM_WIDTH, VRAM_HEIGHT); - m_draw_mode.SetTexturePageChanged(); - } + void SetFullVRAMDirtyRectangle() { m_vram_dirty_rect.Set(0, 0, VRAM_WIDTH, VRAM_HEIGHT); } void ClearVRAMDirtyRectangle() { m_vram_dirty_rect.SetInvalid(); } + void IncludeVRAMDityRectangle(const Common::Rectangle& rect); void IncludeVRAMDityRectangle(const Common::Rectangle& rect); bool IsFlushed() const { return m_batch_current_vertex_ptr == m_batch_start_vertex_ptr; } @@ -200,7 +188,7 @@ protected: u32 GetBatchVertexSpace() const { return static_cast(m_batch_end_vertex_ptr - m_batch_current_vertex_ptr); } u32 GetBatchVertexCount() const { return static_cast(m_batch_current_vertex_ptr - m_batch_start_vertex_ptr); } void EnsureVertexBufferSpace(u32 required_vertices); - void EnsureVertexBufferSpaceForCurrentCommand(); + void EnsureVertexBufferSpace(const GPUBackendDrawCommand* cmd); void ResetBatchVertexDepth(); /// Returns the value to be written to the depth buffer for the current operation for mask bit emulation. @@ -209,43 +197,41 @@ protected: return 1.0f - (static_cast(m_current_depth) / 65535.0f); } - /// Returns the interlaced mode to use when scanning out/displaying. - ALWAYS_INLINE InterlacedRenderMode GetInterlacedRenderMode() const - { - if (IsInterlacedDisplayEnabled()) - { - return m_GPUSTAT.vertical_resolution ? InterlacedRenderMode::InterleavedFields : - InterlacedRenderMode::SeparateFields; - } - else - { - return InterlacedRenderMode::None; - } - } - - void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override; - void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) override; - void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override; - void DispatchRenderCommand() override; + void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params) override; + void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, GPUBackendCommandParameters params) override; + void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height, + GPUBackendCommandParameters params) override; + void DrawPolygon(const GPUBackendDrawPolygonCommand* cmd) override; + void DrawLine(const GPUBackendDrawLineCommand* cmd) override; + void DrawRectangle(const GPUBackendDrawRectangleCommand* cmd) override; void FlushRender() override; void DrawRendererStats(bool is_idle_frame) override; void CalcScissorRect(int* left, int* top, int* right, int* bottom); - std::tuple ScaleVRAMCoordinates(s32 x, s32 y) const + ALWAYS_INLINE std::tuple ScaleVRAMCoordinates(s32 x, s32 y) const { return std::make_tuple(x * s32(m_resolution_scale), y * s32(m_resolution_scale)); } + ALWAYS_INLINE Common::Rectangle ScaleVRAMRect(const Common::Rectangle& rect) + { + return rect * m_resolution_scale; + } + /// Computes the area affected by a VRAM transfer, including wrap-around of X. Common::Rectangle GetVRAMTransferBounds(u32 x, u32 y, u32 width, u32 height) const; /// Returns true if the VRAM copy shader should be used (oversized copies, masking). - bool UseVRAMCopyShader(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) const; + bool UseVRAMCopyShader(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height, + GPUBackendCommandParameters params) const; - VRAMFillUBOData GetVRAMFillUBOData(u32 x, u32 y, u32 width, u32 height, u32 color) const; - VRAMWriteUBOData GetVRAMWriteUBOData(u32 x, u32 y, u32 width, u32 height, u32 buffer_offset) const; - VRAMCopyUBOData GetVRAMCopyUBOData(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) const; + VRAMFillUBOData GetVRAMFillUBOData(u32 x, u32 y, u32 width, u32 height, u32 color, + GPUBackendCommandParameters params) const; + VRAMWriteUBOData GetVRAMWriteUBOData(u32 x, u32 y, u32 width, u32 height, u32 buffer_offset, + GPUBackendCommandParameters params) const; + VRAMCopyUBOData GetVRAMCopyUBOData(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height, + GPUBackendCommandParameters params) const; /// Expands a line into two triangles. void DrawLine(float x0, float y0, u32 col0, float x1, float y1, u32 col1, float depth); @@ -257,6 +243,8 @@ protected: static void ComputePolygonUVLimits(BatchVertex* vertices, u32 num_vertices); static bool AreUVLimitsNeeded(); + void SetupDraw(const GPUBackendDrawCommand* cmd); + HeapArray m_vram_shadow; BatchVertex* m_batch_start_vertex_ptr = nullptr; @@ -280,12 +268,16 @@ protected: // Bounding box of VRAM area that the GPU has drawn into. Common::Rectangle m_vram_dirty_rect; + GPUDrawModeReg m_last_texture_page_bits{}; + GPUTextureWindowReg m_last_texture_window_reg{}; + // Statistics RendererStats m_renderer_stats = {}; RendererStats m_last_renderer_stats = {}; // Changed state bool m_batch_ubo_dirty = true; + bool m_drawing_area_changed = false; private: enum : u32 @@ -294,8 +286,6 @@ private: MAX_BATCH_VERTEX_COUNT = VERTEX_BUFFER_SIZE / sizeof(BatchVertex) }; - void LoadVertices(); - ALWAYS_INLINE void AddVertex(const BatchVertex& v) { std::memcpy(m_batch_current_vertex_ptr, &v, sizeof(BatchVertex)); diff --git a/src/core/gpu_hw_d3d11.cpp b/src/core/gpu_hw_d3d11.cpp index 97eb39507..9d38c5770 100644 --- a/src/core/gpu_hw_d3d11.cpp +++ b/src/core/gpu_hw_d3d11.cpp @@ -13,8 +13,8 @@ GPU_HW_D3D11::GPU_HW_D3D11() = default; GPU_HW_D3D11::~GPU_HW_D3D11() { - if (m_host_display) - m_host_display->ClearDisplayTexture(); + if (g_host_interface->GetDisplay()) + g_host_interface->GetDisplay()->ClearDisplayTexture(); m_context->ClearState(); @@ -22,8 +22,9 @@ GPU_HW_D3D11::~GPU_HW_D3D11() DestroyStateObjects(); } -bool GPU_HW_D3D11::Initialize(HostDisplay* host_display) +bool GPU_HW_D3D11::Initialize() { + HostDisplay* host_display = g_host_interface->GetDisplay(); if (host_display->GetRenderAPI() != HostDisplay::RenderAPI::D3D11) { Log_ErrorPrintf("Host render API is incompatible"); @@ -32,7 +33,7 @@ bool GPU_HW_D3D11::Initialize(HostDisplay* host_display) SetCapabilities(); - if (!GPU_HW::Initialize(host_display)) + if (!GPU_HW::Initialize()) return false; m_device = static_cast(host_display->GetRenderDevice()); @@ -92,8 +93,6 @@ void GPU_HW_D3D11::Reset() void GPU_HW_D3D11::ResetGraphicsAPIState() { - GPU_HW::ResetGraphicsAPIState(); - m_context->GSSetShader(nullptr, nullptr, 0); // In D3D11 we can't leave a buffer mapped across a Present() call. @@ -126,7 +125,7 @@ void GPU_HW_D3D11::UpdateSettings() if (framebuffer_changed) { - m_host_display->ClearDisplayTexture(); + g_host_interface->GetDisplay()->ClearDisplayTexture(); CreateFramebuffer(); } @@ -333,8 +332,7 @@ bool GPU_HW_D3D11::CreateStateObjects() for (u8 transparency_mode = 0; transparency_mode < 5; transparency_mode++) { bl_desc = CD3D11_BLEND_DESC(CD3D11_DEFAULT()); - if (transparency_mode != static_cast(TransparencyMode::Disabled) || - m_texture_filtering != GPUTextureFilter::Nearest) + if (transparency_mode != static_cast(GPUTransparencyMode::Disabled) || m_texture_filtering != GPUTextureFilter::Nearest) { bl_desc.RenderTarget[0].BlendEnable = TRUE; bl_desc.RenderTarget[0].SrcBlend = D3D11_BLEND_ONE; @@ -342,7 +340,7 @@ bool GPU_HW_D3D11::CreateStateObjects() bl_desc.RenderTarget[0].SrcBlendAlpha = D3D11_BLEND_ONE; bl_desc.RenderTarget[0].DestBlendAlpha = D3D11_BLEND_ZERO; bl_desc.RenderTarget[0].BlendOp = - (transparency_mode == static_cast(TransparencyMode::BackgroundMinusForeground)) ? + (transparency_mode == static_cast(GPUTransparencyMode::BackgroundMinusForeground)) ? D3D11_BLEND_OP_REV_SUBTRACT : D3D11_BLEND_OP_ADD; bl_desc.RenderTarget[0].BlendOpAlpha = D3D11_BLEND_OP_ADD; @@ -371,8 +369,8 @@ void GPU_HW_D3D11::DestroyStateObjects() bool GPU_HW_D3D11::CompileShaders() { - GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_true_color, m_scaled_dithering, - m_texture_filtering, m_using_uv_limits, m_supports_dual_source_blend); + GPU_HW_ShaderGen shadergen(g_host_interface->GetDisplay()->GetRenderAPI(), m_resolution_scale, m_true_color, + m_scaled_dithering, m_texture_filtering, m_using_uv_limits, m_supports_dual_source_blend); Common::Timer compile_time; const int progress_total = 1 + 1 + 2 + (4 * 9 * 2 * 2) + 7 + (2 * 3); @@ -442,7 +440,7 @@ bool GPU_HW_D3D11::CompileShaders() for (u8 interlacing = 0; interlacing < 2; interlacing++) { const std::string ps = shadergen.GenerateBatchFragmentShader( - static_cast(render_mode), static_cast(texture_mode), + static_cast(render_mode), static_cast(texture_mode), ConvertToBoolUnchecked(dithering), ConvertToBoolUnchecked(interlacing)); m_batch_pixel_shaders[render_mode][texture_mode][dithering][interlacing] = @@ -505,8 +503,8 @@ bool GPU_HW_D3D11::CompileShaders() { for (u8 interlacing = 0; interlacing < 3; interlacing++) { - const std::string ps = shadergen.GenerateDisplayFragmentShader(ConvertToBoolUnchecked(depth_24bit), - static_cast(interlacing)); + const std::string ps = shadergen.GenerateDisplayFragmentShader( + ConvertToBoolUnchecked(depth_24bit), static_cast(interlacing)); m_display_pixel_shaders[depth_24bit][interlacing] = m_shader_cache.GetPixelShader(m_device.Get(), ps); if (!m_display_pixel_shaders[depth_24bit][interlacing]) return false; @@ -608,7 +606,7 @@ void GPU_HW_D3D11::DrawUtilityShader(ID3D11PixelShader* shader, const void* unif void GPU_HW_D3D11::DrawBatchVertices(BatchRenderMode render_mode, u32 base_vertex, u32 num_vertices) { - const bool textured = (m_batch.texture_mode != TextureMode::Disabled); + const bool textured = (m_batch.texture_mode != GPUTextureMode::Disabled); m_context->VSSetShader(m_batch_vertex_shaders[BoolToUInt8(textured)].Get(), nullptr, 0); @@ -617,8 +615,8 @@ void GPU_HW_D3D11::DrawBatchVertices(BatchRenderMode render_mode, u32 base_verte .Get(), nullptr, 0); - const TransparencyMode transparency_mode = - (render_mode == BatchRenderMode::OnlyOpaque) ? TransparencyMode::Disabled : m_batch.transparency_mode; + const GPUTransparencyMode transparency_mode = + (render_mode == BatchRenderMode::OnlyOpaque) ? GPUTransparencyMode::Disabled : m_batch.transparency_mode; m_context->OMSetBlendState(m_batch_blend_states[static_cast(transparency_mode)].Get(), nullptr, 0xFFFFFFFFu); m_context->OMSetDepthStencilState( m_batch.check_mask_before_draw ? m_depth_test_less_state.Get() : m_depth_test_always_state.Get(), 0); @@ -637,46 +635,44 @@ void GPU_HW_D3D11::SetScissorFromDrawingArea() void GPU_HW_D3D11::ClearDisplay() { - GPU_HW::ClearDisplay(); - static constexpr std::array clear_color = {0.0f, 0.0f, 0.0f, 1.0f}; m_context->ClearRenderTargetView(m_display_texture.GetD3DRTV(), clear_color.data()); } void GPU_HW_D3D11::UpdateDisplay() { - GPU_HW::UpdateDisplay(); + HostDisplay* display = g_host_interface->GetDisplay(); if (g_settings.debugging.show_vram) { - m_host_display->SetDisplayTexture(m_vram_texture.GetD3DSRV(), m_vram_texture.GetWidth(), m_vram_texture.GetHeight(), - 0, 0, m_vram_texture.GetWidth(), m_vram_texture.GetHeight()); - m_host_display->SetDisplayParameters(VRAM_WIDTH, VRAM_HEIGHT, 0, 0, VRAM_WIDTH, VRAM_HEIGHT, - static_cast(VRAM_WIDTH) / static_cast(VRAM_HEIGHT)); + display->SetDisplayTexture(m_vram_texture.GetD3DSRV(), m_vram_texture.GetWidth(), m_vram_texture.GetHeight(), 0, 0, + m_vram_texture.GetWidth(), m_vram_texture.GetHeight()); + display->SetDisplayParameters(VRAM_WIDTH, VRAM_HEIGHT, 0, 0, VRAM_WIDTH, VRAM_HEIGHT, + static_cast(VRAM_WIDTH) / static_cast(VRAM_HEIGHT)); } else { - const u32 vram_offset_x = m_crtc_state.display_vram_left; - const u32 vram_offset_y = m_crtc_state.display_vram_top; + const u32 vram_offset_x = m_display_vram_left; + const u32 vram_offset_y = m_display_vram_top; const u32 scaled_vram_offset_x = vram_offset_x * m_resolution_scale; const u32 scaled_vram_offset_y = vram_offset_y * m_resolution_scale; - const u32 display_width = m_crtc_state.display_vram_width; - const u32 display_height = m_crtc_state.display_vram_height; + const u32 display_width = m_display_vram_width; + const u32 display_height = m_display_vram_height; const u32 scaled_display_width = display_width * m_resolution_scale; const u32 scaled_display_height = display_height * m_resolution_scale; - const InterlacedRenderMode interlaced = GetInterlacedRenderMode(); + const GPUInterlacedDisplayMode interlaced = m_display_interlace; - if (IsDisplayDisabled()) + if (!m_display_enabled) { - m_host_display->ClearDisplayTexture(); + display->ClearDisplayTexture(); } - else if (!m_GPUSTAT.display_area_color_depth_24 && interlaced == InterlacedRenderMode::None && + else if (!m_display_24bit && interlaced == GPUInterlacedDisplayMode::None && (scaled_vram_offset_x + scaled_display_width) <= m_vram_texture.GetWidth() && (scaled_vram_offset_y + scaled_display_height) <= m_vram_texture.GetHeight()) { - m_host_display->SetDisplayTexture(m_vram_texture.GetD3DSRV(), m_vram_texture.GetWidth(), - m_vram_texture.GetHeight(), scaled_vram_offset_x, scaled_vram_offset_y, - scaled_display_width, scaled_display_height); + display->SetDisplayTexture(m_vram_texture.GetD3DSRV(), m_vram_texture.GetWidth(), m_vram_texture.GetHeight(), + scaled_vram_offset_x, scaled_vram_offset_y, scaled_display_width, + scaled_display_height); } else { @@ -684,28 +680,26 @@ void GPU_HW_D3D11::UpdateDisplay() m_context->OMSetDepthStencilState(m_depth_disabled_state.Get(), 0); m_context->PSSetShaderResources(0, 1, m_vram_texture.GetD3DSRVArray()); - const u32 reinterpret_field_offset = (interlaced != InterlacedRenderMode::None) ? GetInterlacedDisplayField() : 0; - const u32 reinterpret_start_x = m_crtc_state.regs.X * m_resolution_scale; - const u32 reinterpret_crop_left = (m_crtc_state.display_vram_left - m_crtc_state.regs.X) * m_resolution_scale; + const u32 reinterpret_field_offset = + (interlaced != GPUInterlacedDisplayMode::None) ? m_display_interlace_field : 0; + const u32 reinterpret_start_x = m_display_vram_start_x * m_resolution_scale; + const u32 reinterpret_crop_left = (m_display_vram_left - m_display_vram_start_x) * m_resolution_scale; const u32 uniforms[4] = {reinterpret_start_x, scaled_vram_offset_y + reinterpret_field_offset, reinterpret_crop_left, reinterpret_field_offset}; ID3D11PixelShader* display_pixel_shader = - m_display_pixel_shaders[BoolToUInt8(m_GPUSTAT.display_area_color_depth_24)][static_cast(interlaced)].Get(); + m_display_pixel_shaders[BoolToUInt8(m_display_24bit)][static_cast(interlaced)].Get(); SetViewportAndScissor(0, 0, scaled_display_width, scaled_display_height); DrawUtilityShader(display_pixel_shader, uniforms, sizeof(uniforms)); - m_host_display->SetDisplayTexture(m_display_texture.GetD3DSRV(), m_display_texture.GetWidth(), - m_display_texture.GetHeight(), 0, 0, scaled_display_width, - scaled_display_height); + display->SetDisplayTexture(m_display_texture.GetD3DSRV(), m_display_texture.GetWidth(), + m_display_texture.GetHeight(), 0, 0, scaled_display_width, scaled_display_height); RestoreGraphicsAPIState(); } - m_host_display->SetDisplayParameters(m_crtc_state.display_width, m_crtc_state.display_height, - m_crtc_state.display_origin_left, m_crtc_state.display_origin_top, - m_crtc_state.display_vram_width, m_crtc_state.display_vram_height, - m_crtc_state.display_aspect_ratio); + display->SetDisplayParameters(m_display_width, m_display_height, m_display_origin_left, m_display_origin_top, + m_display_width, m_display_height, m_display_aspect_ratio); } } @@ -742,50 +736,50 @@ void GPU_HW_D3D11::ReadVRAM(u32 x, u32 y, u32 width, u32 height) RestoreGraphicsAPIState(); } -void GPU_HW_D3D11::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) +void GPU_HW_D3D11::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params) { if ((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT) { // CPU round trip if oversized for now. Log_WarningPrintf("Oversized VRAM fill (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height); ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); - GPU::FillVRAM(x, y, width, height, color); - UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data()); + SoftwareFillVRAM(x, y, width, height, color, params); + UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data(), params); return; } - GPU_HW::FillVRAM(x, y, width, height, color); + GPU_HW::FillVRAM(x, y, width, height, color, params); - const VRAMFillUBOData uniforms = GetVRAMFillUBOData(x, y, width, height, color); + const VRAMFillUBOData uniforms = GetVRAMFillUBOData(x, y, width, height, color, params); m_context->OMSetDepthStencilState(m_depth_test_always_state.Get(), 0); SetViewportAndScissor(x * m_resolution_scale, y * m_resolution_scale, width * m_resolution_scale, height * m_resolution_scale); - DrawUtilityShader(IsInterlacedRenderingEnabled() ? m_vram_interlaced_fill_pixel_shader.Get() : - m_vram_fill_pixel_shader.Get(), + DrawUtilityShader(params.interlaced_rendering ? m_vram_interlaced_fill_pixel_shader.Get() : + m_vram_fill_pixel_shader.Get(), &uniforms, sizeof(uniforms)); RestoreGraphicsAPIState(); } -void GPU_HW_D3D11::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) +void GPU_HW_D3D11::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, GPUBackendCommandParameters params) { const Common::Rectangle bounds = GetVRAMTransferBounds(x, y, width, height); - GPU_HW::UpdateVRAM(bounds.left, bounds.top, bounds.GetWidth(), bounds.GetHeight(), data); + GPU_HW::UpdateVRAM(bounds.left, bounds.top, bounds.GetWidth(), bounds.GetHeight(), data, params); const u32 num_pixels = width * height; const auto map_result = m_texture_stream_buffer.Map(m_context.Get(), sizeof(u16), num_pixels * sizeof(u16)); std::memcpy(map_result.pointer, data, num_pixels * sizeof(u16)); m_texture_stream_buffer.Unmap(m_context.Get(), num_pixels * sizeof(u16)); - const VRAMWriteUBOData uniforms = GetVRAMWriteUBOData(x, y, width, height, map_result.index_aligned); + const VRAMWriteUBOData uniforms = GetVRAMWriteUBOData(x, y, width, height, map_result.index_aligned, params); m_context->OMSetDepthStencilState( - m_GPUSTAT.check_mask_before_draw ? m_depth_test_less_state.Get() : m_depth_test_always_state.Get(), 0); + params.check_mask_before_draw ? m_depth_test_less_state.Get() : m_depth_test_always_state.Get(), 0); m_context->PSSetShaderResources(0, 1, m_texture_stream_buffer_srv_r16ui.GetAddressOf()); // the viewport should already be set to the full vram, so just adjust the scissor - const Common::Rectangle scaled_bounds = bounds * m_resolution_scale; + const Common::Rectangle scaled_bounds(ScaleVRAMRect(bounds)); SetScissor(scaled_bounds.left, scaled_bounds.top, scaled_bounds.GetWidth(), scaled_bounds.GetHeight()); DrawUtilityShader(m_vram_write_pixel_shader.Get(), &uniforms, sizeof(uniforms)); @@ -793,9 +787,10 @@ void GPU_HW_D3D11::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* d RestoreGraphicsAPIState(); } -void GPU_HW_D3D11::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) +void GPU_HW_D3D11::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height, + GPUBackendCommandParameters params) { - if (UseVRAMCopyShader(src_x, src_y, dst_x, dst_y, width, height)) + if (UseVRAMCopyShader(src_x, src_y, dst_x, dst_y, width, height, params)) { const Common::Rectangle src_bounds = GetVRAMTransferBounds(src_x, src_y, width, height); const Common::Rectangle dst_bounds = GetVRAMTransferBounds(dst_x, dst_y, width, height); @@ -803,18 +798,18 @@ void GPU_HW_D3D11::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 widt UpdateVRAMReadTexture(); IncludeVRAMDityRectangle(dst_bounds); - const VRAMCopyUBOData uniforms = GetVRAMCopyUBOData(src_x, src_y, dst_x, dst_y, width, height); + const VRAMCopyUBOData uniforms = GetVRAMCopyUBOData(src_x, src_y, dst_x, dst_y, width, height, params); - const Common::Rectangle dst_bounds_scaled(dst_bounds * m_resolution_scale); + const Common::Rectangle dst_bounds_scaled(ScaleVRAMRect(dst_bounds)); SetViewportAndScissor(dst_bounds_scaled.left, dst_bounds_scaled.top, dst_bounds_scaled.GetWidth(), dst_bounds_scaled.GetHeight()); m_context->OMSetDepthStencilState( - m_GPUSTAT.check_mask_before_draw ? m_depth_test_less_state.Get() : m_depth_test_always_state.Get(), 0); + params.check_mask_before_draw ? m_depth_test_less_state.Get() : m_depth_test_always_state.Get(), 0); m_context->PSSetShaderResources(0, 1, m_vram_read_texture.GetD3DSRVArray()); DrawUtilityShader(m_vram_copy_pixel_shader.Get(), &uniforms, sizeof(uniforms)); RestoreGraphicsAPIState(); - if (m_GPUSTAT.check_mask_before_draw) + if (params.check_mask_before_draw) m_current_depth++; return; @@ -826,7 +821,7 @@ void GPU_HW_D3D11::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 widt if (m_vram_dirty_rect.Intersects(Common::Rectangle::FromExtents(src_x, src_y, width, height))) UpdateVRAMReadTexture(); - GPU_HW::CopyVRAM(src_x, src_y, dst_x, dst_y, width, height); + GPU_HW::CopyVRAM(src_x, src_y, dst_x, dst_y, width, height, params); src_x *= m_resolution_scale; src_y *= m_resolution_scale; @@ -841,7 +836,7 @@ void GPU_HW_D3D11::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 widt void GPU_HW_D3D11::UpdateVRAMReadTexture() { - const auto scaled_rect = m_vram_dirty_rect * m_resolution_scale; + const Common::Rectangle scaled_rect(ScaleVRAMRect(m_vram_dirty_rect)); const CD3D11_BOX src_box(scaled_rect.left, scaled_rect.top, 0, scaled_rect.right, scaled_rect.bottom, 1); m_context->CopySubresourceRegion(m_vram_read_texture, 0, scaled_rect.left, scaled_rect.top, 0, m_vram_texture, 0, &src_box); @@ -864,7 +859,3 @@ void GPU_HW_D3D11::UpdateDepthBufferFromMaskBit() RestoreGraphicsAPIState(); } -std::unique_ptr GPU::CreateHardwareD3D11Renderer() -{ - return std::make_unique(); -} diff --git a/src/core/gpu_hw_d3d11.h b/src/core/gpu_hw_d3d11.h index bbf7eb116..5a3c8a44c 100644 --- a/src/core/gpu_hw_d3d11.h +++ b/src/core/gpu_hw_d3d11.h @@ -19,7 +19,7 @@ public: GPU_HW_D3D11(); ~GPU_HW_D3D11() override; - bool Initialize(HostDisplay* host_display) override; + bool Initialize() override; void Reset() override; void ResetGraphicsAPIState() override; @@ -30,9 +30,9 @@ protected: void ClearDisplay() override; void UpdateDisplay() override; void ReadVRAM(u32 x, u32 y, u32 width, u32 height) override; - void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override; - void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) override; - void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override; + void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params) override; + void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, GPUBackendCommandParameters params) override; + void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height, GPUBackendCommandParameters params) override; void UpdateVRAMReadTexture() override; void UpdateDepthBufferFromMaskBit() override; void SetScissorFromDrawingArea() override; diff --git a/src/core/gpu_hw_opengl.cpp b/src/core/gpu_hw_opengl.cpp index 2284815f7..4baceb67b 100644 --- a/src/core/gpu_hw_opengl.cpp +++ b/src/core/gpu_hw_opengl.cpp @@ -21,9 +21,9 @@ GPU_HW_OpenGL::~GPU_HW_OpenGL() if (m_texture_buffer_r16ui_texture != 0) glDeleteTextures(1, &m_texture_buffer_r16ui_texture); - if (m_host_display) + if (g_host_interface->GetDisplay()) { - m_host_display->ClearDisplayTexture(); + g_host_interface->GetDisplay()->ClearDisplayTexture(); ResetGraphicsAPIState(); } @@ -32,8 +32,9 @@ GPU_HW_OpenGL::~GPU_HW_OpenGL() glUseProgram(0); } -bool GPU_HW_OpenGL::Initialize(HostDisplay* host_display) +bool GPU_HW_OpenGL::Initialize() { + HostDisplay* host_display = g_host_interface->GetDisplay(); if (host_display->GetRenderAPI() != HostDisplay::RenderAPI::OpenGL && host_display->GetRenderAPI() != HostDisplay::RenderAPI::OpenGLES) { @@ -45,7 +46,7 @@ bool GPU_HW_OpenGL::Initialize(HostDisplay* host_display) m_shader_cache.Open(IsGLES(), g_host_interface->GetShaderCacheBasePath()); - if (!GPU_HW::Initialize(host_display)) + if (!GPU_HW::Initialize()) return false; if (!CreateFramebuffer()) @@ -130,7 +131,7 @@ void GPU_HW_OpenGL::UpdateSettings() if (framebuffer_changed) { - m_host_display->ClearDisplayTexture(); + g_host_interface->GetDisplay()->ClearDisplayTexture(); CreateFramebuffer(); } if (shaders_changed) @@ -358,8 +359,8 @@ bool GPU_HW_OpenGL::CreateTextureBuffer() bool GPU_HW_OpenGL::CompilePrograms() { const bool use_binding_layout = GPU_HW_ShaderGen::UseGLSLBindingLayout(); - GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_true_color, m_scaled_dithering, - m_texture_filtering, m_using_uv_limits, m_supports_dual_source_blend); + GPU_HW_ShaderGen shadergen(g_host_interface->GetDisplay()->GetRenderAPI(), m_resolution_scale, m_true_color, + m_scaled_dithering, m_texture_filtering, m_using_uv_limits, m_supports_dual_source_blend); Common::Timer compile_time; const int progress_total = (4 * 9 * 2 * 2) + (2 * 3) + 5; @@ -383,10 +384,10 @@ bool GPU_HW_OpenGL::CompilePrograms() { for (u8 interlacing = 0; interlacing < 2; interlacing++) { - const bool textured = (static_cast(texture_mode) != TextureMode::Disabled); + const bool textured = (static_cast(texture_mode) != GPUTextureMode::Disabled); const std::string batch_vs = shadergen.GenerateBatchVertexShader(textured); const std::string fs = shadergen.GenerateBatchFragmentShader( - static_cast(render_mode), static_cast(texture_mode), + static_cast(render_mode), static_cast(texture_mode), ConvertToBoolUnchecked(dithering), ConvertToBoolUnchecked(interlacing)); const auto link_callback = [this, textured, use_binding_layout](GL::Program& prog) { @@ -444,7 +445,7 @@ bool GPU_HW_OpenGL::CompilePrograms() { const std::string vs = shadergen.GenerateScreenQuadVertexShader(); const std::string fs = shadergen.GenerateDisplayFragmentShader(ConvertToBoolUnchecked(depth_24bit), - static_cast(interlaced)); + static_cast(interlaced)); std::optional prog = m_shader_cache.GetProgram(vs, {}, fs, [this, use_binding_layout](GL::Program& prog) { @@ -558,23 +559,24 @@ void GPU_HW_OpenGL::DrawBatchVertices(BatchRenderMode render_mode, u32 base_vert [BoolToUInt8(m_batch.dithering)][BoolToUInt8(m_batch.interlacing)]; prog.Bind(); - if (m_batch.texture_mode != TextureMode::Disabled) + if (m_batch.texture_mode != GPUTextureMode::Disabled) m_vram_read_texture.Bind(); - if (m_batch.transparency_mode == TransparencyMode::Disabled || render_mode == BatchRenderMode::OnlyOpaque) + if (m_batch.transparency_mode == GPUTransparencyMode::Disabled || render_mode == BatchRenderMode::OnlyOpaque) { glDisable(GL_BLEND); } else { glEnable(GL_BLEND); - glBlendEquationSeparate( - m_batch.transparency_mode == TransparencyMode::BackgroundMinusForeground ? GL_FUNC_REVERSE_SUBTRACT : GL_FUNC_ADD, - GL_FUNC_ADD); + glBlendEquationSeparate(m_batch.transparency_mode == GPUTransparencyMode::BackgroundMinusForeground ? + GL_FUNC_REVERSE_SUBTRACT : + GL_FUNC_ADD, + GL_FUNC_ADD); glBlendFuncSeparate(GL_ONE, m_supports_dual_source_blend ? GL_SRC1_ALPHA : GL_SRC_ALPHA, GL_ONE, GL_ZERO); } - glDepthFunc(m_GPUSTAT.check_mask_before_draw ? GL_GEQUAL : GL_ALWAYS); + glDepthFunc(m_batch.check_mask_before_draw ? GL_GEQUAL : GL_ALWAYS); glDrawArrays(GL_TRIANGLES, m_batch_base_vertex, num_vertices); } @@ -606,8 +608,6 @@ void GPU_HW_OpenGL::UploadUniformBuffer(const void* data, u32 data_size) void GPU_HW_OpenGL::ClearDisplay() { - GPU_HW::ClearDisplay(); - m_display_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER); glDisable(GL_SCISSOR_TEST); glClearColor(0.0f, 0.0f, 0.0f, 1.0f); @@ -618,41 +618,40 @@ void GPU_HW_OpenGL::ClearDisplay() void GPU_HW_OpenGL::UpdateDisplay() { - GPU_HW::UpdateDisplay(); - + HostDisplay* display = g_host_interface->GetDisplay(); if (g_settings.debugging.show_vram) { - m_host_display->SetDisplayTexture(reinterpret_cast(static_cast(m_vram_texture.GetGLId())), - m_vram_texture.GetWidth(), static_cast(m_vram_texture.GetHeight()), 0, - m_vram_texture.GetHeight(), m_vram_texture.GetWidth(), - -static_cast(m_vram_texture.GetHeight())); - m_host_display->SetDisplayParameters(VRAM_WIDTH, VRAM_HEIGHT, 0, 0, VRAM_WIDTH, VRAM_HEIGHT, - static_cast(VRAM_WIDTH) / static_cast(VRAM_HEIGHT)); + display->SetDisplayTexture(reinterpret_cast(static_cast(m_vram_texture.GetGLId())), + m_vram_texture.GetWidth(), static_cast(m_vram_texture.GetHeight()), 0, + m_vram_texture.GetHeight(), m_vram_texture.GetWidth(), + -static_cast(m_vram_texture.GetHeight())); + display->SetDisplayParameters(VRAM_WIDTH, VRAM_HEIGHT, 0, 0, VRAM_WIDTH, VRAM_HEIGHT, + static_cast(VRAM_WIDTH) / static_cast(VRAM_HEIGHT)); } else { - const u32 vram_offset_x = m_crtc_state.display_vram_left; - const u32 vram_offset_y = m_crtc_state.display_vram_top; + const u32 vram_offset_x = m_display_vram_left; + const u32 vram_offset_y = m_display_vram_top; const u32 scaled_vram_offset_x = vram_offset_x * m_resolution_scale; const u32 scaled_vram_offset_y = vram_offset_y * m_resolution_scale; - const u32 display_width = m_crtc_state.display_vram_width; - const u32 display_height = m_crtc_state.display_vram_height; - const u32 scaled_display_width = display_width * m_resolution_scale; - const u32 scaled_display_height = display_height * m_resolution_scale; - const InterlacedRenderMode interlaced = GetInterlacedRenderMode(); + const u32 display_width = m_display_vram_width; + const u32 display_height = m_display_vram_height; + const u32 scaled_display_width = m_display_width * m_resolution_scale; + const u32 scaled_display_height = m_display_height * m_resolution_scale; + const GPUInterlacedDisplayMode interlaced = m_display_interlace; - if (IsDisplayDisabled()) + if (!m_display_enabled) { - m_host_display->ClearDisplayTexture(); + display->ClearDisplayTexture(); } - else if (!m_GPUSTAT.display_area_color_depth_24 && interlaced == GPU_HW::InterlacedRenderMode::None && + else if (!m_display_24bit && interlaced == GPUInterlacedDisplayMode::None && (scaled_vram_offset_x + scaled_display_width) <= m_vram_texture.GetWidth() && (scaled_vram_offset_y + scaled_display_height) <= m_vram_texture.GetHeight()) { - m_host_display->SetDisplayTexture(reinterpret_cast(static_cast(m_vram_texture.GetGLId())), - m_vram_texture.GetWidth(), m_vram_texture.GetHeight(), scaled_vram_offset_x, - m_vram_texture.GetHeight() - scaled_vram_offset_y, scaled_display_width, - -static_cast(scaled_display_height)); + display->SetDisplayTexture(reinterpret_cast(static_cast(m_vram_texture.GetGLId())), + m_vram_texture.GetWidth(), m_vram_texture.GetHeight(), scaled_vram_offset_x, + m_vram_texture.GetHeight() - scaled_vram_offset_y, scaled_display_width, + -static_cast(scaled_display_height)); } else { @@ -660,16 +659,17 @@ void GPU_HW_OpenGL::UpdateDisplay() glDisable(GL_SCISSOR_TEST); glDisable(GL_DEPTH_TEST); - m_display_programs[BoolToUInt8(m_GPUSTAT.display_area_color_depth_24)][static_cast(interlaced)].Bind(); + m_display_programs[BoolToUInt8(m_display_24bit)][static_cast(interlaced)].Bind(); m_display_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER); m_vram_texture.Bind(); - const u8 height_div2 = BoolToUInt8(interlaced == GPU_HW::InterlacedRenderMode::SeparateFields); - const u32 reinterpret_field_offset = (interlaced != InterlacedRenderMode::None) ? GetInterlacedDisplayField() : 0; + const u8 height_div2 = BoolToUInt8(interlaced == GPUInterlacedDisplayMode::SeparateFields); + const u32 reinterpret_field_offset = + (interlaced != GPUInterlacedDisplayMode::None) ? m_display_interlace_field : 0; const u32 scaled_flipped_vram_offset_y = m_vram_texture.GetHeight() - scaled_vram_offset_y - reinterpret_field_offset - (scaled_display_height >> height_div2); - const u32 reinterpret_start_x = m_crtc_state.regs.X * m_resolution_scale; - const u32 reinterpret_crop_left = (m_crtc_state.display_vram_left - m_crtc_state.regs.X) * m_resolution_scale; + const u32 reinterpret_start_x = m_display_vram_start_x * m_resolution_scale; + const u32 reinterpret_crop_left = (m_display_vram_left - m_display_vram_start_x) * m_resolution_scale; const u32 uniforms[4] = {reinterpret_start_x, scaled_flipped_vram_offset_y, reinterpret_crop_left, reinterpret_field_offset}; UploadUniformBuffer(uniforms, sizeof(uniforms)); @@ -679,10 +679,9 @@ void GPU_HW_OpenGL::UpdateDisplay() glBindVertexArray(m_attributeless_vao_id); glDrawArrays(GL_TRIANGLES, 0, 3); - m_host_display->SetDisplayTexture(reinterpret_cast(static_cast(m_display_texture.GetGLId())), - m_display_texture.GetWidth(), m_display_texture.GetHeight(), 0, - scaled_display_height, scaled_display_width, - -static_cast(scaled_display_height)); + display->SetDisplayTexture(reinterpret_cast(static_cast(m_display_texture.GetGLId())), + m_display_texture.GetWidth(), m_display_texture.GetHeight(), 0, scaled_display_height, + scaled_display_width, -static_cast(scaled_display_height)); // restore state glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_vram_fbo_id); @@ -692,10 +691,8 @@ void GPU_HW_OpenGL::UpdateDisplay() glEnable(GL_SCISSOR_TEST); } - m_host_display->SetDisplayParameters(m_crtc_state.display_width, m_crtc_state.display_height, - m_crtc_state.display_origin_left, m_crtc_state.display_origin_top, - m_crtc_state.display_vram_width, m_crtc_state.display_vram_height, - m_crtc_state.display_aspect_ratio); + display->SetDisplayParameters(m_display_width, m_display_height, m_display_origin_left, m_display_origin_top, + m_display_vram_width, m_display_vram_height, m_display_aspect_ratio); } } @@ -730,19 +727,19 @@ void GPU_HW_OpenGL::ReadVRAM(u32 x, u32 y, u32 width, u32 height) RestoreGraphicsAPIState(); } -void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) +void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params) { if ((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT) { // CPU round trip if oversized for now. Log_WarningPrintf("Oversized VRAM fill (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height); ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); - GPU::FillVRAM(x, y, width, height, color); - UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data()); + SoftwareFillVRAM(x, y, width, height, color, params); + UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data(), params); return; } - GPU_HW::FillVRAM(x, y, width, height, color); + GPU_HW::FillVRAM(x, y, width, height, color, params); // scale coordinates x *= m_resolution_scale; @@ -753,7 +750,7 @@ void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) glScissor(x, m_vram_texture.GetHeight() - y - height, width, height); // fast path when not using interlaced rendering - if (!IsInterlacedRenderingEnabled()) + if (!params.interlaced_rendering) { const auto [r, g, b, a] = RGBA8ToFloat(m_true_color ? color : RGBA5551ToRGBA8888(RGBA8888ToRGBA5551(color))); glClearColor(r, g, b, a); @@ -763,7 +760,7 @@ void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) } else { - const VRAMFillUBOData uniforms = GetVRAMFillUBOData(x, y, width, height, color); + const VRAMFillUBOData uniforms = GetVRAMFillUBOData(x, y, width, height, color, params); m_vram_interlaced_fill_program.Bind(); UploadUniformBuffer(&uniforms, sizeof(uniforms)); @@ -776,13 +773,14 @@ void GPU_HW_OpenGL::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) } } -void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) +void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, + GPUBackendCommandParameters params) { const u32 num_pixels = width * height; if (num_pixels < m_max_texture_buffer_size || m_use_ssbo_for_vram_writes) { const Common::Rectangle bounds = GetVRAMTransferBounds(x, y, width, height); - GPU_HW::UpdateVRAM(bounds.left, bounds.top, bounds.GetWidth(), bounds.GetHeight(), data); + GPU_HW::UpdateVRAM(bounds.left, bounds.top, bounds.GetWidth(), bounds.GetHeight(), data, params); const auto map_result = m_texture_stream_buffer->Map(sizeof(u16), num_pixels * sizeof(u16)); std::memcpy(map_result.pointer, data, num_pixels * sizeof(u16)); @@ -790,7 +788,7 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* m_texture_stream_buffer->Unbind(); glDisable(GL_BLEND); - glDepthFunc(m_GPUSTAT.check_mask_before_draw ? GL_GEQUAL : GL_ALWAYS); + glDepthFunc(params.check_mask_before_draw ? GL_GEQUAL : GL_ALWAYS); m_vram_write_program.Bind(); if (m_use_ssbo_for_vram_writes) @@ -798,11 +796,11 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* else glBindTexture(GL_TEXTURE_BUFFER, m_texture_buffer_r16ui_texture); - const VRAMWriteUBOData uniforms = GetVRAMWriteUBOData(x, y, width, height, map_result.index_aligned); + const VRAMWriteUBOData uniforms = GetVRAMWriteUBOData(x, y, width, height, map_result.index_aligned, params); UploadUniformBuffer(&uniforms, sizeof(uniforms)); // the viewport should already be set to the full vram, so just adjust the scissor - const Common::Rectangle scaled_bounds = bounds * m_resolution_scale; + const Common::Rectangle scaled_bounds(ScaleVRAMRect(bounds)); glScissor(scaled_bounds.left, m_vram_texture.GetHeight() - scaled_bounds.top - scaled_bounds.GetHeight(), scaled_bounds.GetWidth(), scaled_bounds.GetHeight()); @@ -818,12 +816,12 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* // CPU round trip if oversized for now. Log_WarningPrintf("Oversized VRAM update (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height); ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); - GPU::UpdateVRAM(x, y, width, height, data); - UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data()); + SoftwareUpdateVRAM(x, y, width, height, data, params); + UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data(), params); return; } - GPU_HW::UpdateVRAM(x, y, width, height, data); + GPU_HW::UpdateVRAM(x, y, width, height, data, params); const auto map_result = m_texture_stream_buffer->Map(sizeof(u32), num_pixels * sizeof(u32)); @@ -881,9 +879,10 @@ void GPU_HW_OpenGL::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* } } -void GPU_HW_OpenGL::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) +void GPU_HW_OpenGL::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height, + GPUBackendCommandParameters params) { - if (UseVRAMCopyShader(src_x, src_y, dst_x, dst_y, width, height)) + if (UseVRAMCopyShader(src_x, src_y, dst_x, dst_y, width, height, params)) { const Common::Rectangle src_bounds = GetVRAMTransferBounds(src_x, src_y, width, height); const Common::Rectangle dst_bounds = GetVRAMTransferBounds(dst_x, dst_y, width, height); @@ -891,14 +890,14 @@ void GPU_HW_OpenGL::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 wid UpdateVRAMReadTexture(); IncludeVRAMDityRectangle(dst_bounds); - VRAMCopyUBOData uniforms = GetVRAMCopyUBOData(src_x, src_y, dst_x, dst_y, width, height); + VRAMCopyUBOData uniforms = GetVRAMCopyUBOData(src_x, src_y, dst_x, dst_y, width, height, params); uniforms.u_src_y = m_vram_texture.GetHeight() - uniforms.u_src_y - uniforms.u_height; uniforms.u_dst_y = m_vram_texture.GetHeight() - uniforms.u_dst_y - uniforms.u_height; UploadUniformBuffer(&uniforms, sizeof(uniforms)); glDisable(GL_SCISSOR_TEST); glDisable(GL_BLEND); - glDepthFunc(m_GPUSTAT.check_mask_before_draw ? GL_GEQUAL : GL_ALWAYS); + glDepthFunc(params.check_mask_before_draw ? GL_GEQUAL : GL_ALWAYS); const Common::Rectangle dst_bounds_scaled(dst_bounds * m_resolution_scale); glViewport(dst_bounds_scaled.left, @@ -910,13 +909,13 @@ void GPU_HW_OpenGL::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 wid RestoreGraphicsAPIState(); - if (m_GPUSTAT.check_mask_before_draw) + if (params.check_mask_before_draw) m_current_depth++; return; } - GPU_HW::CopyVRAM(src_x, src_y, dst_x, dst_y, width, height); + GPU_HW::CopyVRAM(src_x, src_y, dst_x, dst_y, width, height, params); src_x *= m_resolution_scale; src_y *= m_resolution_scale; @@ -951,7 +950,7 @@ void GPU_HW_OpenGL::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 wid void GPU_HW_OpenGL::UpdateVRAMReadTexture() { - const auto scaled_rect = m_vram_dirty_rect * m_resolution_scale; + const Common::Rectangle scaled_rect = ScaleVRAMRect(m_vram_dirty_rect); const u32 width = scaled_rect.GetWidth(); const u32 height = scaled_rect.GetHeight(); const u32 x = scaled_rect.left; @@ -996,8 +995,3 @@ void GPU_HW_OpenGL::UpdateDepthBufferFromMaskBit() glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); glEnable(GL_SCISSOR_TEST); } - -std::unique_ptr GPU::CreateHardwareOpenGLRenderer() -{ - return std::make_unique(); -} diff --git a/src/core/gpu_hw_opengl.h b/src/core/gpu_hw_opengl.h index 3e8720b06..75d92db17 100644 --- a/src/core/gpu_hw_opengl.h +++ b/src/core/gpu_hw_opengl.h @@ -15,7 +15,7 @@ public: GPU_HW_OpenGL(); ~GPU_HW_OpenGL() override; - bool Initialize(HostDisplay* host_display) override; + bool Initialize() override; void Reset() override; void ResetGraphicsAPIState() override; @@ -26,9 +26,10 @@ protected: void ClearDisplay() override; void UpdateDisplay() override; void ReadVRAM(u32 x, u32 y, u32 width, u32 height) override; - void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override; - void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) override; - void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override; + void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params) override; + void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, GPUBackendCommandParameters params) override; + void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height, + GPUBackendCommandParameters params) override; void UpdateVRAMReadTexture() override; void UpdateDepthBufferFromMaskBit() override; void SetScissorFromDrawingArea() override; diff --git a/src/core/gpu_hw_shadergen.cpp b/src/core/gpu_hw_shadergen.cpp index d0050c8d7..58d0d7ca6 100644 --- a/src/core/gpu_hw_shadergen.cpp +++ b/src/core/gpu_hw_shadergen.cpp @@ -17,7 +17,7 @@ GPU_HW_ShaderGen::~GPU_HW_ShaderGen() = default; void GPU_HW_ShaderGen::WriteCommonFunctions(std::stringstream& ss) { ss << "CONSTANT uint RESOLUTION_SCALE = " << m_resolution_scale << "u;\n"; - ss << "CONSTANT uint2 VRAM_SIZE = uint2(" << GPU::VRAM_WIDTH << ", " << GPU::VRAM_HEIGHT << ") * RESOLUTION_SCALE;\n"; + ss << "CONSTANT uint2 VRAM_SIZE = uint2(" << VRAM_WIDTH << ", " << VRAM_HEIGHT << ") * RESOLUTION_SCALE;\n"; ss << "CONSTANT float2 RCP_VRAM_SIZE = float2(1.0, 1.0) / float2(VRAM_SIZE);\n"; ss << R"( @@ -628,12 +628,11 @@ void FilteredSampleFromVRAM(uint4 texpage, float2 coords, float4 uv_limits, } std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(GPU_HW::BatchRenderMode transparency, - GPU::TextureMode texture_mode, bool dithering, - bool interlacing) + GPUTextureMode texture_mode, bool dithering, bool interlacing) { - const GPU::TextureMode actual_texture_mode = texture_mode & ~GPU::TextureMode::RawTextureBit; - const bool raw_texture = (texture_mode & GPU::TextureMode::RawTextureBit) == GPU::TextureMode::RawTextureBit; - const bool textured = (texture_mode != GPU::TextureMode::Disabled); + const GPUTextureMode actual_texture_mode = texture_mode & ~GPUTextureMode::RawTextureBit; + const bool raw_texture = (texture_mode & GPUTextureMode::RawTextureBit) == GPUTextureMode::RawTextureBit; + const bool textured = (texture_mode != GPUTextureMode::Disabled); const bool use_dual_source = m_supports_dual_source_blend && ((transparency != GPU_HW::BatchRenderMode::TransparencyDisabled && transparency != GPU_HW::BatchRenderMode::OnlyOpaque) || @@ -646,10 +645,9 @@ std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(GPU_HW::BatchRenderMod DefineMacro(ss, "TRANSPARENCY_ONLY_TRANSPARENT", transparency == GPU_HW::BatchRenderMode::OnlyTransparent); DefineMacro(ss, "TEXTURED", textured); DefineMacro(ss, "PALETTE", - actual_texture_mode == GPU::TextureMode::Palette4Bit || - actual_texture_mode == GPU::TextureMode::Palette8Bit); - DefineMacro(ss, "PALETTE_4_BIT", actual_texture_mode == GPU::TextureMode::Palette4Bit); - DefineMacro(ss, "PALETTE_8_BIT", actual_texture_mode == GPU::TextureMode::Palette8Bit); + actual_texture_mode == GPUTextureMode::Palette4Bit || actual_texture_mode == GPUTextureMode::Palette8Bit); + DefineMacro(ss, "PALETTE_4_BIT", actual_texture_mode == GPUTextureMode::Palette4Bit); + DefineMacro(ss, "PALETTE_8_BIT", actual_texture_mode == GPUTextureMode::Palette8Bit); DefineMacro(ss, "RAW_TEXTURE", raw_texture); DefineMacro(ss, "DITHERING", dithering); DefineMacro(ss, "DITHERING_SCALED", m_scaled_dithering); @@ -671,7 +669,7 @@ std::string GPU_HW_ShaderGen::GenerateBatchFragmentShader(GPU_HW::BatchRenderMod { if (i > 0) ss << ", "; - ss << GPU::DITHER_MATRIX[i / 4][i % 4]; + ss << DITHER_MATRIX[i / 4][i % 4]; } if (m_glsl) ss << " );\n"; @@ -967,14 +965,13 @@ std::string GPU_HW_ShaderGen::GenerateInterlacedFillFragmentShader() return ss.str(); } -std::string GPU_HW_ShaderGen::GenerateDisplayFragmentShader(bool depth_24bit, - GPU_HW::InterlacedRenderMode interlace_mode) +std::string GPU_HW_ShaderGen::GenerateDisplayFragmentShader(bool depth_24bit, GPUInterlacedDisplayMode interlace_mode) { std::stringstream ss; WriteHeader(ss); DefineMacro(ss, "DEPTH_24BIT", depth_24bit); - DefineMacro(ss, "INTERLACED", interlace_mode != GPU_HW::InterlacedRenderMode::None); - DefineMacro(ss, "INTERLEAVED", interlace_mode == GPU_HW::InterlacedRenderMode::InterleavedFields); + DefineMacro(ss, "INTERLACED", interlace_mode != GPUInterlacedDisplayMode::None); + DefineMacro(ss, "INTERLEAVED", interlace_mode == GPUInterlacedDisplayMode::InterleavedFields); WriteCommonFunctions(ss); DeclareUniformBuffer(ss, {"uint2 u_vram_offset", "uint u_crop_left", "uint u_field_offset"}, true); diff --git a/src/core/gpu_hw_shadergen.h b/src/core/gpu_hw_shadergen.h index a881d4b41..36b79249a 100644 --- a/src/core/gpu_hw_shadergen.h +++ b/src/core/gpu_hw_shadergen.h @@ -10,10 +10,10 @@ public: ~GPU_HW_ShaderGen(); std::string GenerateBatchVertexShader(bool textured); - std::string GenerateBatchFragmentShader(GPU_HW::BatchRenderMode transparency, GPU::TextureMode texture_mode, + std::string GenerateBatchFragmentShader(GPU_HW::BatchRenderMode transparency, GPUTextureMode texture_mode, bool dithering, bool interlacing); std::string GenerateInterlacedFillFragmentShader(); - std::string GenerateDisplayFragmentShader(bool depth_24bit, GPU_HW::InterlacedRenderMode interlace_mode); + std::string GenerateDisplayFragmentShader(bool depth_24bit, GPUInterlacedDisplayMode interlace_mode); std::string GenerateVRAMReadFragmentShader(); std::string GenerateVRAMWriteFragmentShader(bool use_ssbo); std::string GenerateVRAMCopyFragmentShader(); diff --git a/src/core/gpu_hw_vulkan.cpp b/src/core/gpu_hw_vulkan.cpp index 1f82a979e..f9413eefa 100644 --- a/src/core/gpu_hw_vulkan.cpp +++ b/src/core/gpu_hw_vulkan.cpp @@ -17,17 +17,18 @@ GPU_HW_Vulkan::GPU_HW_Vulkan() = default; GPU_HW_Vulkan::~GPU_HW_Vulkan() { - if (m_host_display) + if (g_host_interface->GetDisplay()) { - m_host_display->ClearDisplayTexture(); + g_host_interface->GetDisplay()->ClearDisplayTexture(); ResetGraphicsAPIState(); } DestroyResources(); } -bool GPU_HW_Vulkan::Initialize(HostDisplay* host_display) +bool GPU_HW_Vulkan::Initialize() { + HostDisplay* host_display = g_host_interface->GetDisplay(); if (host_display->GetRenderAPI() != HostDisplay::RenderAPI::Vulkan) { Log_ErrorPrintf("Host render API is incompatible"); @@ -37,7 +38,7 @@ bool GPU_HW_Vulkan::Initialize(HostDisplay* host_display) Assert(g_vulkan_shader_cache); SetCapabilities(); - if (!GPU_HW::Initialize(host_display)) + if (!GPU_HW::Initialize()) return false; if (!CreatePipelineLayouts()) @@ -131,7 +132,7 @@ void GPU_HW_Vulkan::UpdateSettings() if (shaders_changed) { // clear it since we draw a loading screen and it's not in the correct state - m_host_display->ClearDisplayTexture(); + g_host_interface->GetDisplay()->ClearDisplayTexture(); DestroyPipelines(); CompilePipelines(); } @@ -583,8 +584,8 @@ bool GPU_HW_Vulkan::CompilePipelines() VkDevice device = g_vulkan_context->GetDevice(); VkPipelineCache pipeline_cache = g_vulkan_shader_cache->GetPipelineCache(); - GPU_HW_ShaderGen shadergen(m_host_display->GetRenderAPI(), m_resolution_scale, m_true_color, m_scaled_dithering, - m_texture_filtering, m_using_uv_limits, m_supports_dual_source_blend); + GPU_HW_ShaderGen shadergen(g_host_interface->GetDisplay()->GetRenderAPI(), m_resolution_scale, m_true_color, + m_scaled_dithering, m_texture_filtering, m_using_uv_limits, m_supports_dual_source_blend); Common::Timer compile_time; const int progress_total = 2 + (4 * 9 * 2 * 2) + (2 * 4 * 5 * 9 * 2 * 2) + 1 + 2 + 2 + 2 + 2 + (2 * 3); @@ -629,7 +630,7 @@ bool GPU_HW_Vulkan::CompilePipelines() for (u8 interlacing = 0; interlacing < 2; interlacing++) { const std::string fs = shadergen.GenerateBatchFragmentShader( - static_cast(render_mode), static_cast(texture_mode), + static_cast(render_mode), static_cast(texture_mode), ConvertToBoolUnchecked(dithering), ConvertToBoolUnchecked(interlacing)); VkShaderModule shader = g_vulkan_shader_cache->GetFragmentShader(fs); @@ -658,7 +659,7 @@ bool GPU_HW_Vulkan::CompilePipelines() { for (u8 interlacing = 0; interlacing < 2; interlacing++) { - const bool textured = (static_cast(texture_mode) != TextureMode::Disabled); + const bool textured = (static_cast(texture_mode) != GPUTextureMode::Disabled); gpbuilder.SetPipelineLayout(m_batch_pipeline_layout); gpbuilder.SetRenderPass(m_vram_render_pass, 0); @@ -683,7 +684,7 @@ bool GPU_HW_Vulkan::CompilePipelines() (depth_test != 0) ? VK_COMPARE_OP_GREATER_OR_EQUAL : VK_COMPARE_OP_ALWAYS); gpbuilder.SetNoBlendingState(); - if ((static_cast(transparency_mode) != TransparencyMode::Disabled && + if ((static_cast(transparency_mode) != GPUTransparencyMode::Disabled && (static_cast(render_mode) != BatchRenderMode::TransparencyDisabled && static_cast(render_mode) != BatchRenderMode::OnlyOpaque)) || m_texture_filtering != GPUTextureFilter::Nearest) @@ -691,7 +692,8 @@ bool GPU_HW_Vulkan::CompilePipelines() gpbuilder.SetBlendAttachment( 0, true, VK_BLEND_FACTOR_ONE, m_supports_dual_source_blend ? VK_BLEND_FACTOR_SRC1_ALPHA : VK_BLEND_FACTOR_SRC_ALPHA, - (static_cast(transparency_mode) == TransparencyMode::BackgroundMinusForeground && + (static_cast(transparency_mode) == + GPUTransparencyMode::BackgroundMinusForeground && static_cast(render_mode) != BatchRenderMode::TransparencyDisabled && static_cast(render_mode) != BatchRenderMode::OnlyOpaque) ? VK_BLEND_OP_REVERSE_SUBTRACT : @@ -874,7 +876,7 @@ bool GPU_HW_Vulkan::CompilePipelines() for (u8 interlace_mode = 0; interlace_mode < 3; interlace_mode++) { VkShaderModule fs = g_vulkan_shader_cache->GetFragmentShader(shadergen.GenerateDisplayFragmentShader( - ConvertToBoolUnchecked(depth_24), static_cast(interlace_mode))); + ConvertToBoolUnchecked(depth_24), static_cast(interlace_mode))); if (fs == VK_NULL_HANDLE) return false; @@ -940,7 +942,6 @@ void GPU_HW_Vulkan::SetScissorFromDrawingArea() void GPU_HW_Vulkan::ClearDisplay() { - GPU_HW::ClearDisplay(); EndRenderPass(); VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer(); @@ -953,51 +954,51 @@ void GPU_HW_Vulkan::ClearDisplay() void GPU_HW_Vulkan::UpdateDisplay() { - GPU_HW::UpdateDisplay(); - EndRenderPass(); + HostDisplay* display = g_host_interface->GetDisplay(); if (g_settings.debugging.show_vram) { m_vram_texture.TransitionToLayout(g_vulkan_context->GetCurrentCommandBuffer(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - m_host_display->SetDisplayTexture(&m_vram_texture, m_vram_texture.GetWidth(), m_vram_texture.GetHeight(), 0, 0, - m_vram_texture.GetWidth(), m_vram_texture.GetHeight()); - m_host_display->SetDisplayParameters(VRAM_WIDTH, VRAM_HEIGHT, 0, 0, VRAM_WIDTH, VRAM_HEIGHT, - static_cast(VRAM_WIDTH) / static_cast(VRAM_HEIGHT)); + display->SetDisplayTexture(&m_vram_texture, m_vram_texture.GetWidth(), m_vram_texture.GetHeight(), 0, 0, + m_vram_texture.GetWidth(), m_vram_texture.GetHeight()); + display->SetDisplayParameters(VRAM_WIDTH, VRAM_HEIGHT, 0, 0, VRAM_WIDTH, VRAM_HEIGHT, + static_cast(VRAM_WIDTH) / static_cast(VRAM_HEIGHT)); } else { - const u32 vram_offset_x = m_crtc_state.display_vram_left; - const u32 vram_offset_y = m_crtc_state.display_vram_top; + const u32 vram_offset_x = m_display_vram_left; + const u32 vram_offset_y = m_display_vram_top; const u32 scaled_vram_offset_x = vram_offset_x * m_resolution_scale; const u32 scaled_vram_offset_y = vram_offset_y * m_resolution_scale; - const u32 display_width = m_crtc_state.display_vram_width; - const u32 display_height = m_crtc_state.display_vram_height; + const u32 display_width = m_display_vram_width; + const u32 display_height = m_display_vram_height; const u32 scaled_display_width = display_width * m_resolution_scale; const u32 scaled_display_height = display_height * m_resolution_scale; - const InterlacedRenderMode interlaced = GetInterlacedRenderMode(); + const GPUInterlacedDisplayMode interlaced = m_display_interlace; - if (IsDisplayDisabled()) + if (!m_display_enabled) { - m_host_display->ClearDisplayTexture(); + display->ClearDisplayTexture(); } - else if (!m_GPUSTAT.display_area_color_depth_24 && interlaced == InterlacedRenderMode::None && + else if (!m_display_24bit && interlaced == GPUInterlacedDisplayMode::None && (scaled_vram_offset_x + scaled_display_width) <= m_vram_texture.GetWidth() && (scaled_vram_offset_y + scaled_display_height) <= m_vram_texture.GetHeight()) { m_vram_texture.TransitionToLayout(g_vulkan_context->GetCurrentCommandBuffer(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - m_host_display->SetDisplayTexture(&m_vram_texture, m_vram_texture.GetWidth(), m_vram_texture.GetHeight(), - scaled_vram_offset_x, scaled_vram_offset_y, scaled_display_width, - scaled_display_height); + display->SetDisplayTexture(&m_vram_texture, m_vram_texture.GetWidth(), m_vram_texture.GetHeight(), + scaled_vram_offset_x, scaled_vram_offset_y, scaled_display_width, + scaled_display_height); } else { EndRenderPass(); - const u32 reinterpret_field_offset = (interlaced != InterlacedRenderMode::None) ? GetInterlacedDisplayField() : 0; - const u32 reinterpret_start_x = m_crtc_state.regs.X * m_resolution_scale; - const u32 reinterpret_crop_left = (m_crtc_state.display_vram_left - m_crtc_state.regs.X) * m_resolution_scale; + const u32 reinterpret_field_offset = + (interlaced != GPUInterlacedDisplayMode::None) ? m_display_interlace_field : 0; + const u32 reinterpret_start_x = m_display_vram_start_x * m_resolution_scale; + const u32 reinterpret_crop_left = (m_display_vram_left - m_display_vram_start_x) * m_resolution_scale; const u32 uniforms[4] = {reinterpret_start_x, scaled_vram_offset_y + reinterpret_field_offset, reinterpret_crop_left, reinterpret_field_offset}; @@ -1007,9 +1008,8 @@ void GPU_HW_Vulkan::UpdateDisplay() BeginRenderPass(m_display_render_pass, m_display_framebuffer, 0, 0, scaled_display_width, scaled_display_height); - vkCmdBindPipeline( - cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, - m_display_pipelines[BoolToUInt8(m_GPUSTAT.display_area_color_depth_24)][static_cast(interlaced)]); + vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, + m_display_pipelines[BoolToUInt8(m_display_24bit)][static_cast(interlaced)]); vkCmdPushConstants(cmdbuf, m_single_sampler_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(uniforms), uniforms); vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_single_sampler_pipeline_layout, 0, 1, @@ -1022,16 +1022,14 @@ void GPU_HW_Vulkan::UpdateDisplay() m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); m_display_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - m_host_display->SetDisplayTexture(&m_display_texture, m_display_texture.GetWidth(), m_display_texture.GetHeight(), - 0, 0, scaled_display_width, scaled_display_height); + display->SetDisplayTexture(&m_display_texture, m_display_texture.GetWidth(), m_display_texture.GetHeight(), 0, 0, + scaled_display_width, scaled_display_height); RestoreGraphicsAPIState(); } - m_host_display->SetDisplayParameters(m_crtc_state.display_width, m_crtc_state.display_height, - m_crtc_state.display_origin_left, m_crtc_state.display_origin_top, - m_crtc_state.display_vram_width, m_crtc_state.display_vram_height, - m_crtc_state.display_aspect_ratio); + display->SetDisplayParameters(m_display_width, m_display_height, m_display_origin_left, m_display_origin_top, + m_display_width, m_display_height, m_display_aspect_ratio); } } @@ -1080,19 +1078,19 @@ void GPU_HW_Vulkan::ReadVRAM(u32 x, u32 y, u32 width, u32 height) RestoreGraphicsAPIState(); } -void GPU_HW_Vulkan::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) +void GPU_HW_Vulkan::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params) { if ((x + width) > VRAM_WIDTH || (y + height) > VRAM_HEIGHT) { // CPU round trip if oversized for now. Log_WarningPrintf("Oversized VRAM fill (%u-%u, %u-%u), CPU round trip", x, x + width, y, y + height); ReadVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT); - GPU::FillVRAM(x, y, width, height, color); - UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data()); + SoftwareFillVRAM(x, y, width, height, color, params); + UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_vram_shadow.data(), params); return; } - GPU_HW::FillVRAM(x, y, width, height, color); + GPU_HW::FillVRAM(x, y, width, height, color, params); x *= m_resolution_scale; y *= m_resolution_scale; @@ -1102,21 +1100,22 @@ void GPU_HW_Vulkan::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) BeginVRAMRenderPass(); VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer(); - const VRAMFillUBOData uniforms = GetVRAMFillUBOData(x, y, width, height, color); + const VRAMFillUBOData uniforms = GetVRAMFillUBOData(x, y, width, height, color, params); vkCmdPushConstants(cmdbuf, m_no_samplers_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(uniforms), &uniforms); vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, - m_vram_fill_pipelines[BoolToUInt8(IsInterlacedRenderingEnabled())]); + m_vram_fill_pipelines[BoolToUInt8(params.interlaced_rendering)]); Vulkan::Util::SetViewportAndScissor(cmdbuf, x, y, width, height); vkCmdDraw(cmdbuf, 3, 1, 0, 0); RestoreGraphicsAPIState(); } -void GPU_HW_Vulkan::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) +void GPU_HW_Vulkan::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, + GPUBackendCommandParameters params) { const Common::Rectangle bounds = GetVRAMTransferBounds(x, y, width, height); - GPU_HW::UpdateVRAM(bounds.left, bounds.top, bounds.GetWidth(), bounds.GetHeight(), data); + GPU_HW::UpdateVRAM(bounds.left, bounds.top, bounds.GetWidth(), bounds.GetHeight(), data, params); const u32 data_size = width * height * sizeof(u16); const u32 alignment = std::max(sizeof(u16), static_cast(g_vulkan_context->GetTexelBufferAlignment())); @@ -1140,16 +1139,16 @@ void GPU_HW_Vulkan::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* BeginVRAMRenderPass(); VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer(); - const VRAMWriteUBOData uniforms = GetVRAMWriteUBOData(x, y, width, height, start_index); + const VRAMWriteUBOData uniforms = GetVRAMWriteUBOData(x, y, width, height, start_index, params); vkCmdPushConstants(cmdbuf, m_vram_write_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(uniforms), &uniforms); vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, - m_vram_write_pipelines[BoolToUInt8(m_GPUSTAT.check_mask_before_draw)]); + m_vram_write_pipelines[BoolToUInt8(params.check_mask_before_draw)]); vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_vram_write_pipeline_layout, 0, 1, &m_vram_write_descriptor_set, 0, nullptr); // the viewport should already be set to the full vram, so just adjust the scissor - const Common::Rectangle scaled_bounds = bounds * m_resolution_scale; + const Common::Rectangle scaled_bounds(ScaleVRAMRect(bounds)); Vulkan::Util::SetScissor(cmdbuf, scaled_bounds.left, scaled_bounds.top, scaled_bounds.GetWidth(), scaled_bounds.GetHeight()); vkCmdDraw(cmdbuf, 3, 1, 0, 0); @@ -1157,9 +1156,10 @@ void GPU_HW_Vulkan::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* RestoreGraphicsAPIState(); } -void GPU_HW_Vulkan::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) +void GPU_HW_Vulkan::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height, + GPUBackendCommandParameters params) { - if (UseVRAMCopyShader(src_x, src_y, dst_x, dst_y, width, height)) + if (UseVRAMCopyShader(src_x, src_y, dst_x, dst_y, width, height, params)) { const Common::Rectangle src_bounds = GetVRAMTransferBounds(src_x, src_y, width, height); const Common::Rectangle dst_bounds = GetVRAMTransferBounds(dst_x, dst_y, width, height); @@ -1167,14 +1167,14 @@ void GPU_HW_Vulkan::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 wid UpdateVRAMReadTexture(); IncludeVRAMDityRectangle(dst_bounds); - const VRAMCopyUBOData uniforms(GetVRAMCopyUBOData(src_x, src_y, dst_x, dst_y, width, height)); - const Common::Rectangle dst_bounds_scaled(dst_bounds * m_resolution_scale); + const VRAMCopyUBOData uniforms(GetVRAMCopyUBOData(src_x, src_y, dst_x, dst_y, width, height, params)); + const Common::Rectangle dst_bounds_scaled(ScaleVRAMRect(dst_bounds)); BeginVRAMRenderPass(); VkCommandBuffer cmdbuf = g_vulkan_context->GetCurrentCommandBuffer(); vkCmdBindPipeline(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, - m_vram_copy_pipelines[BoolToUInt8(m_GPUSTAT.check_mask_before_draw)]); + m_vram_copy_pipelines[BoolToUInt8(params.check_mask_before_draw)]); vkCmdBindDescriptorSets(cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, m_single_sampler_pipeline_layout, 0, 1, &m_vram_copy_descriptor_set, 0, nullptr); vkCmdPushConstants(cmdbuf, m_single_sampler_pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, sizeof(uniforms), @@ -1184,13 +1184,13 @@ void GPU_HW_Vulkan::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 wid vkCmdDraw(cmdbuf, 3, 1, 0, 0); RestoreGraphicsAPIState(); - if (m_GPUSTAT.check_mask_before_draw) + if (params.check_mask_before_draw) m_current_depth++; return; } - GPU_HW::CopyVRAM(src_x, src_y, dst_x, dst_y, width, height); + GPU_HW::CopyVRAM(src_x, src_y, dst_x, dst_y, width, height, params); src_x *= m_resolution_scale; src_y *= m_resolution_scale; @@ -1224,7 +1224,7 @@ void GPU_HW_Vulkan::UpdateVRAMReadTexture() m_vram_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); m_vram_read_texture.TransitionToLayout(cmdbuf, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - const auto scaled_rect = m_vram_dirty_rect * m_resolution_scale; + const Common::Rectangle scaled_rect(ScaleVRAMRect(m_vram_dirty_rect)); const VkImageCopy copy{{VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u}, {static_cast(scaled_rect.left), static_cast(scaled_rect.top), 0}, {VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u}, @@ -1262,8 +1262,3 @@ void GPU_HW_Vulkan::UpdateDepthBufferFromMaskBit() RestoreGraphicsAPIState(); } - -std::unique_ptr GPU::CreateHardwareVulkanRenderer() -{ - return std::make_unique(); -} diff --git a/src/core/gpu_hw_vulkan.h b/src/core/gpu_hw_vulkan.h index 24a6863c0..e182b9c30 100644 --- a/src/core/gpu_hw_vulkan.h +++ b/src/core/gpu_hw_vulkan.h @@ -14,7 +14,7 @@ public: GPU_HW_Vulkan(); ~GPU_HW_Vulkan() override; - bool Initialize(HostDisplay* host_display) override; + bool Initialize() override; void Reset() override; void ResetGraphicsAPIState() override; @@ -25,9 +25,10 @@ protected: void ClearDisplay() override; void UpdateDisplay() override; void ReadVRAM(u32 x, u32 y, u32 width, u32 height) override; - void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override; - void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) override; - void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override; + void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params) override; + void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, GPUBackendCommandParameters params) override; + void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height, + GPUBackendCommandParameters params) override; void UpdateVRAMReadTexture() override; void UpdateDepthBufferFromMaskBit() override; void SetScissorFromDrawingArea() override; diff --git a/src/core/gpu_sw.cpp b/src/core/gpu_sw.cpp index b602826cc..aa6110c2d 100644 --- a/src/core/gpu_sw.cpp +++ b/src/core/gpu_sw.cpp @@ -6,16 +6,29 @@ #include Log_SetChannel(GPU_SW); +static constexpr std::tuple UnpackTexcoord(u16 texcoord) +{ + return std::make_tuple(static_cast(texcoord), static_cast(texcoord >> 8)); +} + +static constexpr std::tuple UnpackColorRGB24(u32 rgb24) +{ + return std::make_tuple(static_cast(rgb24), static_cast(rgb24 >> 8), static_cast(rgb24 >> 16)); +} +static constexpr u32 PackColorRGB24(u8 r, u8 g, u8 b) +{ + return ZeroExtend32(r) | (ZeroExtend32(g) << 8) | (ZeroExtend32(b) << 16); +} + GPU_SW::GPU_SW() { m_vram.fill(0); - m_vram_ptr = m_vram.data(); } GPU_SW::~GPU_SW() { - if (m_host_display) - m_host_display->ClearDisplayTexture(); + if (g_host_interface->GetDisplay()) + g_host_interface->GetDisplay()->ClearDisplayTexture(); } bool GPU_SW::IsHardwareRenderer() const @@ -23,21 +36,22 @@ bool GPU_SW::IsHardwareRenderer() const return false; } -bool GPU_SW::Initialize(HostDisplay* host_display) +bool GPU_SW::Initialize() { - if (!GPU::Initialize(host_display)) + if (!GPUBackend::Initialize()) return false; - m_display_texture = host_display->CreateTexture(VRAM_WIDTH, VRAM_HEIGHT, nullptr, 0, true); + m_display_texture = g_host_interface->GetDisplay()->CreateTexture(VRAM_WIDTH, VRAM_HEIGHT, nullptr, 0, true); if (!m_display_texture) return false; + m_vram_ptr = m_vram.data(); return true; } void GPU_SW::Reset() { - GPU::Reset(); + GPUBackend::Reset(); m_vram.fill(0); } @@ -151,211 +165,122 @@ void GPU_SW::UpdateDisplay() // fill display texture m_display_texture_buffer.resize(VRAM_WIDTH * VRAM_HEIGHT); + HostDisplay* display = g_host_interface->GetDisplay(); + if (!g_settings.debugging.show_vram) { - if (IsDisplayDisabled()) + if (!m_display_enabled) { - m_host_display->ClearDisplayTexture(); + display->ClearDisplayTexture(); return; } - const u32 vram_offset_y = m_crtc_state.display_vram_top; - const u32 display_width = m_crtc_state.display_vram_width; - const u32 display_height = m_crtc_state.display_vram_height; - const u32 texture_offset_x = m_crtc_state.display_vram_left - m_crtc_state.regs.X; - if (IsInterlacedDisplayEnabled()) + const u32 vram_offset_x = m_display_vram_left; + const u32 vram_offset_y = m_display_vram_top; + const u32 display_width = m_display_vram_width; + const u32 display_height = m_display_vram_height; + const u32 texture_offset_x = m_display_vram_left - m_display_vram_start_x; + if (m_display_interlace != GPUInterlacedDisplayMode::None) { - const u32 field = GetInterlacedDisplayField(); - if (m_GPUSTAT.display_area_color_depth_24) + const u32 field = m_display_interlace_field; + const bool interleaved = (m_display_interlace == GPUInterlacedDisplayMode::InterleavedFields); + if (m_display_24bit) { - CopyOut24Bit(m_crtc_state.regs.X, vram_offset_y + field, m_display_texture_buffer.data() + field * VRAM_WIDTH, - VRAM_WIDTH, display_width + texture_offset_x, display_height, true, m_GPUSTAT.vertical_resolution); + CopyOut24Bit(m_display_vram_start_x, vram_offset_y + field, + m_display_texture_buffer.data() + field * VRAM_WIDTH, VRAM_WIDTH, display_width + texture_offset_x, + display_height, true, interleaved); } else { - CopyOut15Bit(m_crtc_state.regs.X, vram_offset_y + field, m_display_texture_buffer.data() + field * VRAM_WIDTH, - VRAM_WIDTH, display_width + texture_offset_x, display_height, true, m_GPUSTAT.vertical_resolution); + CopyOut15Bit(m_display_vram_start_x, vram_offset_y + field, + m_display_texture_buffer.data() + field * VRAM_WIDTH, VRAM_WIDTH, display_width + texture_offset_x, + display_height, true, interleaved); } } else { - if (m_GPUSTAT.display_area_color_depth_24) + if (m_display_24bit) { - CopyOut24Bit(m_crtc_state.regs.X, vram_offset_y, m_display_texture_buffer.data(), VRAM_WIDTH, + CopyOut24Bit(m_display_vram_start_x, vram_offset_y, m_display_texture_buffer.data(), VRAM_WIDTH, display_width + texture_offset_x, display_height, false, false); } else { - CopyOut15Bit(m_crtc_state.regs.X, vram_offset_y, m_display_texture_buffer.data(), VRAM_WIDTH, + CopyOut15Bit(m_display_vram_start_x, vram_offset_y, m_display_texture_buffer.data(), VRAM_WIDTH, display_width + texture_offset_x, display_height, false, false); } } - m_host_display->UpdateTexture(m_display_texture.get(), 0, 0, display_width, display_height, - m_display_texture_buffer.data(), VRAM_WIDTH * sizeof(u32)); - m_host_display->SetDisplayTexture(m_display_texture->GetHandle(), VRAM_WIDTH, VRAM_HEIGHT, texture_offset_x, 0, - display_width, display_height); - m_host_display->SetDisplayParameters(m_crtc_state.display_width, m_crtc_state.display_height, - m_crtc_state.display_origin_left, m_crtc_state.display_origin_top, - m_crtc_state.display_vram_width, m_crtc_state.display_vram_height, - m_crtc_state.display_aspect_ratio); + display->UpdateTexture(m_display_texture.get(), 0, 0, display_width, display_height, + m_display_texture_buffer.data(), VRAM_WIDTH * sizeof(u32)); + display->SetDisplayTexture(m_display_texture->GetHandle(), VRAM_WIDTH, VRAM_HEIGHT, texture_offset_x, 0, + display_width, display_height); + display->SetDisplayParameters(m_display_width, m_display_height, m_display_origin_left, m_display_origin_top, + m_display_vram_width, m_display_vram_height, m_display_aspect_ratio); } else { CopyOut15Bit(0, 0, m_display_texture_buffer.data(), VRAM_WIDTH, VRAM_WIDTH, VRAM_HEIGHT, false, false); - m_host_display->UpdateTexture(m_display_texture.get(), 0, 0, VRAM_WIDTH, VRAM_HEIGHT, - m_display_texture_buffer.data(), VRAM_WIDTH * sizeof(u32)); - m_host_display->SetDisplayTexture(m_display_texture->GetHandle(), VRAM_WIDTH, VRAM_HEIGHT, 0, 0, VRAM_WIDTH, - VRAM_HEIGHT); - m_host_display->SetDisplayParameters(VRAM_WIDTH, VRAM_HEIGHT, 0, 0, VRAM_WIDTH, VRAM_HEIGHT, - static_cast(VRAM_WIDTH) / static_cast(VRAM_HEIGHT)); + display->UpdateTexture(m_display_texture.get(), 0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_display_texture_buffer.data(), + VRAM_WIDTH * sizeof(u32)); + display->SetDisplayTexture(m_display_texture->GetHandle(), VRAM_WIDTH, VRAM_HEIGHT, 0, 0, VRAM_WIDTH, VRAM_HEIGHT); + display->SetDisplayParameters(VRAM_WIDTH, VRAM_HEIGHT, 0, 0, VRAM_WIDTH, VRAM_HEIGHT, + static_cast(VRAM_WIDTH) / static_cast(VRAM_HEIGHT)); } } -void GPU_SW::DispatchRenderCommand() +void GPU_SW::ReadVRAM(u32 x, u32 y, u32 width, u32 height) { - const RenderCommand rc{m_render_command.bits}; - const bool dithering_enable = rc.IsDitheringEnabled() && m_GPUSTAT.dither_enable; + // no-op +} - switch (rc.primitive) - { - case Primitive::Polygon: - { - const u32 first_color = rc.color_for_first_vertex; - const bool shaded = rc.shading_enable; - const bool textured = rc.texture_enable; +void GPU_SW::FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params) +{ + SoftwareFillVRAM(x, y, width, height, color, params); +} - const u32 num_vertices = rc.quad_polygon ? 4 : 3; - std::array vertices; - for (u32 i = 0; i < num_vertices; i++) - { - SWVertex& vert = vertices[i]; - const u32 color_rgb = (shaded && i > 0) ? (FifoPop() & UINT32_C(0x00FFFFFF)) : first_color; - vert.color_r = Truncate8(color_rgb); - vert.color_g = Truncate8(color_rgb >> 8); - vert.color_b = Truncate8(color_rgb >> 16); +void GPU_SW::UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, GPUBackendCommandParameters params) +{ + SoftwareUpdateVRAM(x, y, width, height, data, params); +} - const VertexPosition vp{FifoPop()}; - vert.x = vp.x; - vert.y = vp.y; +void GPU_SW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height, + GPUBackendCommandParameters params) +{ + SoftwareCopyVRAM(src_x, src_y, dst_x, dst_y, width, height, params); +} - if (textured) - { - std::tie(vert.texcoord_x, vert.texcoord_y) = UnpackTexcoord(Truncate16(FifoPop())); - } - else - { - vert.texcoord_x = 0; - vert.texcoord_y = 0; - } - } +void GPU_SW::DrawPolygon(const GPUBackendDrawPolygonCommand* cmd) +{ + const GPURenderCommand rc{cmd->rc.bits}; + const bool dithering_enable = rc.IsDitheringEnabled() && cmd->draw_mode.dither_enable; - if (!IsDrawingAreaIsValid()) - return; + const DrawTriangleFunction DrawFunction = GetDrawTriangleFunction( + rc.shading_enable, rc.texture_enable, rc.raw_texture_enable, rc.transparency_enable, dithering_enable); - const DrawTriangleFunction DrawFunction = GetDrawTriangleFunction( - rc.shading_enable, rc.texture_enable, rc.raw_texture_enable, rc.transparency_enable, dithering_enable); + (this->*DrawFunction)(cmd, &cmd->vertices[0], &cmd->vertices[1], &cmd->vertices[2]); + if (rc.quad_polygon) + (this->*DrawFunction)(cmd, &cmd->vertices[2], &cmd->vertices[1], &cmd->vertices[3]); +} - (this->*DrawFunction)(&vertices[0], &vertices[1], &vertices[2]); - if (num_vertices > 3) - (this->*DrawFunction)(&vertices[2], &vertices[1], &vertices[3]); - } - break; +void GPU_SW::DrawRectangle(const GPUBackendDrawRectangleCommand* cmd) +{ + const GPURenderCommand rc{cmd->rc.bits}; + const bool dithering_enable = rc.IsDitheringEnabled() && cmd->draw_mode.dither_enable; - case Primitive::Rectangle: - { - const auto [r, g, b] = UnpackColorRGB24(rc.color_for_first_vertex); - const VertexPosition vp{FifoPop()}; - const u32 texcoord_and_palette = rc.texture_enable ? FifoPop() : 0; - const auto [texcoord_x, texcoord_y] = UnpackTexcoord(Truncate16(texcoord_and_palette)); + const DrawRectangleFunction DrawFunction = + GetDrawRectangleFunction(rc.texture_enable, rc.raw_texture_enable, rc.transparency_enable); - s32 width; - s32 height; - switch (rc.rectangle_size) - { - case DrawRectangleSize::R1x1: - width = 1; - height = 1; - break; - case DrawRectangleSize::R8x8: - width = 8; - height = 8; - break; - case DrawRectangleSize::R16x16: - width = 16; - height = 16; - break; - default: - { - const u32 width_and_height = FifoPop(); - width = static_cast(width_and_height & VRAM_WIDTH_MASK); - height = static_cast((width_and_height >> 16) & VRAM_HEIGHT_MASK); + (this->*DrawFunction)(cmd); +} - if (width >= MAX_PRIMITIVE_WIDTH || height >= MAX_PRIMITIVE_HEIGHT) - { - Log_DebugPrintf("Culling too-large rectangle: %d,%d %dx%d", vp.x.GetValue(), vp.y.GetValue(), width, - height); - return; - } - } - break; - } +void GPU_SW::DrawLine(const GPUBackendDrawLineCommand* cmd) +{ + const DrawLineFunction DrawFunction = + GetDrawLineFunction(cmd->rc.shading_enable, cmd->rc.transparency_enable, cmd->IsDitheringEnabled()); - if (!IsDrawingAreaIsValid()) - return; - - const DrawRectangleFunction DrawFunction = - GetDrawRectangleFunction(rc.texture_enable, rc.raw_texture_enable, rc.transparency_enable); - - (this->*DrawFunction)(vp.x, vp.y, width, height, r, g, b, texcoord_x, texcoord_y); - } - break; - - case Primitive::Line: - { - const u32 first_color = rc.color_for_first_vertex; - const bool shaded = rc.shading_enable; - - const DrawLineFunction DrawFunction = GetDrawLineFunction(shaded, rc.transparency_enable, dithering_enable); - - std::array vertices = {}; - u32 buffer_pos = 0; - - // first vertex - SWVertex* p0 = &vertices[0]; - SWVertex* p1 = &vertices[1]; - p0->SetPosition(VertexPosition{rc.polyline ? m_blit_buffer[buffer_pos++] : Truncate32(FifoPop())}); - p0->SetColorRGB24(first_color); - - // remaining vertices in line strip - const u32 num_vertices = rc.polyline ? GetPolyLineVertexCount() : 2; - for (u32 i = 1; i < num_vertices; i++) - { - if (rc.polyline) - { - p1->SetColorRGB24(shaded ? (m_blit_buffer[buffer_pos++] & UINT32_C(0x00FFFFFF)) : first_color); - p1->SetPosition(VertexPosition{m_blit_buffer[buffer_pos++]}); - } - else - { - p1->SetColorRGB24(shaded ? (FifoPop() & UINT32_C(0x00FFFFFF)) : first_color); - p1->SetPosition(VertexPosition{Truncate32(FifoPop())}); - } - - // down here because of the FIFO pops - if (IsDrawingAreaIsValid()) - (this->*DrawFunction)(p0, p1); - - // swap p0/p1 so that the last vertex is used as the first for the next line - std::swap(p0, p1); - } - } - break; - - default: - UnreachableCode(); - break; - } + for (u16 i = 1; i < cmd->num_vertices; i++) + (this->*DrawFunction)(cmd, &cmd->vertices[i - 1], &cmd->vertices[i]); } enum : u32 @@ -383,7 +308,9 @@ constexpr u8 FixedColorToInt(FixedPointColor r) return Truncate8(r >> 12); } -bool GPU_SW::IsClockwiseWinding(const SWVertex* v0, const SWVertex* v1, const SWVertex* v2) +bool GPU_SW::IsClockwiseWinding(const GPUBackendDrawPolygonCommand::Vertex* v0, + const GPUBackendDrawPolygonCommand::Vertex* v1, + const GPUBackendDrawPolygonCommand::Vertex* v2) { const s32 abx = v1->x - v0->x; const s32 aby = v1->y - v0->y; @@ -407,7 +334,9 @@ static constexpr u8 Interpolate(u8 v0, u8 v1, u8 v2, s32 w0, s32 w1, s32 w2, s32 template -void GPU_SW::DrawTriangle(const SWVertex* v0, const SWVertex* v1, const SWVertex* v2) +void GPU_SW::DrawTriangle(const GPUBackendDrawPolygonCommand* cmd, const GPUBackendDrawPolygonCommand::Vertex* v0, + const GPUBackendDrawPolygonCommand::Vertex* v1, + const GPUBackendDrawPolygonCommand::Vertex* v2) { #define orient2d(ax, ay, bx, by, cx, cy) ((bx - ax) * (cy - ay) - (by - ay) * (cx - ax)) @@ -415,12 +344,12 @@ void GPU_SW::DrawTriangle(const SWVertex* v0, const SWVertex* v1, const SWVertex if (IsClockwiseWinding(v0, v1, v2)) std::swap(v1, v2); - const s32 px0 = v0->x + m_drawing_offset.x; - const s32 py0 = v0->y + m_drawing_offset.y; - const s32 px1 = v1->x + m_drawing_offset.x; - const s32 py1 = v1->y + m_drawing_offset.y; - const s32 px2 = v2->x + m_drawing_offset.x; - const s32 py2 = v2->y + m_drawing_offset.y; + const s32 px0 = v0->x; + const s32 py0 = v0->y; + const s32 px1 = v1->x; + const s32 py1 = v1->y; + const s32 px2 = v2->x; + const s32 py2 = v2->y; // Barycentric coordinates at minX/minY corner const s32 ws = orient2d(px0, py0, px1, py1, px2, py2); @@ -434,16 +363,11 @@ void GPU_SW::DrawTriangle(const SWVertex* v0, const SWVertex* v1, const SWVertex s32 min_y = std::min(py0, std::min(py1, py2)); s32 max_y = std::max(py0, std::max(py1, py2)); - // reject triangles which cover the whole vram area - if (static_cast(max_x - min_x) > MAX_PRIMITIVE_WIDTH || static_cast(max_y - min_y) > MAX_PRIMITIVE_HEIGHT) - return; - // clip to drawing area min_x = std::clamp(min_x, static_cast(m_drawing_area.left), static_cast(m_drawing_area.right)); max_x = std::clamp(max_x, static_cast(m_drawing_area.left), static_cast(m_drawing_area.right)); min_y = std::clamp(min_y, static_cast(m_drawing_area.top), static_cast(m_drawing_area.bottom)); max_y = std::clamp(max_y, static_cast(m_drawing_area.top), static_cast(m_drawing_area.bottom)); - AddDrawTriangleTicks(max_x - min_x + 1, max_y - min_y + 1, shading_enable, texture_enable, transparency_enable); // compute per-pixel increments const s32 a01 = py0 - py1, b01 = px1 - px0; @@ -476,17 +400,17 @@ void GPU_SW::DrawTriangle(const SWVertex* v0, const SWVertex* v1, const SWVertex const s32 b2 = row_w2; const u8 r = - shading_enable ? Interpolate(v0->color_r, v1->color_r, v2->color_r, b0, b1, b2, ws, half_ws) : v0->color_r; + shading_enable ? Interpolate(v0->GetR(), v1->GetR(), v2->GetR(), b0, b1, b2, ws, half_ws) : v0->GetR(); const u8 g = - shading_enable ? Interpolate(v0->color_g, v1->color_g, v2->color_g, b0, b1, b2, ws, half_ws) : v0->color_g; + shading_enable ? Interpolate(v0->GetG(), v1->GetG(), v2->GetG(), b0, b1, b2, ws, half_ws) : v0->GetG(); const u8 b = - shading_enable ? Interpolate(v0->color_b, v1->color_b, v2->color_b, b0, b1, b2, ws, half_ws) : v0->color_b; + shading_enable ? Interpolate(v0->GetB(), v1->GetB(), v2->GetB(), b0, b1, b2, ws, half_ws) : v0->GetB(); - const u8 texcoord_x = Interpolate(v0->texcoord_x, v1->texcoord_x, v2->texcoord_x, b0, b1, b2, ws, half_ws); - const u8 texcoord_y = Interpolate(v0->texcoord_y, v1->texcoord_y, v2->texcoord_y, b0, b1, b2, ws, half_ws); + const u8 u = texture_enable ? Interpolate(v0->GetU(), v1->GetU(), v2->GetU(), b0, b1, b2, ws, half_ws) : 0; + const u8 v = texture_enable ? Interpolate(v0->GetV(), v1->GetV(), v2->GetV(), b0, b1, b2, ws, half_ws) : 0; ShadePixel( - static_cast(x), static_cast(y), r, g, b, texcoord_x, texcoord_y); + cmd, static_cast(x), static_cast(y), r, g, b, u, v); } row_w0 += a12; @@ -534,42 +458,31 @@ GPU_SW::DrawTriangleFunction GPU_SW::GetDrawTriangleFunction(bool shading_enable } template -void GPU_SW::DrawRectangle(s32 origin_x, s32 origin_y, u32 width, u32 height, u8 r, u8 g, u8 b, u8 origin_texcoord_x, - u8 origin_texcoord_y) +void GPU_SW::DrawRectangle(const GPUBackendDrawRectangleCommand* cmd) { - const s32 start_x = TruncateVertexPosition(m_drawing_offset.x + origin_x); - const s32 start_y = TruncateVertexPosition(m_drawing_offset.y + origin_y); + const s32 origin_x = cmd->x; + const s32 origin_y = cmd->y; + const auto [r, g, b] = UnpackColorRGB24(cmd->color); + const auto [origin_texcoord_x, origin_texcoord_y] = UnpackTexcoord(cmd->texcoord); + for (u32 offset_y = 0; offset_y < cmd->height; offset_y++) { - const u32 clip_left = static_cast(std::clamp(start_x, m_drawing_area.left, m_drawing_area.right)); - const u32 clip_right = - static_cast(std::clamp(start_x + static_cast(width), m_drawing_area.left, m_drawing_area.right)) + - 1u; - const u32 clip_top = static_cast(std::clamp(start_y, m_drawing_area.top, m_drawing_area.bottom)); - const u32 clip_bottom = - static_cast(std::clamp(start_y + static_cast(height), m_drawing_area.top, m_drawing_area.bottom)) + - 1u; - AddDrawRectangleTicks(clip_right - clip_left, clip_bottom - clip_top, texture_enable, transparency_enable); - } - - for (u32 offset_y = 0; offset_y < height; offset_y++) - { - const s32 y = start_y + static_cast(offset_y); + const s32 y = origin_y + static_cast(offset_y); if (y < static_cast(m_drawing_area.top) || y > static_cast(m_drawing_area.bottom)) continue; const u8 texcoord_y = Truncate8(ZeroExtend32(origin_texcoord_y) + offset_y); - for (u32 offset_x = 0; offset_x < width; offset_x++) + for (u32 offset_x = 0; offset_x < cmd->width; offset_x++) { - const s32 x = start_x + static_cast(offset_x); + const s32 x = origin_x + static_cast(offset_x); if (x < static_cast(m_drawing_area.left) || x > static_cast(m_drawing_area.right)) continue; const u8 texcoord_x = Truncate8(ZeroExtend32(origin_texcoord_x) + offset_x); ShadePixel( - static_cast(x), static_cast(y), r, g, b, texcoord_x, texcoord_y); + cmd, static_cast(x), static_cast(y), r, g, b, texcoord_x, texcoord_y); } } } @@ -583,7 +496,7 @@ constexpr GPU_SW::DitherLUT GPU_SW::ComputeDitherLUT() { for (s32 value = 0; value < DITHER_LUT_SIZE; value++) { - const s32 dithered_value = (value + DITHER_MATRIX[i][j]) >> 3; + const s32 dithered_value = (value + GPU::DITHER_MATRIX[i][j]) >> 3; lut[i][j][value] = static_cast((dithered_value < 0) ? 0 : ((dithered_value > 31) ? 31 : dithered_value)); } } @@ -594,7 +507,8 @@ constexpr GPU_SW::DitherLUT GPU_SW::ComputeDitherLUT() static constexpr GPU_SW::DitherLUT s_dither_lut = GPU_SW::ComputeDitherLUT(); template -void GPU_SW::ShadePixel(u32 x, u32 y, u8 color_r, u8 color_g, u8 color_b, u8 texcoord_x, u8 texcoord_y) +void GPU_SW::ShadePixel(const GPUBackendDrawCommand* cmd, u32 x, u32 y, u8 color_r, u8 color_g, u8 color_b, + u8 texcoord_x, u8 texcoord_y) { VRAMPixel color; bool transparent; @@ -602,38 +516,41 @@ void GPU_SW::ShadePixel(u32 x, u32 y, u8 color_r, u8 color_g, u8 color_b, u8 tex { // Apply texture window // TODO: Precompute the second half - texcoord_x = (texcoord_x & ~(m_draw_mode.texture_window_mask_x * 8u)) | - ((m_draw_mode.texture_window_offset_x & m_draw_mode.texture_window_mask_x) * 8u); - texcoord_y = (texcoord_y & ~(m_draw_mode.texture_window_mask_y * 8u)) | - ((m_draw_mode.texture_window_offset_y & m_draw_mode.texture_window_mask_y) * 8u); + texcoord_x = (texcoord_x & ~(cmd->window.mask_x * 8u)) | ((cmd->window.offset_x & cmd->window.mask_x) * 8u); + texcoord_y = (texcoord_y & ~(cmd->window.mask_y * 8u)) | ((cmd->window.offset_y & cmd->window.mask_y) * 8u); VRAMPixel texture_color; - switch (m_draw_mode.GetTextureMode()) + switch (cmd->draw_mode.texture_mode) { - case GPU::TextureMode::Palette4Bit: + case GPUTextureMode::Palette4Bit: { - const u16 palette_value = GetPixel((m_draw_mode.texture_page_x + ZeroExtend32(texcoord_x / 4)) % VRAM_WIDTH, - (m_draw_mode.texture_page_y + ZeroExtend32(texcoord_y)) % VRAM_HEIGHT); + const u16 palette_value = + GetPixel((cmd->draw_mode.GetTexturePageBaseX() + ZeroExtend32(texcoord_x / 4)) % VRAM_WIDTH, + (cmd->draw_mode.GetTexturePageBaseY() + ZeroExtend32(texcoord_y)) % VRAM_HEIGHT); const u16 palette_index = (palette_value >> ((texcoord_x % 4) * 4)) & 0x0Fu; - texture_color.bits = GetPixel((m_draw_mode.texture_palette_x + ZeroExtend32(palette_index)) % VRAM_WIDTH, - m_draw_mode.texture_palette_y); + + const u32 px = (cmd->palette.GetXBase() + ZeroExtend32(palette_index)) % VRAM_WIDTH; + const u32 py = cmd->palette.GetYBase(); + texture_color.bits = + GetPixel((cmd->palette.GetXBase() + ZeroExtend32(palette_index)) % VRAM_WIDTH, cmd->palette.GetYBase()); } break; - case GPU::TextureMode::Palette8Bit: + case GPUTextureMode::Palette8Bit: { - const u16 palette_value = GetPixel((m_draw_mode.texture_page_x + ZeroExtend32(texcoord_x / 2)) % VRAM_WIDTH, - (m_draw_mode.texture_page_y + ZeroExtend32(texcoord_y)) % VRAM_HEIGHT); + const u16 palette_value = + GetPixel((cmd->draw_mode.GetTexturePageBaseX() + ZeroExtend32(texcoord_x / 2)) % VRAM_WIDTH, + (cmd->draw_mode.GetTexturePageBaseY() + ZeroExtend32(texcoord_y)) % VRAM_HEIGHT); const u16 palette_index = (palette_value >> ((texcoord_x % 2) * 8)) & 0xFFu; - texture_color.bits = GetPixel((m_draw_mode.texture_palette_x + ZeroExtend32(palette_index)) % VRAM_WIDTH, - m_draw_mode.texture_palette_y); + texture_color.bits = + GetPixel((cmd->palette.GetXBase() + ZeroExtend32(palette_index)) % VRAM_WIDTH, cmd->palette.GetYBase()); } break; default: { - texture_color.bits = GetPixel((m_draw_mode.texture_page_x + ZeroExtend32(texcoord_x)) % VRAM_WIDTH, - (m_draw_mode.texture_page_y + ZeroExtend32(texcoord_y)) % VRAM_HEIGHT); + texture_color.bits = GetPixel((cmd->draw_mode.GetTexturePageBaseX() + ZeroExtend32(texcoord_x)) % VRAM_WIDTH, + (cmd->draw_mode.GetTexturePageBaseY() + ZeroExtend32(texcoord_y)) % VRAM_HEIGHT); } break; } @@ -684,18 +601,18 @@ void GPU_SW::ShadePixel(u32 x, u32 y, u8 color_r, u8 color_g, u8 color_b, u8 tex color.Set(func(bg_color.r.GetValue(), color.r.GetValue()), func(bg_color.g.GetValue(), color.g.GetValue()), \ func(bg_color.b.GetValue(), color.b.GetValue()), color.c.GetValue()) - switch (m_draw_mode.GetTransparencyMode()) + switch (cmd->draw_mode.transparency_mode) { - case GPU::TransparencyMode::HalfBackgroundPlusHalfForeground: + case GPUTransparencyMode::HalfBackgroundPlusHalfForeground: BLEND_RGB(BLEND_AVERAGE); break; - case GPU::TransparencyMode::BackgroundPlusForeground: + case GPUTransparencyMode::BackgroundPlusForeground: BLEND_RGB(BLEND_ADD); break; - case GPU::TransparencyMode::BackgroundMinusForeground: + case GPUTransparencyMode::BackgroundMinusForeground: BLEND_RGB(BLEND_SUBTRACT); break; - case GPU::TransparencyMode::BackgroundPlusQuarterForeground: + case GPUTransparencyMode::BackgroundPlusQuarterForeground: BLEND_RGB(BLEND_QUARTER); break; default: @@ -715,14 +632,14 @@ void GPU_SW::ShadePixel(u32 x, u32 y, u8 color_r, u8 color_g, u8 color_b, u8 tex UNREFERENCED_VARIABLE(transparent); } - const u16 mask_and = m_GPUSTAT.GetMaskAND(); + const u16 mask_and = cmd->params.GetMaskAND(); if ((bg_color.bits & mask_and) != 0) return; - if (IsInterlacedRenderingEnabled() && GetActiveLineLSB() == (static_cast(y) & 1u)) + if (cmd->params.interlaced_rendering && cmd->params.active_line_lsb == (Truncate8(static_cast(y)) & 1u)) return; - SetPixel(static_cast(x), static_cast(y), color.bits | m_GPUSTAT.GetMaskOR()); + SetPixel(static_cast(x), static_cast(y), color.bits | cmd->params.GetMaskOR()); } constexpr FixedPointCoord GetLineCoordStep(s32 delta, s32 k) @@ -747,7 +664,8 @@ constexpr FixedPointColor GetLineColorStep(s32 delta, s32 k) } template -void GPU_SW::DrawLine(const SWVertex* p0, const SWVertex* p1) +void GPU_SW::DrawLine(const GPUBackendDrawLineCommand* cmd, const GPUBackendDrawLineCommand::Vertex* p0, + const GPUBackendDrawLineCommand::Vertex* p1) { // Algorithm based on Mednafen. if (p0->x > p1->x) @@ -757,21 +675,6 @@ void GPU_SW::DrawLine(const SWVertex* p0, const SWVertex* p1) const s32 dy = p1->y - p0->y; const s32 k = std::max(std::abs(dx), std::abs(dy)); - { - // TODO: Move to base class - const s32 min_x = std::min(p0->x, p1->x); - const s32 max_x = std::max(p0->x, p1->x); - const s32 min_y = std::min(p0->y, p1->y); - const s32 max_y = std::max(p0->y, p1->y); - - const u32 clip_left = static_cast(std::clamp(min_x, m_drawing_area.left, m_drawing_area.left)); - const u32 clip_right = static_cast(std::clamp(max_x, m_drawing_area.left, m_drawing_area.right)) + 1u; - const u32 clip_top = static_cast(std::clamp(min_y, m_drawing_area.top, m_drawing_area.bottom)); - const u32 clip_bottom = static_cast(std::clamp(max_y, m_drawing_area.top, m_drawing_area.bottom)) + 1u; - - AddDrawLineTicks(clip_right - clip_left, clip_bottom - clip_top, shading_enable); - } - FixedPointCoord step_x, step_y; FixedPointColor step_r, step_g, step_b; if (k > 0) @@ -781,9 +684,9 @@ void GPU_SW::DrawLine(const SWVertex* p0, const SWVertex* p1) if constexpr (shading_enable) { - step_r = GetLineColorStep(s32(ZeroExtend32(p1->color_r)) - s32(ZeroExtend32(p0->color_r)), k); - step_g = GetLineColorStep(s32(ZeroExtend32(p1->color_g)) - s32(ZeroExtend32(p0->color_g)), k); - step_b = GetLineColorStep(s32(ZeroExtend32(p1->color_b)) - s32(ZeroExtend32(p0->color_b)), k); + step_r = GetLineColorStep(s32(ZeroExtend32(p1->GetR())) - s32(ZeroExtend32(p0->GetR())), k); + step_g = GetLineColorStep(s32(ZeroExtend32(p1->GetG())) - s32(ZeroExtend32(p0->GetG())), k); + step_b = GetLineColorStep(s32(ZeroExtend32(p1->GetB())) - s32(ZeroExtend32(p0->GetB())), k); } else { @@ -803,24 +706,25 @@ void GPU_SW::DrawLine(const SWVertex* p0, const SWVertex* p1) FixedPointCoord current_x = IntToFixedCoord(p0->x); FixedPointCoord current_y = IntToFixedCoord(p0->y); - FixedPointColor current_r = IntToFixedColor(p0->color_r); - FixedPointColor current_g = IntToFixedColor(p0->color_g); - FixedPointColor current_b = IntToFixedColor(p0->color_b); + FixedPointColor current_r = IntToFixedColor(p0->GetR()); + FixedPointColor current_g = IntToFixedColor(p0->GetG()); + FixedPointColor current_b = IntToFixedColor(p0->GetB()); for (s32 i = 0; i <= k; i++) { - const s32 x = m_drawing_offset.x + FixedToIntCoord(current_x); - const s32 y = m_drawing_offset.y + FixedToIntCoord(current_y); + // FIXME: Draw offset should be applied here + const s32 x = /*m_drawing_offset.x + */ FixedToIntCoord(current_x); + const s32 y = /*m_drawing_offset.y + */ FixedToIntCoord(current_y); - const u8 r = shading_enable ? FixedColorToInt(current_r) : p0->color_r; - const u8 g = shading_enable ? FixedColorToInt(current_g) : p0->color_g; - const u8 b = shading_enable ? FixedColorToInt(current_b) : p0->color_b; + const u8 r = shading_enable ? FixedColorToInt(current_r) : p0->GetR(); + const u8 g = shading_enable ? FixedColorToInt(current_g) : p0->GetG(); + const u8 b = shading_enable ? FixedColorToInt(current_b) : p0->GetB(); if (x >= static_cast(m_drawing_area.left) && x <= static_cast(m_drawing_area.right) && y >= static_cast(m_drawing_area.top) && y <= static_cast(m_drawing_area.bottom)) { - ShadePixel(static_cast(x), static_cast(y), r, g, b, - 0, 0); + ShadePixel(cmd, static_cast(x), static_cast(y), r, + g, b, 0, 0); } current_x += step_x; @@ -863,7 +767,7 @@ GPU_SW::DrawRectangleFunction GPU_SW::GetDrawRectangleFunction(bool texture_enab return funcs[u8(texture_enable)][u8(raw_texture_enable)][u8(transparency_enable)]; } -std::unique_ptr GPU::CreateSoftwareRenderer() +void GPU_SW::FlushRender() { - return std::make_unique(); + // no-op } diff --git a/src/core/gpu_sw.h b/src/core/gpu_sw.h index a9113e2fb..08f6a368d 100644 --- a/src/core/gpu_sw.h +++ b/src/core/gpu_sw.h @@ -1,12 +1,13 @@ #pragma once #include "gpu.h" +#include "gpu_backend.h" #include #include #include class HostDisplayTexture; -class GPU_SW final : public GPU +class GPU_SW final : public GPUBackend { public: GPU_SW(); @@ -14,7 +15,7 @@ public: bool IsHardwareRenderer() const override; - bool Initialize(HostDisplay* host_display) override; + bool Initialize() override; void Reset() override; u16 GetPixel(u32 x, u32 y) const { return m_vram[VRAM_WIDTH * y + x]; } @@ -28,20 +29,61 @@ public: static constexpr DitherLUT ComputeDitherLUT(); protected: - struct SWVertex - { - s32 x, y; - u8 color_r, color_g, color_b; - u8 texcoord_x, texcoord_y; + static constexpr u8 Convert5To8(u8 x5) { return (x5 << 3) | (x5 & 7); } + static constexpr u8 Convert8To5(u8 x8) { return (x8 >> 3); } - ALWAYS_INLINE void SetPosition(VertexPosition p) + union VRAMPixel + { + u16 bits; + + BitField r; + BitField g; + BitField b; + BitField c; + + u8 GetR8() const { return Convert5To8(r); } + u8 GetG8() const { return Convert5To8(g); } + u8 GetB8() const { return Convert5To8(b); } + + void Set(u8 r_, u8 g_, u8 b_, bool c_ = false) { - x = p.x; - y = p.y; + bits = (ZeroExtend16(r_)) | (ZeroExtend16(g_) << 5) | (ZeroExtend16(b_) << 10) | (static_cast(c_) << 15); } - ALWAYS_INLINE void SetColorRGB24(u32 color) { std::tie(color_r, color_g, color_b) = UnpackColorRGB24(color); } - ALWAYS_INLINE void SetTexcoord(u16 value) { std::tie(texcoord_x, texcoord_y) = UnpackTexcoord(value); } + void ClampAndSet(u8 r_, u8 g_, u8 b_, bool c_ = false) + { + Set(std::min(r_, 0x1F), std::min(g_, 0x1F), std::min(b_, 0x1F), c_); + } + + void SetRGB24(u32 rgb24, bool c_ = false) + { + bits = Truncate16(((rgb24 >> 3) & 0x1F) | (((rgb24 >> 11) & 0x1F) << 5) | (((rgb24 >> 19) & 0x1F) << 10)) | + (static_cast(c_) << 15); + } + + void SetRGB24(u8 r8, u8 g8, u8 b8, bool c_ = false) + { + bits = (ZeroExtend16(r8 >> 3)) | (ZeroExtend16(g8 >> 3) << 5) | (ZeroExtend16(b8 >> 3) << 10) | + (static_cast(c_) << 15); + } + + void SetRGB24Dithered(u32 x, u32 y, u8 r8, u8 g8, u8 b8, bool c_ = false) + { + const s32 offset = GPU::DITHER_MATRIX[y & 3][x & 3]; + r8 = static_cast(std::clamp(static_cast(ZeroExtend32(r8)) + offset, 0, 255)); + g8 = static_cast(std::clamp(static_cast(ZeroExtend32(g8)) + offset, 0, 255)); + b8 = static_cast(std::clamp(static_cast(ZeroExtend32(b8)) + offset, 0, 255)); + SetRGB24(r8, g8, b8, c_); + } + + u32 ToRGB24() const + { + const u32 r_ = ZeroExtend32(r.GetValue()); + const u32 g_ = ZeroExtend32(g.GetValue()); + const u32 b_ = ZeroExtend32(b.GetValue()); + + return ((r_ << 3) | (r_ & 7)) | (((g_ << 3) | (g_ & 7)) << 8) | (((b_ << 3) | (b_ & 7)) << 16); + } }; ////////////////////////////////////////////////////////////////////////// @@ -53,39 +95,51 @@ protected: bool interleaved); void ClearDisplay() override; void UpdateDisplay() override; + void ReadVRAM(u32 x, u32 y, u32 width, u32 height) override; + void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color, GPUBackendCommandParameters params) override; + void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data, GPUBackendCommandParameters params) override; + void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height, + GPUBackendCommandParameters params) override; ////////////////////////////////////////////////////////////////////////// // Rasterization ////////////////////////////////////////////////////////////////////////// - void DispatchRenderCommand() override; + void DrawPolygon(const GPUBackendDrawPolygonCommand* cmd) override; + void DrawLine(const GPUBackendDrawLineCommand* cmd) override; + void DrawRectangle(const GPUBackendDrawRectangleCommand* cmd) override; + void FlushRender() override; - static bool IsClockwiseWinding(const SWVertex* v0, const SWVertex* v1, const SWVertex* v2); + static bool IsClockwiseWinding(const GPUBackendDrawPolygonCommand::Vertex* v0, const GPUBackendDrawPolygonCommand::Vertex* v1, + const GPUBackendDrawPolygonCommand::Vertex* v2); template - void ShadePixel(u32 x, u32 y, u8 color_r, u8 color_g, u8 color_b, u8 texcoord_x, u8 texcoord_y); + void ShadePixel(const GPUBackendDrawCommand* cmd, u32 x, u32 y, u8 color_r, u8 color_g, u8 color_b, u8 texcoord_x, + u8 texcoord_y); template - void DrawTriangle(const SWVertex* v0, const SWVertex* v1, const SWVertex* v2); + void DrawTriangle(const GPUBackendDrawPolygonCommand* cmd, const GPUBackendDrawPolygonCommand::Vertex* v0, + const GPUBackendDrawPolygonCommand::Vertex* v1, const GPUBackendDrawPolygonCommand::Vertex* v2); - using DrawTriangleFunction = void (GPU_SW::*)(const SWVertex* v0, const SWVertex* v1, const SWVertex* v2); + using DrawTriangleFunction = void (GPU_SW::*)(const GPUBackendDrawPolygonCommand* cmd, const GPUBackendDrawPolygonCommand::Vertex* v0, + const GPUBackendDrawPolygonCommand::Vertex* v1, + const GPUBackendDrawPolygonCommand::Vertex* v2); DrawTriangleFunction GetDrawTriangleFunction(bool shading_enable, bool texture_enable, bool raw_texture_enable, bool transparency_enable, bool dithering_enable); template - void DrawRectangle(s32 origin_x, s32 origin_y, u32 width, u32 height, u8 r, u8 g, u8 b, u8 origin_texcoord_x, - u8 origin_texcoord_y); + void DrawRectangle(const GPUBackendDrawRectangleCommand* cmd); - using DrawRectangleFunction = void (GPU_SW::*)(s32 origin_x, s32 origin_y, u32 width, u32 height, u8 r, u8 g, u8 b, - u8 origin_texcoord_x, u8 origin_texcoord_y); + using DrawRectangleFunction = void (GPU_SW::*)(const GPUBackendDrawRectangleCommand* cmd); DrawRectangleFunction GetDrawRectangleFunction(bool texture_enable, bool raw_texture_enable, bool transparency_enable); template - void DrawLine(const SWVertex* p0, const SWVertex* p1); + void DrawLine(const GPUBackendDrawLineCommand* cmd, const GPUBackendDrawLineCommand::Vertex* p0, const GPUBackendDrawLineCommand::Vertex* p1); - using DrawLineFunction = void (GPU_SW::*)(const SWVertex* p0, const SWVertex* p1); + using DrawLineFunction = void (GPU_SW::*)(const GPUBackendDrawLineCommand* cmd, const GPUBackendDrawLineCommand::Vertex* p0, + const GPUBackendDrawLineCommand::Vertex* p1); DrawLineFunction GetDrawLineFunction(bool shading_enable, bool transparency_enable, bool dithering_enable); std::vector m_display_texture_buffer; diff --git a/src/core/gpu_types.h b/src/core/gpu_types.h new file mode 100644 index 000000000..ffc06fb64 --- /dev/null +++ b/src/core/gpu_types.h @@ -0,0 +1,455 @@ +#pragma once +#include "common/bitfield.h" +#include "common/rectangle.h" +#include "types.h" +#include + +enum : u32 +{ + VRAM_WIDTH = 1024, + VRAM_HEIGHT = 512, + VRAM_SIZE = VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16), + VRAM_WIDTH_MASK = VRAM_WIDTH - 1, + VRAM_HEIGHT_MASK = VRAM_HEIGHT - 1, + VRAM_COORD_MASK = 0x3FF, + TEXTURE_PAGE_WIDTH = 256, + TEXTURE_PAGE_HEIGHT = 256, + MAX_PRIMITIVE_WIDTH = 1024, + MAX_PRIMITIVE_HEIGHT = 512, + DITHER_MATRIX_SIZE = 4 +}; + +enum class GPUPrimitive : u8 +{ + Reserved = 0, + Polygon = 1, + Line = 2, + Rectangle = 3 +}; + +enum class GPUDrawRectangleSize : u8 +{ + Variable = 0, + R1x1 = 1, + R8x8 = 2, + R16x16 = 3 +}; + +enum class GPUTextureMode : u8 +{ + Palette4Bit = 0, + Palette8Bit = 1, + Direct16Bit = 2, + Reserved_Direct16Bit = 3, + + // Not register values. + RawTextureBit = 4, + RawPalette4Bit = RawTextureBit | Palette4Bit, + RawPalette8Bit = RawTextureBit | Palette8Bit, + RawDirect16Bit = RawTextureBit | Direct16Bit, + Reserved_RawDirect16Bit = RawTextureBit | Reserved_Direct16Bit, + + Disabled = 8 // Not a register value +}; + +IMPLEMENT_ENUM_CLASS_BITWISE_OPERATORS(GPUTextureMode); + +enum class GPUTransparencyMode : u8 +{ + HalfBackgroundPlusHalfForeground = 0, + BackgroundPlusForeground = 1, + BackgroundMinusForeground = 2, + BackgroundPlusQuarterForeground = 3, + + Disabled = 4 // Not a register value +}; + +enum class GPUInterlacedDisplayMode : u8 +{ + None, + InterleavedFields, + SeparateFields +}; + +union GPURenderCommand +{ + u32 bits; + + BitField color_for_first_vertex; + BitField raw_texture_enable; // not valid for lines + BitField transparency_enable; + BitField texture_enable; + BitField rectangle_size; // only for rectangles + BitField quad_polygon; // only for polygons + BitField polyline; // only for lines + BitField shading_enable; // 0 - flat, 1 = gouroud + BitField primitive; + + /// Returns true if texturing should be enabled. Depends on the primitive type. + ALWAYS_INLINE bool IsTexturingEnabled() const { return (primitive != GPUPrimitive::Line) ? texture_enable : false; } + + /// Returns true if dithering should be enabled. Depends on the primitive type. + ALWAYS_INLINE bool IsDitheringEnabled() const + { + switch (primitive) + { + case GPUPrimitive::Polygon: + return shading_enable || (texture_enable && !raw_texture_enable); + + case GPUPrimitive::Line: + return true; + + case GPUPrimitive::Rectangle: + default: + return false; + } + } +}; + +// Helper/format conversion functions. +static constexpr u32 RGBA5551ToRGBA8888(u16 color) +{ + u8 r = Truncate8(color & 31); + u8 g = Truncate8((color >> 5) & 31); + u8 b = Truncate8((color >> 10) & 31); + u8 a = Truncate8((color >> 15) & 1); + + // 00012345 -> 1234545 + b = (b << 3) | (b & 0b111); + g = (g << 3) | (g & 0b111); + r = (r << 3) | (r & 0b111); + a = a ? 255 : 0; + + return ZeroExtend32(r) | (ZeroExtend32(g) << 8) | (ZeroExtend32(b) << 16) | (ZeroExtend32(a) << 24); +} + +static constexpr u16 RGBA8888ToRGBA5551(u32 color) +{ + const u16 r = Truncate16((color >> 3) & 0x1Fu); + const u16 g = Truncate16((color >> 11) & 0x1Fu); + const u16 b = Truncate16((color >> 19) & 0x1Fu); + const u16 a = Truncate16((color >> 31) & 0x01u); + + return r | (g << 5) | (b << 10) | (a << 15); +} + +union GPUVertexPosition +{ + u32 bits; + + BitField x; + BitField y; +}; + +// Sprites/rectangles should be clipped to 12 bits before drawing. +static constexpr s32 TruncateGPUVertexPosition(s32 x) +{ + return SignExtendN<11, s32>(x); +} + +// bits in GP0(E1h) or texpage part of polygon +union GPUDrawModeReg +{ + static constexpr u16 MASK = 0b1111111111111; + static constexpr u16 TEXTURE_PAGE_MASK = UINT16_C(0b0000000000011111); + + // Polygon texpage commands only affect bits 0-8, 11 + static constexpr u16 POLYGON_TEXPAGE_MASK = 0b0000100111111111; + + // Bits 0..5 are returned in the GPU status register, latched at E1h/polygon draw time. + static constexpr u32 GPUSTAT_MASK = 0b11111111111; + + u16 bits; + + BitField texture_page_x_base; + BitField texture_page_y_base; + BitField transparency_mode; + BitField texture_mode; + BitField dither_enable; + BitField draw_to_displayed_field; + BitField texture_disable; + BitField texture_x_flip; + BitField texture_y_flip; + + ALWAYS_INLINE u16 GetTexturePageBaseX() const { return ZeroExtend16(texture_page_x_base.GetValue()) * 64; } + ALWAYS_INLINE u16 GetTexturePageBaseY() const { return ZeroExtend16(texture_page_y_base.GetValue()) * 256; } + + /// Returns true if the texture mode requires a palette. + bool IsUsingPalette() const { return (bits & (2 << 7)) == 0; } + + /// Returns a rectangle comprising the texture page area. + Common::Rectangle GetTexturePageRectangle() const + { + static constexpr std::array texture_page_widths = { + {TEXTURE_PAGE_WIDTH / 4, TEXTURE_PAGE_WIDTH / 2, TEXTURE_PAGE_WIDTH, TEXTURE_PAGE_WIDTH}}; + return Common::Rectangle::FromExtents(GetTexturePageBaseX(), GetTexturePageBaseY(), + texture_page_widths[static_cast(texture_mode.GetValue())], + TEXTURE_PAGE_HEIGHT); + } + + /// Returns a rectangle comprising the texture palette area. + Common::Rectangle GetTexturePaletteRectangle() const + { + static constexpr std::array palette_widths = {{16, 256, 0, 0}}; + return Common::Rectangle::FromExtents(GetTexturePageBaseX(), GetTexturePageBaseY(), + palette_widths[static_cast(texture_mode.GetValue())], 1); + } +}; + +union GPUTexturePaletteReg +{ + static constexpr u16 MASK = UINT16_C(0b0111111111111111); + + u16 bits; + + BitField x; + BitField y; + + ALWAYS_INLINE u32 GetXBase() const { return static_cast(x) * 16u; } + ALWAYS_INLINE u32 GetYBase() const { return static_cast(y); } +}; + +union GPUTextureWindowReg +{ + static constexpr u32 MASK = 0b11111111111111111111; + + u32 bits; + + BitField mask_x; + BitField mask_y; + BitField offset_x; + BitField offset_y; +}; + +// 4x4 dither matrix. +static constexpr s32 DITHER_MATRIX[DITHER_MATRIX_SIZE][DITHER_MATRIX_SIZE] = {{-4, +0, -3, +1}, // row 0 + {+2, -2, +3, -1}, // row 1 + {-3, +1, -4, +0}, // row 2 + {+4, -1, +2, -2}}; // row 3 + +enum class GPUBackendCommandType : u8 +{ + Sync, + FrameDone, + Reset, + UpdateSettings, + UpdateResolutionScale, + ReadVRAM, + FillVRAM, + UpdateVRAM, + CopyVRAM, + SetDrawingArea, + DrawPolygon, + DrawRectangle, + DrawLine, + ClearDisplay, + UpdateDisplay, + FlushRender +}; + +union GPUBackendCommandParameters +{ + u8 bits; + + BitField interlaced_rendering; + + /// Returns 0 if the currently-displayed field is on an even line in VRAM, otherwise 1. + BitField active_line_lsb; + + BitField set_mask_while_drawing; + BitField check_mask_before_draw; + + ALWAYS_INLINE bool IsMaskingEnabled() const { return (bits & 12u) != 0u; } + + // During transfer/render operations, if ((dst_pixel & mask_and) == 0) { pixel = src_pixel | mask_or } + u16 GetMaskAND() const + { + // return check_mask_before_draw ? 0x8000 : 0x0000; + return Truncate16((bits << 12) & 0x8000); + } + u16 GetMaskOR() const + { + // return set_mask_while_drawing ? 0x8000 : 0x0000; + return Truncate16((bits << 13) & 0x8000); + } +}; + +struct GPUBackendCommand +{ + GPUBackendCommandType type; + GPUBackendCommandParameters params; + u32 size; +}; + +struct GPUBackendSyncCommand : public GPUBackendCommand +{ + ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendSyncCommand); } +}; + +struct GPUBackendFrameDoneCommand : public GPUBackendCommand +{ + ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendFrameDoneCommand); } +}; + +struct GPUBackendResetCommand : public GPUBackendCommand +{ + ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendResetCommand); } +}; + +struct GPUBackendUpdateSettingsCommand : public GPUBackendCommand +{ + ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendUpdateSettingsCommand); } +}; + +struct GPUBackendUpdateResolutionScaleCommand : public GPUBackendCommand +{ + ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendUpdateResolutionScaleCommand); } +}; + +struct GPUBackendReadVRAMCommand : public GPUBackendCommand +{ + u16 x; + u16 y; + u16 width; + u16 height; + + ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendReadVRAMCommand); } +}; + +struct GPUBackendFillVRAMCommand : public GPUBackendCommand +{ + u16 x; + u16 y; + u16 width; + u16 height; + u32 color; + + ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendFillVRAMCommand); } +}; + +struct GPUBackendUpdateVRAMCommand : public GPUBackendCommand +{ + u16 x; + u16 y; + u16 width; + u16 height; + u16 data[0]; + + ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendUpdateVRAMCommand) + (sizeof(u16) * width * height); } +}; + +struct GPUBackendCopyVRAMCommand : public GPUBackendCommand +{ + u16 src_x; + u16 src_y; + u16 dst_x; + u16 dst_y; + u16 width; + u16 height; + + ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendCopyVRAMCommand); } +}; + +struct GPUBackendSetDrawingAreaCommand : public GPUBackendCommand +{ + Common::Rectangle new_area; + + ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendSetDrawingAreaCommand); } +}; + +struct GPUBackendDrawCommand : public GPUBackendCommand +{ + GPURenderCommand rc; + GPUDrawModeReg draw_mode; + GPUTexturePaletteReg palette; + GPUTextureWindowReg window; + Common::Rectangle bounds; + + ALWAYS_INLINE bool IsDitheringEnabled() const { return rc.IsDitheringEnabled() && draw_mode.dither_enable; } +}; + +struct GPUBackendDrawPolygonCommand : public GPUBackendDrawCommand +{ + u16 num_vertices; + + struct Vertex + { + float precise_x, precise_y, precise_w; + s32 x, y; + u32 color; + u16 texcoord; + + ALWAYS_INLINE u8 GetR() const { return Truncate8(color); } + ALWAYS_INLINE u8 GetG() const { return Truncate8(color >> 8); } + ALWAYS_INLINE u8 GetB() const { return Truncate8(color >> 16); } + ALWAYS_INLINE u8 GetU() const { return Truncate8(texcoord); } + ALWAYS_INLINE u8 GetV() const { return Truncate8(texcoord >> 8); } + }; + + Vertex vertices[0]; + + ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendDrawPolygonCommand) + sizeof(Vertex) * num_vertices; } +}; + +struct GPUBackendDrawRectangleCommand : public GPUBackendDrawCommand +{ + s32 x, y; + u16 width, height; + u16 texcoord; + u32 color; + + ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendDrawRectangleCommand); } +}; + +struct GPUBackendDrawLineCommand : public GPUBackendDrawCommand +{ + u16 num_vertices; + + struct Vertex + { + s32 x, y; + u32 color; + + ALWAYS_INLINE u8 GetR() const { return Truncate8(color); } + ALWAYS_INLINE u8 GetG() const { return Truncate8(color >> 8); } + ALWAYS_INLINE u8 GetB() const { return Truncate8(color >> 16); } + }; + + Vertex vertices[0]; + + ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendDrawLineCommand) + sizeof(Vertex) * num_vertices; } +}; + +struct GPUBackendClearDisplayCommand : public GPUBackendCommand +{ + ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendClearDisplayCommand); } +}; + +struct GPUBackendUpdateDisplayCommand : public GPUBackendCommand +{ + float display_aspect_ratio; + u16 display_width; + u16 display_height; + u16 display_origin_left; + u16 display_origin_top; + u16 display_vram_left; + u16 display_vram_top; + u16 display_vram_width; + u16 display_vram_height; + u16 display_vram_start_x; + u16 display_vram_start_y; + GPUInterlacedDisplayMode display_interlace; + + /// Returns 0 if the currently-displayed field is on odd lines (1,3,5,...) or 1 if even (2,4,6,...). + u8 display_interlace_field; + + bool display_enabled; + bool display_24bit; + + ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendUpdateDisplayCommand); } +}; + +struct GPUBackendFlushRenderCommand : public GPUBackendCommand +{ + ALWAYS_INLINE u32 Size() const { return sizeof(GPUBackendFlushRenderCommand); } +}; diff --git a/src/core/host_interface.cpp b/src/core/host_interface.cpp index 600b94bd5..def14078d 100644 --- a/src/core/host_interface.cpp +++ b/src/core/host_interface.cpp @@ -12,6 +12,7 @@ #include "cpu_core.h" #include "dma.h" #include "gpu.h" +#include "gpu_backend.h" #include "gte.h" #include "host_display.h" #include "pgxp.h" @@ -554,7 +555,7 @@ void HostInterface::CheckForSettingsChanges(const Settings& old_settings) g_settings.display_active_start_offset != old_settings.display_active_start_offset || g_settings.display_active_end_offset != old_settings.display_active_end_offset) { - g_gpu->UpdateSettings(); + g_gpu.UpdateSettings(); } if (g_settings.gpu_pgxp_enable != old_settings.gpu_pgxp_enable || @@ -726,7 +727,7 @@ void HostInterface::ToggleSoftwareRendering() if (System::IsShutdown() || g_settings.gpu_renderer == GPURenderer::Software) return; - const GPURenderer new_renderer = g_gpu->IsHardwareRenderer() ? GPURenderer::Software : g_settings.gpu_renderer; + const GPURenderer new_renderer = g_gpu_backend->IsHardwareRenderer() ? GPURenderer::Software : g_settings.gpu_renderer; AddFormattedOSDMessage(5.0f, "Switching to %s renderer...", Settings::GetRendererDisplayName(new_renderer)); System::RecreateGPU(new_renderer); @@ -743,9 +744,9 @@ void HostInterface::ModifyResolutionScale(s32 increment) if (!System::IsShutdown()) { - g_gpu->RestoreGraphicsAPIState(); - g_gpu->UpdateSettings(); - g_gpu->ResetGraphicsAPIState(); + g_gpu_backend->RestoreGraphicsAPIState(); + g_gpu_backend->UpdateSettings(); + g_gpu_backend->ResetGraphicsAPIState(); } } diff --git a/src/core/namco_guncon.cpp b/src/core/namco_guncon.cpp index b3bf49488..816a95723 100644 --- a/src/core/namco_guncon.cpp +++ b/src/core/namco_guncon.cpp @@ -159,7 +159,7 @@ void NamcoGunCon::UpdatePosition() // are we within the active display area? u32 tick, line; - if (mouse_x < 0 || mouse_y < 0 || !g_gpu->ConvertScreenCoordinatesToBeamTicksAndLines(mouse_x, mouse_y, &tick, &line)) + if (mouse_x < 0 || mouse_y < 0 || !g_gpu.ConvertScreenCoordinatesToBeamTicksAndLines(mouse_x, mouse_y, &tick, &line)) { Log_DebugPrintf("Lightgun out of range for window coordinates %d,%d", mouse_x, mouse_y); m_position_x = 0x01; @@ -168,7 +168,7 @@ void NamcoGunCon::UpdatePosition() } // 8MHz units for X = 44100*768*11/7 = 53222400 / 8000000 = 6.6528 - const double divider = static_cast(g_gpu->GetCRTCFrequency()) / 8000000.0; + const double divider = static_cast(g_gpu.GetCRTCFrequency()) / 8000000.0; m_position_x = static_cast(static_cast(tick) / static_cast(divider)); m_position_y = static_cast(line); Log_DebugPrintf("Lightgun window coordinates %d,%d -> tick %u line %u 8mhz ticks %u", mouse_x, mouse_y, tick, line, diff --git a/src/core/settings.h b/src/core/settings.h index 08373fd1c..1d182cbc6 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -72,6 +72,7 @@ struct Settings CPUExecutionMode cpu_execution_mode = CPUExecutionMode::Interpreter; bool cpu_recompiler_memory_exceptions = false; bool cpu_recompiler_icache = false; + bool cpu_thread = true; float emulation_speed = 1.0f; bool speed_limiter_enabled = true; diff --git a/src/core/system.cpp b/src/core/system.cpp index b861fada4..a8c2b84f1 100644 --- a/src/core/system.cpp +++ b/src/core/system.cpp @@ -14,6 +14,7 @@ #include "cpu_core.h" #include "dma.h" #include "gpu.h" +#include "gpu_backend.h" #include "gte.h" #include "host_display.h" #include "host_interface.h" @@ -28,9 +29,12 @@ #include "spu.h" #include "timers.h" #include +#include #include #include #include +#include +#include Log_SetChannel(System); #ifdef WIN32 @@ -59,7 +63,6 @@ static std::unique_ptr OpenCDImage(const char* path, bool force_preload static bool DoLoadState(ByteStream* stream, bool force_software_renderer); static bool DoState(StateWrapper& sw); -static bool CreateGPU(GPURenderer renderer); static bool Initialize(bool force_software_renderer); @@ -95,6 +98,17 @@ static u32 s_last_global_tick_counter = 0; static Common::Timer s_fps_timer; static Common::Timer s_frame_timer; +static float s_average_cpu_frame_time_accumulator = 0.0f; +static float s_worst_cpu_frame_time_accumulator = 0.0f; +static float s_worst_cpu_frame_time = 0.0f; +static float s_average_cpu_frame_time = 0.0f; +static Common::Timer s_cpu_frame_timer; + +static void StartCPUThread(); +static void WakeCPUThread(); +static void WaitForCPUThread(); +static void StopCPUThread(); + // Playlist of disc images. static std::vector s_media_playlist; static std::string s_media_playlist_filename; @@ -153,12 +167,6 @@ u32 GetInternalFrameNumber() return s_internal_frame_number; } -void FrameDone() -{ - s_frame_number++; - CPU::g_state.frame_done = true; -} - void IncrementInternalFrameNumber() { s_internal_frame_number++; @@ -194,10 +202,18 @@ float GetAverageFrameTime() { return s_average_frame_time; } +float GetAverageCPUFrameTime() +{ + return s_average_cpu_frame_time; +} float GetWorstFrameTime() { return s_worst_frame_time; } +float GetWorstCPUFrameTime() +{ + return s_worst_cpu_frame_time; +} float GetThrottleFrequency() { return s_throttle_frequency; @@ -469,22 +485,22 @@ std::optional GetRegionForPath(const char* image_path) bool RecreateGPU(GPURenderer renderer) { - g_gpu->RestoreGraphicsAPIState(); + g_gpu_backend->RestoreGraphicsAPIState(); // save current state std::unique_ptr state_stream = ByteStream_CreateGrowableMemoryStream(); StateWrapper sw(state_stream.get(), StateWrapper::Mode::Write); - const bool state_valid = g_gpu->DoState(sw) && TimingEvents::DoState(sw); + const bool state_valid = g_gpu_backend->DoState(sw); if (!state_valid) Log_ErrorPrintf("Failed to save old GPU state when switching renderers"); - g_gpu->ResetGraphicsAPIState(); + g_gpu_backend->ResetGraphicsAPIState(); + g_gpu_backend.reset(); // create new renderer - g_gpu.reset(); - if (!CreateGPU(renderer)) + if (!GPUBackend::Create(renderer)) { - Panic("Failed to recreate GPU"); + Panic("Failed to recreate GPU backend"); return false; } @@ -492,10 +508,9 @@ bool RecreateGPU(GPURenderer renderer) { state_stream->SeekAbsolute(0); sw.SetMode(StateWrapper::Mode::Read); - g_gpu->RestoreGraphicsAPIState(); - g_gpu->DoState(sw); - TimingEvents::DoState(sw); - g_gpu->ResetGraphicsAPIState(); + g_gpu_backend->RestoreGraphicsAPIState(); + g_gpu_backend->DoState(sw); + g_gpu_backend->ResetGraphicsAPIState(); } return true; @@ -526,6 +541,7 @@ bool Boot(const SystemBootParameters& params) Assert(s_media_playlist.empty()); s_state = State::Starting; s_region = g_settings.region; + // g_settings.cpu_thread = false; if (params.state_stream) { @@ -705,15 +721,16 @@ bool Initialize(bool force_software_renderer) s_fps_timer.Reset(); s_frame_timer.Reset(); + if (!GPUBackend::Create(force_software_renderer ? GPURenderer::Software : g_settings.gpu_renderer)) + return false; + TimingEvents::Initialize(); CPU::Initialize(); CPU::CodeCache::Initialize(g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler); Bus::Initialize(); - if (!CreateGPU(force_software_renderer ? GPURenderer::Software : g_settings.gpu_renderer)) - return false; - + g_gpu.Initialize(); g_dma.Initialize(); g_interrupt_controller.Initialize(); @@ -726,6 +743,10 @@ bool Initialize(bool force_software_renderer) g_sio.Initialize(); UpdateThrottlePeriod(); + + if (g_settings.cpu_thread) + StartCPUThread(); + return true; } @@ -734,13 +755,19 @@ void Shutdown() if (s_state == State::Shutdown) return; + s_state = State::Stopping; + + if (g_settings.cpu_thread) + StopCPUThread(); + g_sio.Shutdown(); g_mdec.Shutdown(); g_spu.Shutdown(); g_timers.Shutdown(); g_pad.Shutdown(); g_cdrom.Shutdown(); - g_gpu.reset(); + g_gpu_backend.reset(); + g_gpu.Shutdown(); g_interrupt_controller.Shutdown(); g_dma.Shutdown(); CPU::CodeCache::Shutdown(); @@ -756,44 +783,6 @@ void Shutdown() s_state = State::Shutdown; } -bool CreateGPU(GPURenderer renderer) -{ - switch (renderer) - { - case GPURenderer::HardwareOpenGL: - g_gpu = GPU::CreateHardwareOpenGLRenderer(); - break; - - case GPURenderer::HardwareVulkan: - g_gpu = GPU::CreateHardwareVulkanRenderer(); - break; - -#ifdef WIN32 - case GPURenderer::HardwareD3D11: - g_gpu = GPU::CreateHardwareD3D11Renderer(); - break; -#endif - - case GPURenderer::Software: - default: - g_gpu = GPU::CreateSoftwareRenderer(); - break; - } - - if (!g_gpu || !g_gpu->Initialize(g_host_interface->GetDisplay())) - { - Log_ErrorPrintf("Failed to initialize GPU, falling back to software"); - g_gpu.reset(); - g_gpu = GPU::CreateSoftwareRenderer(); - if (!g_gpu->Initialize(g_host_interface->GetDisplay())) - return false; - } - - // we put this here rather than in Initialize() because of the virtual calls - g_gpu->Reset(); - return true; -} - bool DoState(StateWrapper& sw) { if (!sw.DoMarker("System")) @@ -818,12 +807,15 @@ bool DoState(StateWrapper& sw) if (!sw.DoMarker("InterruptController") || !g_interrupt_controller.DoState(sw)) return false; - g_gpu->RestoreGraphicsAPIState(); - const bool gpu_result = sw.DoMarker("GPU") && g_gpu->DoState(sw); - g_gpu->ResetGraphicsAPIState(); + g_gpu_backend->RestoreGraphicsAPIState(); + const bool gpu_result = sw.DoMarker("GPUBackend") && g_gpu_backend->DoState(sw); + g_gpu_backend->ResetGraphicsAPIState(); if (!gpu_result) return false; + if (!sw.DoMarker("GPU") || !g_gpu.DoState(sw)) + return false; + if (!sw.DoMarker("CDROM") || !g_cdrom.DoState(sw)) return false; @@ -853,14 +845,14 @@ void Reset() if (IsShutdown()) return; - g_gpu->RestoreGraphicsAPIState(); + g_gpu_backend->RestoreGraphicsAPIState(); CPU::Reset(); CPU::CodeCache::Flush(); Bus::Reset(); g_dma.Reset(); g_interrupt_controller.Reset(); - g_gpu->Reset(); + g_gpu.Reset(); g_cdrom.Reset(); g_pad.Reset(); g_timers.Reset(); @@ -872,7 +864,7 @@ void Reset() TimingEvents::Reset(); ResetPerformanceCounters(); - g_gpu->ResetGraphicsAPIState(); + g_gpu_backend->ResetGraphicsAPIState(); } bool LoadState(ByteStream* state) @@ -1052,12 +1044,12 @@ bool SaveState(ByteStream* state, u32 screenshot_size /* = 128 */) { header.offset_to_data = static_cast(state->GetPosition()); - g_gpu->RestoreGraphicsAPIState(); + g_gpu_backend->RestoreGraphicsAPIState(); StateWrapper sw(state, StateWrapper::Mode::Write); const bool result = DoState(sw); - g_gpu->ResetGraphicsAPIState(); + g_gpu_backend->ResetGraphicsAPIState(); if (!result) return false; @@ -1077,12 +1069,14 @@ bool SaveState(ByteStream* state, u32 screenshot_size /* = 128 */) return true; } -void RunFrame() +static std::thread s_cpu_thread; +static std::atomic_bool s_cpu_thread_running{false}; +static std::atomic_bool s_cpu_thread_sleeping{false}; +static std::mutex s_cpu_thread_wake_mutex; +static std::condition_variable s_cpu_thread_wake_cv; + +static void ExecuteCPUFrame() { - s_frame_timer.Reset(); - - g_gpu->RestoreGraphicsAPIState(); - switch (g_settings.cpu_execution_mode) { case CPUExecutionMode::Recompiler: @@ -1105,11 +1099,100 @@ void RunFrame() // Generate any pending samples from the SPU before sleeping, this way we reduce the chances of underruns. g_spu.GeneratePendingSamples(); +} + +static void CPUThreadFunction() +{ + for (;;) + { + { + std::unique_lock lock(s_cpu_thread_wake_mutex); + s_cpu_thread_sleeping.store(true); + s_cpu_thread_wake_cv.wait(lock, []() { return !CPU::g_state.frame_done || s_state == State::Stopping; }); + s_cpu_thread_sleeping.store(false); + + if (s_state == State::Stopping) + break; + } + + s_cpu_frame_timer.Reset(); + + ExecuteCPUFrame(); + + { + const float frame_time = static_cast(s_frame_timer.GetTimeMilliseconds()); + s_average_cpu_frame_time_accumulator += frame_time; + s_worst_cpu_frame_time_accumulator = std::max(s_worst_cpu_frame_time_accumulator, frame_time); + } + } + + s_cpu_thread_running.store(false); +} + +void StartCPUThread() +{ + Assert(!s_cpu_thread.joinable() && !s_cpu_thread_running.load()); + s_cpu_thread_running.store(true); + s_cpu_thread = std::thread(CPUThreadFunction); +} + +void StopCPUThread() +{ + Assert(s_state == State::Stopping); + while (s_cpu_thread_running.load()) + { + WakeCPUThread(); + g_gpu_backend->ProcessGPUCommands(); + } + + if (s_cpu_thread.joinable()) + s_cpu_thread.join(); +} + +void WakeCPUThread() +{ + std::unique_lock lock(s_cpu_thread_wake_mutex); + if (!s_cpu_thread_sleeping.load()) + return; + + CPU::g_state.frame_done = false; + s_cpu_thread_wake_cv.notify_one(); +} + +void WaitForCPUThread() +{ + while (!s_cpu_thread_sleeping.load()) + ; +} + +void RunFrame() +{ + s_frame_timer.Reset(); + + g_gpu_backend->RestoreGraphicsAPIState(); + + if (!g_settings.cpu_thread) + { + CPU::g_state.frame_done = false; + ExecuteCPUFrame(); + } + else + { + WakeCPUThread(); + g_gpu_backend->RunGPUFrame(); + } if (s_cheat_list) s_cheat_list->Apply(); - g_gpu->ResetGraphicsAPIState(); + g_gpu_backend->ResetGraphicsAPIState(); +} + +void FrameDone() +{ + s_frame_number++; + CPU::g_state.frame_done = true; + g_gpu_backend->CPUFrameDone(); } void SetThrottleFrequency(float frequency) @@ -1163,8 +1246,15 @@ void Throttle() s_last_throttle_time += s_throttle_period; } -void UpdatePerformanceCounters() +void EndFrame() { + if (g_settings.cpu_thread) + { + // finish up anything the CPU pushed to the GPU after vblank + WaitForCPUThread(); + g_gpu_backend->EndGPUFrame(); + } + const float frame_time = static_cast(s_frame_timer.GetTimeMilliseconds()); s_average_frame_time_accumulator += frame_time; s_worst_frame_time_accumulator = std::max(s_worst_frame_time_accumulator, frame_time); @@ -1179,8 +1269,12 @@ void UpdatePerformanceCounters() s_worst_frame_time = s_worst_frame_time_accumulator; s_worst_frame_time_accumulator = 0.0f; + s_worst_cpu_frame_time = s_worst_cpu_frame_time_accumulator; + s_worst_cpu_frame_time_accumulator = 0.0f; s_average_frame_time = s_average_frame_time_accumulator / frames_presented; s_average_frame_time_accumulator = 0.0f; + s_average_cpu_frame_time = s_average_cpu_frame_time_accumulator / frames_presented; + s_average_cpu_frame_time_accumulator = 0.0f; s_vps = static_cast(frames_presented / time); s_last_frame_number = s_frame_number; s_fps = static_cast(s_internal_frame_number - s_last_internal_frame_number) / time; @@ -1191,6 +1285,11 @@ void UpdatePerformanceCounters() s_last_global_tick_counter = global_tick_counter; s_fps_timer.Reset(); +#ifndef WIN32 + Log_InfoPrintf("FPS: %.2f VPS: %.2f Average: %.2fms (%.2fms CPU) Worst: %.2fms (%.2fms CPU)", s_fps, s_vps, + s_average_frame_time, s_average_cpu_frame_time, s_worst_frame_time, s_worst_cpu_frame_time); +#endif + g_host_interface->OnSystemPerformanceCountersUpdated(); } diff --git a/src/core/system.h b/src/core/system.h index 6b0085099..0e9941762 100644 --- a/src/core/system.h +++ b/src/core/system.h @@ -45,7 +45,8 @@ enum class State Shutdown, Starting, Running, - Paused + Paused, + Stopping }; /// Returns true if the filename is a PlayStation executable we can inject. @@ -93,7 +94,9 @@ float GetFPS(); float GetVPS(); float GetEmulationSpeed(); float GetAverageFrameTime(); +float GetAverageCPUFrameTime(); float GetWorstFrameTime(); +float GetWorstCPUFrameTime(); float GetThrottleFrequency(); bool Boot(const SystemBootParameters& params); @@ -117,7 +120,7 @@ void UpdateThrottlePeriod(); /// Throttles the system, i.e. sleeps until it's time to execute the next frame. void Throttle(); -void UpdatePerformanceCounters(); +void EndFrame(); void ResetPerformanceCounters(); // Access controllers for simulating input. diff --git a/src/core/timers.cpp b/src/core/timers.cpp index a51b8bd7c..892c5bb91 100644 --- a/src/core/timers.cpp +++ b/src/core/timers.cpp @@ -194,8 +194,8 @@ u32 Timers::ReadRegister(u32 offset) if (timer_index < 2 && cs.external_counting_enabled) { // timers 0/1 depend on the GPU - if (timer_index == 0 || g_gpu->IsCRTCScanlinePending()) - g_gpu->SynchronizeCRTC(); + if (timer_index == 0 || g_gpu.IsCRTCScanlinePending()) + g_gpu.SynchronizeCRTC(); } m_sysclk_event->InvokeEarly(); @@ -208,8 +208,8 @@ u32 Timers::ReadRegister(u32 offset) if (timer_index < 2 && cs.external_counting_enabled) { // timers 0/1 depend on the GPU - if (timer_index == 0 || g_gpu->IsCRTCScanlinePending()) - g_gpu->SynchronizeCRTC(); + if (timer_index == 0 || g_gpu.IsCRTCScanlinePending()) + g_gpu.SynchronizeCRTC(); } m_sysclk_event->InvokeEarly(); @@ -244,8 +244,8 @@ void Timers::WriteRegister(u32 offset, u32 value) if (timer_index < 2 && cs.external_counting_enabled) { // timers 0/1 depend on the GPU - if (timer_index == 0 || g_gpu->IsCRTCScanlinePending()) - g_gpu->SynchronizeCRTC(); + if (timer_index == 0 || g_gpu.IsCRTCScanlinePending()) + g_gpu.SynchronizeCRTC(); } m_sysclk_event->InvokeEarly(); diff --git a/src/duckstation-qt/qthostinterface.cpp b/src/duckstation-qt/qthostinterface.cpp index 4ca8e9fa3..c4371d454 100644 --- a/src/duckstation-qt/qthostinterface.cpp +++ b/src/duckstation-qt/qthostinterface.cpp @@ -8,6 +8,7 @@ #include "core/cheats.h" #include "core/controller.h" #include "core/gpu.h" +#include "core/gpu_backend.h" #include "core/system.h" #include "frontend-common/game_list.h" #include "frontend-common/imgui_styles.h" @@ -432,7 +433,7 @@ void QtHostInterface::onHostDisplayWindowResized(int width, int height) // re-render the display, since otherwise it will be out of date and stretched if paused if (!System::IsShutdown()) { - g_gpu->UpdateResolutionScale(); + g_gpu_backend->UpdateResolutionScale(); renderDisplay(); } } @@ -550,7 +551,7 @@ void QtHostInterface::updateDisplayState() if (!System::IsShutdown()) { - g_gpu->UpdateResolutionScale(); + g_gpu_backend->UpdateResolutionScale(); redrawDisplayWindow(); } UpdateSpeedLimiterState(); @@ -1247,7 +1248,7 @@ void QtHostInterface::threadEntryPoint() renderDisplay(); - System::UpdatePerformanceCounters(); + System::EndFrame(); if (m_speed_limiter_enabled) System::Throttle(); diff --git a/src/duckstation-sdl/sdl_host_interface.cpp b/src/duckstation-sdl/sdl_host_interface.cpp index 2a05fe629..7edeeba53 100644 --- a/src/duckstation-sdl/sdl_host_interface.cpp +++ b/src/duckstation-sdl/sdl_host_interface.cpp @@ -7,6 +7,7 @@ #include "core/cheats.h" #include "core/controller.h" #include "core/gpu.h" +#include "core/gpu_backend.h" #include "core/host_display.h" #include "core/system.h" #include "frontend-common/icon.h" @@ -376,7 +377,7 @@ bool SDLHostInterface::SetFullscreen(bool enabled) m_display->ResizeRenderWindow(window_width, window_height); if (!System::IsShutdown()) - g_gpu->UpdateResolutionScale(); + g_gpu_backend->UpdateResolutionScale(); m_fullscreen = enabled; return true; @@ -534,7 +535,7 @@ void SDLHostInterface::HandleSDLEvent(const SDL_Event* event) UpdateFramebufferScale(); if (!System::IsShutdown()) - g_gpu->UpdateResolutionScale(); + g_gpu_backend->UpdateResolutionScale(); } else if (event->window.event == SDL_WINDOWEVENT_MOVED) { @@ -824,11 +825,11 @@ void SDLHostInterface::DrawMainMenuBar() } else { - ImGui::SetCursorPosX(ImGui::GetIO().DisplaySize.x - (420.0f * framebuffer_scale)); - ImGui::Text("Average: %.2fms", System::GetAverageFrameTime()); + ImGui::SetCursorPosX(ImGui::GetIO().DisplaySize.x - (500.0f * framebuffer_scale)); + ImGui::Text("Average: %.2fms / %.2fms", System::GetAverageFrameTime(), System::GetAverageCPUFrameTime()); - ImGui::SetCursorPosX(ImGui::GetIO().DisplaySize.x - (310.0f * framebuffer_scale)); - ImGui::Text("Worst: %.2fms", System::GetWorstFrameTime()); + ImGui::SetCursorPosX(ImGui::GetIO().DisplaySize.x - (350.0f * framebuffer_scale)); + ImGui::Text("Worst: %.2fms / %.2fms", System::GetWorstFrameTime(), System::GetWorstCPUFrameTime()); ImGui::SetCursorPosX(ImGui::GetIO().DisplaySize.x - (210.0f * framebuffer_scale)); @@ -907,7 +908,7 @@ void SDLHostInterface::DrawQuickSettingsMenu() for (u32 scale = 1; scale <= GPU::MAX_RESOLUTION_SCALE; scale++) { char buf[32]; - std::snprintf(buf, sizeof(buf), "%ux (%ux%u)", scale, scale * GPU::VRAM_WIDTH, scale * GPU::VRAM_HEIGHT); + std::snprintf(buf, sizeof(buf), "%ux (%ux%u)", scale, scale * VRAM_WIDTH, scale * VRAM_HEIGHT); if (ImGui::MenuItem(buf, nullptr, current_internal_resolution == scale)) { @@ -1653,7 +1654,7 @@ void SDLHostInterface::Run() if (System::IsRunning()) { - System::UpdatePerformanceCounters(); + System::EndFrame(); if (m_speed_limiter_enabled) System::Throttle(); diff --git a/src/frontend-common/common_host_interface.cpp b/src/frontend-common/common_host_interface.cpp index eee65a9c1..505e4ac62 100644 --- a/src/frontend-common/common_host_interface.cpp +++ b/src/frontend-common/common_host_interface.cpp @@ -11,6 +11,7 @@ #include "core/cpu_code_cache.h" #include "core/dma.h" #include "core/gpu.h" +#include "core/gpu_backend.h" #include "core/host_display.h" #include "core/mdec.h" #include "core/pgxp.h" @@ -825,8 +826,8 @@ void CommonHostInterface::DrawFPSWindow() if (g_settings.display_show_resolution) { - const auto [effective_width, effective_height] = g_gpu->GetEffectiveDisplayResolution(); - const bool interlaced = g_gpu->IsInterlacedDisplayEnabled(); + const auto [effective_width, effective_height] = g_gpu_backend->GetEffectiveDisplayResolution(); + const bool interlaced = g_gpu.IsInterlacedDisplayEnabled(); ImGui::Text("%ux%u (%s)", effective_width, effective_height, interlaced ? "interlaced" : "progressive"); } @@ -906,7 +907,7 @@ void CommonHostInterface::DrawOSDMessages() void CommonHostInterface::DrawDebugWindows() { if (g_settings.debugging.show_gpu_state) - g_gpu->DrawDebugStateWindow(); + g_gpu.DrawDebugStateWindow(); if (g_settings.debugging.show_cdrom_state) g_cdrom.DrawDebugWindow(); if (g_settings.debugging.show_timers_state) @@ -1419,7 +1420,7 @@ void CommonHostInterface::RegisterGraphicsHotkeys() if (!pressed) { g_settings.gpu_pgxp_enable = !g_settings.gpu_pgxp_enable; - g_gpu->UpdateSettings(); + g_gpu.UpdateSettings(); AddFormattedOSDMessage(5.0f, "PGXP is now %s.", g_settings.gpu_pgxp_enable ? "enabled" : "disabled");