From 1757932b3a255bb352355faa67c735d49a16d292 Mon Sep 17 00:00:00 2001 From: Connor McLaughlin Date: Sun, 19 Apr 2020 01:16:58 +1000 Subject: [PATCH] GPU: Implement FIFO and timings This will cause a slight performance loss. I've left some knobs in which can be tweaked to mitigate this, but the goal is to be compatible with all games which require them. --- src/core/dma.cpp | 8 - src/core/gpu.cpp | 157 +++++++----- src/core/gpu.h | 102 +++++--- src/core/gpu_commands.cpp | 460 +++++++++++++++++++++------------- src/core/gpu_hw.cpp | 57 +++-- src/core/gpu_hw.h | 4 +- src/core/gpu_sw.cpp | 71 ++++-- src/core/gpu_sw.h | 2 +- src/core/host_interface.cpp | 6 +- src/core/save_state_version.h | 2 +- src/core/settings.cpp | 2 + src/core/settings.h | 2 + 12 files changed, 562 insertions(+), 311 deletions(-) diff --git a/src/core/dma.cpp b/src/core/dma.cpp index 21f82197e..1ebda89b9 100644 --- a/src/core/dma.cpp +++ b/src/core/dma.cpp @@ -259,14 +259,6 @@ void DMA::TransferChannel(Channel channel) if (word_count > 0) TransferMemoryToDevice(channel, (current_address + sizeof(header)) & ADDRESS_MASK, 4, word_count); - // Self-referencing DMA loops.. not sure how these are happening? - if (current_address == next_address) - { - Log_ErrorPrintf("HACK: Aborting self-referencing DMA loop @ 0x%08X. Something went wrong to generate this.", - current_address); - break; - } - current_address = next_address; if (current_address & UINT32_C(0x800000)) break; diff --git a/src/core/gpu.cpp b/src/core/gpu.cpp index 2537efe13..2dc6f7eb9 100644 --- a/src/core/gpu.cpp +++ b/src/core/gpu.cpp @@ -38,16 +38,19 @@ bool GPU::Initialize(HostDisplay* host_display, System* system, DMA* dma, Interr void GPU::UpdateSettings() { - m_force_progressive_scan = m_system->GetSettings().gpu_disable_interlacing; + const Settings& settings = m_system->GetSettings(); - if (m_force_ntsc_timings != m_system->GetSettings().gpu_force_ntsc_timings) + m_force_progressive_scan = settings.gpu_disable_interlacing; + m_fifo_size = settings.gpu_fifo_size; + m_max_run_ahead = settings.gpu_max_run_ahead; + + if (m_force_ntsc_timings != settings.gpu_force_ntsc_timings) { - m_force_ntsc_timings = m_system->GetSettings().gpu_force_ntsc_timings; + m_force_ntsc_timings = settings.gpu_force_ntsc_timings; UpdateCRTCConfig(); } - m_crtc_state.display_aspect_ratio = - Settings::GetDisplayAspectRatioValue(m_system->GetSettings().display_aspect_ratio); + m_crtc_state.display_aspect_ratio = Settings::GetDisplayAspectRatioValue(settings.display_aspect_ratio); // Crop mode calls this, so recalculate the display area UpdateCRTCDisplayParameters(); @@ -77,11 +80,13 @@ void GPU::SoftReset() m_crtc_state.current_scanline = 0; m_crtc_state.in_hblank = false; m_crtc_state.in_vblank = false; - m_state = State::Idle; - m_blitter_ticks = 0; + m_blitter_state = BlitterState::Idle; + m_command_ticks = 0; m_command_total_words = 0; m_vram_transfer = {}; - m_GP0_buffer.clear(); + m_fifo.Clear(); + m_blit_buffer.clear(); + m_blit_remaining_words = 0; SetDrawMode(0); SetTexturePalette(0); SetTextureWindow(0); @@ -148,8 +153,8 @@ bool GPU::DoState(StateWrapper& sw) sw.Do(&m_crtc_state.in_hblank); sw.Do(&m_crtc_state.in_vblank); - sw.Do(&m_state); - sw.Do(&m_blitter_ticks); + sw.Do(&m_blitter_state); + sw.Do(&m_command_ticks); sw.Do(&m_command_total_words); sw.Do(&m_GPUREAD_latch); @@ -160,7 +165,13 @@ bool GPU::DoState(StateWrapper& sw) sw.Do(&m_vram_transfer.col); sw.Do(&m_vram_transfer.row); - sw.Do(&m_GP0_buffer); + sw.Do(&m_fifo); + sw.Do(&m_blit_buffer); + sw.Do(&m_blit_remaining_words); + sw.Do(&m_render_command.bits); + + sw.Do(&m_max_run_ahead); + sw.Do(&m_fifo_size); if (sw.IsReading()) { @@ -207,16 +218,26 @@ void GPU::RestoreGraphicsAPIState() {} void GPU::UpdateDMARequest() { - // we can kill the blitter ticks here if enough time has passed - if (m_blitter_ticks > 0 && GetPendingGPUTicks() >= m_blitter_ticks) - m_blitter_ticks = 0; + switch (m_blitter_state) + { + case BlitterState::Idle: + m_GPUSTAT.gpu_idle = (m_command_ticks <= 0); + m_GPUSTAT.ready_to_send_vram = false; + m_GPUSTAT.ready_to_recieve_dma = (m_fifo.GetSize() < m_fifo_size); + break; - const bool blitter_idle = (m_blitter_ticks <= 0); + case BlitterState::WritingVRAM: + m_GPUSTAT.gpu_idle = false; + m_GPUSTAT.ready_to_send_vram = false; + m_GPUSTAT.ready_to_recieve_dma = (m_fifo.GetSize() < m_fifo_size); + break; - m_GPUSTAT.ready_to_send_vram = (blitter_idle && m_state == State::ReadingVRAM); - m_GPUSTAT.ready_to_recieve_cmd = (blitter_idle && m_state == State::Idle); - m_GPUSTAT.ready_to_recieve_dma = - blitter_idle && (m_state == State::Idle || (m_state != State::ReadingVRAM && m_command_total_words > 0)); + case BlitterState::ReadingVRAM: + m_GPUSTAT.gpu_idle = false; + m_GPUSTAT.ready_to_send_vram = true; + m_GPUSTAT.ready_to_recieve_dma = false; + break; + } bool dma_request; switch (m_GPUSTAT.dma_direction) @@ -226,15 +247,15 @@ void GPU::UpdateDMARequest() break; case DMADirection::FIFO: - dma_request = blitter_idle && m_state >= State::ReadingVRAM; // FIFO not full/full + dma_request = m_GPUSTAT.ready_to_recieve_dma; break; case DMADirection::CPUtoGP0: - dma_request = blitter_idle && m_GPUSTAT.ready_to_recieve_dma; + dma_request = m_GPUSTAT.ready_to_recieve_dma; break; case DMADirection::GPUREADtoCPU: - dma_request = blitter_idle && m_GPUSTAT.ready_to_send_vram; + dma_request = m_GPUSTAT.ready_to_send_vram; break; default: @@ -256,7 +277,7 @@ u32 GPU::ReadRegister(u32 offset) { // code can be dependent on the odd/even bit, so update the GPU state when reading. // we can mitigate this slightly by only updating when the raster is actually hitting a new line - if (IsRasterScanlinePending()) + if (IsRasterScanlineOrCommandPending()) Synchronize(); return m_GPUSTAT.bits; @@ -273,7 +294,8 @@ void GPU::WriteRegister(u32 offset, u32 value) switch (offset) { case 0x00: - WriteGP0(value); + m_fifo.Push(value); + ExecuteCommands(); return; case 0x04: @@ -305,21 +327,12 @@ void GPU::DMAWrite(const u32* words, u32 word_count) { case DMADirection::CPUtoGP0: { - std::copy(words, words + word_count, std::back_inserter(m_GP0_buffer)); - ExecuteCommands(); - - if (m_state == State::WritingVRAM) - { - Assert(m_blitter_ticks == 0); - m_blitter_ticks = GetPendingGPUTicks() + word_count; - - // reschedule GPU tick event - const TickCount sysclk_ticks = GPUTicksToSystemTicks(word_count); - if (m_tick_event->GetTicksUntilNextExecution() > sysclk_ticks) - m_tick_event->Schedule(sysclk_ticks); - + m_fifo.PushRange(words, word_count); + m_fifo_pushed = true; + if (!m_syncing) + ExecuteCommands(); + else UpdateDMARequest(); - } } break; @@ -332,6 +345,22 @@ void GPU::DMAWrite(const u32* words, u32 word_count) } } +void GPU::AddCommandTicks(TickCount ticks) +{ + if (m_command_ticks != 0) + { + m_command_ticks += ticks; + return; + } + + m_command_ticks = GetPendingGPUTicks() + ticks; + + // reschedule GPU tick event if it would execute later than this command finishes + const TickCount sysclk_ticks = GPUTicksToSystemTicks(ticks); + if (m_tick_event->GetTicksUntilNextExecution() > sysclk_ticks) + m_tick_event->Schedule(sysclk_ticks); +} + void GPU::Synchronize() { m_tick_event->InvokeEarly(); @@ -547,7 +576,7 @@ void GPU::UpdateSliceTicks() (m_crtc_state.horizontal_display_end - m_crtc_state.current_tick_in_scanline); m_tick_event->Schedule( - GPUTicksToSystemTicks((m_blitter_ticks > 0) ? std::min(m_blitter_ticks, ticks_until_vblank) : ticks_until_vblank)); + GPUTicksToSystemTicks((m_command_ticks > 0) ? std::min(m_command_ticks, ticks_until_vblank) : ticks_until_vblank)); } bool GPU::IsRasterScanlinePending() const @@ -555,6 +584,13 @@ bool GPU::IsRasterScanlinePending() const return (GetPendingGPUTicks() + m_crtc_state.current_tick_in_scanline) >= m_crtc_state.horizontal_total; } +bool GPU::IsRasterScanlineOrCommandPending() const +{ + const TickCount pending_ticks = GetPendingGPUTicks(); + return ((pending_ticks + m_crtc_state.current_tick_in_scanline) >= m_crtc_state.horizontal_total) || + (m_command_ticks > 0 && pending_ticks > m_command_ticks); +} + void GPU::Execute(TickCount ticks) { // convert cpu/master clock to GPU ticks, accounting for partial cycles because of the non-integer divider @@ -565,13 +601,12 @@ void GPU::Execute(TickCount ticks) m_crtc_state.current_tick_in_scanline += gpu_ticks; // handle blits - TickCount blit_ticks_remaining = gpu_ticks; - while (m_blitter_ticks > 0 && blit_ticks_remaining > 0) + if (m_command_ticks > 0) { - const TickCount slice = std::min(blit_ticks_remaining, m_blitter_ticks); - m_blitter_ticks -= slice; - blit_ticks_remaining -= slice; - UpdateDMARequest(); + m_command_ticks -= gpu_ticks; + ExecuteCommands(); + if (m_command_ticks < 0) + m_command_ticks = 0; } } @@ -672,7 +707,7 @@ void GPU::Execute(TickCount ticks) u32 GPU::ReadGPUREAD() { - if (m_state != State::ReadingVRAM) + if (m_blitter_state != BlitterState::ReadingVRAM) return m_GPUREAD_latch; // Read two pixels out of VRAM and combine them. Zero fill odd pixel counts. @@ -692,11 +727,12 @@ u32 GPU::ReadGPUREAD() { Log_DebugPrintf("End of VRAM->CPU transfer"); m_vram_transfer = {}; - m_state = State::Idle; + m_blitter_state = BlitterState::Idle; UpdateDMARequest(); // end of transfer, catch up on any commands which were written (unlikely) ExecuteCommands(); + UpdateDMARequest(); break; } } @@ -706,12 +742,6 @@ u32 GPU::ReadGPUREAD() return value; } -void GPU::WriteGP0(u32 value) -{ - m_GP0_buffer.push_back(value); - ExecuteCommands(); -} - void GPU::WriteGP1(u32 value) { const u8 command = Truncate8(value >> 24); @@ -729,11 +759,13 @@ void GPU::WriteGP1(u32 value) case 0x01: // Clear FIFO { Log_DebugPrintf("GP1 clear FIFO"); - m_state = State::Idle; + m_blitter_state = BlitterState::Idle; m_command_total_words = 0; m_vram_transfer = {}; - m_GP0_buffer.clear(); - m_blitter_ticks = 0; + m_fifo.Clear(); + m_blit_buffer.clear(); + m_blit_remaining_words = 0; + m_command_ticks = 0; UpdateDMARequest(); } break; @@ -756,9 +788,12 @@ void GPU::WriteGP1(u32 value) case 0x04: // DMA Direction { - m_GPUSTAT.dma_direction = static_cast(param); Log_DebugPrintf("DMA direction <- 0x%02X", static_cast(m_GPUSTAT.dma_direction.GetValue())); - UpdateDMARequest(); + if (m_GPUSTAT.dma_direction != static_cast(param)) + { + m_GPUSTAT.dma_direction = static_cast(param); + UpdateDMARequest(); + } } break; @@ -827,8 +862,10 @@ void GPU::WriteGP1(u32 value) if (m_GPUSTAT.bits != new_GPUSTAT.bits) { + // Have to be careful when setting this because Synchronize() can modify GPUSTAT. + static constexpr u32 SET_MASK = UINT32_C(0b00000000011111110100000000000000); Synchronize(); - m_GPUSTAT.bits = new_GPUSTAT.bits; + m_GPUSTAT.bits = (m_GPUSTAT.bits & ~SET_MASK) | (new_GPUSTAT.bits & SET_MASK); UpdateCRTCConfig(); } } @@ -1023,7 +1060,7 @@ void GPU::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 he } } -void GPU::DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32* command_ptr) {} +void GPU::DispatchRenderCommand() {} void GPU::FlushRender() {} diff --git a/src/core/gpu.h b/src/core/gpu.h index ce85a3eed..86a3ece82 100644 --- a/src/core/gpu.h +++ b/src/core/gpu.h @@ -1,5 +1,6 @@ #pragma once #include "common/bitfield.h" +#include "common/fifo_queue.h" #include "common/rectangle.h" #include "timers.h" #include "types.h" @@ -23,13 +24,12 @@ class Timers; class GPU { public: - enum class State : u8 + enum class BlitterState : u8 { Idle, - WaitingForParameters, - ExecutingCommand, ReadingVRAM, - WritingVRAM + WritingVRAM, + DrawingPolyLine }; enum class DMADirection : u32 @@ -88,13 +88,14 @@ public: VRAM_WIDTH = 1024, VRAM_HEIGHT = 512, VRAM_SIZE = VRAM_WIDTH * VRAM_HEIGHT * sizeof(u16), + MAX_FIFO_SIZE = 4096, TEXTURE_PAGE_WIDTH = 256, TEXTURE_PAGE_HEIGHT = 256, MAX_PRIMITIVE_WIDTH = 1024, MAX_PRIMITIVE_HEIGHT = 512, DOT_TIMER_INDEX = 0, HBLANK_TIMER_INDEX = 1, - MAX_RESOLUTION_SCALE = 16, + MAX_RESOLUTION_SCALE = 16 }; enum : u16 @@ -143,6 +144,9 @@ public: /// Returns true if enough ticks have passed for the raster to be on the next line. bool IsRasterScanlinePending() const; + /// Returns true if a raster scanline or command execution is pending. + bool IsRasterScanlineOrCommandPending() const; + // Synchronizes the CRTC, updating the hblank timer. void Synchronize(); @@ -347,10 +351,20 @@ protected: void SetTextureWindow(u32 value); u32 ReadGPUREAD(); - void WriteGP0(u32 value); + void FinishVRAMWrite(); + + /// Returns the number of vertices in the buffered poly-line. + ALWAYS_INLINE u32 GetPolyLineVertexCount() const + { + return (static_cast(m_blit_buffer.size()) + BoolToUInt32(m_render_command.shading_enable)) >> + BoolToUInt8(m_render_command.shading_enable); + } + + void AddCommandTicks(TickCount ticks); + void WriteGP1(u32 value); - void ExecuteCommands(); void EndCommand(); + void ExecuteCommands(); void HandleGetGPUInfoCommand(u32 value); // Rendering in the backend @@ -358,11 +372,30 @@ protected: virtual void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color); virtual void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data); virtual void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height); - virtual void DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32* command_ptr); + virtual void DispatchRenderCommand(); virtual void FlushRender(); virtual void UpdateDisplay(); virtual void DrawRendererStats(bool is_idle_frame); + // These are **very** approximate. + ALWAYS_INLINE void AddDrawTriangleTicks(u32 width, u32 height, bool textured, bool shaded) + { +#if 0 + const u32 draw_ticks = static_cast((std::abs(x1 * (y2 - y3) + x2 * (y3 - y1) + x3 * (y1 - y2)) + 1u) / 2u); +#else + const u32 draw_ticks = (width * height) / 2; +#endif + AddCommandTicks(draw_ticks << BoolToUInt8(textured | shaded)); + } + ALWAYS_INLINE void AddDrawRectangleTicks(u32 width, u32 height, bool textured) + { + AddCommandTicks((width * height) << BoolToUInt8(textured)); + } + ALWAYS_INLINE void AddDrawLineTicks(u32 width, u32 height, bool shaded) + { + AddCommandTicks(std::max(width, height)); + } + HostDisplay* m_host_display = nullptr; System* m_system = nullptr; DMA* m_dma = nullptr; @@ -397,7 +430,7 @@ protected: BitField display_disable; BitField interrupt_request; BitField dma_data_request; - BitField ready_to_recieve_cmd; + BitField gpu_idle; BitField ready_to_send_vram; BitField ready_to_recieve_dma; BitField dma_direction; @@ -595,13 +628,17 @@ protected: bool in_vblank; } m_crtc_state = {}; - State m_state = State::Idle; - TickCount m_blitter_ticks = 0; + BlitterState m_blitter_state = BlitterState::Idle; + TickCount m_command_ticks = 0; u32 m_command_total_words = 0; /// GPUREAD value for non-VRAM-reads. u32 m_GPUREAD_latch = 0; + /// True if currently executing/syncing. + bool m_syncing = false; + bool m_fifo_pushed = false; + struct VRAMTransfer { u16 x; @@ -612,7 +649,13 @@ protected: u16 row; } m_vram_transfer = {}; - std::vector m_GP0_buffer; + HeapFIFOQueue m_fifo; + std::vector m_blit_buffer; + u32 m_blit_remaining_words; + RenderCommand m_render_command{}; + + TickCount m_max_run_ahead = 128; + u32 m_fifo_size = 128; struct Stats { @@ -627,26 +670,29 @@ protected: Stats m_last_stats = {}; private: - using GP0CommandHandler = bool (GPU::*)(const u32*&, u32); + using GP0CommandHandler = bool (GPU::*)(); using GP0CommandHandlerTable = std::array; static GP0CommandHandlerTable GenerateGP0CommandHandlerTable(); // Rendering commands, returns false if not enough data is provided - bool HandleUnknownGP0Command(const u32*& command_ptr, u32 command_size); - bool HandleNOPCommand(const u32*& command_ptr, u32 command_size); - bool HandleClearCacheCommand(const u32*& command_ptr, u32 command_size); - bool HandleInterruptRequestCommand(const u32*& command_ptr, u32 command_size); - bool HandleSetDrawModeCommand(const u32*& command_ptr, u32 command_size); - bool HandleSetTextureWindowCommand(const u32*& command_ptr, u32 command_size); - bool HandleSetDrawingAreaTopLeftCommand(const u32*& command_ptr, u32 command_size); - bool HandleSetDrawingAreaBottomRightCommand(const u32*& command_ptr, u32 command_size); - bool HandleSetDrawingOffsetCommand(const u32*& command_ptr, u32 command_size); - bool HandleSetMaskBitCommand(const u32*& command_ptr, u32 command_size); - bool HandleRenderCommand(const u32*& command_ptr, u32 command_size); - bool HandleFillRectangleCommand(const u32*& command_ptr, u32 command_size); - bool HandleCopyRectangleCPUToVRAMCommand(const u32*& command_ptr, u32 command_size); - bool HandleCopyRectangleVRAMToCPUCommand(const u32*& command_ptr, u32 command_size); - bool HandleCopyRectangleVRAMToVRAMCommand(const u32*& command_ptr, u32 command_size); + bool HandleUnknownGP0Command(); + bool HandleNOPCommand(); + bool HandleClearCacheCommand(); + bool HandleInterruptRequestCommand(); + bool HandleSetDrawModeCommand(); + bool HandleSetTextureWindowCommand(); + bool HandleSetDrawingAreaTopLeftCommand(); + bool HandleSetDrawingAreaBottomRightCommand(); + bool HandleSetDrawingOffsetCommand(); + bool HandleSetMaskBitCommand(); + bool HandleRenderPolygonCommand(); + bool HandleRenderRectangleCommand(); + bool HandleRenderLineCommand(); + bool HandleRenderPolyLineCommand(); + bool HandleFillRectangleCommand(); + bool HandleCopyRectangleCPUToVRAMCommand(); + bool HandleCopyRectangleVRAMToCPUCommand(); + bool HandleCopyRectangleVRAMToVRAMCommand(); static const GP0CommandHandlerTable s_GP0_command_handler_table; }; diff --git a/src/core/gpu_commands.cpp b/src/core/gpu_commands.cpp index 5fa81d504..5a879ab3c 100644 --- a/src/core/gpu_commands.cpp +++ b/src/core/gpu_commands.cpp @@ -7,10 +7,9 @@ Log_SetChannel(GPU); #define CHECK_COMMAND_SIZE(num_words) \ - if (command_size < num_words) \ + if (m_fifo.GetSize() < num_words) \ { \ m_command_total_words = num_words; \ - m_state = State::WaitingForParameters; \ return false; \ } @@ -24,33 +23,95 @@ static constexpr u32 ReplaceZero(u32 value, u32 value_for_zero) void GPU::ExecuteCommands() { - Assert(m_GP0_buffer.size() < 1048576); + m_syncing = true; - const u32* command_ptr = m_GP0_buffer.data(); - u32 command_size = static_cast(m_GP0_buffer.size()); - while (m_state != State::ReadingVRAM && command_size > 0 && command_size >= m_command_total_words) + for (;;) { - const u32 command = command_ptr[0] >> 24; - const u32* old_command_ptr = command_ptr; - if (!(this->*s_GP0_command_handler_table[command])(command_ptr, command_size)) - break; + if (m_command_ticks <= m_max_run_ahead && !m_fifo.IsEmpty()) + { + switch (m_blitter_state) + { + case BlitterState::Idle: + { + const u32 command = m_fifo.Peek(0) >> 24; + if ((this->*s_GP0_command_handler_table[command])()) + continue; + else + break; + } - const u32 words_used = static_cast(command_ptr - old_command_ptr); - DebugAssert(words_used <= command_size); - command_size -= words_used; + case BlitterState::WritingVRAM: + { + DebugAssert(m_blit_remaining_words > 0); + const u32 words_to_copy = std::min(m_blit_remaining_words, m_fifo.GetSize()); + const size_t old_size = m_blit_buffer.size(); + m_blit_buffer.resize(m_blit_buffer.size() + words_to_copy); + m_fifo.PopRange(&m_blit_buffer[old_size], words_to_copy); + m_blit_remaining_words -= words_to_copy; + AddCommandTicks(words_to_copy); + + Log_DebugPrintf("VRAM write burst of %u words, %u words remaining", words_to_copy, m_blit_remaining_words); + if (m_blit_remaining_words == 0) + FinishVRAMWrite(); + + continue; + } + + case BlitterState::ReadingVRAM: + { + Panic("shouldn't be here"); + } + break; + + case BlitterState::DrawingPolyLine: + { + const u32 words_per_vertex = m_render_command.shading_enable ? 2 : 1; + u32 terminator_index = + m_render_command.shading_enable ? ((static_cast(m_blit_buffer.size()) & 1u) ^ 1u) : 0u; + for (; terminator_index < m_fifo.GetSize(); terminator_index += words_per_vertex) + { + // polyline must have at least two vertices, and the terminator is (word & 0xf000f000) == 0x50005000. + // terminator is on the first word for the vertex + if ((m_fifo.Peek(terminator_index) & UINT32_C(0xF000F000)) == UINT32_C(0x50005000)) + break; + } + + const bool found_terminator = (terminator_index < m_fifo.GetSize()); + const u32 words_to_copy = std::min(terminator_index, m_fifo.GetSize()); + if (words_to_copy > 0) + { + const size_t old_size = m_blit_buffer.size(); + m_blit_buffer.resize(m_blit_buffer.size() + words_to_copy); + m_fifo.PopRange(&m_blit_buffer[old_size], words_to_copy); + } + + Log_DebugPrintf("Added %u words to polyline", words_to_copy); + if (found_terminator) + { + // drop terminator + m_fifo.RemoveOne(); + Log_DebugPrintf("Drawing poly-line with %u vertices", GetPolyLineVertexCount()); + DispatchRenderCommand(); + m_blit_buffer.clear(); + EndCommand(); + } + } + break; + } + } + + m_fifo_pushed = false; + UpdateDMARequest(); + if (!m_fifo_pushed) + break; } - if (command_size == 0) - m_GP0_buffer.clear(); - else if (command_ptr > m_GP0_buffer.data()) - m_GP0_buffer.erase(m_GP0_buffer.begin(), m_GP0_buffer.begin() + (command_ptr - m_GP0_buffer.data())); - - UpdateDMARequest(); + m_syncing = false; } void GPU::EndCommand() { - m_state = State::Idle; + m_blitter_state = BlitterState::Idle; m_command_total_words = 0; } @@ -67,7 +128,24 @@ GPU::GP0CommandHandlerTable GPU::GenerateGP0CommandHandlerTable() table[i] = &GPU::HandleNOPCommand; table[0x1F] = &GPU::HandleInterruptRequestCommand; for (u32 i = 0x20; i <= 0x7F; i++) - table[i] = &GPU::HandleRenderCommand; + { + const RenderCommand rc{i << 24}; + switch (rc.primitive) + { + case Primitive::Polygon: + table[i] = &GPU::HandleRenderPolygonCommand; + break; + case Primitive::Line: + table[i] = rc.polyline ? &GPU::HandleRenderPolyLineCommand : &GPU::HandleRenderLineCommand; + break; + case Primitive::Rectangle: + table[i] = &GPU::HandleRenderRectangleCommand; + break; + default: + table[i] = &GPU::HandleUnknownGP0Command; + break; + } + } table[0xE0] = &GPU::HandleNOPCommand; table[0xE1] = &GPU::HandleSetDrawModeCommand; table[0xE2] = &GPU::HandleSetTextureWindowCommand; @@ -87,30 +165,31 @@ GPU::GP0CommandHandlerTable GPU::GenerateGP0CommandHandlerTable() return table; } -bool GPU::HandleUnknownGP0Command(const u32*& command_ptr, u32 command_size) +bool GPU::HandleUnknownGP0Command() { - const u32 command = *(command_ptr++) >> 24; + const u32 command = m_fifo.Pop() >> 24; Log_ErrorPrintf("Unimplemented GP0 command 0x%02X", command); EndCommand(); return true; } -bool GPU::HandleNOPCommand(const u32*& command_ptr, u32 command_size) +bool GPU::HandleNOPCommand() { - command_ptr++; + m_fifo.RemoveOne(); EndCommand(); return true; } -bool GPU::HandleClearCacheCommand(const u32*& command_ptr, u32 command_size) +bool GPU::HandleClearCacheCommand() { Log_DebugPrintf("GP0 clear cache"); - command_ptr++; + m_fifo.RemoveOne(); + AddCommandTicks(1); EndCommand(); return true; } -bool GPU::HandleInterruptRequestCommand(const u32*& command_ptr, u32 command_size) +bool GPU::HandleInterruptRequestCommand() { Log_WarningPrintf("GP0 interrupt request"); if (!m_GPUSTAT.interrupt_request) @@ -119,35 +198,38 @@ bool GPU::HandleInterruptRequestCommand(const u32*& command_ptr, u32 command_siz m_interrupt_controller->InterruptRequest(InterruptController::IRQ::GPU); } - command_ptr++; + m_fifo.RemoveOne(); + AddCommandTicks(1); EndCommand(); return true; } -bool GPU::HandleSetDrawModeCommand(const u32*& command_ptr, u32 command_size) +bool GPU::HandleSetDrawModeCommand() { - const u32 param = *(command_ptr++) & 0x00FFFFFF; + const u32 param = m_fifo.Pop() & 0x00FFFFFFu; Log_DebugPrintf("Set draw mode %08X", param); SetDrawMode(Truncate16(param)); + AddCommandTicks(1); EndCommand(); return true; } -bool GPU::HandleSetTextureWindowCommand(const u32*& command_ptr, u32 command_size) +bool GPU::HandleSetTextureWindowCommand() { - const u32 param = *(command_ptr++) & 0x00FFFFFF; + const u32 param = m_fifo.Pop() & 0x00FFFFFFu; SetTextureWindow(param); Log_DebugPrintf("Set texture window %02X %02X %02X %02X", m_draw_mode.texture_window_mask_x, m_draw_mode.texture_window_mask_y, m_draw_mode.texture_window_offset_x, m_draw_mode.texture_window_offset_y); + AddCommandTicks(1); EndCommand(); return true; } -bool GPU::HandleSetDrawingAreaTopLeftCommand(const u32*& command_ptr, u32 command_size) +bool GPU::HandleSetDrawingAreaTopLeftCommand() { - const u32 param = *(command_ptr++) & 0x00FFFFFF; + const u32 param = m_fifo.Pop() & 0x00FFFFFFu; const u32 left = param & 0x3FF; const u32 top = (param >> 10) & 0x1FF; Log_DebugPrintf("Set drawing area top-left: (%u, %u)", left, top); @@ -160,16 +242,17 @@ bool GPU::HandleSetDrawingAreaTopLeftCommand(const u32*& command_ptr, u32 comman m_drawing_area_changed = true; } + AddCommandTicks(1); EndCommand(); return true; } -bool GPU::HandleSetDrawingAreaBottomRightCommand(const u32*& command_ptr, u32 command_size) +bool GPU::HandleSetDrawingAreaBottomRightCommand() { - const u32 param = *(command_ptr++) & 0x00FFFFFF; + const u32 param = m_fifo.Pop() & 0x00FFFFFFu; - const u32 right = param & 0x3FF; - const u32 bottom = (param >> 10) & 0x1FF; + const u32 right = param & 0x3FFu; + const u32 bottom = (param >> 10) & 0x1FFu; Log_DebugPrintf("Set drawing area bottom-right: (%u, %u)", m_drawing_area.right, m_drawing_area.bottom); if (m_drawing_area.right != right || m_drawing_area.bottom != bottom) { @@ -180,15 +263,16 @@ bool GPU::HandleSetDrawingAreaBottomRightCommand(const u32*& command_ptr, u32 co m_drawing_area_changed = true; } + AddCommandTicks(1); EndCommand(); return true; } -bool GPU::HandleSetDrawingOffsetCommand(const u32*& command_ptr, u32 command_size) +bool GPU::HandleSetDrawingOffsetCommand() { - const u32 param = *(command_ptr++) & 0x00FFFFFF; - const s32 x = SignExtendN<11, s32>(param & 0x7FF); - const s32 y = SignExtendN<11, s32>((param >> 11) & 0x7FF); + const u32 param = m_fifo.Pop() & 0x00FFFFFFu; + const s32 x = SignExtendN<11, s32>(param & 0x7FFu); + const s32 y = SignExtendN<11, s32>((param >> 11) & 0x7FFu); Log_DebugPrintf("Set drawing offset (%d, %d)", m_drawing_offset.x, m_drawing_offset.y); if (m_drawing_offset.x != x || m_drawing_offset.y != y) { @@ -198,13 +282,14 @@ bool GPU::HandleSetDrawingOffsetCommand(const u32*& command_ptr, u32 command_siz m_drawing_offset.y = y; } + AddCommandTicks(1); EndCommand(); return true; } -bool GPU::HandleSetMaskBitCommand(const u32*& command_ptr, u32 command_size) +bool GPU::HandleSetMaskBitCommand() { - const u32 param = *(command_ptr++) & 0x00FFFFFF; + const u32 param = m_fifo.Pop() & 0x00FFFFFFu; constexpr u32 gpustat_mask = (1 << 11) | (1 << 12); const u32 gpustat_bits = (param & 0x03) << 11; @@ -216,172 +301,198 @@ bool GPU::HandleSetMaskBitCommand(const u32*& command_ptr, u32 command_size) Log_DebugPrintf("Set mask bit %u %u", BoolToUInt32(m_GPUSTAT.set_mask_while_drawing), BoolToUInt32(m_GPUSTAT.check_mask_before_draw)); + AddCommandTicks(1); EndCommand(); return true; } -bool GPU::HandleRenderCommand(const u32*& command_ptr, u32 command_size) +bool GPU::HandleRenderPolygonCommand() { - const RenderCommand rc{command_ptr[0]}; - u8 words_per_vertex; - u32 num_vertices; - u32 total_words; - switch (rc.primitive) - { - case Primitive::Polygon: - { - // shaded vertices use the colour from the first word for the first vertex - words_per_vertex = 1 + BoolToUInt8(rc.texture_enable) + BoolToUInt8(rc.shading_enable); - num_vertices = rc.quad_polygon ? 4 : 3; - total_words = words_per_vertex * num_vertices + BoolToUInt8(!rc.shading_enable); - CHECK_COMMAND_SIZE(total_words); - - // set draw state up - if (rc.texture_enable) - { - const u16 texpage_attribute = Truncate16((rc.shading_enable ? command_ptr[5] : command_ptr[4]) >> 16); - SetDrawMode((texpage_attribute & DrawMode::Reg::POLYGON_TEXPAGE_MASK) | - (m_draw_mode.mode_reg.bits & ~DrawMode::Reg::POLYGON_TEXPAGE_MASK)); - SetTexturePalette(Truncate16(command_ptr[2] >> 16)); - } - } - break; - - case Primitive::Line: - { - words_per_vertex = 1 + BoolToUInt8(rc.shading_enable); - if (rc.polyline) - { - // polyline must have at least two vertices, and the terminator is (word & 0xf000f000) == 0x50005000. terminator - // is on the first word for the vertex - num_vertices = 2; - bool found_terminator = false; - for (u32 pos = rc.shading_enable ? 4 : 3; pos < command_size; pos += words_per_vertex) - { - if ((command_ptr[pos] & UINT32_C(0xF000F000)) == UINT32_C(0x50005000)) - { - found_terminator = true; - break; - } - - num_vertices++; - } - if (!found_terminator) - return false; - - total_words = words_per_vertex * num_vertices + BoolToUInt32(!rc.shading_enable) + 1; - } - else - { - num_vertices = 2; - total_words = words_per_vertex * num_vertices + BoolToUInt32(!rc.shading_enable); - } - } - break; - - case Primitive::Rectangle: - { - words_per_vertex = - 2 + BoolToUInt8(rc.texture_enable) + BoolToUInt8(rc.rectangle_size == DrawRectangleSize::Variable); - num_vertices = 1; - total_words = words_per_vertex; - - if (rc.texture_enable) - SetTexturePalette(Truncate16(command_ptr[2] >> 16)); - } - break; - - default: - UnreachableCode(); - return true; - } + const RenderCommand rc{m_fifo.Peek(0)}; + // shaded vertices use the colour from the first word for the first vertex + const u32 words_per_vertex = 1 + BoolToUInt32(rc.texture_enable) + BoolToUInt32(rc.shading_enable); + const u32 num_vertices = rc.quad_polygon ? 4 : 3; + const u32 total_words = words_per_vertex * num_vertices + BoolToUInt32(!rc.shading_enable); CHECK_COMMAND_SIZE(total_words); - static constexpr std::array primitive_names = {{"", "polygon", "line", "rectangle"}}; - - Log_TracePrintf("Render %s %s %s %s %s (%u verts, %u words per vert)", rc.quad_polygon ? "four-point" : "three-point", - rc.transparency_enable ? "semi-transparent" : "opaque", - rc.texture_enable ? "textured" : "non-textured", rc.shading_enable ? "shaded" : "monochrome", - primitive_names[static_cast(rc.primitive.GetValue())], ZeroExtend32(num_vertices), - ZeroExtend32(words_per_vertex)); - if (IsInterlacedRenderingEnabled() && IsRasterScanlinePending()) Synchronize(); - DispatchRenderCommand(rc, num_vertices, command_ptr); - command_ptr += total_words; + Log_TracePrintf( + "Render %s %s %s %s polygon (%u verts, %u words per vert)", rc.quad_polygon ? "four-point" : "three-point", + rc.transparency_enable ? "semi-transparent" : "opaque", rc.texture_enable ? "textured" : "non-textured", + rc.shading_enable ? "shaded" : "monochrome", ZeroExtend32(num_vertices), ZeroExtend32(words_per_vertex)); + + // set draw state up + if (rc.texture_enable) + { + const u16 texpage_attribute = Truncate16((rc.shading_enable ? m_fifo.Peek(5) : m_fifo.Peek(4)) >> 16); + SetDrawMode((texpage_attribute & DrawMode::Reg::POLYGON_TEXPAGE_MASK) | + (m_draw_mode.mode_reg.bits & ~DrawMode::Reg::POLYGON_TEXPAGE_MASK)); + SetTexturePalette(Truncate16(m_fifo.Peek(2) >> 16)); + } + m_stats.num_vertices += num_vertices; m_stats.num_polygons++; + m_render_command.bits = rc.bits; + m_fifo.RemoveOne(); + + DispatchRenderCommand(); EndCommand(); return true; } -bool GPU::HandleFillRectangleCommand(const u32*& command_ptr, u32 command_size) +bool GPU::HandleRenderRectangleCommand() +{ + const RenderCommand rc{m_fifo.Peek(0)}; + const u32 total_words = + 2 + BoolToUInt32(rc.texture_enable) + BoolToUInt32(rc.rectangle_size == DrawRectangleSize::Variable); + + CHECK_COMMAND_SIZE(total_words); + + if (IsInterlacedRenderingEnabled() && IsRasterScanlinePending()) + Synchronize(); + + if (rc.texture_enable) + SetTexturePalette(Truncate16(m_fifo.Peek(2) >> 16)); + + Log_TracePrintf("Render %s %s %s rectangle (%u words)", rc.transparency_enable ? "semi-transparent" : "opaque", + rc.texture_enable ? "textured" : "non-textured", rc.shading_enable ? "shaded" : "monochrome", + total_words); + + m_stats.num_vertices++; + m_stats.num_polygons++; + m_render_command.bits = rc.bits; + m_fifo.RemoveOne(); + + DispatchRenderCommand(); + EndCommand(); + return true; +} + +bool GPU::HandleRenderLineCommand() +{ + const RenderCommand rc{m_fifo.Peek(0)}; + const u32 total_words = rc.shading_enable ? 4 : 3; + CHECK_COMMAND_SIZE(total_words); + + if (IsInterlacedRenderingEnabled() && IsRasterScanlinePending()) + Synchronize(); + + Log_TracePrintf("Render %s %s line (%u total words)", rc.transparency_enable ? "semi-transparent" : "opaque", + rc.shading_enable ? "shaded" : "monochrome", total_words); + + m_stats.num_vertices += 2; + m_stats.num_polygons++; + m_render_command.bits = rc.bits; + m_fifo.RemoveOne(); + + DispatchRenderCommand(); + EndCommand(); + return true; +} + +bool GPU::HandleRenderPolyLineCommand() +{ + // always read the first two vertices, we test for the terminator after that + const RenderCommand rc{m_fifo.Peek(0)}; + const u32 min_words = rc.shading_enable ? 3 : 4; + CHECK_COMMAND_SIZE(min_words); + + if (IsInterlacedRenderingEnabled() && IsRasterScanlinePending()) + Synchronize(); + + Log_TracePrintf("Render %s %s poly-line", rc.transparency_enable ? "semi-transparent" : "opaque", + rc.shading_enable ? "shaded" : "monochrome"); + + m_render_command.bits = rc.bits; + m_fifo.RemoveOne(); + + const u32 words_to_pop = min_words - 1; + m_blit_buffer.resize(words_to_pop); + m_fifo.PopRange(m_blit_buffer.data(), words_to_pop); + + // polyline goes via a different path through the blit buffer + m_blitter_state = BlitterState::DrawingPolyLine; + m_command_total_words = 0; + return true; +} + +bool GPU::HandleFillRectangleCommand() { CHECK_COMMAND_SIZE(3); FlushRender(); - const u32 color = command_ptr[0] & 0x00FFFFFF; - const u32 dst_x = command_ptr[1] & 0x3F0; - const u32 dst_y = (command_ptr[1] >> 16) & 0x3FF; - const u32 width = ((command_ptr[2] & 0x3FF) + 0xF) & ~0xF; - const u32 height = (command_ptr[2] >> 16) & 0x1FF; - command_ptr += 3; + const u32 color = m_fifo.Pop() & 0x00FFFFFF; + const u32 dst_x = m_fifo.Peek() & 0x3F0; + const u32 dst_y = (m_fifo.Pop() >> 16) & 0x3FF; + const u32 width = ((m_fifo.Peek() & 0x3FF) + 0xF) & ~0xF; + const u32 height = (m_fifo.Pop() >> 16) & 0x1FF; Log_DebugPrintf("Fill VRAM rectangle offset=(%u,%u), size=(%u,%u)", dst_x, dst_y, width, height); FillVRAM(dst_x, dst_y, width, height, color); m_stats.num_vram_fills++; + AddCommandTicks(46 + ((width / 8) + 9) * height); EndCommand(); return true; } -bool GPU::HandleCopyRectangleCPUToVRAMCommand(const u32*& command_ptr, u32 command_size) +bool GPU::HandleCopyRectangleCPUToVRAMCommand() { CHECK_COMMAND_SIZE(3); + m_fifo.RemoveOne(); - const u32 copy_width = ReplaceZero(command_ptr[2] & 0x3FF, 0x400); - const u32 copy_height = ReplaceZero((command_ptr[2] >> 16) & 0x1FF, 0x200); + const u32 dst_x = m_fifo.Peek() & 0x3FF; + const u32 dst_y = (m_fifo.Pop() >> 16) & 0x3FF; + const u32 copy_width = ReplaceZero(m_fifo.Peek() & 0x3FF, 0x400); + const u32 copy_height = ReplaceZero((m_fifo.Pop() >> 16) & 0x1FF, 0x200); const u32 num_pixels = copy_width * copy_height; - const u32 num_words = 3 + ((num_pixels + 1) / 2); - if (command_size < num_words) - { - m_command_total_words = num_words; - m_state = State::WritingVRAM; - return false; - } - - const u32 dst_x = command_ptr[1] & 0x3FF; - const u32 dst_y = (command_ptr[1] >> 16) & 0x3FF; + const u32 num_words = ((num_pixels + 1) / 2); Log_DebugPrintf("Copy rectangle from CPU to VRAM offset=(%u,%u), size=(%u,%u)", dst_x, dst_y, copy_width, copy_height); - if (m_system->GetSettings().debugging.dump_cpu_to_vram_copies) - { - DumpVRAMToFile(StringUtil::StdStringFromFormat("cpu_to_vram_copy_%u.png", s_cpu_to_vram_dump_id++).c_str(), - copy_width, copy_height, sizeof(u16) * copy_width, &command_ptr[3], true); - } - - FlushRender(); - UpdateVRAM(dst_x, dst_y, copy_width, copy_height, &command_ptr[3]); - command_ptr += num_words; - m_stats.num_vram_writes++; EndCommand(); + + m_blitter_state = BlitterState::WritingVRAM; + m_blit_buffer.reserve(num_words); + m_blit_remaining_words = num_words; + m_vram_transfer.x = Truncate16(dst_x); + m_vram_transfer.y = Truncate16(dst_y); + m_vram_transfer.width = Truncate16(copy_width); + m_vram_transfer.height = Truncate16(copy_height); return true; } -bool GPU::HandleCopyRectangleVRAMToCPUCommand(const u32*& command_ptr, u32 command_size) +void GPU::FinishVRAMWrite() +{ + if (m_system->GetSettings().debugging.dump_cpu_to_vram_copies) + { + DumpVRAMToFile(StringUtil::StdStringFromFormat("cpu_to_vram_copy_%u.png", s_cpu_to_vram_dump_id++).c_str(), + m_vram_transfer.width, m_vram_transfer.height, sizeof(u16) * m_vram_transfer.width, + m_blit_buffer.data(), true); + } + + FlushRender(); + UpdateVRAM(m_vram_transfer.x, m_vram_transfer.y, m_vram_transfer.width, m_vram_transfer.height, m_blit_buffer.data()); + m_blit_buffer.clear(); + m_vram_transfer = {}; + m_blitter_state = BlitterState::Idle; + m_stats.num_vram_writes++; +} + +bool GPU::HandleCopyRectangleVRAMToCPUCommand() { CHECK_COMMAND_SIZE(3); + m_fifo.RemoveOne(); - m_vram_transfer.width = ((Truncate16(command_ptr[2]) - 1) & 0x3FF) + 1; - m_vram_transfer.height = ((Truncate16(command_ptr[2] >> 16) - 1) & 0x1FF) + 1; - m_vram_transfer.x = Truncate16(command_ptr[1] & 0x3FF); - m_vram_transfer.y = Truncate16((command_ptr[1] >> 16) & 0x3FF); - command_ptr += 3; + m_vram_transfer.x = Truncate16(m_fifo.Peek() & 0x3FF); + m_vram_transfer.y = Truncate16((m_fifo.Pop() >> 16) & 0x3FF); + m_vram_transfer.width = ((Truncate16(m_fifo.Peek()) - 1) & 0x3FF) + 1; + m_vram_transfer.height = ((Truncate16(m_fifo.Pop() >> 16) - 1) & 0x1FF) + 1; Log_DebugPrintf("Copy rectangle from VRAM to CPU offset=(%u,%u), size=(%u,%u)", m_vram_transfer.x, m_vram_transfer.y, m_vram_transfer.width, m_vram_transfer.height); @@ -402,22 +513,22 @@ bool GPU::HandleCopyRectangleVRAMToCPUCommand(const u32*& command_ptr, u32 comma // switch to pixel-by-pixel read state m_stats.num_vram_reads++; - m_state = State::ReadingVRAM; + m_blitter_state = BlitterState::ReadingVRAM; m_command_total_words = 0; return true; } -bool GPU::HandleCopyRectangleVRAMToVRAMCommand(const u32*& command_ptr, u32 command_size) +bool GPU::HandleCopyRectangleVRAMToVRAMCommand() { CHECK_COMMAND_SIZE(4); + m_fifo.RemoveOne(); - const u32 src_x = command_ptr[1] & 0x3FF; - const u32 src_y = (command_ptr[1] >> 16) & 0x3FF; - const u32 dst_x = command_ptr[2] & 0x3FF; - const u32 dst_y = (command_ptr[2] >> 16) & 0x3FF; - const u32 width = ReplaceZero(command_ptr[3] & 0x3FF, 0x400); - const u32 height = ReplaceZero((command_ptr[3] >> 16) & 0x1FF, 0x200); - command_ptr += 4; + const u32 src_x = m_fifo.Peek() & 0x3FF; + const u32 src_y = (m_fifo.Pop() >> 16) & 0x3FF; + const u32 dst_x = m_fifo.Peek() & 0x3FF; + const u32 dst_y = (m_fifo.Pop() >> 16) & 0x3FF; + const u32 width = ReplaceZero(m_fifo.Peek() & 0x3FF, 0x400); + const u32 height = ReplaceZero((m_fifo.Pop() >> 16) & 0x1FF, 0x200); Log_DebugPrintf("Copy rectangle from VRAM to VRAM src=(%u,%u), dst=(%u,%u), size=(%u,%u)", src_x, src_y, dst_x, dst_y, width, height); @@ -425,6 +536,7 @@ bool GPU::HandleCopyRectangleVRAMToVRAMCommand(const u32*& command_ptr, u32 comm FlushRender(); CopyVRAM(src_x, src_y, dst_x, dst_y, width, height); m_stats.num_vram_copies++; + AddCommandTicks(width * height * 2); EndCommand(); return true; } diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index 585ecd0a8..17e236763 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -172,8 +172,9 @@ void GPU_HW::HandleFlippedQuadTextureCoordinates(BatchVertex* vertices) } } -void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command_ptr) +void GPU_HW::LoadVertices() { + const RenderCommand rc{m_render_command.bits}; const u32 texpage = ZeroExtend32(m_draw_mode.mode_reg.bits) | (ZeroExtend32(m_draw_mode.palette_reg) << 16); // TODO: Move this to the GPU.. @@ -181,20 +182,19 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command { case Primitive::Polygon: { - DebugAssert(num_vertices == 3 || num_vertices == 4); EnsureVertexBufferSpace(rc.quad_polygon ? 6 : 3); const u32 first_color = rc.color_for_first_vertex; const bool shaded = rc.shading_enable; const bool textured = rc.texture_enable; - u32 buffer_pos = 1; + const u32 num_vertices = rc.quad_polygon ? 4 : 3; std::array vertices; for (u32 i = 0; i < num_vertices; i++) { - const u32 color = (shaded && i > 0) ? (command_ptr[buffer_pos++] & UINT32_C(0x00FFFFFF)) : first_color; - const VertexPosition vp{command_ptr[buffer_pos++]}; - const u16 packed_texcoord = textured ? Truncate16(command_ptr[buffer_pos++]) : 0; + const u32 color = (shaded && i > 0) ? (m_fifo.Pop() & UINT32_C(0x00FFFFFF)) : first_color; + const VertexPosition vp{m_fifo.Pop()}; + const u16 packed_texcoord = textured ? Truncate16(m_fifo.Pop()) : 0; vertices[i].Set(m_drawing_offset.x + vp.x, m_drawing_offset.y + vp.y, color, texpage, packed_texcoord); } @@ -226,6 +226,7 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command static_cast(std::clamp(max_y, m_drawing_area.top, m_drawing_area.bottom)) + 1u; m_vram_dirty_rect.Include(clip_left, clip_right, clip_top, clip_bottom); + AddDrawTriangleTicks(clip_right - clip_left, clip_bottom - clip_top, rc.texture_enable, rc.shading_enable); std::memcpy(m_batch_current_vertex_ptr, vertices.data(), sizeof(BatchVertex) * 3); m_batch_current_vertex_ptr += 3; @@ -255,6 +256,7 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command static_cast(std::clamp(max_y_123, m_drawing_area.top, m_drawing_area.bottom)) + 1u; m_vram_dirty_rect.Include(clip_left, clip_right, clip_top, clip_bottom); + AddDrawTriangleTicks(clip_right - clip_left, clip_bottom - clip_top, rc.texture_enable, rc.shading_enable); AddVertex(vertices[2]); AddVertex(vertices[1]); @@ -266,14 +268,12 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command case Primitive::Rectangle: { - u32 buffer_pos = 1; const u32 color = rc.color_for_first_vertex; - const VertexPosition vp{command_ptr[buffer_pos++]}; + const VertexPosition vp{m_fifo.Pop()}; const s32 pos_x = m_drawing_offset.x + vp.x; const s32 pos_y = m_drawing_offset.y + vp.y; - const auto [texcoord_x, texcoord_y] = - UnpackTexcoord(rc.texture_enable ? Truncate16(command_ptr[buffer_pos++]) : 0); + const auto [texcoord_x, texcoord_y] = UnpackTexcoord(rc.texture_enable ? Truncate16(m_fifo.Pop()) : 0); u16 orig_tex_left = ZeroExtend16(texcoord_x); u16 orig_tex_top = ZeroExtend16(texcoord_y); s32 rectangle_width; @@ -293,9 +293,12 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command rectangle_height = 16; break; default: - rectangle_width = static_cast(command_ptr[buffer_pos] & 0xFFFF); - rectangle_height = static_cast(command_ptr[buffer_pos] >> 16); - break; + { + const u32 width_and_height = m_fifo.Pop(); + rectangle_width = static_cast(width_and_height & 0xFFFF); + rectangle_height = static_cast(width_and_height >> 16); + } + break; } if (rectangle_width >= MAX_PRIMITIVE_WIDTH || rectangle_height >= MAX_PRIMITIVE_HEIGHT) @@ -350,22 +353,35 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command static_cast(std::clamp(pos_y + rectangle_height, m_drawing_area.top, m_drawing_area.bottom)) + 1u; m_vram_dirty_rect.Include(clip_left, clip_right, clip_top, clip_bottom); + AddDrawRectangleTicks(clip_right - clip_left, clip_bottom - clip_top, rc.texture_enable); } break; case Primitive::Line: { - EnsureVertexBufferSpace(num_vertices * 2); + const u32 num_vertices = rc.polyline ? GetPolyLineVertexCount() : 2; + EnsureVertexBufferSpace(num_vertices); const u32 first_color = rc.color_for_first_vertex; const bool shaded = rc.shading_enable; - u32 buffer_pos = 1; BatchVertex last_vertex; + u32 buffer_pos = 0; for (u32 i = 0; i < num_vertices; i++) { - const u32 color = (shaded && i > 0) ? (command_ptr[buffer_pos++] & UINT32_C(0x00FFFFFF)) : first_color; - const VertexPosition vp{command_ptr[buffer_pos++]}; + u32 color; + VertexPosition vp; + + if (rc.polyline) + { + color = (shaded && i > 0) ? (m_blit_buffer[buffer_pos++] & UINT32_C(0x00FFFFFF)) : first_color; + vp.bits = m_blit_buffer[buffer_pos++]; + } + else + { + color = (shaded && i > 0) ? (m_fifo.Pop() & UINT32_C(0x00FFFFFF)) : first_color; + vp.bits = m_fifo.Pop(); + } BatchVertex vertex; vertex.Set(m_drawing_offset.x + vp.x, m_drawing_offset.y + vp.y, color, 0, 0); @@ -394,6 +410,7 @@ void GPU_HW::LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command static_cast(std::clamp(max_y, m_drawing_area.top, m_drawing_area.bottom)) + 1u; m_vram_dirty_rect.Include(clip_left, clip_right, clip_top, clip_bottom); + AddDrawLineTicks(clip_right - clip_left, clip_bottom - clip_top, rc.shading_enable); } } @@ -485,8 +502,10 @@ void GPU_HW::CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 Common::Rectangle::FromExtents(dst_x, dst_y, width, height).Clamped(0, 0, VRAM_WIDTH, VRAM_HEIGHT)); } -void GPU_HW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32* command_ptr) +void GPU_HW::DispatchRenderCommand() { + const RenderCommand rc{m_render_command.bits}; + TextureMode texture_mode; if (rc.IsTexturingEnabled()) { @@ -574,7 +593,7 @@ void GPU_HW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32 m_batch_ubo_dirty = true; } - LoadVertices(rc, num_vertices, command_ptr); + LoadVertices(); } void GPU_HW::FlushRender() diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index 06100d957..6cdc2cea1 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -145,7 +145,7 @@ protected: void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override; void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) override; void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override; - void DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32* command_ptr) override; + void DispatchRenderCommand() override; void FlushRender() override; void DrawRendererStats(bool is_idle_frame) override; @@ -198,7 +198,7 @@ private: static BatchPrimitive GetPrimitiveForCommand(RenderCommand rc); - void LoadVertices(RenderCommand rc, u32 num_vertices, const u32* command_ptr); + void LoadVertices(); ALWAYS_INLINE void AddVertex(const BatchVertex& v) { diff --git a/src/core/gpu_sw.cpp b/src/core/gpu_sw.cpp index 14cfebb28..d118d6722 100644 --- a/src/core/gpu_sw.cpp +++ b/src/core/gpu_sw.cpp @@ -204,8 +204,9 @@ void GPU_SW::UpdateDisplay() } } -void GPU_SW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32* command_ptr) +void GPU_SW::DispatchRenderCommand() { + const RenderCommand rc{m_render_command.bits}; const bool dithering_enable = rc.IsDitheringEnabled() && m_GPUSTAT.dither_enable; switch (rc.primitive) @@ -216,24 +217,23 @@ void GPU_SW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32 const bool shaded = rc.shading_enable; const bool textured = rc.texture_enable; + const u32 num_vertices = rc.quad_polygon ? 4 : 3; std::array vertices; - - u32 buffer_pos = 1; for (u32 i = 0; i < num_vertices; i++) { SWVertex& vert = vertices[i]; - const u32 color_rgb = (shaded && i > 0) ? (command_ptr[buffer_pos++] & UINT32_C(0x00FFFFFF)) : first_color; + const u32 color_rgb = (shaded && i > 0) ? (m_fifo.Pop() & UINT32_C(0x00FFFFFF)) : first_color; vert.color_r = Truncate8(color_rgb); vert.color_g = Truncate8(color_rgb >> 8); vert.color_b = Truncate8(color_rgb >> 16); - const VertexPosition vp{command_ptr[buffer_pos++]}; + const VertexPosition vp{m_fifo.Pop()}; vert.x = vp.x; vert.y = vp.y; if (textured) { - std::tie(vert.texcoord_x, vert.texcoord_y) = UnpackTexcoord(Truncate16(command_ptr[buffer_pos++])); + std::tie(vert.texcoord_x, vert.texcoord_y) = UnpackTexcoord(Truncate16(m_fifo.Pop())); } else { @@ -253,10 +253,9 @@ void GPU_SW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32 case Primitive::Rectangle: { - u32 buffer_pos = 1; const auto [r, g, b] = UnpackColorRGB24(rc.color_for_first_vertex); - const VertexPosition vp{command_ptr[buffer_pos++]}; - const u32 texcoord_and_palette = rc.texture_enable ? command_ptr[buffer_pos++] : 0; + const VertexPosition vp{m_fifo.Pop()}; + const u32 texcoord_and_palette = rc.texture_enable ? m_fifo.Pop() : 0; const auto [texcoord_x, texcoord_y] = UnpackTexcoord(Truncate16(texcoord_and_palette)); s32 width; @@ -276,9 +275,12 @@ void GPU_SW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32 height = 16; break; default: - width = static_cast(command_ptr[buffer_pos] & UINT32_C(0xFFFF)); - height = static_cast(command_ptr[buffer_pos] >> 16); - break; + { + const u32 width_and_height = m_fifo.Pop(); + width = static_cast(width_and_height & UINT32_C(0xFFFF)); + height = static_cast(width_and_height >> 16); + } + break; } const DrawRectangleFunction DrawFunction = @@ -296,19 +298,28 @@ void GPU_SW::DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32 const DrawLineFunction DrawFunction = GetDrawLineFunction(shaded, rc.transparency_enable, dithering_enable); std::array vertices = {}; - u32 buffer_pos = 1; + u32 buffer_pos = 0; // first vertex SWVertex* p0 = &vertices[0]; SWVertex* p1 = &vertices[1]; - p0->SetPosition(VertexPosition{command_ptr[buffer_pos++]}); + p0->SetPosition(VertexPosition{rc.polyline ? m_blit_buffer[buffer_pos++] : m_fifo.Pop()}); p0->SetColorRGB24(first_color); // remaining vertices in line strip + const u32 num_vertices = rc.polyline ? GetPolyLineVertexCount() : 2; for (u32 i = 1; i < num_vertices; i++) { - p1->SetColorRGB24(shaded ? (command_ptr[buffer_pos++] & UINT32_C(0x00FFFFFF)) : first_color); - p1->SetPosition(VertexPosition{command_ptr[buffer_pos++]}); + if (rc.polyline) + { + p1->SetColorRGB24(shaded ? (m_blit_buffer[buffer_pos++] & UINT32_C(0x00FFFFFF)) : first_color); + p1->SetPosition(VertexPosition{m_blit_buffer[buffer_pos++]}); + } + else + { + p1->SetColorRGB24(shaded ? (m_fifo.Pop() & UINT32_C(0x00FFFFFF)) : first_color); + p1->SetPosition(VertexPosition{m_fifo.Pop()}); + } (this->*DrawFunction)(p0, p1); @@ -408,6 +419,7 @@ void GPU_SW::DrawTriangle(const SWVertex* v0, const SWVertex* v1, const SWVertex max_x = std::clamp(max_x, static_cast(m_drawing_area.left), static_cast(m_drawing_area.right)); min_y = std::clamp(min_y, static_cast(m_drawing_area.top), static_cast(m_drawing_area.bottom)); max_y = std::clamp(max_y, static_cast(m_drawing_area.top), static_cast(m_drawing_area.bottom)); + AddDrawTriangleTicks(max_x - min_x + 1, max_y - min_y + 1, texture_enable, shading_enable); // compute per-pixel increments const s32 a01 = py0 - py1, b01 = px1 - px0; @@ -501,6 +513,18 @@ void GPU_SW::DrawRectangle(s32 origin_x, s32 origin_y, u32 width, u32 height, u8 origin_x += m_drawing_offset.x; origin_y += m_drawing_offset.y; + { + const u32 clip_left = static_cast(std::clamp(origin_x, m_drawing_area.left, m_drawing_area.right)); + const u32 clip_right = + static_cast(std::clamp(origin_x + static_cast(width), m_drawing_area.left, m_drawing_area.right)) + + 1u; + const u32 clip_top = static_cast(std::clamp(origin_y, m_drawing_area.top, m_drawing_area.bottom)); + const u32 clip_bottom = static_cast(std::clamp(origin_y + static_cast(height), m_drawing_area.top, + m_drawing_area.bottom)) + + 1u; + AddDrawRectangleTicks(clip_right - clip_left, clip_bottom - clip_top, texture_enable); + } + for (u32 offset_y = 0; offset_y < height; offset_y++) { const s32 y = origin_y + static_cast(offset_y); @@ -690,6 +714,21 @@ void GPU_SW::DrawLine(const SWVertex* p0, const SWVertex* p1) const s32 dy = p1->y - p0->y; const s32 k = std::max(std::abs(dx), std::abs(dy)); + { + // TODO: Move to base class + const s32 min_x = std::min(p0->x, p1->x); + const s32 max_x = std::max(p0->x, p1->x); + const s32 min_y = std::min(p0->y, p1->y); + const s32 max_y = std::max(p0->y, p1->y); + + const u32 clip_left = static_cast(std::clamp(min_x, m_drawing_area.left, m_drawing_area.left)); + const u32 clip_right = static_cast(std::clamp(max_x, m_drawing_area.left, m_drawing_area.right)) + 1u; + const u32 clip_top = static_cast(std::clamp(min_y, m_drawing_area.top, m_drawing_area.bottom)); + const u32 clip_bottom = static_cast(std::clamp(max_y, m_drawing_area.top, m_drawing_area.bottom)) + 1u; + + AddDrawLineTicks(clip_right - clip_left, clip_bottom - clip_top, shading_enable); + } + FixedPointCoord step_x, step_y; FixedPointColor step_r, step_g, step_b; if (k > 0) diff --git a/src/core/gpu_sw.h b/src/core/gpu_sw.h index dae7bb340..13562a7e3 100644 --- a/src/core/gpu_sw.h +++ b/src/core/gpu_sw.h @@ -51,7 +51,7 @@ protected: // Rasterization ////////////////////////////////////////////////////////////////////////// - void DispatchRenderCommand(RenderCommand rc, u32 num_vertices, const u32* command_ptr) override; + void DispatchRenderCommand() override; static bool IsClockwiseWinding(const SWVertex* v0, const SWVertex* v1, const SWVertex* v2); diff --git a/src/core/host_interface.cpp b/src/core/host_interface.cpp index 1ea742a1f..0d5691f15 100644 --- a/src/core/host_interface.cpp +++ b/src/core/host_interface.cpp @@ -924,6 +924,8 @@ void HostInterface::UpdateSettings(const std::function& apply_callback) const AudioBackend old_audio_backend = m_settings.audio_backend; const GPURenderer old_gpu_renderer = m_settings.gpu_renderer; const u32 old_gpu_resolution_scale = m_settings.gpu_resolution_scale; + const u32 old_gpu_fifo_size = m_settings.gpu_fifo_size; + const u32 old_gpu_max_run_ahead = m_settings.gpu_max_run_ahead; const bool old_gpu_true_color = m_settings.gpu_true_color; const bool old_gpu_scaled_dithering = m_settings.gpu_scaled_dithering; const bool old_gpu_texture_filtering = m_settings.gpu_texture_filtering; @@ -978,8 +980,8 @@ void HostInterface::UpdateSettings(const std::function& apply_callback) m_system->SetCPUExecutionMode(m_settings.cpu_execution_mode); } - if (m_settings.gpu_resolution_scale != old_gpu_resolution_scale || - m_settings.gpu_true_color != old_gpu_true_color || + if (m_settings.gpu_resolution_scale != old_gpu_resolution_scale || m_settings.gpu_fifo_size != old_gpu_fifo_size || + m_settings.gpu_max_run_ahead != old_gpu_max_run_ahead || m_settings.gpu_true_color != old_gpu_true_color || m_settings.gpu_scaled_dithering != old_gpu_scaled_dithering || m_settings.gpu_texture_filtering != old_gpu_texture_filtering || m_settings.gpu_disable_interlacing != old_gpu_disable_interlacing || diff --git a/src/core/save_state_version.h b/src/core/save_state_version.h index 5f883c3c4..7dec24c36 100644 --- a/src/core/save_state_version.h +++ b/src/core/save_state_version.h @@ -2,4 +2,4 @@ #include "types.h" static constexpr u32 SAVE_STATE_MAGIC = 0x43435544; -static constexpr u32 SAVE_STATE_VERSION = 22; +static constexpr u32 SAVE_STATE_VERSION = 23; diff --git a/src/core/settings.cpp b/src/core/settings.cpp index 054275bdb..10bcffb1c 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp @@ -23,6 +23,8 @@ void Settings::Load(SettingsInterface& si) gpu_renderer = ParseRendererName(si.GetStringValue("GPU", "Renderer", GetRendererName(DEFAULT_GPU_RENDERER)).c_str()) .value_or(DEFAULT_GPU_RENDERER); gpu_resolution_scale = static_cast(si.GetIntValue("GPU", "ResolutionScale", 1)); + gpu_fifo_size = static_cast(si.GetIntValue("GPU", "FIFOSize", 128)); + gpu_max_run_ahead = static_cast(si.GetIntValue("GPU", "MaxRunAhead", 128)); gpu_use_debug_device = si.GetBoolValue("GPU", "UseDebugDevice", false); gpu_true_color = si.GetBoolValue("GPU", "TrueColor", true); gpu_scaled_dithering = si.GetBoolValue("GPU", "ScaledDithering", false); diff --git a/src/core/settings.h b/src/core/settings.h index e4306f5e4..86e48a29b 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -47,6 +47,8 @@ struct Settings GPURenderer gpu_renderer = GPURenderer::Software; u32 gpu_resolution_scale = 1; + u32 gpu_fifo_size = 128; + u32 gpu_max_run_ahead = 128; bool gpu_use_debug_device = false; bool gpu_true_color = true; bool gpu_scaled_dithering = false;