From 0090fee30e61d2085f33bd9238375f25393ca4f5 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sat, 3 Aug 2024 14:23:47 +1000 Subject: [PATCH] GPU partial scanout shenanigans, WIP --- src/core/gpu.cpp | 42 +++++++++++++++++++++++++++++++++++++----- src/core/gpu.h | 9 ++++++++- src/core/gpu_hw.cpp | 4 ++-- src/core/gpu_hw.h | 2 +- src/core/gpu_sw.cpp | 41 +++++++++++++++++++++++------------------ src/core/gpu_sw.h | 8 ++++---- src/core/timers.cpp | 30 ++++++++++++++++++++++-------- 7 files changed, 97 insertions(+), 39 deletions(-) diff --git a/src/core/gpu.cpp b/src/core/gpu.cpp index 6822c268e..8e8563969 100644 --- a/src/core/gpu.cpp +++ b/src/core/gpu.cpp @@ -196,7 +196,7 @@ void GPU::Reset(bool clear_vram) m_command_tick_event.Deactivate(); SoftReset(); - UpdateDisplay(); + UpdateDisplay(false, 0, 0); } void GPU::SoftReset() @@ -373,7 +373,7 @@ bool GPU::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_displ { UpdateCRTCConfig(); if (update_display) - UpdateDisplay(); + UpdateDisplay(false, 0, 0); UpdateCommandTickEvent(); } @@ -950,6 +950,9 @@ void GPU::CRTCTickEvent(TickCount ticks) Timers::AddTicks(HBLANK_TIMER_INDEX, static_cast(hblank_timer_ticks)); } + if (m_crtc_state.start_address_changed) + DoPartialScanout(); + UpdateCRTCTickEvent(); return; } @@ -995,6 +998,9 @@ void GPU::CRTCTickEvent(TickCount ticks) m_crtc_state.in_vblank = false; } + if (m_crtc_state.start_address_changed) + DoPartialScanout(); + const bool new_vblank = m_crtc_state.current_scanline < m_crtc_state.vertical_display_start || m_crtc_state.current_scanline >= m_crtc_state.vertical_display_end; if (m_crtc_state.in_vblank != new_vblank) @@ -1003,10 +1009,14 @@ void GPU::CRTCTickEvent(TickCount ticks) { DEBUG_LOG("Now in v-blank"); - // flush any pending draws and "scan out" the image // TODO: move present in here I guess - FlushRender(); - UpdateDisplay(); + if (m_crtc_state.last_scanout_line < m_crtc_state.display_vram_height) + { + UpdateDisplay(true, m_crtc_state.last_scanout_line, m_crtc_state.display_vram_height); + m_crtc_state.last_scanout_line = 0; + m_crtc_state.start_address_changed = false; + } + TimingEvents::SetFrameDone(); // switch fields early. this is needed so we draw to the correct one. @@ -1070,6 +1080,20 @@ void GPU::CRTCTickEvent(TickCount ticks) UpdateCRTCTickEvent(); } +void GPU::DoPartialScanout() +{ + const u32 vram_first_line = m_crtc_state.vertical_visible_start + m_crtc_state.display_origin_top; + const u32 vram_line = (m_crtc_state.current_scanline < vram_first_line) ? 0 : std::min(m_crtc_state.current_scanline - vram_first_line, m_crtc_state.display_vram_height); + if (vram_line != m_crtc_state.last_scanout_line && m_crtc_state.start_address_changed) + { + if (m_crtc_state.last_scanout_line < m_crtc_state.display_vram_height) + UpdateDisplay(true, m_crtc_state.last_scanout_line, vram_line); + + m_crtc_state.last_scanout_line = vram_line; + m_crtc_state.start_address_changed = false; + } +} + void GPU::CommandTickEvent(TickCount ticks) { m_pending_command_ticks -= SystemTicksToGPUTicks(ticks); @@ -1286,8 +1310,16 @@ void GPU::WriteGP1(u32 value) System::IncrementInternalFrameNumber(); if (m_crtc_state.regs.display_address_start != new_value) { + m_crtc_state.start_address_changed = true; SynchronizeCRTC(); m_crtc_state.regs.display_address_start = new_value; + + if (!m_crtc_state.in_vblank) + { + GL_INS_FMT("Display address start set to ({},{}) at scanline {}", m_crtc_state.regs.X.GetValue(), + m_crtc_state.regs.Y.GetValue(), m_crtc_state.current_scanline); + } + UpdateCRTCDisplayParameters(); OnBufferSwapped(); } diff --git a/src/core/gpu.h b/src/core/gpu.h index d1398d3cd..6b33c1c74 100644 --- a/src/core/gpu.h +++ b/src/core/gpu.h @@ -193,6 +193,7 @@ public: // Returns the number of visible lines. ALWAYS_INLINE u16 GetCRTCActiveStartLine() const { return m_crtc_state.vertical_display_start; } ALWAYS_INLINE u16 GetCRTCActiveEndLine() const { return m_crtc_state.vertical_display_end; } + ALWAYS_INLINE u32 GetCRTCCurrentScanline() const { return m_crtc_state.current_scanline; } // Returns the video clock frequency. TickCount GetCRTCFrequency() const; @@ -258,7 +259,9 @@ protected: void UpdateCRTCDisplayParameters(); // Update ticks for this execution slice +public: void UpdateCRTCTickEvent(); + protected: void UpdateCommandTickEvent(); // Updates dynamic bits in GPUSTAT (ready to send VRAM/ready to receive DMA) @@ -268,6 +271,7 @@ protected: // Ticks for hblank/vblank. void CRTCTickEvent(TickCount ticks); void CommandTickEvent(TickCount ticks); + void DoPartialScanout(); /// Returns 0 if the currently-displayed field is on odd lines (1,3,5,...) or 1 if even (2,4,6,...). ALWAYS_INLINE u32 GetInterlacedDisplayField() const { return ZeroExtend32(m_crtc_state.interlaced_field); } @@ -315,7 +319,7 @@ protected: virtual void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height); virtual void DispatchRenderCommand() = 0; virtual void UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) = 0; - virtual void UpdateDisplay() = 0; + virtual void UpdateDisplay(bool partial, u32 start_line, u32 end_line) = 0; virtual void DrawRendererStats(); virtual void OnBufferSwapped(); @@ -536,6 +540,9 @@ protected: TickCount fractional_dot_ticks; // only used when timer0 is enabled + u32 last_scanout_line; + bool start_address_changed; + bool in_hblank; bool in_vblank; diff --git a/src/core/gpu_hw.cpp b/src/core/gpu_hw.cpp index 7b19ba065..4aca24e36 100644 --- a/src/core/gpu_hw.cpp +++ b/src/core/gpu_hw.cpp @@ -503,7 +503,7 @@ void GPU_HW::UpdateSettings(const Settings& old_settings) UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, g_vram, false, false); if (m_write_mask_as_depth) UpdateDepthBufferFromMaskBit(); - UpdateDisplay(); + UpdateDisplay(false, 0, 0); } else if (m_vram_depth_texture && depth_buffer_changed) { @@ -3593,7 +3593,7 @@ void GPU_HW::FlushRender() } } -void GPU_HW::UpdateDisplay() +void GPU_HW::UpdateDisplay(bool partial, u32 start_line, u32 end_line) { FlushRender(); DeactivateROV(); diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index 7eccdd751..e20d49127 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -71,7 +71,7 @@ public: std::tuple GetEffectiveDisplayResolution(bool scaled = true) override; std::tuple GetFullDisplayResolution(bool scaled = true) override; - void UpdateDisplay() override; + void UpdateDisplay(bool partial, u32 start_line, u32 end_line) override; private: enum : u32 diff --git a/src/core/gpu_sw.cpp b/src/core/gpu_sw.cpp index e2f9110a8..284e619aa 100644 --- a/src/core/gpu_sw.cpp +++ b/src/core/gpu_sw.cpp @@ -93,7 +93,7 @@ GPUTexture* GPU_SW::GetDisplayTexture(u32 width, u32 height, GPUTexture::Format ClearDisplayTexture(); g_gpu_device->RecycleTexture(std::move(m_upload_texture)); m_upload_texture = - g_gpu_device->FetchTexture(width, height, 1, 1, 1, GPUTexture::Type::DynamicTexture, format, nullptr, 0); + g_gpu_device->FetchTexture(width, height, 1, 1, 1, GPUTexture::Type::Texture, format, nullptr, 0); if (!m_upload_texture) [[unlikely]] ERROR_LOG("Failed to create {}x{} {} texture", width, height, static_cast(format)); } @@ -202,7 +202,7 @@ ALWAYS_INLINE void CopyOutRow16(const u16* src_p } template -ALWAYS_INLINE_RELEASE bool GPU_SW::CopyOut15Bit(u32 src_x, u32 src_y, u32 width, u32 height, u32 line_skip) +ALWAYS_INLINE_RELEASE bool GPU_SW::CopyOut15Bit(u32 src_x, u32 src_y, u32 skip_y, u32 width, u32 height, u32 line_skip) { using OutputPixelType = std::conditional_tMap(reinterpret_cast(&dst_ptr), &dst_stride, 0, 0, width, height); + const bool mapped = (skip_y == 0 && texture->Map(reinterpret_cast(&dst_ptr), &dst_stride, 0, 0, width, height)); // Fast path when not wrapping around. if ((src_x + width) <= VRAM_WIDTH && (src_y + height) <= VRAM_HEIGHT) @@ -248,13 +251,14 @@ ALWAYS_INLINE_RELEASE bool GPU_SW::CopyOut15Bit(u32 src_x, u32 src_y, u32 width, if (mapped) texture->Unmap(); else - texture->Update(0, 0, width, height, m_upload_buffer.data(), dst_stride); + texture->Update(0, skip_y, width, height, m_upload_buffer.data(), dst_stride); return true; } template -ALWAYS_INLINE_RELEASE bool GPU_SW::CopyOut24Bit(u32 src_x, u32 src_y, u32 skip_x, u32 width, u32 height, u32 line_skip) +ALWAYS_INLINE_RELEASE bool GPU_SW::CopyOut24Bit(u32 src_x, u32 src_y, u32 skip_x, u32 skip_y, u32 width, u32 height, + u32 line_skip) { using OutputPixelType = std::conditional_t(src_x, src_y, width, height, line_skip); + return CopyOut15Bit(src_x, src_y, skip_y, width, height, line_skip); case GPUTexture::Format::RGB565: - return CopyOut15Bit(src_x, src_y, width, height, line_skip); + return CopyOut15Bit(src_x, src_y, skip_y, width, height, line_skip); case GPUTexture::Format::RGBA8: - return CopyOut15Bit(src_x, src_y, width, height, line_skip); + return CopyOut15Bit(src_x, src_y, skip_y, width, height, line_skip); case GPUTexture::Format::BGRA8: - return CopyOut15Bit(src_x, src_y, width, height, line_skip); + return CopyOut15Bit(src_x, src_y, skip_y, width, height, line_skip); default: UnreachableCode(); @@ -403,16 +407,16 @@ bool GPU_SW::CopyOut(u32 src_x, u32 src_y, u32 skip_x, u32 width, u32 height, u3 switch (m_24bit_display_format) { case GPUTexture::Format::RGBA5551: - return CopyOut24Bit(src_x, src_y, skip_x, width, height, line_skip); + return CopyOut24Bit(src_x, src_y, skip_x, skip_y, width, height, line_skip); case GPUTexture::Format::RGB565: - return CopyOut24Bit(src_x, src_y, skip_x, width, height, line_skip); + return CopyOut24Bit(src_x, src_y, skip_x, skip_y, width, height, line_skip); case GPUTexture::Format::RGBA8: - return CopyOut24Bit(src_x, src_y, skip_x, width, height, line_skip); + return CopyOut24Bit(src_x, src_y, skip_x, skip_y, width, height, line_skip); case GPUTexture::Format::BGRA8: - return CopyOut24Bit(src_x, src_y, skip_x, width, height, line_skip); + return CopyOut24Bit(src_x, src_y, skip_x, skip_y, width, height, line_skip); default: UnreachableCode(); @@ -420,7 +424,7 @@ bool GPU_SW::CopyOut(u32 src_x, u32 src_y, u32 skip_x, u32 width, u32 height, u3 } } -void GPU_SW::UpdateDisplay() +void GPU_SW::UpdateDisplay(bool partial, u32 start_line, u32 end_line) { // fill display texture m_backend.Sync(true); @@ -440,13 +444,14 @@ void GPU_SW::UpdateDisplay() const u32 vram_offset_y = m_crtc_state.display_vram_top + ((interlaced && m_GPUSTAT.vertical_resolution) ? field : 0); const u32 skip_x = is_24bit ? (m_crtc_state.display_vram_left - m_crtc_state.regs.X) : 0; + const u32 skip_y = partial ? start_line : 0; const u32 read_width = m_crtc_state.display_vram_width; const u32 read_height = interlaced ? (m_crtc_state.display_vram_height / 2) : m_crtc_state.display_vram_height; if (IsInterlacedDisplayEnabled()) { const u32 line_skip = m_GPUSTAT.vertical_resolution; - if (CopyOut(vram_offset_x, vram_offset_y, skip_x, read_width, read_height, line_skip, is_24bit)) + if (CopyOut(vram_offset_x, vram_offset_y, skip_x, skip_y, read_width, read_height, line_skip, is_24bit)) { SetDisplayTexture(m_upload_texture.get(), nullptr, 0, 0, read_width, read_height); if (is_24bit && g_settings.display_24bit_chroma_smoothing) @@ -462,7 +467,7 @@ void GPU_SW::UpdateDisplay() } else { - if (CopyOut(vram_offset_x, vram_offset_y, skip_x, read_width, read_height, 0, is_24bit)) + if (CopyOut(vram_offset_x, vram_offset_y, skip_x, skip_y, read_width, read_height, 0, is_24bit)) { SetDisplayTexture(m_upload_texture.get(), nullptr, 0, 0, read_width, read_height); if (is_24bit && g_settings.display_24bit_chroma_smoothing) @@ -472,7 +477,7 @@ void GPU_SW::UpdateDisplay() } else { - if (CopyOut(0, 0, 0, VRAM_WIDTH, VRAM_HEIGHT, 0, false)) + if (CopyOut(0, 0, 0, 0, VRAM_WIDTH, VRAM_HEIGHT, 0, false)) SetDisplayTexture(m_upload_texture.get(), nullptr, 0, 0, VRAM_WIDTH, VRAM_HEIGHT); } } diff --git a/src/core/gpu_sw.h b/src/core/gpu_sw.h index 7e510ffaa..813cfe00f 100644 --- a/src/core/gpu_sw.h +++ b/src/core/gpu_sw.h @@ -44,14 +44,14 @@ protected: void UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) override; template - bool CopyOut15Bit(u32 src_x, u32 src_y, u32 width, u32 height, u32 line_skip); + bool CopyOut15Bit(u32 src_x, u32 src_y, u32 skip_y, u32 width, u32 height, u32 line_skip); template - bool CopyOut24Bit(u32 src_x, u32 src_y, u32 skip_x, u32 width, u32 height, u32 line_skip); + bool CopyOut24Bit(u32 src_x, u32 src_y, u32 skip_x, u32 skip_y, u32 width, u32 height, u32 line_skip); - bool CopyOut(u32 src_x, u32 src_y, u32 skip_x, u32 width, u32 height, u32 line_skip, bool is_24bit); + bool CopyOut(u32 src_x, u32 src_y, u32 skip_x, u32 skip_y, u32 width, u32 height, u32 line_skip, bool is_24bit); - void UpdateDisplay() override; + void UpdateDisplay(bool partial, u32 start_line, u32 end_line) override; void DispatchRenderCommand() override; diff --git a/src/core/timers.cpp b/src/core/timers.cpp index 3dc0ff66d..5ba60e4fc 100644 --- a/src/core/timers.cpp +++ b/src/core/timers.cpp @@ -254,7 +254,7 @@ void Timers::CheckForIRQ(u32 timer, u32 old_counter) if (!cs.irq_done || cs.mode.irq_repeat) { // this is actually low for a few cycles - DEBUG_LOG("Raising timer {} pulse IRQ", timer); + DEBUG_LOG("Raising timer {} pulse IRQ @ {}", timer, g_gpu->GetCRTCCurrentScanline()); InterruptController::SetLineState(irqnum, false); InterruptController::SetLineState(irqnum, true); } @@ -377,8 +377,8 @@ void Timers::WriteRegister(u32 offset, u32 value) case 0x00: { const u32 old_counter = cs.counter; - DEBUG_LOG("Timer {} write counter {}", timer_index, value); - cs.counter = value & u32(0xFFFF); + DEBUG_LOG("Timer {} write counter {}", timer_index, ZeroExtend32(Truncate16(value))); + cs.counter = ZeroExtend32(Truncate16(value)); CheckForIRQ(timer_index, old_counter); if (timer_index == 2 || !cs.external_counting_enabled) UpdateSysClkEvent(); @@ -388,28 +388,42 @@ void Timers::WriteRegister(u32 offset, u32 value) case 0x04: { static constexpr u32 WRITE_MASK = 0b1110001111111111; + const bool prev_external_counting_enabled = cs.external_counting_enabled; - DEBUG_LOG("Timer {} write mode register 0x{:04X}", timer_index, value); + DEBUG_LOG("Timer {} write mode register 0x{:04X} @ scaline {}", timer_index, value, g_gpu->GetCRTCCurrentScanline()); cs.mode.bits = (value & WRITE_MASK) | (cs.mode.bits & ~WRITE_MASK); cs.use_external_clock = (cs.mode.clock_source & (timer_index == 2 ? 2 : 1)) != 0; + UpdateCountingEnabled(cs); + + // Need to re-sync GPU if ext counting changed, since we're resetting the counter. + if (timer_index < 2 && !prev_external_counting_enabled && cs.external_counting_enabled) + { + if (timer_index == 0 || g_gpu->IsCRTCScanlinePending()) + g_gpu->SynchronizeCRTC(); + } + cs.counter = 0; cs.irq_done = false; InterruptController::SetLineState( static_cast(static_cast(InterruptController::IRQ::TMR0) + timer_index), false); - UpdateCountingEnabled(cs); CheckForIRQ(timer_index, cs.counter); - UpdateSysClkEvent(); + if (timer_index == 2 || !cs.external_counting_enabled) + UpdateSysClkEvent(); + else if (timer_index < 2 && cs.external_counting_enabled) + g_gpu->UpdateCRTCTickEvent(); } break; case 0x08: { - DEBUG_LOG("Timer {} write target 0x{:04X}", timer_index, ZeroExtend32(Truncate16(value))); - cs.target = value & u32(0xFFFF); + DEBUG_LOG("Timer {} write target {} @ {}", timer_index, ZeroExtend32(Truncate16(value)), g_gpu->GetCRTCCurrentScanline()); + cs.target = ZeroExtend32(Truncate16(value)); CheckForIRQ(timer_index, cs.counter); if (timer_index == 2 || !cs.external_counting_enabled) UpdateSysClkEvent(); + else if (timer_index < 2 && cs.external_counting_enabled) + g_gpu->UpdateCRTCTickEvent(); } break;