From 36df91aba021e811f0e906ec63e547ef1cf80b8a Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sat, 27 Apr 2024 21:50:18 +1000 Subject: [PATCH] GPU: Improve CRTC hblank accuracy And implement Timer0 gating, it was missing previously. --- src/core/gpu.cpp | 154 ++++++++++++++++++++++++++++++-------------- src/core/gpu.h | 23 ++++++- src/core/timers.cpp | 11 ++-- 3 files changed, 130 insertions(+), 58 deletions(-) diff --git a/src/core/gpu.cpp b/src/core/gpu.cpp index 62998ab8d..b4dd0751d 100644 --- a/src/core/gpu.cpp +++ b/src/core/gpu.cpp @@ -607,24 +607,10 @@ void GPU::UpdateCRTCConfig() static constexpr std::array dot_clock_dividers = {{10, 8, 5, 4, 7, 7, 7, 7}}; CRTCState& cs = m_crtc_state; - if (m_GPUSTAT.pal_mode) - { - cs.vertical_total = PAL_TOTAL_LINES; - cs.current_scanline %= PAL_TOTAL_LINES; - cs.horizontal_total = PAL_TICKS_PER_LINE; - cs.horizontal_sync_start = PAL_HSYNC_TICKS; - cs.current_tick_in_scanline %= System::ScaleTicksToOverclock(PAL_TICKS_PER_LINE); - } - else - { - cs.vertical_total = NTSC_TOTAL_LINES; - cs.current_scanline %= NTSC_TOTAL_LINES; - cs.horizontal_total = NTSC_TICKS_PER_LINE; - cs.horizontal_sync_start = NTSC_HSYNC_TICKS; - cs.current_tick_in_scanline %= System::ScaleTicksToOverclock(NTSC_TICKS_PER_LINE); - } - - cs.in_hblank = (cs.current_tick_in_scanline >= cs.horizontal_sync_start); + cs.vertical_total = m_GPUSTAT.pal_mode ? PAL_TOTAL_LINES : NTSC_TOTAL_LINES; + cs.horizontal_total = m_GPUSTAT.pal_mode ? PAL_TICKS_PER_LINE : NTSC_TICKS_PER_LINE; + cs.horizontal_active_start = m_GPUSTAT.pal_mode ? PAL_HORIZONTAL_ACTIVE_START : NTSC_HORIZONTAL_ACTIVE_START; + cs.horizontal_active_end = m_GPUSTAT.pal_mode ? PAL_HORIZONTAL_ACTIVE_END : NTSC_HORIZONTAL_ACTIVE_END; const u8 horizontal_resolution_index = m_GPUSTAT.horizontal_resolution_1 | (m_GPUSTAT.horizontal_resolution_2 << 2); cs.dot_clock_divider = dot_clock_dividers[horizontal_resolution_index]; @@ -658,8 +644,17 @@ void GPU::UpdateCRTCConfig() static_cast(System::ScaleTicksToOverclock(static_cast(cs.horizontal_display_start))); cs.horizontal_display_end = static_cast(System::ScaleTicksToOverclock(static_cast(cs.horizontal_display_end))); + cs.horizontal_active_start = + static_cast(System::ScaleTicksToOverclock(static_cast(cs.horizontal_active_start))); + cs.horizontal_active_end = + static_cast(System::ScaleTicksToOverclock(static_cast(cs.horizontal_active_end))); cs.horizontal_total = static_cast(System::ScaleTicksToOverclock(static_cast(cs.horizontal_total))); + cs.current_tick_in_scanline %= cs.horizontal_total; + cs.UpdateHBlankFlag(); + + cs.current_scanline %= cs.vertical_total; + System::SetThrottleFrequency(ComputeVerticalFrequency()); UpdateCRTCDisplayParameters(); @@ -872,20 +867,36 @@ void GPU::UpdateCRTCTickEvent() ticks_until_event = std::min(ticks_until_event, std::max(ticks_until_irq, 0)); } -#if 0 - const TickCount ticks_until_hblank = - (m_crtc_state.current_tick_in_scanline >= m_crtc_state.horizontal_display_end) ? - (m_crtc_state.horizontal_total - m_crtc_state.current_tick_in_scanline + m_crtc_state.horizontal_display_end) : - (m_crtc_state.horizontal_display_end - m_crtc_state.current_tick_in_scanline); -#endif + if (Timers::IsSyncEnabled(DOT_TIMER_INDEX)) + { + // This could potentially be optimized to skip the time the gate is active, if we're resetting and free running. + // But realistically, I've only seen sync off (most games), or reset+pause on gate (Konami Lightgun games). + TickCount ticks_until_hblank_start_or_end; + if (m_crtc_state.current_tick_in_scanline >= m_crtc_state.horizontal_active_end) + { + ticks_until_hblank_start_or_end = + m_crtc_state.horizontal_total - m_crtc_state.current_tick_in_scanline + m_crtc_state.horizontal_active_start; + } + else if (m_crtc_state.current_tick_in_scanline < m_crtc_state.horizontal_active_start) + { + ticks_until_hblank_start_or_end = m_crtc_state.horizontal_active_start - m_crtc_state.current_tick_in_scanline; + } + else + { + ticks_until_hblank_start_or_end = m_crtc_state.horizontal_active_end - m_crtc_state.current_tick_in_scanline; + } + + ticks_until_event = std::min(ticks_until_event, ticks_until_hblank_start_or_end); + } m_crtc_tick_event->Schedule(CRTCTicksToSystemTicks(ticks_until_event, m_crtc_state.fractional_ticks)); } bool GPU::IsCRTCScanlinePending() const { + // TODO: Most of these should be fields, not lines. const TickCount ticks = (GetPendingCRTCTicks() + m_crtc_state.current_tick_in_scanline); - return (ticks >= (m_crtc_state.in_hblank ? m_crtc_state.horizontal_total : m_crtc_state.horizontal_sync_start)); + return (ticks >= m_crtc_state.horizontal_total); } bool GPU::IsCommandCompletionPending() const @@ -896,28 +907,32 @@ bool GPU::IsCommandCompletionPending() const void GPU::CRTCTickEvent(TickCount ticks) { // convert cpu/master clock to GPU ticks, accounting for partial cycles because of the non-integer divider - { - const TickCount gpu_ticks = SystemTicksToCRTCTicks(ticks, &m_crtc_state.fractional_ticks); - m_crtc_state.current_tick_in_scanline += gpu_ticks; + const TickCount prev_tick = m_crtc_state.current_tick_in_scanline; + const TickCount gpu_ticks = SystemTicksToCRTCTicks(ticks, &m_crtc_state.fractional_ticks); + m_crtc_state.current_tick_in_scanline += gpu_ticks; - if (Timers::IsUsingExternalClock(DOT_TIMER_INDEX)) - { - m_crtc_state.fractional_dot_ticks += gpu_ticks; - const TickCount dots = m_crtc_state.fractional_dot_ticks / m_crtc_state.dot_clock_divider; - m_crtc_state.fractional_dot_ticks = m_crtc_state.fractional_dot_ticks % m_crtc_state.dot_clock_divider; - if (dots > 0) - Timers::AddTicks(DOT_TIMER_INDEX, dots); - } + if (Timers::IsUsingExternalClock(DOT_TIMER_INDEX)) + { + m_crtc_state.fractional_dot_ticks += gpu_ticks; + const TickCount dots = m_crtc_state.fractional_dot_ticks / m_crtc_state.dot_clock_divider; + m_crtc_state.fractional_dot_ticks = m_crtc_state.fractional_dot_ticks % m_crtc_state.dot_clock_divider; + if (dots > 0) + Timers::AddTicks(DOT_TIMER_INDEX, dots); } if (m_crtc_state.current_tick_in_scanline < m_crtc_state.horizontal_total) { - // short path when we execute <1 line.. this shouldn't occur often. - const bool old_hblank = m_crtc_state.in_hblank; - const bool new_hblank = (m_crtc_state.current_tick_in_scanline >= m_crtc_state.horizontal_sync_start); - m_crtc_state.in_hblank = new_hblank; - if (!old_hblank && new_hblank && Timers::IsUsingExternalClock(HBLANK_TIMER_INDEX)) - Timers::AddTicks(HBLANK_TIMER_INDEX, 1); + // short path when we execute <1 line.. this shouldn't occur often, except when gated (konami lightgun games). + m_crtc_state.UpdateHBlankFlag(); + Timers::SetGate(DOT_TIMER_INDEX, m_crtc_state.in_hblank); + if (Timers::IsUsingExternalClock(HBLANK_TIMER_INDEX)) + { + const u32 hblank_timer_ticks = + BoolToUInt32(m_crtc_state.current_tick_in_scanline >= m_crtc_state.horizontal_active_end) - + BoolToUInt32(prev_tick >= m_crtc_state.horizontal_active_end); + if (hblank_timer_ticks > 0) + Timers::AddTicks(HBLANK_TIMER_INDEX, static_cast(hblank_timer_ticks)); + } UpdateCRTCTickEvent(); return; @@ -927,16 +942,23 @@ void GPU::CRTCTickEvent(TickCount ticks) m_crtc_state.current_tick_in_scanline %= m_crtc_state.horizontal_total; #if 0 Log_WarningPrintf("Old line: %u, new line: %u, drawing %u", m_crtc_state.current_scanline, - m_crtc_state.current_scanline + lines_to_draw, lines_to_draw); + m_crtc_state.current_scanline + lines_to_draw, lines_to_draw); #endif - const bool old_hblank = m_crtc_state.in_hblank; - const bool new_hblank = (m_crtc_state.current_tick_in_scanline >= m_crtc_state.horizontal_sync_start); - m_crtc_state.in_hblank = new_hblank; + m_crtc_state.UpdateHBlankFlag(); + Timers::SetGate(DOT_TIMER_INDEX, m_crtc_state.in_hblank); + if (Timers::IsUsingExternalClock(HBLANK_TIMER_INDEX)) { - const u32 hblank_timer_ticks = BoolToUInt32(!old_hblank) + BoolToUInt32(new_hblank) + (lines_to_draw - 1); - Timers::AddTicks(HBLANK_TIMER_INDEX, static_cast(hblank_timer_ticks)); + // lines_to_draw => number of times ticks passed horizontal_total. + // Subtract one if we were previously in hblank, but only on that line. If it was previously less than + // horizontal_active_start, we still want to add one, because hblank would have gone inactive, and then active again + // during the line. Finally add the current line being drawn, if hblank went inactive->active during the line. + const u32 hblank_timer_ticks = + lines_to_draw - BoolToUInt32(prev_tick >= m_crtc_state.horizontal_active_end) + + BoolToUInt32(m_crtc_state.current_tick_in_scanline >= m_crtc_state.horizontal_active_end); + if (hblank_timer_ticks > 0) + Timers::AddTicks(HBLANK_TIMER_INDEX, static_cast(hblank_timer_ticks)); } while (lines_to_draw > 0) @@ -1080,11 +1102,45 @@ bool GPU::ConvertDisplayCoordinatesToBeamTicksAndLines(float display_x, float di *out_line = (static_cast(std::round(display_y)) >> BoolToUInt8(m_GPUSTAT.vertical_interlace)) + m_crtc_state.vertical_visible_start; - *out_tick = static_cast(std::round(display_x * static_cast(m_crtc_state.dot_clock_divider))) + + *out_tick = static_cast(System::ScaleTicksToOverclock( + static_cast(std::round(display_x * static_cast(m_crtc_state.dot_clock_divider))))) + m_crtc_state.horizontal_visible_start; return true; } +void GPU::GetBeamPosition(u32* out_ticks, u32* out_line) +{ + const u32 current_tick = (GetPendingCRTCTicks() + m_crtc_state.current_tick_in_scanline); + *out_line = + (m_crtc_state.current_scanline + (current_tick / m_crtc_state.horizontal_total)) % m_crtc_state.vertical_total; + *out_ticks = current_tick % m_crtc_state.horizontal_total; +} + +TickCount GPU::GetSystemTicksUntilTicksAndLine(u32 ticks, u32 line) +{ + u32 current_tick, current_line; + GetBeamPosition(¤t_tick, ¤t_line); + + u32 ticks_to_target; + if (ticks >= current_tick) + { + ticks_to_target = ticks - current_tick; + } + else + { + ticks_to_target = (m_crtc_state.horizontal_total - current_tick) + ticks; + current_line = (current_line + 1) % m_crtc_state.vertical_total; + } + + const u32 lines_to_target = + (line >= current_line) ? (line - current_line) : ((m_crtc_state.vertical_total - current_line) + line); + + const TickCount total_ticks_to_target = + static_cast((lines_to_target * m_crtc_state.horizontal_total) + ticks_to_target); + + return CRTCTicksToSystemTicks(total_ticks_to_target, m_crtc_state.fractional_ticks); +} + u32 GPU::ReadGPUREAD() { if (m_blitter_state != BlitterState::ReadingVRAM) diff --git a/src/core/gpu.h b/src/core/gpu.h index 9ecda46ff..7f534aeb0 100644 --- a/src/core/gpu.h +++ b/src/core/gpu.h @@ -67,10 +67,8 @@ public: enum : u16 { NTSC_TICKS_PER_LINE = 3413, - NTSC_HSYNC_TICKS = 200, NTSC_TOTAL_LINES = 263, PAL_TICKS_PER_LINE = 3406, - PAL_HSYNC_TICKS = 200, // actually one more on odd lines PAL_TOTAL_LINES = 314, }; @@ -183,8 +181,19 @@ public: bool ConvertDisplayCoordinatesToBeamTicksAndLines(float display_x, float display_y, float x_scale, u32* out_tick, u32* out_line) const; + // Returns the current beam position. + void GetBeamPosition(u32* out_ticks, u32* out_line); + + // Returns the number of system clock ticks until the specified tick/line. + TickCount GetSystemTicksUntilTicksAndLine(u32 ticks, u32 line); + + // Returns the number of visible lines. + ALWAYS_INLINE u16 GetCRTCActiveStartLine() const { return m_crtc_state.vertical_display_start; } + ALWAYS_INLINE u16 GetCRTCActiveEndLine() const { return m_crtc_state.vertical_display_end; } + // Returns the video clock frequency. TickCount GetCRTCFrequency() const; + u16 GetCRTCDotClockDivider() const { return m_crtc_state.dot_clock_divider; } // Dumps raw VRAM to a file. bool DumpVRAMToFile(const char* filename); @@ -517,8 +526,10 @@ protected: u16 vertical_display_start; u16 vertical_display_end; + u16 horizontal_active_start; + u16 horizontal_active_end; + u16 horizontal_total; - u16 horizontal_sync_start; // <- not currently saved to state, so we don't have to bump the version u16 vertical_total; TickCount fractional_ticks; @@ -533,6 +544,12 @@ protected: u8 interlaced_field; // 0 = odd, 1 = even u8 interlaced_display_field; u8 active_line_lsb; + + ALWAYS_INLINE void UpdateHBlankFlag() + { + in_hblank = + (current_tick_in_scanline < horizontal_active_start || current_tick_in_scanline >= horizontal_active_end); + } } m_crtc_state = {}; BlitterState m_blitter_state = BlitterState::Idle; diff --git a/src/core/timers.cpp b/src/core/timers.cpp index eb5cb49d9..ecf2b3d01 100644 --- a/src/core/timers.cpp +++ b/src/core/timers.cpp @@ -166,7 +166,9 @@ void Timers::SetGate(u32 timer, bool state) if (!cs.mode.sync_enable) return; - if (cs.counting_enabled && !cs.use_external_clock) + // Because the gate prevents counting in or outside of the gate, we need a correct counter. + // For reset, we _can_ skip it, until the gate clears. + if (!cs.use_external_clock && (cs.mode.sync_mode != SyncMode::ResetOnGate || !state)) s_sysclk_event->InvokeEarly(); if (state) @@ -415,11 +417,8 @@ void Timers::UpdateCountingEnabled(CounterState& cs) switch (cs.mode.sync_mode) { case SyncMode::PauseOnGate: - cs.counting_enabled = !cs.gate; - break; - case SyncMode::ResetOnGate: - cs.counting_enabled = true; + cs.counting_enabled = !cs.gate; break; case SyncMode::ResetAndRunOnGate: @@ -489,7 +488,7 @@ void Timers::DrawDebugStateWindow() const float framebuffer_scale = Host::GetOSDScale(); - ImGui::SetNextWindowSize(ImVec2(800.0f * framebuffer_scale, 100.0f * framebuffer_scale), ImGuiCond_FirstUseEver); + ImGui::SetNextWindowSize(ImVec2(800.0f * framebuffer_scale, 115.0f * framebuffer_scale), ImGuiCond_FirstUseEver); if (!ImGui::Begin("Timer State", nullptr)) { ImGui::End();