GPU partial scanout shenanigans, WIP

This commit is contained in:
Stenzek 2024-08-03 14:23:47 +10:00
parent 5b590d434b
commit 0090fee30e
No known key found for this signature in database
7 changed files with 97 additions and 39 deletions

View File

@ -196,7 +196,7 @@ void GPU::Reset(bool clear_vram)
m_command_tick_event.Deactivate();
SoftReset();
UpdateDisplay();
UpdateDisplay(false, 0, 0);
}
void GPU::SoftReset()
@ -373,7 +373,7 @@ bool GPU::DoState(StateWrapper& sw, GPUTexture** host_texture, bool update_displ
{
UpdateCRTCConfig();
if (update_display)
UpdateDisplay();
UpdateDisplay(false, 0, 0);
UpdateCommandTickEvent();
}
@ -950,6 +950,9 @@ void GPU::CRTCTickEvent(TickCount ticks)
Timers::AddTicks(HBLANK_TIMER_INDEX, static_cast<TickCount>(hblank_timer_ticks));
}
if (m_crtc_state.start_address_changed)
DoPartialScanout();
UpdateCRTCTickEvent();
return;
}
@ -995,6 +998,9 @@ void GPU::CRTCTickEvent(TickCount ticks)
m_crtc_state.in_vblank = false;
}
if (m_crtc_state.start_address_changed)
DoPartialScanout();
const bool new_vblank = m_crtc_state.current_scanline < m_crtc_state.vertical_display_start ||
m_crtc_state.current_scanline >= m_crtc_state.vertical_display_end;
if (m_crtc_state.in_vblank != new_vblank)
@ -1003,10 +1009,14 @@ void GPU::CRTCTickEvent(TickCount ticks)
{
DEBUG_LOG("Now in v-blank");
// flush any pending draws and "scan out" the image
// TODO: move present in here I guess
FlushRender();
UpdateDisplay();
if (m_crtc_state.last_scanout_line < m_crtc_state.display_vram_height)
{
UpdateDisplay(true, m_crtc_state.last_scanout_line, m_crtc_state.display_vram_height);
m_crtc_state.last_scanout_line = 0;
m_crtc_state.start_address_changed = false;
}
TimingEvents::SetFrameDone();
// switch fields early. this is needed so we draw to the correct one.
@ -1070,6 +1080,20 @@ void GPU::CRTCTickEvent(TickCount ticks)
UpdateCRTCTickEvent();
}
void GPU::DoPartialScanout()
{
const u32 vram_first_line = m_crtc_state.vertical_visible_start + m_crtc_state.display_origin_top;
const u32 vram_line = (m_crtc_state.current_scanline < vram_first_line) ? 0 : std::min<u32>(m_crtc_state.current_scanline - vram_first_line, m_crtc_state.display_vram_height);
if (vram_line != m_crtc_state.last_scanout_line && m_crtc_state.start_address_changed)
{
if (m_crtc_state.last_scanout_line < m_crtc_state.display_vram_height)
UpdateDisplay(true, m_crtc_state.last_scanout_line, vram_line);
m_crtc_state.last_scanout_line = vram_line;
m_crtc_state.start_address_changed = false;
}
}
void GPU::CommandTickEvent(TickCount ticks)
{
m_pending_command_ticks -= SystemTicksToGPUTicks(ticks);
@ -1286,8 +1310,16 @@ void GPU::WriteGP1(u32 value)
System::IncrementInternalFrameNumber();
if (m_crtc_state.regs.display_address_start != new_value)
{
m_crtc_state.start_address_changed = true;
SynchronizeCRTC();
m_crtc_state.regs.display_address_start = new_value;
if (!m_crtc_state.in_vblank)
{
GL_INS_FMT("Display address start set to ({},{}) at scanline {}", m_crtc_state.regs.X.GetValue(),
m_crtc_state.regs.Y.GetValue(), m_crtc_state.current_scanline);
}
UpdateCRTCDisplayParameters();
OnBufferSwapped();
}

View File

@ -193,6 +193,7 @@ public:
// Returns the number of visible lines.
ALWAYS_INLINE u16 GetCRTCActiveStartLine() const { return m_crtc_state.vertical_display_start; }
ALWAYS_INLINE u16 GetCRTCActiveEndLine() const { return m_crtc_state.vertical_display_end; }
ALWAYS_INLINE u32 GetCRTCCurrentScanline() const { return m_crtc_state.current_scanline; }
// Returns the video clock frequency.
TickCount GetCRTCFrequency() const;
@ -258,7 +259,9 @@ protected:
void UpdateCRTCDisplayParameters();
// Update ticks for this execution slice
public:
void UpdateCRTCTickEvent();
protected:
void UpdateCommandTickEvent();
// Updates dynamic bits in GPUSTAT (ready to send VRAM/ready to receive DMA)
@ -268,6 +271,7 @@ protected:
// Ticks for hblank/vblank.
void CRTCTickEvent(TickCount ticks);
void CommandTickEvent(TickCount ticks);
void DoPartialScanout();
/// Returns 0 if the currently-displayed field is on odd lines (1,3,5,...) or 1 if even (2,4,6,...).
ALWAYS_INLINE u32 GetInterlacedDisplayField() const { return ZeroExtend32(m_crtc_state.interlaced_field); }
@ -315,7 +319,7 @@ protected:
virtual void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height);
virtual void DispatchRenderCommand() = 0;
virtual void UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) = 0;
virtual void UpdateDisplay() = 0;
virtual void UpdateDisplay(bool partial, u32 start_line, u32 end_line) = 0;
virtual void DrawRendererStats();
virtual void OnBufferSwapped();
@ -536,6 +540,9 @@ protected:
TickCount fractional_dot_ticks; // only used when timer0 is enabled
u32 last_scanout_line;
bool start_address_changed;
bool in_hblank;
bool in_vblank;

View File

@ -503,7 +503,7 @@ void GPU_HW::UpdateSettings(const Settings& old_settings)
UpdateVRAM(0, 0, VRAM_WIDTH, VRAM_HEIGHT, g_vram, false, false);
if (m_write_mask_as_depth)
UpdateDepthBufferFromMaskBit();
UpdateDisplay();
UpdateDisplay(false, 0, 0);
}
else if (m_vram_depth_texture && depth_buffer_changed)
{
@ -3593,7 +3593,7 @@ void GPU_HW::FlushRender()
}
}
void GPU_HW::UpdateDisplay()
void GPU_HW::UpdateDisplay(bool partial, u32 start_line, u32 end_line)
{
FlushRender();
DeactivateROV();

View File

@ -71,7 +71,7 @@ public:
std::tuple<u32, u32> GetEffectiveDisplayResolution(bool scaled = true) override;
std::tuple<u32, u32> GetFullDisplayResolution(bool scaled = true) override;
void UpdateDisplay() override;
void UpdateDisplay(bool partial, u32 start_line, u32 end_line) override;
private:
enum : u32

View File

@ -93,7 +93,7 @@ GPUTexture* GPU_SW::GetDisplayTexture(u32 width, u32 height, GPUTexture::Format
ClearDisplayTexture();
g_gpu_device->RecycleTexture(std::move(m_upload_texture));
m_upload_texture =
g_gpu_device->FetchTexture(width, height, 1, 1, 1, GPUTexture::Type::DynamicTexture, format, nullptr, 0);
g_gpu_device->FetchTexture(width, height, 1, 1, 1, GPUTexture::Type::Texture, format, nullptr, 0);
if (!m_upload_texture) [[unlikely]]
ERROR_LOG("Failed to create {}x{} {} texture", width, height, static_cast<u32>(format));
}
@ -202,7 +202,7 @@ ALWAYS_INLINE void CopyOutRow16<GPUTexture::Format::BGRA8, u32>(const u16* src_p
}
template<GPUTexture::Format display_format>
ALWAYS_INLINE_RELEASE bool GPU_SW::CopyOut15Bit(u32 src_x, u32 src_y, u32 width, u32 height, u32 line_skip)
ALWAYS_INLINE_RELEASE bool GPU_SW::CopyOut15Bit(u32 src_x, u32 src_y, u32 skip_y, u32 width, u32 height, u32 line_skip)
{
using OutputPixelType =
std::conditional_t<display_format == GPUTexture::Format::RGBA8 || display_format == GPUTexture::Format::BGRA8, u32,
@ -212,9 +212,12 @@ ALWAYS_INLINE_RELEASE bool GPU_SW::CopyOut15Bit(u32 src_x, u32 src_y, u32 width,
if (!texture) [[unlikely]]
return false;
src_y += skip_y;
height -= skip_y;
u32 dst_stride = width * sizeof(OutputPixelType);
u8* dst_ptr = m_upload_buffer.data();
const bool mapped = texture->Map(reinterpret_cast<void**>(&dst_ptr), &dst_stride, 0, 0, width, height);
const bool mapped = (skip_y == 0 && texture->Map(reinterpret_cast<void**>(&dst_ptr), &dst_stride, 0, 0, width, height));
// Fast path when not wrapping around.
if ((src_x + width) <= VRAM_WIDTH && (src_y + height) <= VRAM_HEIGHT)
@ -248,13 +251,14 @@ ALWAYS_INLINE_RELEASE bool GPU_SW::CopyOut15Bit(u32 src_x, u32 src_y, u32 width,
if (mapped)
texture->Unmap();
else
texture->Update(0, 0, width, height, m_upload_buffer.data(), dst_stride);
texture->Update(0, skip_y, width, height, m_upload_buffer.data(), dst_stride);
return true;
}
template<GPUTexture::Format display_format>
ALWAYS_INLINE_RELEASE bool GPU_SW::CopyOut24Bit(u32 src_x, u32 src_y, u32 skip_x, u32 width, u32 height, u32 line_skip)
ALWAYS_INLINE_RELEASE bool GPU_SW::CopyOut24Bit(u32 src_x, u32 src_y, u32 skip_x, u32 skip_y, u32 width, u32 height,
u32 line_skip)
{
using OutputPixelType =
std::conditional_t<display_format == GPUTexture::Format::RGBA8 || display_format == GPUTexture::Format::BGRA8, u32,
@ -374,7 +378,7 @@ ALWAYS_INLINE_RELEASE bool GPU_SW::CopyOut24Bit(u32 src_x, u32 src_y, u32 skip_x
return true;
}
bool GPU_SW::CopyOut(u32 src_x, u32 src_y, u32 skip_x, u32 width, u32 height, u32 line_skip, bool is_24bit)
bool GPU_SW::CopyOut(u32 src_x, u32 src_y, u32 skip_x, u32 skip_y, u32 width, u32 height, u32 line_skip, bool is_24bit)
{
if (!is_24bit)
{
@ -383,16 +387,16 @@ bool GPU_SW::CopyOut(u32 src_x, u32 src_y, u32 skip_x, u32 width, u32 height, u3
switch (m_16bit_display_format)
{
case GPUTexture::Format::RGBA5551:
return CopyOut15Bit<GPUTexture::Format::RGBA5551>(src_x, src_y, width, height, line_skip);
return CopyOut15Bit<GPUTexture::Format::RGBA5551>(src_x, src_y, skip_y, width, height, line_skip);
case GPUTexture::Format::RGB565:
return CopyOut15Bit<GPUTexture::Format::RGB565>(src_x, src_y, width, height, line_skip);
return CopyOut15Bit<GPUTexture::Format::RGB565>(src_x, src_y, skip_y, width, height, line_skip);
case GPUTexture::Format::RGBA8:
return CopyOut15Bit<GPUTexture::Format::RGBA8>(src_x, src_y, width, height, line_skip);
return CopyOut15Bit<GPUTexture::Format::RGBA8>(src_x, src_y, skip_y, width, height, line_skip);
case GPUTexture::Format::BGRA8:
return CopyOut15Bit<GPUTexture::Format::BGRA8>(src_x, src_y, width, height, line_skip);
return CopyOut15Bit<GPUTexture::Format::BGRA8>(src_x, src_y, skip_y, width, height, line_skip);
default:
UnreachableCode();
@ -403,16 +407,16 @@ bool GPU_SW::CopyOut(u32 src_x, u32 src_y, u32 skip_x, u32 width, u32 height, u3
switch (m_24bit_display_format)
{
case GPUTexture::Format::RGBA5551:
return CopyOut24Bit<GPUTexture::Format::RGBA5551>(src_x, src_y, skip_x, width, height, line_skip);
return CopyOut24Bit<GPUTexture::Format::RGBA5551>(src_x, src_y, skip_x, skip_y, width, height, line_skip);
case GPUTexture::Format::RGB565:
return CopyOut24Bit<GPUTexture::Format::RGB565>(src_x, src_y, skip_x, width, height, line_skip);
return CopyOut24Bit<GPUTexture::Format::RGB565>(src_x, src_y, skip_x, skip_y, width, height, line_skip);
case GPUTexture::Format::RGBA8:
return CopyOut24Bit<GPUTexture::Format::RGBA8>(src_x, src_y, skip_x, width, height, line_skip);
return CopyOut24Bit<GPUTexture::Format::RGBA8>(src_x, src_y, skip_x, skip_y, width, height, line_skip);
case GPUTexture::Format::BGRA8:
return CopyOut24Bit<GPUTexture::Format::BGRA8>(src_x, src_y, skip_x, width, height, line_skip);
return CopyOut24Bit<GPUTexture::Format::BGRA8>(src_x, src_y, skip_x, skip_y, width, height, line_skip);
default:
UnreachableCode();
@ -420,7 +424,7 @@ bool GPU_SW::CopyOut(u32 src_x, u32 src_y, u32 skip_x, u32 width, u32 height, u3
}
}
void GPU_SW::UpdateDisplay()
void GPU_SW::UpdateDisplay(bool partial, u32 start_line, u32 end_line)
{
// fill display texture
m_backend.Sync(true);
@ -440,13 +444,14 @@ void GPU_SW::UpdateDisplay()
const u32 vram_offset_y =
m_crtc_state.display_vram_top + ((interlaced && m_GPUSTAT.vertical_resolution) ? field : 0);
const u32 skip_x = is_24bit ? (m_crtc_state.display_vram_left - m_crtc_state.regs.X) : 0;
const u32 skip_y = partial ? start_line : 0;
const u32 read_width = m_crtc_state.display_vram_width;
const u32 read_height = interlaced ? (m_crtc_state.display_vram_height / 2) : m_crtc_state.display_vram_height;
if (IsInterlacedDisplayEnabled())
{
const u32 line_skip = m_GPUSTAT.vertical_resolution;
if (CopyOut(vram_offset_x, vram_offset_y, skip_x, read_width, read_height, line_skip, is_24bit))
if (CopyOut(vram_offset_x, vram_offset_y, skip_x, skip_y, read_width, read_height, line_skip, is_24bit))
{
SetDisplayTexture(m_upload_texture.get(), nullptr, 0, 0, read_width, read_height);
if (is_24bit && g_settings.display_24bit_chroma_smoothing)
@ -462,7 +467,7 @@ void GPU_SW::UpdateDisplay()
}
else
{
if (CopyOut(vram_offset_x, vram_offset_y, skip_x, read_width, read_height, 0, is_24bit))
if (CopyOut(vram_offset_x, vram_offset_y, skip_x, skip_y, read_width, read_height, 0, is_24bit))
{
SetDisplayTexture(m_upload_texture.get(), nullptr, 0, 0, read_width, read_height);
if (is_24bit && g_settings.display_24bit_chroma_smoothing)
@ -472,7 +477,7 @@ void GPU_SW::UpdateDisplay()
}
else
{
if (CopyOut(0, 0, 0, VRAM_WIDTH, VRAM_HEIGHT, 0, false))
if (CopyOut(0, 0, 0, 0, VRAM_WIDTH, VRAM_HEIGHT, 0, false))
SetDisplayTexture(m_upload_texture.get(), nullptr, 0, 0, VRAM_WIDTH, VRAM_HEIGHT);
}
}

View File

@ -44,14 +44,14 @@ protected:
void UpdateCLUT(GPUTexturePaletteReg reg, bool clut_is_8bit) override;
template<GPUTexture::Format display_format>
bool CopyOut15Bit(u32 src_x, u32 src_y, u32 width, u32 height, u32 line_skip);
bool CopyOut15Bit(u32 src_x, u32 src_y, u32 skip_y, u32 width, u32 height, u32 line_skip);
template<GPUTexture::Format display_format>
bool CopyOut24Bit(u32 src_x, u32 src_y, u32 skip_x, u32 width, u32 height, u32 line_skip);
bool CopyOut24Bit(u32 src_x, u32 src_y, u32 skip_x, u32 skip_y, u32 width, u32 height, u32 line_skip);
bool CopyOut(u32 src_x, u32 src_y, u32 skip_x, u32 width, u32 height, u32 line_skip, bool is_24bit);
bool CopyOut(u32 src_x, u32 src_y, u32 skip_x, u32 skip_y, u32 width, u32 height, u32 line_skip, bool is_24bit);
void UpdateDisplay() override;
void UpdateDisplay(bool partial, u32 start_line, u32 end_line) override;
void DispatchRenderCommand() override;

View File

@ -254,7 +254,7 @@ void Timers::CheckForIRQ(u32 timer, u32 old_counter)
if (!cs.irq_done || cs.mode.irq_repeat)
{
// this is actually low for a few cycles
DEBUG_LOG("Raising timer {} pulse IRQ", timer);
DEBUG_LOG("Raising timer {} pulse IRQ @ {}", timer, g_gpu->GetCRTCCurrentScanline());
InterruptController::SetLineState(irqnum, false);
InterruptController::SetLineState(irqnum, true);
}
@ -377,8 +377,8 @@ void Timers::WriteRegister(u32 offset, u32 value)
case 0x00:
{
const u32 old_counter = cs.counter;
DEBUG_LOG("Timer {} write counter {}", timer_index, value);
cs.counter = value & u32(0xFFFF);
DEBUG_LOG("Timer {} write counter {}", timer_index, ZeroExtend32(Truncate16(value)));
cs.counter = ZeroExtend32(Truncate16(value));
CheckForIRQ(timer_index, old_counter);
if (timer_index == 2 || !cs.external_counting_enabled)
UpdateSysClkEvent();
@ -388,28 +388,42 @@ void Timers::WriteRegister(u32 offset, u32 value)
case 0x04:
{
static constexpr u32 WRITE_MASK = 0b1110001111111111;
const bool prev_external_counting_enabled = cs.external_counting_enabled;
DEBUG_LOG("Timer {} write mode register 0x{:04X}", timer_index, value);
DEBUG_LOG("Timer {} write mode register 0x{:04X} @ scaline {}", timer_index, value, g_gpu->GetCRTCCurrentScanline());
cs.mode.bits = (value & WRITE_MASK) | (cs.mode.bits & ~WRITE_MASK);
cs.use_external_clock = (cs.mode.clock_source & (timer_index == 2 ? 2 : 1)) != 0;
UpdateCountingEnabled(cs);
// Need to re-sync GPU if ext counting changed, since we're resetting the counter.
if (timer_index < 2 && !prev_external_counting_enabled && cs.external_counting_enabled)
{
if (timer_index == 0 || g_gpu->IsCRTCScanlinePending())
g_gpu->SynchronizeCRTC();
}
cs.counter = 0;
cs.irq_done = false;
InterruptController::SetLineState(
static_cast<InterruptController::IRQ>(static_cast<u32>(InterruptController::IRQ::TMR0) + timer_index), false);
UpdateCountingEnabled(cs);
CheckForIRQ(timer_index, cs.counter);
UpdateSysClkEvent();
if (timer_index == 2 || !cs.external_counting_enabled)
UpdateSysClkEvent();
else if (timer_index < 2 && cs.external_counting_enabled)
g_gpu->UpdateCRTCTickEvent();
}
break;
case 0x08:
{
DEBUG_LOG("Timer {} write target 0x{:04X}", timer_index, ZeroExtend32(Truncate16(value)));
cs.target = value & u32(0xFFFF);
DEBUG_LOG("Timer {} write target {} @ {}", timer_index, ZeroExtend32(Truncate16(value)), g_gpu->GetCRTCCurrentScanline());
cs.target = ZeroExtend32(Truncate16(value));
CheckForIRQ(timer_index, cs.counter);
if (timer_index == 2 || !cs.external_counting_enabled)
UpdateSysClkEvent();
else if (timer_index < 2 && cs.external_counting_enabled)
g_gpu->UpdateCRTCTickEvent();
}
break;