GPU: Implement non-interleaved interlaced rendering

Fixes screen shaking in True Pinball.
This commit is contained in:
Connor McLaughlin 2020-05-26 03:18:04 +10:00
parent 0daea7c2fc
commit e368dbbadc
11 changed files with 76 additions and 43 deletions

View File

@ -497,7 +497,7 @@ void GPU::UpdateCRTCDisplayParameters()
}
}
const u8 height_shift = BoolToUInt8(m_GPUSTAT.In480iMode());
const u8 height_shift = BoolToUInt8(m_GPUSTAT.vertical_interlace);
// Determine screen size.
cs.display_width = (((cs.horizontal_active_end - cs.horizontal_active_start) / cs.dot_clock_divider) + 2u) & ~3u;
@ -711,16 +711,16 @@ void GPU::Execute(TickCount ticks)
}
// alternating even line bit in 240-line mode
if (m_GPUSTAT.In480iMode())
if (m_GPUSTAT.vertical_interlace)
{
m_crtc_state.displaying_odd_lines =
ConvertToBoolUnchecked((m_crtc_state.regs.Y + BoolToUInt32(m_crtc_state.displaying_odd_field)) & u32(1));
m_GPUSTAT.displaying_odd_line = m_crtc_state.displaying_odd_lines && !m_crtc_state.in_vblank;
m_GPUSTAT.drawing_odd_lines = !m_crtc_state.displaying_odd_lines && !m_crtc_state.in_vblank;
}
else
{
m_crtc_state.displaying_odd_lines = false;
m_GPUSTAT.displaying_odd_line =
m_GPUSTAT.drawing_odd_lines =
ConvertToBoolUnchecked((m_crtc_state.regs.Y + m_crtc_state.current_scanline) & u32(1));
}
@ -743,7 +743,8 @@ bool GPU::ConvertScreenCoordinatesToBeamTicksAndLines(s32 window_x, s32 window_y
return false;
}
*out_line = (static_cast<u32>(display_y) >> BoolToUInt8(m_GPUSTAT.In480iMode())) + m_crtc_state.vertical_active_start;
*out_line =
(static_cast<u32>(display_y) >> BoolToUInt8(m_GPUSTAT.vertical_interlace)) + m_crtc_state.vertical_active_start;
*out_tick = (static_cast<u32>(display_x) * m_crtc_state.dot_clock_divider) + m_crtc_state.horizontal_active_start;
return true;
}

View File

@ -345,7 +345,10 @@ protected:
}
/// Returns true if scanout should be interlaced.
ALWAYS_INLINE bool IsInterlacedDisplayEnabled() const { return (!m_force_progressive_scan) & m_GPUSTAT.In480iMode(); }
ALWAYS_INLINE bool IsInterlacedDisplayEnabled() const
{
return (!m_force_progressive_scan) & m_GPUSTAT.vertical_interlace;
}
/// Returns true if interlaced rendering is enabled and force progressive scan is disabled.
ALWAYS_INLINE bool IsInterlacedRenderingEnabled() const
@ -443,18 +446,13 @@ protected:
BitField<u32, bool, 27, 1> ready_to_send_vram;
BitField<u32, bool, 28, 1> ready_to_recieve_dma;
BitField<u32, DMADirection, 29, 2> dma_direction;
BitField<u32, bool, 31, 1> displaying_odd_line;
BitField<u32, bool, 31, 1> drawing_odd_lines;
bool IsMaskingEnabled() const
{
static constexpr u32 MASK = ((1 << 11) | (1 << 12));
return ((bits & MASK) != 0);
}
bool In480iMode() const
{
static constexpr u32 MASK = (1 << 19) | (1 << 22);
return ((bits & MASK) == MASK);
}
bool SkipDrawingToActiveField() const
{
static constexpr u32 MASK = (1 << 19) | (1 << 22) | (1 << 10);

View File

@ -27,6 +27,13 @@ public:
OnlyTransparent
};
enum class InterlacedRenderMode : u8
{
None,
InterleavedFields,
SeparateFields
};
GPU_HW();
virtual ~GPU_HW();
@ -189,6 +196,20 @@ protected:
return m_batch.check_mask_before_draw || m_render_api != HostDisplay::RenderAPI::D3D11;
}
/// Returns the interlaced mode to use when scanning out/displaying.
ALWAYS_INLINE InterlacedRenderMode GetInterlacedRenderMode() const
{
if (IsInterlacedDisplayEnabled())
{
return m_GPUSTAT.vertical_resolution ? InterlacedRenderMode::InterleavedFields :
InterlacedRenderMode::SeparateFields;
}
else
{
return InterlacedRenderMode::None;
}
}
void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override;
void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) override;
void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override;

View File

@ -448,10 +448,10 @@ bool GPU_HW_D3D11::CompileShaders()
for (u8 depth_24bit = 0; depth_24bit < 2; depth_24bit++)
{
for (u8 interlacing = 0; interlacing < 2; interlacing++)
for (u8 interlacing = 0; interlacing < 3; interlacing++)
{
const std::string ps = shadergen.GenerateDisplayFragmentShader(ConvertToBoolUnchecked(depth_24bit),
ConvertToBoolUnchecked(interlacing));
static_cast<InterlacedRenderMode>(interlacing));
m_display_pixel_shaders[depth_24bit][interlacing] = m_shader_cache.GetPixelShader(m_device.Get(), ps);
if (!m_display_pixel_shaders[depth_24bit][interlacing])
return false;
@ -590,13 +590,13 @@ void GPU_HW_D3D11::UpdateDisplay()
const u32 display_height = m_crtc_state.display_vram_height;
const u32 scaled_display_width = display_width * m_resolution_scale;
const u32 scaled_display_height = display_height * m_resolution_scale;
const bool interlaced = IsInterlacedDisplayEnabled();
const InterlacedRenderMode interlaced = GetInterlacedRenderMode();
if (IsDisplayDisabled())
{
m_host_display->ClearDisplayTexture();
}
else if (!m_GPUSTAT.display_area_color_depth_24 && !interlaced &&
else if (!m_GPUSTAT.display_area_color_depth_24 && interlaced == InterlacedRenderMode::None &&
(scaled_vram_offset_x + scaled_display_width) <= m_vram_texture.GetWidth() &&
(scaled_vram_offset_y + scaled_display_height) <= m_vram_texture.GetHeight())
{
@ -616,9 +616,9 @@ void GPU_HW_D3D11::UpdateDisplay()
const u32 uniforms[4] = {reinterpret_start_x, scaled_vram_offset_y, reinterpret_crop_left,
reinterpret_field_offset};
ID3D11PixelShader* display_pixel_shader =
m_display_pixel_shaders[BoolToUInt8(m_GPUSTAT.display_area_color_depth_24)][BoolToUInt8(interlaced)].Get();
m_display_pixel_shaders[BoolToUInt8(m_GPUSTAT.display_area_color_depth_24)][static_cast<u8>(interlaced)].Get();
SetViewportAndScissor(0, reinterpret_field_offset, scaled_display_width, scaled_display_height);
SetViewportAndScissor(0, 0, scaled_display_width, scaled_display_height);
DrawUtilityShader(display_pixel_shader, uniforms, sizeof(uniforms));
m_host_display->SetDisplayTexture(m_display_texture.GetD3DSRV(), m_display_texture.GetWidth(),

View File

@ -121,5 +121,5 @@ private:
ComPtr<ID3D11PixelShader> m_vram_write_pixel_shader;
ComPtr<ID3D11PixelShader> m_vram_copy_pixel_shader;
ComPtr<ID3D11PixelShader> m_vram_update_depth_pixel_shader;
std::array<std::array<ComPtr<ID3D11PixelShader>, 2>, 2> m_display_pixel_shaders; // [depth_24][interlaced]
std::array<std::array<ComPtr<ID3D11PixelShader>, 3>, 2> m_display_pixel_shaders; // [depth_24][interlaced]
};

View File

@ -403,11 +403,11 @@ bool GPU_HW_OpenGL::CompilePrograms()
for (u8 depth_24bit = 0; depth_24bit < 2; depth_24bit++)
{
for (u8 interlaced = 0; interlaced < 2; interlaced++)
for (u8 interlaced = 0; interlaced < 3; interlaced++)
{
const std::string vs = shadergen.GenerateScreenQuadVertexShader();
const std::string fs = shadergen.GenerateDisplayFragmentShader(ConvertToBoolUnchecked(depth_24bit),
ConvertToBoolUnchecked(interlaced));
static_cast<InterlacedRenderMode>(interlaced));
std::optional<GL::Program> prog =
m_shader_cache.GetProgram(vs, {}, fs, [this, use_binding_layout](GL::Program& prog) {
@ -587,13 +587,13 @@ void GPU_HW_OpenGL::UpdateDisplay()
const u32 display_height = m_crtc_state.display_vram_height;
const u32 scaled_display_width = display_width * m_resolution_scale;
const u32 scaled_display_height = display_height * m_resolution_scale;
const bool interlaced = IsInterlacedDisplayEnabled();
const InterlacedRenderMode interlaced = GetInterlacedRenderMode();
if (IsDisplayDisabled())
{
m_host_display->ClearDisplayTexture();
}
else if (!m_GPUSTAT.display_area_color_depth_24 && !interlaced &&
else if (!m_GPUSTAT.display_area_color_depth_24 && interlaced == GPU_HW::InterlacedRenderMode::None &&
(scaled_vram_offset_x + scaled_display_width) <= m_vram_texture.GetWidth() &&
(scaled_vram_offset_y + scaled_display_height) <= m_vram_texture.GetHeight())
{
@ -608,7 +608,7 @@ void GPU_HW_OpenGL::UpdateDisplay()
glDisable(GL_SCISSOR_TEST);
glDisable(GL_DEPTH_TEST);
m_display_programs[BoolToUInt8(m_GPUSTAT.display_area_color_depth_24)][BoolToUInt8(interlaced)].Bind();
m_display_programs[BoolToUInt8(m_GPUSTAT.display_area_color_depth_24)][static_cast<u8>(interlaced)].Bind();
m_display_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER);
m_vram_texture.Bind();

View File

@ -85,7 +85,7 @@ private:
m_render_programs; // [render_mode][texture_mode][dithering][interlacing]
std::array<std::array<std::array<GL::Program, 2>, 2>, 4>
m_line_render_programs; // [render_mode][dithering][interlacing]
std::array<std::array<GL::Program, 2>, 2> m_display_programs; // [depth_24][interlaced]
std::array<std::array<GL::Program, 3>, 2> m_display_programs; // [depth_24][interlaced]
GL::Program m_vram_interlaced_fill_program;
GL::Program m_vram_read_program;
GL::Program m_vram_write_program;

View File

@ -1008,12 +1008,13 @@ std::string GPU_HW_ShaderGen::GenerateCopyFragmentShader()
return ss.str();
}
std::string GPU_HW_ShaderGen::GenerateDisplayFragmentShader(bool depth_24bit, bool interlaced)
std::string GPU_HW_ShaderGen::GenerateDisplayFragmentShader(bool depth_24bit, GPU_HW::InterlacedRenderMode interlace_mode)
{
std::stringstream ss;
WriteHeader(ss);
DefineMacro(ss, "DEPTH_24BIT", depth_24bit);
DefineMacro(ss, "INTERLACED", interlaced);
DefineMacro(ss, "INTERLACED", interlace_mode != GPU_HW::InterlacedRenderMode::None);
DefineMacro(ss, "INTERLEAVED", interlace_mode == GPU_HW::InterlacedRenderMode::InterleavedFields);
WriteCommonFunctions(ss);
DeclareUniformBuffer(ss, {"uint2 u_vram_offset", "uint u_crop_left", "uint u_field_offset"});
@ -1027,6 +1028,10 @@ std::string GPU_HW_ShaderGen::GenerateDisplayFragmentShader(bool depth_24bit, bo
#if INTERLACED
if (((icoords.y / RESOLUTION_SCALE) & 1u) != u_field_offset)
discard;
#if !INTERLEAVED
icoords.y /= 2u;
#endif
#endif
#if DEPTH_24BIT

View File

@ -21,7 +21,7 @@ public:
std::string GenerateFillFragmentShader();
std::string GenerateInterlacedFillFragmentShader();
std::string GenerateCopyFragmentShader();
std::string GenerateDisplayFragmentShader(bool depth_24bit, bool interlaced);
std::string GenerateDisplayFragmentShader(bool depth_24bit, GPU_HW::InterlacedRenderMode interlace_mode);
std::string GenerateVRAMReadFragmentShader();
std::string GenerateVRAMWriteFragmentShader();
std::string GenerateVRAMCopyFragmentShader();

View File

@ -43,9 +43,11 @@ void GPU_SW::Reset()
m_vram.fill(0);
}
void GPU_SW::CopyOut15Bit(u32 src_x, u32 src_y, u32* dst_ptr, u32 dst_stride, u32 width, u32 height, bool interlaced)
void GPU_SW::CopyOut15Bit(u32 src_x, u32 src_y, u32* dst_ptr, u32 dst_stride, u32 width, u32 height, bool interlaced,
bool interleaved)
{
const u8 interlaced_shift = BoolToUInt8(interlaced);
const u8 interleaved_shift = BoolToUInt8(interleaved);
// Fast path when not wrapping around.
if ((src_x + width) <= VRAM_WIDTH && (src_y + height) <= VRAM_HEIGHT)
@ -54,7 +56,7 @@ void GPU_SW::CopyOut15Bit(u32 src_x, u32 src_y, u32* dst_ptr, u32 dst_stride, u3
height >>= interlaced_shift;
const u16* src_ptr = &m_vram[src_y * VRAM_WIDTH + src_x];
const u32 src_stride = VRAM_WIDTH << interlaced_shift;
const u32 src_stride = VRAM_WIDTH << interleaved_shift;
for (u32 row = 0; row < height; row++)
{
const u16* src_row_ptr = src_ptr;
@ -80,15 +82,17 @@ void GPU_SW::CopyOut15Bit(u32 src_x, u32 src_y, u32* dst_ptr, u32 dst_stride, u3
for (u32 col = src_x; col < end_x; col++)
*(dst_row_ptr++) = RGBA5551ToRGBA8888(src_row_ptr[col % VRAM_WIDTH]);
src_y += (1 << interlaced_shift);
src_y += (1 << interleaved_shift);
dst_ptr += dst_stride;
}
}
}
void GPU_SW::CopyOut24Bit(u32 src_x, u32 src_y, u32* dst_ptr, u32 dst_stride, u32 width, u32 height, bool interlaced)
void GPU_SW::CopyOut24Bit(u32 src_x, u32 src_y, u32* dst_ptr, u32 dst_stride, u32 width, u32 height, bool interlaced,
bool interleaved)
{
const u8 interlaced_shift = BoolToUInt8(interlaced);
const u8 interleaved_shift = BoolToUInt8(interleaved);
if ((src_x + width) <= VRAM_WIDTH && (src_y + height) <= VRAM_HEIGHT)
{
@ -96,7 +100,7 @@ void GPU_SW::CopyOut24Bit(u32 src_x, u32 src_y, u32* dst_ptr, u32 dst_stride, u3
height >>= interlaced_shift;
const u8* src_ptr = reinterpret_cast<const u8*>(&m_vram[src_y * VRAM_WIDTH + src_x]);
const u32 src_stride = (VRAM_WIDTH << interlaced_shift) * sizeof(u16);
const u32 src_stride = (VRAM_WIDTH << interleaved_shift) * sizeof(u16);
for (u32 row = 0; row < height; row++)
{
const u8* src_row_ptr = src_ptr;
@ -133,7 +137,7 @@ void GPU_SW::CopyOut24Bit(u32 src_x, u32 src_y, u32* dst_ptr, u32 dst_stride, u3
*(dst_row_ptr++) = (((ZeroExtend32(s1) << 16) | ZeroExtend32(s0)) >> shift) | 0xFF000000u;
}
src_y += (1 << interlaced_shift);
src_y += (1 << interleaved_shift);
dst_ptr += dst_stride;
}
}
@ -162,13 +166,15 @@ void GPU_SW::UpdateDisplay()
const u32 field = GetInterlacedDisplayLineOffset();
if (m_GPUSTAT.display_area_color_depth_24)
{
CopyOut24Bit(m_crtc_state.regs.X, vram_offset_y + field, m_display_texture_buffer.data() + field * VRAM_WIDTH,
VRAM_WIDTH, display_width + texture_offset_x, display_height, true);
CopyOut24Bit(m_crtc_state.regs.X, vram_offset_y + (m_GPUSTAT.vertical_resolution ? field : 0u),
m_display_texture_buffer.data() + field * VRAM_WIDTH, VRAM_WIDTH, display_width + texture_offset_x,
display_height, true, m_GPUSTAT.vertical_resolution);
}
else
{
CopyOut15Bit(m_crtc_state.regs.X, vram_offset_y + field, m_display_texture_buffer.data() + field * VRAM_WIDTH,
VRAM_WIDTH, display_width + texture_offset_x, display_height, true);
CopyOut15Bit(m_crtc_state.regs.X, vram_offset_y + (m_GPUSTAT.vertical_resolution ? field : 0u),
m_display_texture_buffer.data() + field * VRAM_WIDTH, VRAM_WIDTH, display_width + texture_offset_x,
display_height, true, m_GPUSTAT.vertical_resolution);
}
}
else
@ -176,12 +182,12 @@ void GPU_SW::UpdateDisplay()
if (m_GPUSTAT.display_area_color_depth_24)
{
CopyOut24Bit(m_crtc_state.regs.X, vram_offset_y, m_display_texture_buffer.data(), VRAM_WIDTH,
display_width + texture_offset_x, display_height, false);
display_width + texture_offset_x, display_height, false, false);
}
else
{
CopyOut15Bit(m_crtc_state.regs.X, vram_offset_y, m_display_texture_buffer.data(), VRAM_WIDTH,
display_width + texture_offset_x, display_height, false);
display_width + texture_offset_x, display_height, false, false);
}
}
@ -196,7 +202,7 @@ void GPU_SW::UpdateDisplay()
}
else
{
CopyOut15Bit(0, 0, m_display_texture_buffer.data(), VRAM_WIDTH, VRAM_WIDTH, VRAM_HEIGHT, false);
CopyOut15Bit(0, 0, m_display_texture_buffer.data(), VRAM_WIDTH, VRAM_WIDTH, VRAM_HEIGHT, false, false);
m_host_display->UpdateTexture(m_display_texture.get(), 0, 0, VRAM_WIDTH, VRAM_HEIGHT,
m_display_texture_buffer.data(), VRAM_WIDTH * sizeof(u32));
m_host_display->SetDisplayTexture(m_display_texture->GetHandle(), VRAM_WIDTH, VRAM_HEIGHT, 0, 0, VRAM_WIDTH,

View File

@ -48,8 +48,10 @@ protected:
//////////////////////////////////////////////////////////////////////////
// Scanout
//////////////////////////////////////////////////////////////////////////
void CopyOut15Bit(u32 src_x, u32 src_y, u32* dst_ptr, u32 dst_stride, u32 width, u32 height, bool interlaced);
void CopyOut24Bit(u32 src_x, u32 src_y, u32* dst_ptr, u32 dst_stride, u32 width, u32 height, bool interlaced);
void CopyOut15Bit(u32 src_x, u32 src_y, u32* dst_ptr, u32 dst_stride, u32 width, u32 height, bool interlaced,
bool interleaved);
void CopyOut24Bit(u32 src_x, u32 src_y, u32* dst_ptr, u32 dst_stride, u32 width, u32 height, bool interlaced,
bool interleaved);
void UpdateDisplay() override;
//////////////////////////////////////////////////////////////////////////