From e368dbbadc8ea1ad840656688d0b986ea3c25cc7 Mon Sep 17 00:00:00 2001 From: Connor McLaughlin Date: Tue, 26 May 2020 03:18:04 +1000 Subject: [PATCH] GPU: Implement non-interleaved interlaced rendering Fixes screen shaking in True Pinball. --- src/core/gpu.cpp | 11 ++++++----- src/core/gpu.h | 12 +++++------- src/core/gpu_hw.h | 21 +++++++++++++++++++++ src/core/gpu_hw_d3d11.cpp | 12 ++++++------ src/core/gpu_hw_d3d11.h | 2 +- src/core/gpu_hw_opengl.cpp | 10 +++++----- src/core/gpu_hw_opengl.h | 2 +- src/core/gpu_hw_shadergen.cpp | 9 +++++++-- src/core/gpu_hw_shadergen.h | 2 +- src/core/gpu_sw.cpp | 32 +++++++++++++++++++------------- src/core/gpu_sw.h | 6 ++++-- 11 files changed, 76 insertions(+), 43 deletions(-) diff --git a/src/core/gpu.cpp b/src/core/gpu.cpp index adb8f46db..622762490 100644 --- a/src/core/gpu.cpp +++ b/src/core/gpu.cpp @@ -497,7 +497,7 @@ void GPU::UpdateCRTCDisplayParameters() } } - const u8 height_shift = BoolToUInt8(m_GPUSTAT.In480iMode()); + const u8 height_shift = BoolToUInt8(m_GPUSTAT.vertical_interlace); // Determine screen size. cs.display_width = (((cs.horizontal_active_end - cs.horizontal_active_start) / cs.dot_clock_divider) + 2u) & ~3u; @@ -711,16 +711,16 @@ void GPU::Execute(TickCount ticks) } // alternating even line bit in 240-line mode - if (m_GPUSTAT.In480iMode()) + if (m_GPUSTAT.vertical_interlace) { m_crtc_state.displaying_odd_lines = ConvertToBoolUnchecked((m_crtc_state.regs.Y + BoolToUInt32(m_crtc_state.displaying_odd_field)) & u32(1)); - m_GPUSTAT.displaying_odd_line = m_crtc_state.displaying_odd_lines && !m_crtc_state.in_vblank; + m_GPUSTAT.drawing_odd_lines = !m_crtc_state.displaying_odd_lines && !m_crtc_state.in_vblank; } else { m_crtc_state.displaying_odd_lines = false; - m_GPUSTAT.displaying_odd_line = + m_GPUSTAT.drawing_odd_lines = ConvertToBoolUnchecked((m_crtc_state.regs.Y + m_crtc_state.current_scanline) & u32(1)); } @@ -743,7 +743,8 @@ bool GPU::ConvertScreenCoordinatesToBeamTicksAndLines(s32 window_x, s32 window_y return false; } - *out_line = (static_cast(display_y) >> BoolToUInt8(m_GPUSTAT.In480iMode())) + m_crtc_state.vertical_active_start; + *out_line = + (static_cast(display_y) >> BoolToUInt8(m_GPUSTAT.vertical_interlace)) + m_crtc_state.vertical_active_start; *out_tick = (static_cast(display_x) * m_crtc_state.dot_clock_divider) + m_crtc_state.horizontal_active_start; return true; } diff --git a/src/core/gpu.h b/src/core/gpu.h index f114af8ea..c6479f7a9 100644 --- a/src/core/gpu.h +++ b/src/core/gpu.h @@ -345,7 +345,10 @@ protected: } /// Returns true if scanout should be interlaced. - ALWAYS_INLINE bool IsInterlacedDisplayEnabled() const { return (!m_force_progressive_scan) & m_GPUSTAT.In480iMode(); } + ALWAYS_INLINE bool IsInterlacedDisplayEnabled() const + { + return (!m_force_progressive_scan) & m_GPUSTAT.vertical_interlace; + } /// Returns true if interlaced rendering is enabled and force progressive scan is disabled. ALWAYS_INLINE bool IsInterlacedRenderingEnabled() const @@ -443,18 +446,13 @@ protected: BitField ready_to_send_vram; BitField ready_to_recieve_dma; BitField dma_direction; - BitField displaying_odd_line; + BitField drawing_odd_lines; bool IsMaskingEnabled() const { static constexpr u32 MASK = ((1 << 11) | (1 << 12)); return ((bits & MASK) != 0); } - bool In480iMode() const - { - static constexpr u32 MASK = (1 << 19) | (1 << 22); - return ((bits & MASK) == MASK); - } bool SkipDrawingToActiveField() const { static constexpr u32 MASK = (1 << 19) | (1 << 22) | (1 << 10); diff --git a/src/core/gpu_hw.h b/src/core/gpu_hw.h index 37af61398..197364443 100644 --- a/src/core/gpu_hw.h +++ b/src/core/gpu_hw.h @@ -27,6 +27,13 @@ public: OnlyTransparent }; + enum class InterlacedRenderMode : u8 + { + None, + InterleavedFields, + SeparateFields + }; + GPU_HW(); virtual ~GPU_HW(); @@ -189,6 +196,20 @@ protected: return m_batch.check_mask_before_draw || m_render_api != HostDisplay::RenderAPI::D3D11; } + /// Returns the interlaced mode to use when scanning out/displaying. + ALWAYS_INLINE InterlacedRenderMode GetInterlacedRenderMode() const + { + if (IsInterlacedDisplayEnabled()) + { + return m_GPUSTAT.vertical_resolution ? InterlacedRenderMode::InterleavedFields : + InterlacedRenderMode::SeparateFields; + } + else + { + return InterlacedRenderMode::None; + } + } + void FillVRAM(u32 x, u32 y, u32 width, u32 height, u32 color) override; void UpdateVRAM(u32 x, u32 y, u32 width, u32 height, const void* data) override; void CopyVRAM(u32 src_x, u32 src_y, u32 dst_x, u32 dst_y, u32 width, u32 height) override; diff --git a/src/core/gpu_hw_d3d11.cpp b/src/core/gpu_hw_d3d11.cpp index b4f516fcb..6f97ef282 100644 --- a/src/core/gpu_hw_d3d11.cpp +++ b/src/core/gpu_hw_d3d11.cpp @@ -448,10 +448,10 @@ bool GPU_HW_D3D11::CompileShaders() for (u8 depth_24bit = 0; depth_24bit < 2; depth_24bit++) { - for (u8 interlacing = 0; interlacing < 2; interlacing++) + for (u8 interlacing = 0; interlacing < 3; interlacing++) { const std::string ps = shadergen.GenerateDisplayFragmentShader(ConvertToBoolUnchecked(depth_24bit), - ConvertToBoolUnchecked(interlacing)); + static_cast(interlacing)); m_display_pixel_shaders[depth_24bit][interlacing] = m_shader_cache.GetPixelShader(m_device.Get(), ps); if (!m_display_pixel_shaders[depth_24bit][interlacing]) return false; @@ -590,13 +590,13 @@ void GPU_HW_D3D11::UpdateDisplay() const u32 display_height = m_crtc_state.display_vram_height; const u32 scaled_display_width = display_width * m_resolution_scale; const u32 scaled_display_height = display_height * m_resolution_scale; - const bool interlaced = IsInterlacedDisplayEnabled(); + const InterlacedRenderMode interlaced = GetInterlacedRenderMode(); if (IsDisplayDisabled()) { m_host_display->ClearDisplayTexture(); } - else if (!m_GPUSTAT.display_area_color_depth_24 && !interlaced && + else if (!m_GPUSTAT.display_area_color_depth_24 && interlaced == InterlacedRenderMode::None && (scaled_vram_offset_x + scaled_display_width) <= m_vram_texture.GetWidth() && (scaled_vram_offset_y + scaled_display_height) <= m_vram_texture.GetHeight()) { @@ -616,9 +616,9 @@ void GPU_HW_D3D11::UpdateDisplay() const u32 uniforms[4] = {reinterpret_start_x, scaled_vram_offset_y, reinterpret_crop_left, reinterpret_field_offset}; ID3D11PixelShader* display_pixel_shader = - m_display_pixel_shaders[BoolToUInt8(m_GPUSTAT.display_area_color_depth_24)][BoolToUInt8(interlaced)].Get(); + m_display_pixel_shaders[BoolToUInt8(m_GPUSTAT.display_area_color_depth_24)][static_cast(interlaced)].Get(); - SetViewportAndScissor(0, reinterpret_field_offset, scaled_display_width, scaled_display_height); + SetViewportAndScissor(0, 0, scaled_display_width, scaled_display_height); DrawUtilityShader(display_pixel_shader, uniforms, sizeof(uniforms)); m_host_display->SetDisplayTexture(m_display_texture.GetD3DSRV(), m_display_texture.GetWidth(), diff --git a/src/core/gpu_hw_d3d11.h b/src/core/gpu_hw_d3d11.h index 38f9f1a98..4257a9b5d 100644 --- a/src/core/gpu_hw_d3d11.h +++ b/src/core/gpu_hw_d3d11.h @@ -121,5 +121,5 @@ private: ComPtr m_vram_write_pixel_shader; ComPtr m_vram_copy_pixel_shader; ComPtr m_vram_update_depth_pixel_shader; - std::array, 2>, 2> m_display_pixel_shaders; // [depth_24][interlaced] + std::array, 3>, 2> m_display_pixel_shaders; // [depth_24][interlaced] }; diff --git a/src/core/gpu_hw_opengl.cpp b/src/core/gpu_hw_opengl.cpp index 26c4a061f..86afe2ac4 100644 --- a/src/core/gpu_hw_opengl.cpp +++ b/src/core/gpu_hw_opengl.cpp @@ -403,11 +403,11 @@ bool GPU_HW_OpenGL::CompilePrograms() for (u8 depth_24bit = 0; depth_24bit < 2; depth_24bit++) { - for (u8 interlaced = 0; interlaced < 2; interlaced++) + for (u8 interlaced = 0; interlaced < 3; interlaced++) { const std::string vs = shadergen.GenerateScreenQuadVertexShader(); const std::string fs = shadergen.GenerateDisplayFragmentShader(ConvertToBoolUnchecked(depth_24bit), - ConvertToBoolUnchecked(interlaced)); + static_cast(interlaced)); std::optional prog = m_shader_cache.GetProgram(vs, {}, fs, [this, use_binding_layout](GL::Program& prog) { @@ -587,13 +587,13 @@ void GPU_HW_OpenGL::UpdateDisplay() const u32 display_height = m_crtc_state.display_vram_height; const u32 scaled_display_width = display_width * m_resolution_scale; const u32 scaled_display_height = display_height * m_resolution_scale; - const bool interlaced = IsInterlacedDisplayEnabled(); + const InterlacedRenderMode interlaced = GetInterlacedRenderMode(); if (IsDisplayDisabled()) { m_host_display->ClearDisplayTexture(); } - else if (!m_GPUSTAT.display_area_color_depth_24 && !interlaced && + else if (!m_GPUSTAT.display_area_color_depth_24 && interlaced == GPU_HW::InterlacedRenderMode::None && (scaled_vram_offset_x + scaled_display_width) <= m_vram_texture.GetWidth() && (scaled_vram_offset_y + scaled_display_height) <= m_vram_texture.GetHeight()) { @@ -608,7 +608,7 @@ void GPU_HW_OpenGL::UpdateDisplay() glDisable(GL_SCISSOR_TEST); glDisable(GL_DEPTH_TEST); - m_display_programs[BoolToUInt8(m_GPUSTAT.display_area_color_depth_24)][BoolToUInt8(interlaced)].Bind(); + m_display_programs[BoolToUInt8(m_GPUSTAT.display_area_color_depth_24)][static_cast(interlaced)].Bind(); m_display_texture.BindFramebuffer(GL_DRAW_FRAMEBUFFER); m_vram_texture.Bind(); diff --git a/src/core/gpu_hw_opengl.h b/src/core/gpu_hw_opengl.h index 17a3fe8d2..0a1d0b607 100644 --- a/src/core/gpu_hw_opengl.h +++ b/src/core/gpu_hw_opengl.h @@ -85,7 +85,7 @@ private: m_render_programs; // [render_mode][texture_mode][dithering][interlacing] std::array, 2>, 4> m_line_render_programs; // [render_mode][dithering][interlacing] - std::array, 2> m_display_programs; // [depth_24][interlaced] + std::array, 2> m_display_programs; // [depth_24][interlaced] GL::Program m_vram_interlaced_fill_program; GL::Program m_vram_read_program; GL::Program m_vram_write_program; diff --git a/src/core/gpu_hw_shadergen.cpp b/src/core/gpu_hw_shadergen.cpp index 488496a5d..63951cabe 100644 --- a/src/core/gpu_hw_shadergen.cpp +++ b/src/core/gpu_hw_shadergen.cpp @@ -1008,12 +1008,13 @@ std::string GPU_HW_ShaderGen::GenerateCopyFragmentShader() return ss.str(); } -std::string GPU_HW_ShaderGen::GenerateDisplayFragmentShader(bool depth_24bit, bool interlaced) +std::string GPU_HW_ShaderGen::GenerateDisplayFragmentShader(bool depth_24bit, GPU_HW::InterlacedRenderMode interlace_mode) { std::stringstream ss; WriteHeader(ss); DefineMacro(ss, "DEPTH_24BIT", depth_24bit); - DefineMacro(ss, "INTERLACED", interlaced); + DefineMacro(ss, "INTERLACED", interlace_mode != GPU_HW::InterlacedRenderMode::None); + DefineMacro(ss, "INTERLEAVED", interlace_mode == GPU_HW::InterlacedRenderMode::InterleavedFields); WriteCommonFunctions(ss); DeclareUniformBuffer(ss, {"uint2 u_vram_offset", "uint u_crop_left", "uint u_field_offset"}); @@ -1027,6 +1028,10 @@ std::string GPU_HW_ShaderGen::GenerateDisplayFragmentShader(bool depth_24bit, bo #if INTERLACED if (((icoords.y / RESOLUTION_SCALE) & 1u) != u_field_offset) discard; + + #if !INTERLEAVED + icoords.y /= 2u; + #endif #endif #if DEPTH_24BIT diff --git a/src/core/gpu_hw_shadergen.h b/src/core/gpu_hw_shadergen.h index bf62c0447..3b2270dcc 100644 --- a/src/core/gpu_hw_shadergen.h +++ b/src/core/gpu_hw_shadergen.h @@ -21,7 +21,7 @@ public: std::string GenerateFillFragmentShader(); std::string GenerateInterlacedFillFragmentShader(); std::string GenerateCopyFragmentShader(); - std::string GenerateDisplayFragmentShader(bool depth_24bit, bool interlaced); + std::string GenerateDisplayFragmentShader(bool depth_24bit, GPU_HW::InterlacedRenderMode interlace_mode); std::string GenerateVRAMReadFragmentShader(); std::string GenerateVRAMWriteFragmentShader(); std::string GenerateVRAMCopyFragmentShader(); diff --git a/src/core/gpu_sw.cpp b/src/core/gpu_sw.cpp index de7a50640..d9b1ec7c5 100644 --- a/src/core/gpu_sw.cpp +++ b/src/core/gpu_sw.cpp @@ -43,9 +43,11 @@ void GPU_SW::Reset() m_vram.fill(0); } -void GPU_SW::CopyOut15Bit(u32 src_x, u32 src_y, u32* dst_ptr, u32 dst_stride, u32 width, u32 height, bool interlaced) +void GPU_SW::CopyOut15Bit(u32 src_x, u32 src_y, u32* dst_ptr, u32 dst_stride, u32 width, u32 height, bool interlaced, + bool interleaved) { const u8 interlaced_shift = BoolToUInt8(interlaced); + const u8 interleaved_shift = BoolToUInt8(interleaved); // Fast path when not wrapping around. if ((src_x + width) <= VRAM_WIDTH && (src_y + height) <= VRAM_HEIGHT) @@ -54,7 +56,7 @@ void GPU_SW::CopyOut15Bit(u32 src_x, u32 src_y, u32* dst_ptr, u32 dst_stride, u3 height >>= interlaced_shift; const u16* src_ptr = &m_vram[src_y * VRAM_WIDTH + src_x]; - const u32 src_stride = VRAM_WIDTH << interlaced_shift; + const u32 src_stride = VRAM_WIDTH << interleaved_shift; for (u32 row = 0; row < height; row++) { const u16* src_row_ptr = src_ptr; @@ -80,15 +82,17 @@ void GPU_SW::CopyOut15Bit(u32 src_x, u32 src_y, u32* dst_ptr, u32 dst_stride, u3 for (u32 col = src_x; col < end_x; col++) *(dst_row_ptr++) = RGBA5551ToRGBA8888(src_row_ptr[col % VRAM_WIDTH]); - src_y += (1 << interlaced_shift); + src_y += (1 << interleaved_shift); dst_ptr += dst_stride; } } } -void GPU_SW::CopyOut24Bit(u32 src_x, u32 src_y, u32* dst_ptr, u32 dst_stride, u32 width, u32 height, bool interlaced) +void GPU_SW::CopyOut24Bit(u32 src_x, u32 src_y, u32* dst_ptr, u32 dst_stride, u32 width, u32 height, bool interlaced, + bool interleaved) { const u8 interlaced_shift = BoolToUInt8(interlaced); + const u8 interleaved_shift = BoolToUInt8(interleaved); if ((src_x + width) <= VRAM_WIDTH && (src_y + height) <= VRAM_HEIGHT) { @@ -96,7 +100,7 @@ void GPU_SW::CopyOut24Bit(u32 src_x, u32 src_y, u32* dst_ptr, u32 dst_stride, u3 height >>= interlaced_shift; const u8* src_ptr = reinterpret_cast(&m_vram[src_y * VRAM_WIDTH + src_x]); - const u32 src_stride = (VRAM_WIDTH << interlaced_shift) * sizeof(u16); + const u32 src_stride = (VRAM_WIDTH << interleaved_shift) * sizeof(u16); for (u32 row = 0; row < height; row++) { const u8* src_row_ptr = src_ptr; @@ -133,7 +137,7 @@ void GPU_SW::CopyOut24Bit(u32 src_x, u32 src_y, u32* dst_ptr, u32 dst_stride, u3 *(dst_row_ptr++) = (((ZeroExtend32(s1) << 16) | ZeroExtend32(s0)) >> shift) | 0xFF000000u; } - src_y += (1 << interlaced_shift); + src_y += (1 << interleaved_shift); dst_ptr += dst_stride; } } @@ -162,13 +166,15 @@ void GPU_SW::UpdateDisplay() const u32 field = GetInterlacedDisplayLineOffset(); if (m_GPUSTAT.display_area_color_depth_24) { - CopyOut24Bit(m_crtc_state.regs.X, vram_offset_y + field, m_display_texture_buffer.data() + field * VRAM_WIDTH, - VRAM_WIDTH, display_width + texture_offset_x, display_height, true); + CopyOut24Bit(m_crtc_state.regs.X, vram_offset_y + (m_GPUSTAT.vertical_resolution ? field : 0u), + m_display_texture_buffer.data() + field * VRAM_WIDTH, VRAM_WIDTH, display_width + texture_offset_x, + display_height, true, m_GPUSTAT.vertical_resolution); } else { - CopyOut15Bit(m_crtc_state.regs.X, vram_offset_y + field, m_display_texture_buffer.data() + field * VRAM_WIDTH, - VRAM_WIDTH, display_width + texture_offset_x, display_height, true); + CopyOut15Bit(m_crtc_state.regs.X, vram_offset_y + (m_GPUSTAT.vertical_resolution ? field : 0u), + m_display_texture_buffer.data() + field * VRAM_WIDTH, VRAM_WIDTH, display_width + texture_offset_x, + display_height, true, m_GPUSTAT.vertical_resolution); } } else @@ -176,12 +182,12 @@ void GPU_SW::UpdateDisplay() if (m_GPUSTAT.display_area_color_depth_24) { CopyOut24Bit(m_crtc_state.regs.X, vram_offset_y, m_display_texture_buffer.data(), VRAM_WIDTH, - display_width + texture_offset_x, display_height, false); + display_width + texture_offset_x, display_height, false, false); } else { CopyOut15Bit(m_crtc_state.regs.X, vram_offset_y, m_display_texture_buffer.data(), VRAM_WIDTH, - display_width + texture_offset_x, display_height, false); + display_width + texture_offset_x, display_height, false, false); } } @@ -196,7 +202,7 @@ void GPU_SW::UpdateDisplay() } else { - CopyOut15Bit(0, 0, m_display_texture_buffer.data(), VRAM_WIDTH, VRAM_WIDTH, VRAM_HEIGHT, false); + CopyOut15Bit(0, 0, m_display_texture_buffer.data(), VRAM_WIDTH, VRAM_WIDTH, VRAM_HEIGHT, false, false); m_host_display->UpdateTexture(m_display_texture.get(), 0, 0, VRAM_WIDTH, VRAM_HEIGHT, m_display_texture_buffer.data(), VRAM_WIDTH * sizeof(u32)); m_host_display->SetDisplayTexture(m_display_texture->GetHandle(), VRAM_WIDTH, VRAM_HEIGHT, 0, 0, VRAM_WIDTH, diff --git a/src/core/gpu_sw.h b/src/core/gpu_sw.h index 6db9e792b..6381c2a11 100644 --- a/src/core/gpu_sw.h +++ b/src/core/gpu_sw.h @@ -48,8 +48,10 @@ protected: ////////////////////////////////////////////////////////////////////////// // Scanout ////////////////////////////////////////////////////////////////////////// - void CopyOut15Bit(u32 src_x, u32 src_y, u32* dst_ptr, u32 dst_stride, u32 width, u32 height, bool interlaced); - void CopyOut24Bit(u32 src_x, u32 src_y, u32* dst_ptr, u32 dst_stride, u32 width, u32 height, bool interlaced); + void CopyOut15Bit(u32 src_x, u32 src_y, u32* dst_ptr, u32 dst_stride, u32 width, u32 height, bool interlaced, + bool interleaved); + void CopyOut24Bit(u32 src_x, u32 src_y, u32* dst_ptr, u32 dst_stride, u32 width, u32 height, bool interlaced, + bool interleaved); void UpdateDisplay() override; //////////////////////////////////////////////////////////////////////////