From b285188de1a38565b9718bdac7fe698ee870d914 Mon Sep 17 00:00:00 2001 From: iwubcode Date: Sun, 2 Jul 2017 21:24:20 -0500 Subject: [PATCH] Video Backends: Implement vertical scaling for xfb copies. This fixes the display of PAL games that run in 50hz mode. --- .../VideoBackends/D3D/PSTextureEncoder.cpp | 7 ++++-- .../VideoBackends/OGL/TextureConverter.cpp | 11 ++++++---- .../VideoBackends/Vulkan/TextureConverter.cpp | 22 +++++++++++++++---- Source/Core/VideoCommon/BPStructs.cpp | 4 ++-- Source/Core/VideoCommon/RenderBase.cpp | 11 +++++----- Source/Core/VideoCommon/TextureCacheBase.cpp | 20 +++++++++-------- Source/Core/VideoCommon/TextureCacheBase.h | 15 ++++++++----- .../VideoCommon/TextureConversionShader.cpp | 9 ++++++-- 8 files changed, 65 insertions(+), 34 deletions(-) diff --git a/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp b/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp index 7c05215948..2aafb299bf 100644 --- a/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp +++ b/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp @@ -26,6 +26,8 @@ struct EFBEncodeParams s32 SrcTop; u32 DestWidth; u32 ScaleFactor; + float y_scale; + u32 padding[3]; }; PSTextureEncoder::PSTextureEncoder() @@ -45,7 +47,7 @@ void PSTextureEncoder::Init() // EFB2RAM copies use max (EFB_WIDTH * 4) by (EFB_HEIGHT / 4) // XFB2RAM copies use max (EFB_WIDTH / 2) by (EFB_HEIGHT) D3D11_TEXTURE2D_DESC t2dd = CD3D11_TEXTURE2D_DESC(DXGI_FORMAT_B8G8R8A8_UNORM, EFB_WIDTH * 4, - EFB_HEIGHT, 1, 1, D3D11_BIND_RENDER_TARGET); + 1024, 1, 1, D3D11_BIND_RENDER_TARGET); hr = D3D::device->CreateTexture2D(&t2dd, nullptr, &m_out); CHECK(SUCCEEDED(hr), "create efb encode output texture"); D3D::SetDebugObjectName(m_out, "efb encoder output texture"); @@ -127,6 +129,7 @@ void PSTextureEncoder::Encode(u8* dst, const EFBCopyParams& params, u32 native_w encode_params.SrcTop = src_rect.top; encode_params.DestWidth = native_width; encode_params.ScaleFactor = scale_by_half ? 2 : 1; + encode_params.y_scale = params.y_scale; D3D::context->UpdateSubresource(m_encodeParams, 0, nullptr, &encode_params, 0, 0); D3D::stateman->SetPixelConstants(m_encodeParams); @@ -134,7 +137,7 @@ void PSTextureEncoder::Encode(u8* dst, const EFBCopyParams& params, u32 native_w // TODO: This only produces perfect downsampling for 2x IR, other resolutions will need more // complex down filtering to average all pixels and produce the correct result. // Also, box filtering won't be correct for anything other than 1x IR - if (scale_by_half || g_renderer->GetEFBScale() != 1) + if (scale_by_half || g_renderer->GetEFBScale() != 1 || params.y_scale > 1.0f) D3D::SetLinearCopySampler(); else D3D::SetPointCopySampler(); diff --git a/Source/Core/VideoBackends/OGL/TextureConverter.cpp b/Source/Core/VideoBackends/OGL/TextureConverter.cpp index 67f1bcc28e..1fb55c8d24 100644 --- a/Source/Core/VideoBackends/OGL/TextureConverter.cpp +++ b/Source/Core/VideoBackends/OGL/TextureConverter.cpp @@ -50,6 +50,7 @@ struct EncodingProgram { SHADER program; GLint copy_position_uniform; + GLint y_scale_uniform; }; static std::map s_encoding_programs; @@ -166,6 +167,7 @@ static EncodingProgram& GetOrCreateEncodingShader(const EFBCopyParams& params) PanicAlert("Failed to compile texture encoding shader."); program.copy_position_uniform = glGetUniformLocation(program.program.glprogid, "position"); + program.y_scale_uniform = glGetUniformLocation(program.program.glprogid, "y_scale"); return s_encoding_programs.emplace(params, program).first->second; } @@ -217,7 +219,7 @@ void Shutdown() // dst_line_size, writeStride in bytes static void EncodeToRamUsingShader(GLuint srcTexture, u8* destAddr, u32 dst_line_size, - u32 dstHeight, u32 writeStride, bool linearFilter) + u32 dstHeight, u32 writeStride, bool linearFilter, float y_scale) { // switch to texture converter frame buffer // attach render buffer as color destination @@ -233,7 +235,7 @@ static void EncodeToRamUsingShader(GLuint srcTexture, u8* destAddr, u32 dst_line // TODO: This only produces perfect downsampling for 2x IR, other resolutions will need more // complex down filtering to average all pixels and produce the correct result. // Also, box filtering won't be correct for anything other than 1x IR - if (linearFilter || g_renderer->GetEFBScale() != 1) + if (linearFilter || g_renderer->GetEFBScale() != 1 || y_scale > 1.0f) g_sampler_cache->BindLinearSampler(9); else g_sampler_cache->BindNearestSampler(9); @@ -282,13 +284,14 @@ void EncodeToRamFromTexture(u8* dest_ptr, const EFBCopyParams& params, u32 nativ texconv_shader.program.Bind(); glUniform4i(texconv_shader.copy_position_uniform, src_rect.left, src_rect.top, native_width, scale_by_half ? 2 : 1); + glUniform1f(texconv_shader.y_scale_uniform, params.y_scale); const GLuint read_texture = params.depth ? FramebufferManager::ResolveAndGetDepthTarget(src_rect) : FramebufferManager::ResolveAndGetRenderTarget(src_rect); EncodeToRamUsingShader(read_texture, dest_ptr, bytes_per_row, num_blocks_y, memory_stride, - scale_by_half && !params.depth); + scale_by_half && !params.depth, params.y_scale); FramebufferManager::SetFramebuffer(0); g_renderer->RestoreAPIState(); @@ -308,7 +311,7 @@ void EncodeToRamYUYV(GLuint srcTexture, const TargetRectangle& sourceRc, u8* des // We enable linear filtering, because the GameCube does filtering in the vertical direction when // yscale is enabled. // Otherwise we get jaggies when a game uses yscaling (most PAL games) - EncodeToRamUsingShader(srcTexture, destAddr, dstWidth * 2, dstHeight, dstStride, true); + EncodeToRamUsingShader(srcTexture, destAddr, dstWidth * 2, dstHeight, dstStride, true, 1.0f); FramebufferManager::SetFramebuffer(0); OGLTexture::DisableStage(0); g_renderer->RestoreAPIState(); diff --git a/Source/Core/VideoBackends/Vulkan/TextureConverter.cpp b/Source/Core/VideoBackends/Vulkan/TextureConverter.cpp index e4d0e167f9..75f274f917 100644 --- a/Source/Core/VideoBackends/Vulkan/TextureConverter.cpp +++ b/Source/Core/VideoBackends/Vulkan/TextureConverter.cpp @@ -5,6 +5,7 @@ #include "VideoBackends/Vulkan/TextureConverter.h" #include +#include #include #include #include @@ -32,6 +33,14 @@ namespace Vulkan { + namespace + { + struct EFBEncodeParams + { + std::array position_uniform; + float y_scale; + }; + } TextureConverter::TextureConverter() { } @@ -243,14 +252,19 @@ void TextureConverter::EncodeTextureToMemory(VkImageView src_texture, u8* dest_p VK_NULL_HANDLE, shader); // Uniform - int4 of left,top,native_width,scale - s32 position_uniform[4] = {src_rect.left, src_rect.top, static_cast(native_width), - scale_by_half ? 2 : 1}; - draw.SetPushConstants(position_uniform, sizeof(position_uniform)); + EFBEncodeParams encoder_params; + encoder_params.position_uniform[0] = src_rect.left; + encoder_params.position_uniform[1] = src_rect.top; + encoder_params.position_uniform[2] = static_cast(native_width); + encoder_params.position_uniform[3] = scale_by_half ? 2 : 1; + encoder_params.y_scale = params.y_scale; + draw.SetPushConstants(&encoder_params, sizeof(encoder_params)); // We also linear filtering for both box filtering and downsampling higher resolutions to 1x // TODO: This only produces perfect downsampling for 2x IR, other resolutions will need more // complex down filtering to average all pixels and produce the correct result. - bool linear_filter = (scale_by_half && !params.depth) || g_renderer->GetEFBScale() != 1; + bool linear_filter = (scale_by_half && !params.depth) || g_renderer->GetEFBScale() != 1 || + params.y_scale > 1.0f; draw.SetPSSampler(0, src_texture, linear_filter ? g_object_cache->GetLinearSampler() : g_object_cache->GetPointSampler()); diff --git a/Source/Core/VideoCommon/BPStructs.cpp b/Source/Core/VideoCommon/BPStructs.cpp index e721831481..183f7d4f20 100644 --- a/Source/Core/VideoCommon/BPStructs.cpp +++ b/Source/Core/VideoCommon/BPStructs.cpp @@ -232,7 +232,7 @@ static void BPWritten(const BPCmd& bp) bool is_depth_copy = bpmem.zcontrol.pixel_format == PEControl::Z24; g_texture_cache->CopyRenderTargetToTexture(destAddr, PE_copy.tp_realFormat(), destStride, is_depth_copy, srcRect, !!PE_copy.intensity_fmt, - !!PE_copy.half_scale); + !!PE_copy.half_scale, 1.0f); } else { @@ -261,7 +261,7 @@ static void BPWritten(const BPCmd& bp) bool is_depth_copy = bpmem.zcontrol.pixel_format == PEControl::Z24; g_texture_cache->CopyRenderTargetToTexture(destAddr, EFBCopyFormat::XFB, destStride, is_depth_copy, srcRect, false, - false); + false, yScale); // This stays in to signal end of a "frame" g_renderer->RenderToXFB(destAddr, srcRect, destStride, height, s_gammaLUT[PE_copy.gamma]); diff --git a/Source/Core/VideoCommon/RenderBase.cpp b/Source/Core/VideoCommon/RenderBase.cpp index 28e607f8cb..168aece684 100644 --- a/Source/Core/VideoCommon/RenderBase.cpp +++ b/Source/Core/VideoCommon/RenderBase.cpp @@ -668,15 +668,16 @@ void Renderer::Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const // Get the current XFB from texture cache auto* xfb_entry = g_texture_cache->GetTexture(xfbAddr, fbWidth, fbHeight, TextureFormat::XFB, force_safe_texture_cache_hash); - - // TODO, check if xfb_entry is a duplicate of the previous frame and skip SwapImpl - m_previous_xfb_texture = xfb_entry->texture.get(); + if (xfb_entry) + { + // TODO, check if xfb_entry is a duplicate of the previous frame and skip SwapImpl m_last_xfb_texture = xfb_entry->texture.get(); - // TODO: merge more generic parts into VideoCommon - g_renderer->SwapImpl(xfb_entry->texture.get(), rc, ticks, Gamma); + // TODO: merge more generic parts into VideoCommon + g_renderer->SwapImpl(xfb_entry->texture.get(), rc, ticks, Gamma); + } } if (m_xfb_written) diff --git a/Source/Core/VideoCommon/TextureCacheBase.cpp b/Source/Core/VideoCommon/TextureCacheBase.cpp index ed5a071177..beb5b584d8 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.cpp +++ b/Source/Core/VideoCommon/TextureCacheBase.cpp @@ -376,7 +376,7 @@ TextureCacheBase::DoPartialTextureUpdates(TCacheEntry* entry_to_update, u8* pale u32 copy_width = std::min(entry->native_width - src_x, entry_to_update->native_width - dst_x); u32 copy_height = - std::min(entry->native_height - src_y, entry_to_update->native_height - dst_y); + std::min((entry->native_height * entry->y_scale) - src_y, (entry_to_update->native_height * entry_to_update->y_scale) - dst_y); // If one of the textures is scaled, scale both with the current efb scaling factor if (entry_to_update->native_width != entry_to_update->GetWidth() || @@ -385,9 +385,9 @@ TextureCacheBase::DoPartialTextureUpdates(TCacheEntry* entry_to_update, u8* pale { ScaleTextureCacheEntryTo(entry_to_update, g_renderer->EFBToScaledX(entry_to_update->native_width), - g_renderer->EFBToScaledY(entry_to_update->native_height)); + g_renderer->EFBToScaledY(entry_to_update->native_height * entry_to_update->y_scale)); ScaleTextureCacheEntryTo(entry, g_renderer->EFBToScaledX(entry->native_width), - g_renderer->EFBToScaledY(entry->native_height)); + g_renderer->EFBToScaledY(entry->native_height * entry->y_scale)); src_x = g_renderer->EFBToScaledX(src_x); src_y = g_renderer->EFBToScaledY(src_y); @@ -794,7 +794,7 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::GetTexture(u32 address, u32 wid // Do not load strided EFB copies, they are not meant to be used directly. // Also do not directly load EFB copies, which were partly overwritten. - if (entry->IsCopy() && entry->native_width == nativeW && entry->native_height == nativeH && + if (entry->IsCopy() && entry->native_width == nativeW && static_cast(entry->native_height * entry->y_scale) == nativeH && entry->memory_stride == entry->BytesPerRow() && !entry->may_have_overlapping_textures) { // EFB copies have slightly different rules as EFB copy formats have different @@ -881,7 +881,8 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::GetTexture(u32 address, u32 wid TCacheEntry* entry = hash_iter->second; // All parameters, except the address, need to match here if (entry->format == full_format && entry->native_levels >= tex_levels && - entry->native_width == nativeW && entry->native_height == nativeH) + entry->native_width == nativeW && + static_cast(entry->native_height * entry->y_scale) == nativeH) { entry = DoPartialTextureUpdates(hash_iter->second, &texMem[tlutaddr], tlutfmt); @@ -1107,7 +1108,7 @@ TextureCacheBase::TCacheEntry* TextureCacheBase::GetTexture(u32 address, u32 wid void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, EFBCopyFormat dstFormat, u32 dstStride, bool is_depth_copy, const EFBRectangle& srcRect, bool isIntensity, - bool scaleByHalf) + bool scaleByHalf, float y_scale) { // Emulation methods: // @@ -1451,7 +1452,7 @@ void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, EFBCopyFormat dstF const u32 blockW = TexDecoder_GetBlockWidthInTexels(baseFormat); // Round up source height to multiple of block size - u32 actualHeight = Common::AlignUp(tex_h, blockH); + u32 actualHeight = Common::AlignUp(static_cast(tex_h * y_scale), blockH); const u32 actualWidth = Common::AlignUp(tex_w, blockW); u32 num_blocks_y = actualHeight / blockH; @@ -1465,7 +1466,7 @@ void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, EFBCopyFormat dstF if (copy_to_ram) { - EFBCopyParams format(srcFormat, dstFormat, is_depth_copy, isIntensity); + EFBCopyParams format(srcFormat, dstFormat, is_depth_copy, isIntensity, y_scale); CopyEFB(dst, format, tex_w, bytes_per_row, num_blocks_y, dstStride, srcRect, scaleByHalf); } else @@ -1556,6 +1557,7 @@ void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, EFBCopyFormat dstF { entry->SetGeneralParameters(dstAddr, 0, baseFormat, is_xfb_copy); entry->SetDimensions(tex_w, tex_h, 1); + entry->y_scale = y_scale; entry->frameCount = FRAMECOUNT_INVALID; if (is_xfb_copy) @@ -1731,7 +1733,7 @@ u32 TextureCacheBase::TCacheEntry::NumBlocksY() const { u32 blockH = TexDecoder_GetBlockHeightInTexels(format.texfmt); // Round up source height to multiple of block size - u32 actualHeight = Common::AlignUp(native_height, blockH); + u32 actualHeight = Common::AlignUp(static_cast(native_height * y_scale), blockH); return actualHeight / blockH; } diff --git a/Source/Core/VideoCommon/TextureCacheBase.h b/Source/Core/VideoCommon/TextureCacheBase.h index 53ab41a836..96b36b6eba 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.h +++ b/Source/Core/VideoCommon/TextureCacheBase.h @@ -45,21 +45,22 @@ struct TextureAndTLUTFormat struct EFBCopyParams { EFBCopyParams(PEControl::PixelFormat efb_format_, EFBCopyFormat copy_format_, bool depth_, - bool yuv_) - : efb_format(efb_format_), copy_format(copy_format_), depth(depth_), yuv(yuv_) + bool yuv_, float y_scale_) + : efb_format(efb_format_), copy_format(copy_format_), depth(depth_), yuv(yuv_), y_scale(y_scale_) { } bool operator<(const EFBCopyParams& rhs) const { - return std::tie(efb_format, copy_format, depth, yuv) < - std::tie(rhs.efb_format, rhs.copy_format, rhs.depth, rhs.yuv); + return std::tie(efb_format, copy_format, depth, yuv, y_scale) < + std::tie(rhs.efb_format, rhs.copy_format, rhs.depth, rhs.yuv, rhs.y_scale); } PEControl::PixelFormat efb_format; EFBCopyFormat copy_format; bool depth; bool yuv; + float y_scale; }; class TextureCacheBase @@ -86,6 +87,7 @@ public: // content, aren't just downscaled bool should_force_safe_hashing = false; // for XFB bool is_xfb_copy = false; + float y_scale = 1.0f; unsigned int native_width, native_height; // Texture dimensions from the GameCube's point of view @@ -188,7 +190,7 @@ public: virtual void BindTextures(); void CopyRenderTargetToTexture(u32 dstAddr, EFBCopyFormat dstFormat, u32 dstStride, bool is_depth_copy, const EFBRectangle& srcRect, bool isIntensity, - bool scaleByHalf); + bool scaleByHalf, float y_scale); virtual void ConvertTexture(TCacheEntry* entry, TCacheEntry* unconverted, const void* palette, TLUTFormat format) = 0; @@ -210,6 +212,8 @@ public: { } + void ScaleTextureCacheEntryTo(TCacheEntry* entry, u32 new_width, u32 new_height); + protected: TextureCacheBase(); @@ -235,7 +239,6 @@ private: TCacheEntry* ApplyPaletteToEntry(TCacheEntry* entry, u8* palette, TLUTFormat tlutfmt); - void ScaleTextureCacheEntryTo(TCacheEntry* entry, u32 new_width, u32 new_height); TCacheEntry* DoPartialTextureUpdates(TCacheEntry* entry_to_update, u8* palette, TLUTFormat tlutfmt); diff --git a/Source/Core/VideoCommon/TextureConversionShader.cpp b/Source/Core/VideoCommon/TextureConversionShader.cpp index 74e50cd27f..04b7bb3a01 100644 --- a/Source/Core/VideoCommon/TextureConversionShader.cpp +++ b/Source/Core/VideoCommon/TextureConversionShader.cpp @@ -64,9 +64,12 @@ static void WriteSwizzler(char*& p, EFBCopyFormat format, APIType ApiType) // left, top, of source rectangle within source texture // width of the destination rectangle, scale_factor (1 or 2) if (ApiType == APIType::Vulkan) - WRITE(p, "layout(std140, push_constant) uniform PCBlock { int4 position; } PC;\n"); + WRITE(p, "layout(std140, push_constant) uniform PCBlock { int4 position; float y_scale; } PC;\n"); else + { WRITE(p, "uniform int4 position;\n"); + WRITE(p, "uniform float y_scale;\n"); + } // Alpha channel in the copy is set to 1 the EFB format does not have an alpha channel. WRITE(p, "float4 RGBA8ToRGB8(float4 src)\n"); @@ -111,7 +114,8 @@ static void WriteSwizzler(char*& p, EFBCopyFormat format, APIType ApiType) WRITE(p, "{\n" " int2 sampleUv;\n" " int2 uv1 = int2(gl_FragCoord.xy);\n" - " int4 position = PC.position;\n"); + " int4 position = PC.position;\n" + " float y_scale = PC.y_scale;\n"); } else // D3D { @@ -150,6 +154,7 @@ static void WriteSwizzler(char*& p, EFBCopyFormat format, APIType ApiType) // pixel) WRITE(p, " uv0 += float2(position.xy);\n"); // move to copied rect WRITE(p, " uv0 /= float2(%d, %d);\n", EFB_WIDTH, EFB_HEIGHT); // normalize to [0:1] + WRITE(p, " uv0 /= float2(1, y_scale);\n"); // apply the y scaling if (ApiType == APIType::OpenGL) // ogl has to flip up and down { WRITE(p, " uv0.y = 1.0-uv0.y;\n");