diff --git a/Source/Core/VideoBackends/D3D/TextureCache.cpp b/Source/Core/VideoBackends/D3D/TextureCache.cpp index 9c5f139502..e34858d5a4 100644 --- a/Source/Core/VideoBackends/D3D/TextureCache.cpp +++ b/Source/Core/VideoBackends/D3D/TextureCache.cpp @@ -240,7 +240,8 @@ void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, } auto uid = TextureConversionShaderGen::GetShaderUid(dst_format, is_depth_copy, is_intensity, - scale_by_half); + scale_by_half, + NeedsCopyFilterInShader(filter_coefficients)); ID3D11PixelShader* pixel_shader = GetEFBToTexPixelShader(uid); if (!pixel_shader) return; diff --git a/Source/Core/VideoBackends/OGL/TextureCache.cpp b/Source/Core/VideoBackends/OGL/TextureCache.cpp index 086d0f29f2..99a3576d55 100644 --- a/Source/Core/VideoBackends/OGL/TextureCache.cpp +++ b/Source/Core/VideoBackends/OGL/TextureCache.cpp @@ -515,7 +515,8 @@ void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, glViewport(0, 0, destination_texture->GetConfig().width, destination_texture->GetConfig().height); auto uid = TextureConversionShaderGen::GetShaderUid(dst_format, is_depth_copy, is_intensity, - scale_by_half); + scale_by_half, + NeedsCopyFilterInShader(filter_coefficients)); auto it = m_efb_copy_programs.emplace(uid, EFBCopyShader()); EFBCopyShader& shader = it.first->second; diff --git a/Source/Core/VideoBackends/Vulkan/TextureCache.cpp b/Source/Core/VideoBackends/Vulkan/TextureCache.cpp index bc7a0d0f18..364569baa3 100644 --- a/Source/Core/VideoBackends/Vulkan/TextureCache.cpp +++ b/Source/Core/VideoBackends/Vulkan/TextureCache.cpp @@ -274,7 +274,8 @@ void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL); auto uid = TextureConversionShaderGen::GetShaderUid(dst_format, is_depth_copy, is_intensity, - scale_by_half); + scale_by_half, + NeedsCopyFilterInShader(filter_coefficients)); auto it = m_efb_copy_to_tex_shaders.emplace(uid, VkShaderModule(VK_NULL_HANDLE)); VkShaderModule& shader = it.first->second; diff --git a/Source/Core/VideoCommon/TextureCacheBase.cpp b/Source/Core/VideoCommon/TextureCacheBase.cpp index f0b1392531..0a04a93168 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.cpp +++ b/Source/Core/VideoCommon/TextureCacheBase.cpp @@ -1499,8 +1499,8 @@ void TextureCacheBase::LoadTextureLevelZeroFromMemory(TCacheEntry* entry_to_upda } } -TextureCacheBase::CopyFilterCoefficientArray -TextureCacheBase::GetRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients) +TextureCacheBase::CopyFilterCoefficientArray TextureCacheBase::GetRAMCopyFilterCoefficients( + const CopyFilterCoefficients::Values& coefficients) const { // To simplify the backend, we precalculate the three coefficients in common. Coefficients 0, 1 // are for the row above, 2, 3, 4 are for the current pixel, and 5, 6 are for the row below. @@ -1510,8 +1510,8 @@ TextureCacheBase::GetRAMCopyFilterCoefficients(const CopyFilterCoefficients::Val static_cast(coefficients[5]) + static_cast(coefficients[6])}; } -TextureCacheBase::CopyFilterCoefficientArray -TextureCacheBase::GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients) +TextureCacheBase::CopyFilterCoefficientArray TextureCacheBase::GetVRAMCopyFilterCoefficients( + const CopyFilterCoefficients::Values& coefficients) const { // If the user disables the copy filter, only apply it to the VRAM copy. // This way games which are sensitive to changes to the RAM copy of the XFB will be unaffected. @@ -1528,6 +1528,12 @@ TextureCacheBase::GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Va return res; } +bool TextureCacheBase::NeedsCopyFilterInShader(const CopyFilterCoefficientArray& coefficients) const +{ + // If the top/bottom coefficients are zero, no point sampling/blending from these rows. + return coefficients[0] != 0 || coefficients[2] != 0; +} + void TextureCacheBase::CopyRenderTargetToTexture( u32 dstAddr, EFBCopyFormat dstFormat, u32 width, u32 height, u32 dstStride, bool is_depth_copy, const EFBRectangle& srcRect, bool isIntensity, bool scaleByHalf, float y_scale, float gamma, @@ -1650,11 +1656,12 @@ void TextureCacheBase::CopyRenderTargetToTexture( if (copy_to_ram) { + CopyFilterCoefficientArray coefficients = GetRAMCopyFilterCoefficients(filter_coefficients); PEControl::PixelFormat srcFormat = bpmem.zcontrol.pixel_format; - EFBCopyParams format(srcFormat, dstFormat, is_depth_copy, isIntensity); + EFBCopyParams format(srcFormat, dstFormat, is_depth_copy, isIntensity, + NeedsCopyFilterInShader(coefficients)); CopyEFB(dst, format, tex_w, bytes_per_row, num_blocks_y, dstStride, srcRect, scaleByHalf, - y_scale, gamma, clamp_top, clamp_bottom, - GetRAMCopyFilterCoefficients(filter_coefficients)); + y_scale, gamma, clamp_top, clamp_bottom, coefficients); } else { diff --git a/Source/Core/VideoCommon/TextureCacheBase.h b/Source/Core/VideoCommon/TextureCacheBase.h index 720e95b470..467fb34041 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.h +++ b/Source/Core/VideoCommon/TextureCacheBase.h @@ -47,21 +47,23 @@ struct TextureAndTLUTFormat struct EFBCopyParams { EFBCopyParams(PEControl::PixelFormat efb_format_, EFBCopyFormat copy_format_, bool depth_, - bool yuv_) - : efb_format(efb_format_), copy_format(copy_format_), depth(depth_), yuv(yuv_) + bool yuv_, bool copy_filter_) + : efb_format(efb_format_), copy_format(copy_format_), depth(depth_), yuv(yuv_), + copy_filter(copy_filter_) { } bool operator<(const EFBCopyParams& rhs) const { - return std::tie(efb_format, copy_format, depth, yuv) < - std::tie(rhs.efb_format, rhs.copy_format, rhs.depth, rhs.yuv); + return std::tie(efb_format, copy_format, depth, yuv, copy_filter) < + std::tie(rhs.efb_format, rhs.copy_format, rhs.depth, rhs.yuv, rhs.copy_filter); } PEControl::PixelFormat efb_format; EFBCopyFormat copy_format; bool depth; bool yuv; + bool copy_filter; }; struct TextureLookupInformation @@ -106,6 +108,7 @@ private: static const int FRAMECOUNT_INVALID = 0; public: + // Reduced version of the full coefficient array, reduced to a single value for each row. using CopyFilterCoefficientArray = std::array; struct TCacheEntry @@ -278,6 +281,9 @@ public: protected: TextureCacheBase(); + // Returns false if the top/bottom row coefficients are zero. + bool NeedsCopyFilterInShader(const CopyFilterCoefficientArray& coefficients) const; + alignas(16) u8* temp = nullptr; size_t temp_size = 0; @@ -329,9 +335,9 @@ private: // Precomputing the coefficients for the previous, current, and next lines for the copy filter. CopyFilterCoefficientArray - GetRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients); + GetRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients) const; CopyFilterCoefficientArray - GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients); + GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients) const; TexAddrCache textures_by_address; TexHashCache textures_by_hash; diff --git a/Source/Core/VideoCommon/TextureConversionShader.cpp b/Source/Core/VideoCommon/TextureConversionShader.cpp index 2339558df1..a36f643693 100644 --- a/Source/Core/VideoCommon/TextureConversionShader.cpp +++ b/Source/Core/VideoCommon/TextureConversionShader.cpp @@ -180,21 +180,33 @@ static void WriteSampleFunction(char*& p, const EFBCopyParams& params, APIType A // The filter is only applied to the RGB channels, the alpha channel is left intact. WRITE(p, "float4 SampleEFB(float2 uv, float2 pixel_size, int xoffset)\n"); WRITE(p, "{\n"); - WRITE(p, " float4 prev_row = "); - WriteSampleOp(-1); - WRITE(p, ";\n"); - WRITE(p, " float4 current_row = "); - WriteSampleOp(0); - WRITE(p, ";\n"); - WRITE(p, " float4 next_row = "); - WriteSampleOp(1); - WRITE(p, ";\n"); - WRITE(p, + if (params.copy_filter) + { + WRITE(p, " float4 prev_row = "); + WriteSampleOp(-1); + WRITE(p, ";\n"); + WRITE(p, " float4 current_row = "); + WriteSampleOp(0); + WRITE(p, ";\n"); + WRITE(p, " float4 next_row = "); + WriteSampleOp(1); + WRITE(p, ";\n"); + WRITE( + p, " float3 col = float3(clamp((int3(prev_row.rgb * 255.0) * filter_coefficients[0] +\n" " int3(current_row.rgb * 255.0) * filter_coefficients[1] +\n" " int3(next_row.rgb * 255.0) * filter_coefficients[2]) >> 6,\n" " int3(0, 0, 0), int3(255, 255, 255))) / 255.0;\n"); - WRITE(p, " return float4(col, current_row.a);\n"); + WRITE(p, " return float4(col, current_row.a);\n"); + } + else + { + WRITE(p, " float4 current_row = "); + WriteSampleOp(0); + WRITE(p, ";\n"); + WRITE(p, " return float4(clamp(int3(current_row.rgb * 255.0) * filter_coefficients[1], " + "int3(0, 0, 0), int3(255, 255, 255)), current_row.a);\n"); + } WRITE(p, "}\n"); } diff --git a/Source/Core/VideoCommon/TextureConverterShaderGen.cpp b/Source/Core/VideoCommon/TextureConverterShaderGen.cpp index 8a3c3970d8..748afa29b6 100644 --- a/Source/Core/VideoCommon/TextureConverterShaderGen.cpp +++ b/Source/Core/VideoCommon/TextureConverterShaderGen.cpp @@ -15,7 +15,7 @@ namespace TextureConversionShaderGen { TCShaderUid GetShaderUid(EFBCopyFormat dst_format, bool is_depth_copy, bool is_intensity, - bool scale_by_half) + bool scale_by_half, bool copy_filter) { TCShaderUid out; UidData* uid_data = out.GetUidData(); @@ -26,6 +26,7 @@ TCShaderUid GetShaderUid(EFBCopyFormat dst_format, bool is_depth_copy, bool is_i uid_data->is_depth_copy = is_depth_copy; uid_data->is_intensity = is_intensity; uid_data->scale_by_half = scale_by_half; + uid_data->copy_filter = copy_filter; return out; } @@ -91,12 +92,21 @@ ShaderCode GenerateShader(APIType api_type, const UidData* uid_data) // The copy filter applies to both color and depth copies. This has been verified on hardware. // The filter is only applied to the RGB channels, the alpha channel is left intact. - out.Write(" float4 prev_row = SampleEFB(uv0, -1.0f);\n" - " float4 current_row = SampleEFB(uv0, 0.0f);\n" - " float4 next_row = SampleEFB(uv0, 1.0f);\n" - " float4 texcol = float4(prev_row.rgb * filter_coefficients[0] +\n" - " current_row.rgb * filter_coefficients[1] +\n" - " next_row.rgb * filter_coefficients[2], current_row.a);\n"); + if (uid_data->copy_filter) + { + out.Write(" float4 prev_row = SampleEFB(uv0, -1.0f);\n" + " float4 current_row = SampleEFB(uv0, 0.0f);\n" + " float4 next_row = SampleEFB(uv0, 1.0f);\n" + " float4 texcol = float4(prev_row.rgb * filter_coefficients[0] +\n" + " current_row.rgb * filter_coefficients[1] +\n" + " next_row.rgb * filter_coefficients[2], current_row.a);\n"); + } + else + { + out.Write( + " float4 current_row = SampleEFB(uv0, 0.0f);\n" + " float4 texcol = float4(current_row.rgb * filter_coefficients[1], current_row.a);\n"); + } if (uid_data->is_depth_copy) { diff --git a/Source/Core/VideoCommon/TextureConverterShaderGen.h b/Source/Core/VideoCommon/TextureConverterShaderGen.h index b55bf57065..1f231f53c3 100644 --- a/Source/Core/VideoCommon/TextureConverterShaderGen.h +++ b/Source/Core/VideoCommon/TextureConverterShaderGen.h @@ -22,6 +22,7 @@ struct UidData u32 is_depth_copy : 1; u32 is_intensity : 1; u32 scale_by_half : 1; + u32 copy_filter : 1; }; #pragma pack() @@ -30,6 +31,6 @@ using TCShaderUid = ShaderUid; ShaderCode GenerateShader(APIType api_type, const UidData* uid_data); TCShaderUid GetShaderUid(EFBCopyFormat dst_format, bool is_depth_copy, bool is_intensity, - bool scale_by_half); + bool scale_by_half, bool copy_filter); } // namespace TextureConversionShaderGen