From 9e798eec94a8ebe94c1ef8270b504b5698b2a0f5 Mon Sep 17 00:00:00 2001 From: Stenzek Date: Sun, 29 Apr 2018 18:52:30 +1000 Subject: [PATCH] Implement EFB copy filter and gamma in hardware backends Also makes y_scale a dynamic parameter for EFB copies, as it doesn't make sense to keep it as part of the uid, otherwise we're generating redundant shaders. --- Source/Core/Core/Config/GraphicsSettings.cpp | 2 + Source/Core/Core/Config/GraphicsSettings.h | 1 + .../Core/ConfigLoaders/IsSettingSaveable.cpp | 1 + .../VideoBackends/D3D/PSTextureEncoder.cpp | 24 +- .../Core/VideoBackends/D3D/PSTextureEncoder.h | 6 +- Source/Core/VideoBackends/D3D/Render.cpp | 17 +- Source/Core/VideoBackends/D3D/Render.h | 4 +- .../Core/VideoBackends/D3D/TextureCache.cpp | 47 +++- Source/Core/VideoBackends/D3D/TextureCache.h | 10 +- Source/Core/VideoBackends/Null/Render.cpp | 2 +- Source/Core/VideoBackends/Null/Render.h | 2 +- Source/Core/VideoBackends/Null/TextureCache.h | 7 +- Source/Core/VideoBackends/OGL/Render.cpp | 3 +- Source/Core/VideoBackends/OGL/Render.h | 2 +- .../Core/VideoBackends/OGL/TextureCache.cpp | 30 ++- Source/Core/VideoBackends/OGL/TextureCache.h | 12 +- .../VideoBackends/OGL/TextureConverter.cpp | 19 +- .../Core/VideoBackends/OGL/TextureConverter.h | 11 +- .../VideoBackends/Software/EfbInterface.cpp | 33 ++- .../VideoBackends/Software/EfbInterface.h | 3 +- .../VideoBackends/Software/SWRenderer.cpp | 3 +- .../Core/VideoBackends/Software/SWRenderer.h | 2 +- .../VideoBackends/Software/TextureCache.h | 9 +- .../VideoBackends/Software/TextureEncoder.cpp | 9 +- .../VideoBackends/Software/TextureEncoder.h | 6 +- Source/Core/VideoBackends/Vulkan/Renderer.cpp | 3 +- Source/Core/VideoBackends/Vulkan/Renderer.h | 2 +- .../VideoBackends/Vulkan/TextureCache.cpp | 38 ++- .../Core/VideoBackends/Vulkan/TextureCache.h | 8 +- .../VideoBackends/Vulkan/TextureConverter.cpp | 27 +- .../VideoBackends/Vulkan/TextureConverter.h | 9 +- Source/Core/VideoCommon/BPMemory.h | 4 +- Source/Core/VideoCommon/BPStructs.cpp | 12 +- Source/Core/VideoCommon/RenderBase.cpp | 2 +- Source/Core/VideoCommon/RenderBase.h | 3 +- Source/Core/VideoCommon/TextureCacheBase.cpp | 49 +++- Source/Core/VideoCommon/TextureCacheBase.h | 31 ++- .../VideoCommon/TextureConversionShader.cpp | 245 +++++++++++------- .../VideoCommon/TextureConverterShaderGen.cpp | 62 +++-- Source/Core/VideoCommon/VideoConfig.cpp | 1 + Source/Core/VideoCommon/VideoConfig.h | 1 + 41 files changed, 526 insertions(+), 236 deletions(-) diff --git a/Source/Core/Core/Config/GraphicsSettings.cpp b/Source/Core/Core/Config/GraphicsSettings.cpp index f46c7a585a..cfae1b14f2 100644 --- a/Source/Core/Core/Config/GraphicsSettings.cpp +++ b/Source/Core/Core/Config/GraphicsSettings.cpp @@ -106,6 +106,8 @@ const ConfigInfo GFX_ENHANCE_POST_SHADER{ {System::GFX, "Enhancements", "PostProcessingShader"}, ""}; const ConfigInfo GFX_ENHANCE_FORCE_TRUE_COLOR{{System::GFX, "Enhancements", "ForceTrueColor"}, true}; +const ConfigInfo GFX_ENHANCE_DISABLE_COPY_FILTER{ + {System::GFX, "Enhancements", "DisableCopyFilter"}, true}; // Graphics.Stereoscopy diff --git a/Source/Core/Core/Config/GraphicsSettings.h b/Source/Core/Core/Config/GraphicsSettings.h index 4f0e4f741c..6b4c900e01 100644 --- a/Source/Core/Core/Config/GraphicsSettings.h +++ b/Source/Core/Core/Config/GraphicsSettings.h @@ -80,6 +80,7 @@ extern const ConfigInfo GFX_ENHANCE_FORCE_FILTERING; extern const ConfigInfo GFX_ENHANCE_MAX_ANISOTROPY; // NOTE - this is x in (1 << x) extern const ConfigInfo GFX_ENHANCE_POST_SHADER; extern const ConfigInfo GFX_ENHANCE_FORCE_TRUE_COLOR; +extern const ConfigInfo GFX_ENHANCE_DISABLE_COPY_FILTER; // Graphics.Stereoscopy diff --git a/Source/Core/Core/ConfigLoaders/IsSettingSaveable.cpp b/Source/Core/Core/ConfigLoaders/IsSettingSaveable.cpp index 7b92427749..56b3ec8c74 100644 --- a/Source/Core/Core/ConfigLoaders/IsSettingSaveable.cpp +++ b/Source/Core/Core/ConfigLoaders/IsSettingSaveable.cpp @@ -85,6 +85,7 @@ bool IsSettingSaveable(const Config::ConfigLocation& config_location) Config::GFX_ENHANCE_MAX_ANISOTROPY.location, Config::GFX_ENHANCE_POST_SHADER.location, Config::GFX_ENHANCE_FORCE_TRUE_COLOR.location, + Config::GFX_ENHANCE_DISABLE_COPY_FILTER.location, // Graphics.Stereoscopy diff --git a/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp b/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp index 8d48332af7..de0d9716ec 100644 --- a/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp +++ b/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp @@ -31,7 +31,11 @@ struct EFBEncodeParams u32 DestWidth; u32 ScaleFactor; float y_scale; - u32 padding[3]; + float gamma_rcp; + float clamp_top; + float clamp_bottom; + s32 filter_coefficients[3]; + u32 padding; }; PSTextureEncoder::PSTextureEncoder() @@ -66,9 +70,11 @@ void PSTextureEncoder::Shutdown() SAFE_RELEASE(m_encode_params); } -void PSTextureEncoder::Encode(u8* dst, const EFBCopyParams& params, u32 native_width, - u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, - const EFBRectangle& src_rect, bool scale_by_half) +void PSTextureEncoder::Encode( + u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row, u32 num_blocks_y, + u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half, float y_scale, float gamma, + bool clamp_top, bool clamp_bottom, + const TextureCacheBase::CopyFilterCoefficientArray& filter_coefficients) { // Resolve MSAA targets before copying. // FIXME: Instead of resolving EFB, it would be better to pick out a @@ -101,7 +107,13 @@ void PSTextureEncoder::Encode(u8* dst, const EFBCopyParams& params, u32 native_w encode_params.SrcTop = src_rect.top; encode_params.DestWidth = native_width; encode_params.ScaleFactor = scale_by_half ? 2 : 1; - encode_params.y_scale = params.y_scale; + encode_params.y_scale = y_scale; + encode_params.gamma_rcp = 1.0f / gamma; + encode_params.clamp_top = clamp_top ? src_rect.top / float(EFB_HEIGHT) : 0.0f; + encode_params.clamp_bottom = clamp_bottom ? src_rect.bottom / float(EFB_HEIGHT) : 0.0f; + for (size_t i = 0; i < filter_coefficients.size(); i++) + encode_params.filter_coefficients[i] = filter_coefficients[i]; + D3D::context->UpdateSubresource(m_encode_params, 0, nullptr, &encode_params, 0, 0); D3D::stateman->SetPixelConstants(m_encode_params); @@ -109,7 +121,7 @@ void PSTextureEncoder::Encode(u8* dst, const EFBCopyParams& params, u32 native_w // TODO: This only produces perfect downsampling for 2x IR, other resolutions will need more // complex down filtering to average all pixels and produce the correct result. // Also, box filtering won't be correct for anything other than 1x IR - if (scale_by_half || g_renderer->GetEFBScale() != 1 || params.y_scale > 1.0f) + if (scale_by_half || g_renderer->GetEFBScale() != 1 || y_scale > 1.0f) D3D::SetLinearCopySampler(); else D3D::SetPointCopySampler(); diff --git a/Source/Core/VideoBackends/D3D/PSTextureEncoder.h b/Source/Core/VideoBackends/D3D/PSTextureEncoder.h index 7c45970d19..4054b07fbf 100644 --- a/Source/Core/VideoBackends/D3D/PSTextureEncoder.h +++ b/Source/Core/VideoBackends/D3D/PSTextureEncoder.h @@ -8,6 +8,7 @@ #include #include "Common/CommonTypes.h" +#include "VideoCommon/TextureCacheBase.h" #include "VideoCommon/TextureConversionShader.h" #include "VideoCommon/VideoCommon.h" @@ -38,8 +39,9 @@ public: void Init(); void Shutdown(); void Encode(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row, - u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, - bool scale_by_half); + u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half, + float y_scale, float gamma, bool clamp_top, bool clamp_bottom, + const TextureCacheBase::CopyFilterCoefficientArray& filter_coefficients); private: ID3D11PixelShader* GetEncodingPixelShader(const EFBCopyParams& params); diff --git a/Source/Core/VideoBackends/D3D/Render.cpp b/Source/Core/VideoBackends/D3D/Render.cpp index 85e81f8171..40e02f4e12 100644 --- a/Source/Core/VideoBackends/D3D/Render.cpp +++ b/Source/Core/VideoBackends/D3D/Render.cpp @@ -627,8 +627,7 @@ void Renderer::ReinterpretPixelData(unsigned int convtype) } // This function has the final picture. We adjust the aspect ratio here. -void Renderer::SwapImpl(AbstractTexture* texture, const EFBRectangle& xfb_region, u64 ticks, - float Gamma) +void Renderer::SwapImpl(AbstractTexture* texture, const EFBRectangle& xfb_region, u64 ticks) { ResetAPIState(); @@ -650,7 +649,7 @@ void Renderer::SwapImpl(AbstractTexture* texture, const EFBRectangle& xfb_region auto* xfb_texture = static_cast(texture); BlitScreen(xfb_region, targetRc, xfb_texture->GetRawTexIdentifier(), - xfb_texture->GetConfig().width, xfb_texture->GetConfig().height, Gamma); + xfb_texture->GetConfig().width, xfb_texture->GetConfig().height); // Reset viewport for drawing text D3D11_VIEWPORT vp = CD3D11_VIEWPORT(0.0f, 0.0f, static_cast(m_backbuffer_width), @@ -854,7 +853,7 @@ void Renderer::BBoxWrite(int index, u16 _value) } void Renderer::BlitScreen(TargetRectangle src, TargetRectangle dst, D3DTexture2D* src_texture, - u32 src_width, u32 src_height, float Gamma) + u32 src_width, u32 src_height) { if (g_ActiveConfig.stereo_mode == StereoMode::SBS || g_ActiveConfig.stereo_mode == StereoMode::TAB) @@ -871,13 +870,13 @@ void Renderer::BlitScreen(TargetRectangle src, TargetRectangle dst, D3DTexture2D D3D::drawShadedTexQuad(src_texture->GetSRV(), src.AsRECT(), src_width, src_height, PixelShaderCache::GetColorCopyProgram(false), VertexShaderCache::GetSimpleVertexShader(), - VertexShaderCache::GetSimpleInputLayout(), nullptr, Gamma, 0); + VertexShaderCache::GetSimpleInputLayout(), nullptr, 1.0f, 0); D3D::context->RSSetViewports(1, &rightVp); D3D::drawShadedTexQuad(src_texture->GetSRV(), src.AsRECT(), src_width, src_height, PixelShaderCache::GetColorCopyProgram(false), VertexShaderCache::GetSimpleVertexShader(), - VertexShaderCache::GetSimpleInputLayout(), nullptr, Gamma, 1); + VertexShaderCache::GetSimpleInputLayout(), nullptr, 1.0f, 1); } else if (g_ActiveConfig.stereo_mode == StereoMode::Nvidia3DVision) { @@ -896,13 +895,13 @@ void Renderer::BlitScreen(TargetRectangle src, TargetRectangle dst, D3DTexture2D D3D::drawShadedTexQuad(src_texture->GetSRV(), src.AsRECT(), src_width, src_height, PixelShaderCache::GetColorCopyProgram(false), VertexShaderCache::GetSimpleVertexShader(), - VertexShaderCache::GetSimpleInputLayout(), nullptr, Gamma, 0); + VertexShaderCache::GetSimpleInputLayout(), nullptr, 1.0f, 0); D3D::context->RSSetViewports(1, &rightVp); D3D::drawShadedTexQuad(src_texture->GetSRV(), src.AsRECT(), src_width, src_height, PixelShaderCache::GetColorCopyProgram(false), VertexShaderCache::GetSimpleVertexShader(), - VertexShaderCache::GetSimpleInputLayout(), nullptr, Gamma, 1); + VertexShaderCache::GetSimpleInputLayout(), nullptr, 1.0f, 1); // Copy the left eye to the backbuffer, if Nvidia 3D Vision is enabled it should // recognize the signature and automatically include the right eye frame. @@ -927,7 +926,7 @@ void Renderer::BlitScreen(TargetRectangle src, TargetRectangle dst, D3DTexture2D nullptr; D3D::drawShadedTexQuad(src_texture->GetSRV(), src.AsRECT(), src_width, src_height, pixelShader, VertexShaderCache::GetSimpleVertexShader(), - VertexShaderCache::GetSimpleInputLayout(), geomShader, Gamma); + VertexShaderCache::GetSimpleInputLayout(), geomShader, 1.0f); } } diff --git a/Source/Core/VideoBackends/D3D/Render.h b/Source/Core/VideoBackends/D3D/Render.h index a7ccb0b9ae..0927f6c934 100644 --- a/Source/Core/VideoBackends/D3D/Render.h +++ b/Source/Core/VideoBackends/D3D/Render.h @@ -63,7 +63,7 @@ public: TargetRectangle ConvertEFBRectangle(const EFBRectangle& rc) override; - void SwapImpl(AbstractTexture* texture, const EFBRectangle& rc, u64 ticks, float Gamma) override; + void SwapImpl(AbstractTexture* texture, const EFBRectangle& rc, u64 ticks) override; void ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaEnable, bool zEnable, u32 color, u32 z) override; @@ -84,7 +84,7 @@ private: void UpdateBackbufferSize(); void BlitScreen(TargetRectangle src, TargetRectangle dst, D3DTexture2D* src_texture, - u32 src_width, u32 src_height, float Gamma); + u32 src_width, u32 src_height); void UpdateUtilityUniformBuffer(const void* uniforms, u32 uniforms_size); void UpdateUtilityVertexBuffer(const void* vertices, u32 vertex_stride, u32 num_vertices); diff --git a/Source/Core/VideoBackends/D3D/TextureCache.cpp b/Source/Core/VideoBackends/D3D/TextureCache.cpp index b0974f3cc3..9c5f139502 100644 --- a/Source/Core/VideoBackends/D3D/TextureCache.cpp +++ b/Source/Core/VideoBackends/D3D/TextureCache.cpp @@ -33,10 +33,12 @@ static std::unique_ptr g_encoder; void TextureCache::CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, - const EFBRectangle& src_rect, bool scale_by_half) + const EFBRectangle& src_rect, bool scale_by_half, float y_scale, + float gamma, bool clamp_top, bool clamp_bottom, + const CopyFilterCoefficientArray& filter_coefficients) { g_encoder->Encode(dst, params, native_width, bytes_per_row, num_blocks_y, memory_stride, src_rect, - scale_by_half); + scale_by_half, y_scale, gamma, clamp_top, clamp_bottom, filter_coefficients); } const char palette_shader[] = @@ -137,9 +139,9 @@ void TextureCache::ConvertTexture(TCacheEntry* destination, TCacheEntry* source, D3D::stateman->SetTexture(1, palette_buf_srv); // TODO: Add support for C14X2 format. (Different multiplier, more palette entries.) - float params[4] = {source->format == TextureFormat::I4 ? 15.f : 255.f}; - D3D::context->UpdateSubresource(palette_uniform, 0, nullptr, ¶ms, 0, 0); - D3D::stateman->SetPixelConstants(palette_uniform); + float params[8] = {source->format == TextureFormat::I4 ? 15.f : 255.f}; + D3D::context->UpdateSubresource(uniform_buffer, 0, nullptr, ¶ms, 0, 0); + D3D::stateman->SetPixelConstants(uniform_buffer); const D3D11_RECT sourcerect = CD3D11_RECT(0, 0, source->GetWidth(), source->GetHeight()); @@ -180,7 +182,7 @@ TextureCache::TextureCache() palette_buf = nullptr; palette_buf_srv = nullptr; - palette_uniform = nullptr; + uniform_buffer = nullptr; palette_pixel_shader[static_cast(TLUTFormat::IA8)] = GetConvertShader("IA8"); palette_pixel_shader[static_cast(TLUTFormat::RGB565)] = GetConvertShader("RGB565"); palette_pixel_shader[static_cast(TLUTFormat::RGB5A3)] = GetConvertShader("RGB5A3"); @@ -195,10 +197,10 @@ TextureCache::TextureCache() CHECK(SUCCEEDED(hr), "create palette decoder lut srv"); D3D::SetDebugObjectName(palette_buf_srv, "texture decoder lut srv"); const D3D11_BUFFER_DESC cbdesc = - CD3D11_BUFFER_DESC(16, D3D11_BIND_CONSTANT_BUFFER, D3D11_USAGE_DEFAULT); - hr = D3D::device->CreateBuffer(&cbdesc, nullptr, &palette_uniform); + CD3D11_BUFFER_DESC(sizeof(float) * 8, D3D11_BIND_CONSTANT_BUFFER, D3D11_USAGE_DEFAULT); + hr = D3D::device->CreateBuffer(&cbdesc, nullptr, &uniform_buffer); CHECK(SUCCEEDED(hr), "Create palette decoder constant buffer"); - D3D::SetDebugObjectName(palette_uniform, + D3D::SetDebugObjectName(uniform_buffer, "a constant buffer used in TextureCache::CopyRenderTargetToTexture"); } @@ -209,7 +211,7 @@ TextureCache::~TextureCache() SAFE_RELEASE(palette_buf); SAFE_RELEASE(palette_buf_srv); - SAFE_RELEASE(palette_uniform); + SAFE_RELEASE(uniform_buffer); for (auto*& shader : palette_pixel_shader) SAFE_RELEASE(shader); for (auto& iter : m_efb_to_tex_pixel_shaders) @@ -218,7 +220,9 @@ TextureCache::~TextureCache() void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const EFBRectangle& src_rect, bool scale_by_half, - EFBCopyFormat dst_format, bool is_intensity) + EFBCopyFormat dst_format, bool is_intensity, float gamma, + bool clamp_top, bool clamp_bottom, + const CopyFilterCoefficientArray& filter_coefficients) { auto* destination_texture = static_cast(entry->texture.get()); @@ -260,6 +264,27 @@ void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, else D3D::SetPointCopySampler(); + struct PixelConstants + { + float filter_coefficients[3]; + float gamma_rcp; + float clamp_top; + float clamp_bottom; + float pixel_height; + u32 padding; + }; + PixelConstants constants; + for (size_t i = 0; i < filter_coefficients.size(); i++) + constants.filter_coefficients[i] = filter_coefficients[i] / 64.0f; + constants.gamma_rcp = 1.0f / gamma; + constants.clamp_top = clamp_top ? src_rect.top / float(EFB_HEIGHT) : 0.0f; + constants.clamp_bottom = clamp_bottom ? src_rect.bottom / float(EFB_HEIGHT) : 1.0f; + constants.pixel_height = + g_ActiveConfig.bCopyEFBScaled ? 1.0f / g_renderer->GetTargetHeight() : 1.0f / EFB_HEIGHT; + constants.padding = 0; + D3D::context->UpdateSubresource(uniform_buffer, 0, nullptr, &constants, 0, 0); + D3D::stateman->SetPixelConstants(uniform_buffer); + // Make sure we don't draw with the texture set as both a source and target. // (This can happen because we don't unbind textures when we free them.) D3D::stateman->UnsetTexture(destination_texture->GetRawTexIdentifier()->GetSRV()); diff --git a/Source/Core/VideoBackends/D3D/TextureCache.h b/Source/Core/VideoBackends/D3D/TextureCache.h index 49332e2de3..24dda22d60 100644 --- a/Source/Core/VideoBackends/D3D/TextureCache.h +++ b/Source/Core/VideoBackends/D3D/TextureCache.h @@ -34,11 +34,13 @@ private: void CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, - bool scale_by_half) override; + bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom, + const CopyFilterCoefficientArray& filter_coefficients) override; void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const EFBRectangle& src_rect, - bool scale_by_half, EFBCopyFormat dst_format, - bool is_intensity) override; + bool scale_by_half, EFBCopyFormat dst_format, bool is_intensity, + float gamma, bool clamp_top, bool clamp_bottom, + const CopyFilterCoefficientArray& filter_coefficients) override; bool CompileShaders() override { return true; } void DeleteShaders() override {} @@ -46,7 +48,7 @@ private: ID3D11Buffer* palette_buf; ID3D11ShaderResourceView* palette_buf_srv; - ID3D11Buffer* palette_uniform; + ID3D11Buffer* uniform_buffer; ID3D11PixelShader* palette_pixel_shader[3]; std::map m_efb_to_tex_pixel_shaders; diff --git a/Source/Core/VideoBackends/Null/Render.cpp b/Source/Core/VideoBackends/Null/Render.cpp index 4e9cf655c2..c9c0e62325 100644 --- a/Source/Core/VideoBackends/Null/Render.cpp +++ b/Source/Core/VideoBackends/Null/Render.cpp @@ -92,7 +92,7 @@ TargetRectangle Renderer::ConvertEFBRectangle(const EFBRectangle& rc) return result; } -void Renderer::SwapImpl(AbstractTexture*, const EFBRectangle&, u64, float) +void Renderer::SwapImpl(AbstractTexture*, const EFBRectangle&, u64) { UpdateActiveConfig(); } diff --git a/Source/Core/VideoBackends/Null/Render.h b/Source/Core/VideoBackends/Null/Render.h index 0c0092554f..c1bf9c122e 100644 --- a/Source/Core/VideoBackends/Null/Render.h +++ b/Source/Core/VideoBackends/Null/Render.h @@ -34,7 +34,7 @@ public: void BBoxWrite(int index, u16 value) override {} TargetRectangle ConvertEFBRectangle(const EFBRectangle& rc) override; - void SwapImpl(AbstractTexture* texture, const EFBRectangle& rc, u64 ticks, float Gamma) override; + void SwapImpl(AbstractTexture* texture, const EFBRectangle& rc, u64 ticks) override; void ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaEnable, bool zEnable, u32 color, u32 z) override diff --git a/Source/Core/VideoBackends/Null/TextureCache.h b/Source/Core/VideoBackends/Null/TextureCache.h index cf9dfa84a2..25803344a4 100644 --- a/Source/Core/VideoBackends/Null/TextureCache.h +++ b/Source/Core/VideoBackends/Null/TextureCache.h @@ -27,12 +27,15 @@ public: void CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, - bool scale_by_half) override + bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom, + const CopyFilterCoefficientArray& filter_coefficients) override { } void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const EFBRectangle& src_rect, - bool scale_by_half, EFBCopyFormat dst_format, bool is_intensity) override + bool scale_by_half, EFBCopyFormat dst_format, bool is_intensity, + float gamma, bool clamp_top, bool clamp_bottom, + const CopyFilterCoefficientArray& filter_coefficients) override { } }; diff --git a/Source/Core/VideoBackends/OGL/Render.cpp b/Source/Core/VideoBackends/OGL/Render.cpp index d5259aa651..f049ee9e3a 100644 --- a/Source/Core/VideoBackends/OGL/Render.cpp +++ b/Source/Core/VideoBackends/OGL/Render.cpp @@ -1365,8 +1365,7 @@ void Renderer::ApplyBlendingState(const BlendingState state, bool force) } // This function has the final picture. We adjust the aspect ratio here. -void Renderer::SwapImpl(AbstractTexture* texture, const EFBRectangle& xfb_region, u64 ticks, - float Gamma) +void Renderer::SwapImpl(AbstractTexture* texture, const EFBRectangle& xfb_region, u64 ticks) { if (g_ogl_config.bSupportsDebug) { diff --git a/Source/Core/VideoBackends/OGL/Render.h b/Source/Core/VideoBackends/OGL/Render.h index d8882e2111..c27c06308c 100644 --- a/Source/Core/VideoBackends/OGL/Render.h +++ b/Source/Core/VideoBackends/OGL/Render.h @@ -126,7 +126,7 @@ public: TargetRectangle ConvertEFBRectangle(const EFBRectangle& rc) override; - void SwapImpl(AbstractTexture* texture, const EFBRectangle& rc, u64 ticks, float Gamma) override; + void SwapImpl(AbstractTexture* texture, const EFBRectangle& rc, u64 ticks) override; void ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaEnable, bool zEnable, u32 color, u32 z) override; diff --git a/Source/Core/VideoBackends/OGL/TextureCache.cpp b/Source/Core/VideoBackends/OGL/TextureCache.cpp index 24f94e39db..086d0f29f2 100644 --- a/Source/Core/VideoBackends/OGL/TextureCache.cpp +++ b/Source/Core/VideoBackends/OGL/TextureCache.cpp @@ -68,10 +68,18 @@ constexpr const char* geometry_program = "layout(triangles) in;\n" void TextureCache::CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, - const EFBRectangle& src_rect, bool scale_by_half) + const EFBRectangle& src_rect, bool scale_by_half, float y_scale, + float gamma, bool clamp_top, bool clamp_bottom, + const CopyFilterCoefficientArray& filter_coefficients) { + // Flip top/bottom due to lower-left coordinate system. + float clamp_top_val = + clamp_bottom ? (1.0f - src_rect.bottom / static_cast(EFB_HEIGHT)) : 0.0f; + float clamp_bottom_val = + clamp_top ? (1.0f - src_rect.top / static_cast(EFB_HEIGHT)) : 0.0f; TextureConverter::EncodeToRamFromTexture(dst, params, native_width, bytes_per_row, num_blocks_y, - memory_stride, src_rect, scale_by_half); + memory_stride, src_rect, scale_by_half, y_scale, gamma, + clamp_top_val, clamp_bottom_val, filter_coefficients); } TextureCache::TextureCache() @@ -483,7 +491,9 @@ void TextureCache::DecodeTextureOnGPU(TCacheEntry* entry, u32 dst_level, const u void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const EFBRectangle& src_rect, bool scale_by_half, - EFBCopyFormat dst_format, bool is_intensity) + EFBCopyFormat dst_format, bool is_intensity, float gamma, + bool clamp_top, bool clamp_bottom, + const CopyFilterCoefficientArray& filter_coefficients) { auto* destination_texture = static_cast(entry->texture.get()); g_renderer->ResetAPIState(); // reset any game specific settings @@ -528,6 +538,11 @@ void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, code.GetBuffer(), geo_program); shader.position_uniform = glGetUniformLocation(shader.shader.glprogid, "copy_position"); + shader.pixel_height_uniform = glGetUniformLocation(shader.shader.glprogid, "pixel_height"); + shader.gamma_rcp_uniform = glGetUniformLocation(shader.shader.glprogid, "gamma_rcp"); + shader.clamp_tb_uniform = glGetUniformLocation(shader.shader.glprogid, "clamp_tb"); + shader.filter_coefficients_uniform = + glGetUniformLocation(shader.shader.glprogid, "filter_coefficients"); } shader.shader.Bind(); @@ -535,6 +550,15 @@ void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, TargetRectangle R = g_renderer->ConvertEFBRectangle(src_rect); glUniform4f(shader.position_uniform, static_cast(R.left), static_cast(R.top), static_cast(R.right), static_cast(R.bottom)); + glUniform1f(shader.pixel_height_uniform, g_ActiveConfig.bCopyEFBScaled ? + 1.0f / g_renderer->GetTargetHeight() : + 1.0f / EFB_HEIGHT); + glUniform1f(shader.gamma_rcp_uniform, 1.0f / gamma); + glUniform2f(shader.clamp_tb_uniform, + clamp_bottom ? (1.0f - src_rect.bottom / static_cast(EFB_HEIGHT)) : 0.0f, + clamp_top ? (1.0f - src_rect.top / static_cast(EFB_HEIGHT)) : 0.0f); + glUniform3f(shader.filter_coefficients_uniform, filter_coefficients[0] / 64.0f, + filter_coefficients[1] / 64.0f, filter_coefficients[2] / 64.0f); ProgramShaderCache::BindVertexFormat(nullptr); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); diff --git a/Source/Core/VideoBackends/OGL/TextureCache.h b/Source/Core/VideoBackends/OGL/TextureCache.h index 3923919df6..54dc4afef8 100644 --- a/Source/Core/VideoBackends/OGL/TextureCache.h +++ b/Source/Core/VideoBackends/OGL/TextureCache.h @@ -65,11 +65,13 @@ private: void CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, - bool scale_by_half) override; + bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom, + const CopyFilterCoefficientArray& filter_coefficients) override; void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const EFBRectangle& src_rect, - bool scale_by_half, EFBCopyFormat dst_format, - bool is_intensity) override; + bool scale_by_half, EFBCopyFormat dst_format, bool is_intensity, + float gamma, bool clamp_top, bool clamp_bottom, + const CopyFilterCoefficientArray& filter_coefficients) override; bool CompileShaders() override; void DeleteShaders() override; @@ -84,6 +86,10 @@ private: { SHADER shader; GLuint position_uniform; + GLuint pixel_height_uniform; + GLuint gamma_rcp_uniform; + GLuint clamp_tb_uniform; + GLuint filter_coefficients_uniform; }; std::map m_efb_copy_programs; diff --git a/Source/Core/VideoBackends/OGL/TextureConverter.cpp b/Source/Core/VideoBackends/OGL/TextureConverter.cpp index e1ec911a65..615cc9e1c7 100644 --- a/Source/Core/VideoBackends/OGL/TextureConverter.cpp +++ b/Source/Core/VideoBackends/OGL/TextureConverter.cpp @@ -39,6 +39,9 @@ struct EncodingProgram SHADER program; GLint copy_position_uniform; GLint y_scale_uniform; + GLint gamma_rcp_uniform; + GLint clamp_tb_uniform; + GLint filter_coefficients_uniform; }; std::map s_encoding_programs; @@ -81,6 +84,10 @@ static EncodingProgram& GetOrCreateEncodingShader(const EFBCopyParams& params) program.copy_position_uniform = glGetUniformLocation(program.program.glprogid, "position"); program.y_scale_uniform = glGetUniformLocation(program.program.glprogid, "y_scale"); + program.gamma_rcp_uniform = glGetUniformLocation(program.program.glprogid, "gamma_rcp"); + program.clamp_tb_uniform = glGetUniformLocation(program.program.glprogid, "clamp_tb"); + program.filter_coefficients_uniform = + glGetUniformLocation(program.program.glprogid, "filter_coefficients"); return s_encoding_programs.emplace(params, program).first->second; } @@ -137,7 +144,9 @@ static void EncodeToRamUsingShader(GLuint srcTexture, u8* destAddr, u32 dst_line void EncodeToRamFromTexture(u8* dest_ptr, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, - const EFBRectangle& src_rect, bool scale_by_half) + const EFBRectangle& src_rect, bool scale_by_half, float y_scale, + float gamma, float clamp_top, float clamp_bottom, + const TextureCacheBase::CopyFilterCoefficientArray& filter_coefficients) { g_renderer->ResetAPIState(); @@ -146,14 +155,18 @@ void EncodeToRamFromTexture(u8* dest_ptr, const EFBCopyParams& params, u32 nativ texconv_shader.program.Bind(); glUniform4i(texconv_shader.copy_position_uniform, src_rect.left, src_rect.top, native_width, scale_by_half ? 2 : 1); - glUniform1f(texconv_shader.y_scale_uniform, params.y_scale); + glUniform1f(texconv_shader.y_scale_uniform, y_scale); + glUniform1f(texconv_shader.gamma_rcp_uniform, 1.0f / gamma); + glUniform2f(texconv_shader.clamp_tb_uniform, clamp_top, clamp_bottom); + glUniform3i(texconv_shader.filter_coefficients_uniform, filter_coefficients[0], + filter_coefficients[1], filter_coefficients[2]); const GLuint read_texture = params.depth ? FramebufferManager::ResolveAndGetDepthTarget(src_rect) : FramebufferManager::ResolveAndGetRenderTarget(src_rect); EncodeToRamUsingShader(read_texture, dest_ptr, bytes_per_row, num_blocks_y, memory_stride, - scale_by_half && !params.depth, params.y_scale); + scale_by_half && !params.depth, y_scale); g_renderer->RestoreAPIState(); } diff --git a/Source/Core/VideoBackends/OGL/TextureConverter.h b/Source/Core/VideoBackends/OGL/TextureConverter.h index baed715a7e..0d7450e4b6 100644 --- a/Source/Core/VideoBackends/OGL/TextureConverter.h +++ b/Source/Core/VideoBackends/OGL/TextureConverter.h @@ -7,10 +7,9 @@ #include "Common/CommonTypes.h" #include "Common/GL/GLUtil.h" +#include "VideoCommon/TextureCacheBase.h" #include "VideoCommon/VideoCommon.h" -struct EFBCopyParams; - namespace OGL { // Converts textures between formats using shaders @@ -21,9 +20,11 @@ void Init(); void Shutdown(); // returns size of the encoded data (in bytes) -void EncodeToRamFromTexture(u8* dest_ptr, const EFBCopyParams& params, u32 native_width, - u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, - const EFBRectangle& src_rect, bool scale_by_half); +void EncodeToRamFromTexture( + u8* dest_ptr, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row, + u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half, + float y_scale, float gamma, float clamp_top, float clamp_bottom, + const TextureCacheBase::CopyFilterCoefficientArray& filter_coefficients); } } // namespace OGL diff --git a/Source/Core/VideoBackends/Software/EfbInterface.cpp b/Source/Core/VideoBackends/Software/EfbInterface.cpp index 5d852e0ff9..a13feb6c50 100644 --- a/Source/Core/VideoBackends/Software/EfbInterface.cpp +++ b/Source/Core/VideoBackends/Software/EfbInterface.cpp @@ -502,6 +502,23 @@ static u32 VerticalFilter(const std::array& colors, return out_color32; } +static u32 GammaCorrection(u32 color, const float gamma_rcp) +{ + u8 in_colors[4]; + std::memcpy(&in_colors, &color, sizeof(in_colors)); + + u8 out_color[4]; + for (int i = BLU_C; i <= RED_C; i++) + { + out_color[i] = static_cast( + MathUtil::Clamp(std::pow(in_colors[i] / 255.0f, gamma_rcp) * 255.0f, 0.0f, 255.0f)); + } + + u32 out_color32; + std::memcpy(&out_color32, out_color, sizeof(out_color32)); + return out_color32; +} + // For internal used only, return a non-normalized value, which saves work later. static yuv444 ConvertColorToYUV(u32 color) { @@ -530,8 +547,7 @@ u8* GetPixelPointer(u16 x, u16 y, bool depth) } void EncodeXFB(u8* xfb_in_ram, u32 memory_stride, const EFBRectangle& source_rect, float y_scale, - bool clamp_top, bool clamp_bottom, float Gamma, - const std::array& filterCoefficients) + float gamma) { if (!xfb_in_ram) { @@ -539,8 +555,12 @@ void EncodeXFB(u8* xfb_in_ram, u32 memory_stride, const EFBRectangle& source_rec return; } - int left = source_rect.left; - int right = source_rect.right; + const int left = source_rect.left; + const int right = source_rect.right; + const bool clamp_top = bpmem.triggerEFBCopy.clamp_top; + const bool clamp_bottom = bpmem.triggerEFBCopy.clamp_bottom; + const float gamma_rcp = 1.0f / gamma; + const auto filter_coefficients = bpmem.copyfilter.GetCoefficients(); // this assumes copies will always start on an even (YU) pixel and the // copy always has an even width, which might not be true. @@ -575,9 +595,10 @@ void EncodeXFB(u8* xfb_in_ram, u32 memory_stride, const EFBRectangle& source_rec std::array colors = {{GetColor(x, y_prev), GetColor(x, y), GetColor(x, y_next)}}; // Vertical Filter (Multisampling resolve, deflicker, brightness) - u32 filtered = VerticalFilter(colors, filterCoefficients); + u32 filtered = VerticalFilter(colors, filter_coefficients); - // TODO: Gamma correction happens here. + // Gamma correction happens here. + filtered = GammaCorrection(filtered, gamma_rcp); scanline[i] = ConvertColorToYUV(filtered); } diff --git a/Source/Core/VideoBackends/Software/EfbInterface.h b/Source/Core/VideoBackends/Software/EfbInterface.h index 9d0706a83c..7f7c0ec608 100644 --- a/Source/Core/VideoBackends/Software/EfbInterface.h +++ b/Source/Core/VideoBackends/Software/EfbInterface.h @@ -59,8 +59,7 @@ u32 GetDepth(u16 x, u16 y); u8* GetPixelPointer(u16 x, u16 y, bool depth); void EncodeXFB(u8* xfb_in_ram, u32 memory_stride, const EFBRectangle& source_rect, float y_scale, - bool clamp_top, bool clamp_bottom, float Gamma, - const std::array& filterCoefficients); + float gamma); extern u32 perf_values[PQ_NUM_MEMBERS]; inline void IncPerfCounterQuadCount(PerfQueryType type) diff --git a/Source/Core/VideoBackends/Software/SWRenderer.cpp b/Source/Core/VideoBackends/Software/SWRenderer.cpp index 51b035dd82..f4e322619d 100644 --- a/Source/Core/VideoBackends/Software/SWRenderer.cpp +++ b/Source/Core/VideoBackends/Software/SWRenderer.cpp @@ -87,8 +87,7 @@ std::unique_ptr SWRenderer::CreatePipeline(const AbstractPipel } // Called on the GPU thread -void SWRenderer::SwapImpl(AbstractTexture* texture, const EFBRectangle& xfb_region, u64 ticks, - float Gamma) +void SWRenderer::SwapImpl(AbstractTexture* texture, const EFBRectangle& xfb_region, u64 ticks) { OSD::DoCallbacks(OSD::CallbackType::OnFrame); diff --git a/Source/Core/VideoBackends/Software/SWRenderer.h b/Source/Core/VideoBackends/Software/SWRenderer.h index 2c4a5aeef6..3c274edda7 100644 --- a/Source/Core/VideoBackends/Software/SWRenderer.h +++ b/Source/Core/VideoBackends/Software/SWRenderer.h @@ -34,7 +34,7 @@ public: TargetRectangle ConvertEFBRectangle(const EFBRectangle& rc) override; - void SwapImpl(AbstractTexture* texture, const EFBRectangle& rc, u64 ticks, float Gamma) override; + void SwapImpl(AbstractTexture* texture, const EFBRectangle& rc, u64 ticks) override; void ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaEnable, bool zEnable, u32 color, u32 z) override; diff --git a/Source/Core/VideoBackends/Software/TextureCache.h b/Source/Core/VideoBackends/Software/TextureCache.h index 13a70002f2..f3d9c91938 100644 --- a/Source/Core/VideoBackends/Software/TextureCache.h +++ b/Source/Core/VideoBackends/Software/TextureCache.h @@ -18,15 +18,18 @@ public: } void CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, - bool scale_by_half) override + bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom, + const CopyFilterCoefficientArray& filter_coefficients) override { TextureEncoder::Encode(dst, params, native_width, bytes_per_row, num_blocks_y, memory_stride, - src_rect, scale_by_half); + src_rect, scale_by_half, y_scale, gamma); } private: void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const EFBRectangle& src_rect, - bool scale_by_half, EFBCopyFormat dst_format, bool is_intensity) override + bool scale_by_half, EFBCopyFormat dst_format, bool is_intensity, + float gamma, bool clamp_top, bool clamp_bottom, + const CopyFilterCoefficientArray& filter_coefficients) override { // TODO: If we ever want to "fake" vram textures, we would need to implement this } diff --git a/Source/Core/VideoBackends/Software/TextureEncoder.cpp b/Source/Core/VideoBackends/Software/TextureEncoder.cpp index 5308dca705..a31888e544 100644 --- a/Source/Core/VideoBackends/Software/TextureEncoder.cpp +++ b/Source/Core/VideoBackends/Software/TextureEncoder.cpp @@ -1469,15 +1469,12 @@ void EncodeEfbCopy(u8* dst, const EFBCopyParams& params, u32 native_width, u32 b } void Encode(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row, - u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half) + u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half, + float y_scale, float gamma) { if (params.copy_format == EFBCopyFormat::XFB) { - static constexpr std::array gamma_LUT = {1.0f, 1.7f, 2.2f, 1.0f}; - EfbInterface::EncodeXFB(dst, native_width, src_rect, params.y_scale, - !!bpmem.triggerEFBCopy.clamp_top, !!bpmem.triggerEFBCopy.clamp_bottom, - gamma_LUT[bpmem.triggerEFBCopy.gamma], - bpmem.copyfilter.GetCoefficients()); + EfbInterface::EncodeXFB(dst, native_width, src_rect, y_scale, gamma); } else { diff --git a/Source/Core/VideoBackends/Software/TextureEncoder.h b/Source/Core/VideoBackends/Software/TextureEncoder.h index ec21c97c42..20aa05605a 100644 --- a/Source/Core/VideoBackends/Software/TextureEncoder.h +++ b/Source/Core/VideoBackends/Software/TextureEncoder.h @@ -5,12 +5,12 @@ #pragma once #include "Common/CommonTypes.h" +#include "VideoCommon/TextureCacheBase.h" #include "VideoCommon/VideoCommon.h" -struct EFBCopyParams; - namespace TextureEncoder { void Encode(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row, - u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half); + u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half, + float y_scale, float gamma); } diff --git a/Source/Core/VideoBackends/Vulkan/Renderer.cpp b/Source/Core/VideoBackends/Vulkan/Renderer.cpp index c69ad396d6..38d7bd0992 100644 --- a/Source/Core/VideoBackends/Vulkan/Renderer.cpp +++ b/Source/Core/VideoBackends/Vulkan/Renderer.cpp @@ -697,8 +697,7 @@ void Renderer::ReinterpretPixelData(unsigned int convtype) BindEFBToStateTracker(); } -void Renderer::SwapImpl(AbstractTexture* texture, const EFBRectangle& xfb_region, u64 ticks, - float Gamma) +void Renderer::SwapImpl(AbstractTexture* texture, const EFBRectangle& xfb_region, u64 ticks) { // Pending/batched EFB pokes should be included in the final image. FramebufferManager::GetInstance()->FlushEFBPokes(); diff --git a/Source/Core/VideoBackends/Vulkan/Renderer.h b/Source/Core/VideoBackends/Vulkan/Renderer.h index b2662a3d4b..ce15d12592 100644 --- a/Source/Core/VideoBackends/Vulkan/Renderer.h +++ b/Source/Core/VideoBackends/Vulkan/Renderer.h @@ -59,7 +59,7 @@ public: void BBoxWrite(int index, u16 value) override; TargetRectangle ConvertEFBRectangle(const EFBRectangle& rc) override; - void SwapImpl(AbstractTexture* texture, const EFBRectangle& rc, u64 ticks, float Gamma) override; + void SwapImpl(AbstractTexture* texture, const EFBRectangle& rc, u64 ticks) override; void ClearScreen(const EFBRectangle& rc, bool color_enable, bool alpha_enable, bool z_enable, u32 color, u32 z) override; diff --git a/Source/Core/VideoBackends/Vulkan/TextureCache.cpp b/Source/Core/VideoBackends/Vulkan/TextureCache.cpp index cb61cd4de4..bc7a0d0f18 100644 --- a/Source/Core/VideoBackends/Vulkan/TextureCache.cpp +++ b/Source/Core/VideoBackends/Vulkan/TextureCache.cpp @@ -100,7 +100,9 @@ void TextureCache::ConvertTexture(TCacheEntry* destination, TCacheEntry* source, void TextureCache::CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, - const EFBRectangle& src_rect, bool scale_by_half) + const EFBRectangle& src_rect, bool scale_by_half, float y_scale, + float gamma, bool clamp_top, bool clamp_bottom, + const CopyFilterCoefficientArray& filter_coefficients) { // Flush EFB pokes first, as they're expected to be included. FramebufferManager::GetInstance()->FlushEFBPokes(); @@ -131,9 +133,9 @@ void TextureCache::CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_widt src_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - m_texture_converter->EncodeTextureToMemory(src_texture->GetView(), dst, params, native_width, - bytes_per_row, num_blocks_y, memory_stride, src_rect, - scale_by_half); + m_texture_converter->EncodeTextureToMemory( + src_texture->GetView(), dst, params, native_width, bytes_per_row, num_blocks_y, memory_stride, + src_rect, scale_by_half, y_scale, gamma, clamp_top, clamp_bottom, filter_coefficients); // Transition back to original state src_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), original_layout); @@ -209,7 +211,9 @@ void TextureCache::DeleteShaders() void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const EFBRectangle& src_rect, bool scale_by_half, - EFBCopyFormat dst_format, bool is_intensity) + EFBCopyFormat dst_format, bool is_intensity, float gamma, + bool clamp_top, bool clamp_bottom, + const CopyFilterCoefficientArray& filter_coefficients) { VKTexture* texture = static_cast(entry->texture.get()); @@ -228,6 +232,26 @@ void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, VkCommandBuffer command_buffer = g_command_buffer_mgr->GetCurrentCommandBuffer(); StateTracker::GetInstance()->EndRenderPass(); + // Fill uniform buffer. + struct PixelUniforms + { + float filter_coefficients[3]; + float gamma_rcp; + float clamp_top; + float clamp_bottom; + float pixel_height; + u32 padding; + }; + PixelUniforms uniforms; + for (size_t i = 0; i < filter_coefficients.size(); i++) + uniforms.filter_coefficients[i] = filter_coefficients[i] / 64.0f; + uniforms.gamma_rcp = 1.0f / gamma; + uniforms.clamp_top = clamp_top ? src_rect.top / float(EFB_HEIGHT) : 0.0f; + uniforms.clamp_bottom = clamp_bottom ? src_rect.bottom / float(EFB_HEIGHT) : 1.0f; + uniforms.pixel_height = + g_ActiveConfig.bCopyEFBScaled ? 1.0f / g_renderer->GetTargetHeight() : 1.0f / EFB_HEIGHT; + uniforms.padding = 0; + // Transition EFB to shader resource before binding. // An out-of-bounds source region is valid here, and fine for the draw (since it is converted // to texture coordinates), but it's not valid to resolve an out-of-range rectangle. @@ -274,6 +298,10 @@ void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, g_shader_cache->GetPassthroughVertexShader(), g_shader_cache->GetPassthroughGeometryShader(), shader); + u8* ubo_ptr = draw.AllocatePSUniforms(sizeof(PixelUniforms)); + std::memcpy(ubo_ptr, &uniforms, sizeof(PixelUniforms)); + draw.CommitPSUniforms(sizeof(PixelUniforms)); + draw.SetPSSampler(0, src_texture->GetView(), src_sampler); VkRect2D dest_region = {{0, 0}, {texture->GetConfig().width, texture->GetConfig().height}}; diff --git a/Source/Core/VideoBackends/Vulkan/TextureCache.h b/Source/Core/VideoBackends/Vulkan/TextureCache.h index b27f9ad0e7..846761d1d5 100644 --- a/Source/Core/VideoBackends/Vulkan/TextureCache.h +++ b/Source/Core/VideoBackends/Vulkan/TextureCache.h @@ -38,7 +38,8 @@ public: void CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, - bool scale_by_half) override; + bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom, + const CopyFilterCoefficientArray& filter_coefficients) override; bool SupportsGPUTextureDecode(TextureFormat format, TLUTFormat palette_format) override; @@ -52,8 +53,9 @@ public: private: void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const EFBRectangle& src_rect, - bool scale_by_half, EFBCopyFormat dst_format, - bool is_intensity) override; + bool scale_by_half, EFBCopyFormat dst_format, bool is_intensity, + float gamma, bool clamp_top, bool clamp_bottom, + const CopyFilterCoefficientArray& filter_coefficients) override; std::unique_ptr m_texture_upload_buffer; diff --git a/Source/Core/VideoBackends/Vulkan/TextureConverter.cpp b/Source/Core/VideoBackends/Vulkan/TextureConverter.cpp index 6c584fcc95..cf5d7075f7 100644 --- a/Source/Core/VideoBackends/Vulkan/TextureConverter.cpp +++ b/Source/Core/VideoBackends/Vulkan/TextureConverter.cpp @@ -38,6 +38,11 @@ struct EFBEncodeParams { std::array position_uniform; float y_scale; + float gamma_rcp; + float clamp_top; + float clamp_bottom; + s32 filter_coefficients[3]; + u32 padding; }; } TextureConverter::TextureConverter() @@ -201,10 +206,11 @@ void TextureConverter::ConvertTexture(TextureCacheBase::TCacheEntry* dst_entry, draw.EndRenderPass(); } -void TextureConverter::EncodeTextureToMemory(VkImageView src_texture, u8* dest_ptr, - const EFBCopyParams& params, u32 native_width, - u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, - const EFBRectangle& src_rect, bool scale_by_half) +void TextureConverter::EncodeTextureToMemory( + VkImageView src_texture, u8* dest_ptr, const EFBCopyParams& params, u32 native_width, + u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, + bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom, + const TextureCacheBase::CopyFilterCoefficientArray& filter_coefficients) { VkShaderModule shader = GetEncodingShader(params); if (shader == VK_NULL_HANDLE) @@ -236,14 +242,21 @@ void TextureConverter::EncodeTextureToMemory(VkImageView src_texture, u8* dest_p encoder_params.position_uniform[1] = src_rect.top; encoder_params.position_uniform[2] = static_cast(native_width); encoder_params.position_uniform[3] = scale_by_half ? 2 : 1; - encoder_params.y_scale = params.y_scale; - draw.SetPushConstants(&encoder_params, sizeof(encoder_params)); + encoder_params.y_scale = y_scale; + encoder_params.gamma_rcp = 1.0f / gamma; + encoder_params.clamp_top = clamp_top ? src_rect.top / float(EFB_HEIGHT) : 0.0f; + encoder_params.clamp_bottom = clamp_bottom ? src_rect.bottom / float(EFB_HEIGHT) : 0.0f; + for (size_t i = 0; i < filter_coefficients.size(); i++) + encoder_params.filter_coefficients[i] = filter_coefficients[i]; + u8* ubo_ptr = draw.AllocatePSUniforms(sizeof(EFBEncodeParams)); + std::memcpy(ubo_ptr, &encoder_params, sizeof(EFBEncodeParams)); + draw.CommitPSUniforms(sizeof(EFBEncodeParams)); // We also linear filtering for both box filtering and downsampling higher resolutions to 1x // TODO: This only produces perfect downsampling for 2x IR, other resolutions will need more // complex down filtering to average all pixels and produce the correct result. bool linear_filter = - (scale_by_half && !params.depth) || g_renderer->GetEFBScale() != 1 || params.y_scale > 1.0f; + (scale_by_half && !params.depth) || g_renderer->GetEFBScale() != 1 || y_scale > 1.0f; draw.SetPSSampler(0, src_texture, linear_filter ? g_object_cache->GetLinearSampler() : g_object_cache->GetPointSampler()); diff --git a/Source/Core/VideoBackends/Vulkan/TextureConverter.h b/Source/Core/VideoBackends/Vulkan/TextureConverter.h index f277c2d597..f85efc4d5c 100644 --- a/Source/Core/VideoBackends/Vulkan/TextureConverter.h +++ b/Source/Core/VideoBackends/Vulkan/TextureConverter.h @@ -40,9 +40,12 @@ public: // Uses an encoding shader to copy src_texture to dest_ptr. // NOTE: Executes the current command buffer. - void EncodeTextureToMemory(VkImageView src_texture, u8* dest_ptr, const EFBCopyParams& params, - u32 native_width, u32 bytes_per_row, u32 num_blocks_y, - u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half); + void + EncodeTextureToMemory(VkImageView src_texture, u8* dest_ptr, const EFBCopyParams& params, + u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, + const EFBRectangle& src_rect, bool scale_by_half, float y_scale, + float gamma, bool clamp_top, bool clamp_bottom, + const TextureCacheBase::CopyFilterCoefficientArray& filter_coefficients); bool SupportsTextureDecoding(TextureFormat format, TLUTFormat palette_format); void DecodeTexture(VkCommandBuffer command_buffer, TextureCache::TCacheEntry* entry, diff --git a/Source/Core/VideoCommon/BPMemory.h b/Source/Core/VideoCommon/BPMemory.h index 4a1cea9d48..0f287b4f99 100644 --- a/Source/Core/VideoCommon/BPMemory.h +++ b/Source/Core/VideoCommon/BPMemory.h @@ -970,6 +970,8 @@ union UPE_Copy union CopyFilterCoefficients { + using Values = std::array; + u64 Hex; BitField<0, 6, u64> w0; @@ -980,7 +982,7 @@ union CopyFilterCoefficients BitField<38, 6, u64> w5; BitField<44, 6, u64> w6; - std::array GetCoefficients() const + Values GetCoefficients() const { return { static_cast(w0), static_cast(w1), static_cast(w2), static_cast(w3), diff --git a/Source/Core/VideoCommon/BPStructs.cpp b/Source/Core/VideoCommon/BPStructs.cpp index 143132ecaf..0e0d639a63 100644 --- a/Source/Core/VideoCommon/BPStructs.cpp +++ b/Source/Core/VideoCommon/BPStructs.cpp @@ -229,10 +229,13 @@ static void BPWritten(const BPCmd& bp) { // bpmem.zcontrol.pixel_format to PEControl::Z24 is when the game wants to copy from ZBuffer // (Zbuffer uses 24-bit Format) + static constexpr CopyFilterCoefficients::Values filter_coefficients = { + {0, 0, 21, 22, 21, 0, 0}}; bool is_depth_copy = bpmem.zcontrol.pixel_format == PEControl::Z24; g_texture_cache->CopyRenderTargetToTexture( destAddr, PE_copy.tp_realFormat(), srcRect.GetWidth(), srcRect.GetHeight(), destStride, - is_depth_copy, srcRect, !!PE_copy.intensity_fmt, !!PE_copy.half_scale, 1.0f, 1.0f); + is_depth_copy, srcRect, !!PE_copy.intensity_fmt, !!PE_copy.half_scale, 1.0f, 1.0f, + bpmem.triggerEFBCopy.clamp_top, bpmem.triggerEFBCopy.clamp_bottom, filter_coefficients); } else { @@ -260,9 +263,10 @@ static void BPWritten(const BPCmd& bp) bpmem.copyTexSrcWH.x + 1, destStride, height, yScale); bool is_depth_copy = bpmem.zcontrol.pixel_format == PEControl::Z24; - g_texture_cache->CopyRenderTargetToTexture(destAddr, EFBCopyFormat::XFB, srcRect.GetWidth(), - height, destStride, is_depth_copy, srcRect, false, - false, yScale, s_gammaLUT[PE_copy.gamma]); + g_texture_cache->CopyRenderTargetToTexture( + destAddr, EFBCopyFormat::XFB, srcRect.GetWidth(), height, destStride, is_depth_copy, + srcRect, false, false, yScale, s_gammaLUT[PE_copy.gamma], bpmem.triggerEFBCopy.clamp_top, + bpmem.triggerEFBCopy.clamp_bottom, bpmem.copyfilter.GetCoefficients()); // This stays in to signal end of a "frame" g_renderer->RenderToXFB(destAddr, srcRect, destStride, height, s_gammaLUT[PE_copy.gamma]); diff --git a/Source/Core/VideoCommon/RenderBase.cpp b/Source/Core/VideoCommon/RenderBase.cpp index 1598fe0a00..76d40f5aa3 100644 --- a/Source/Core/VideoCommon/RenderBase.cpp +++ b/Source/Core/VideoCommon/RenderBase.cpp @@ -680,7 +680,7 @@ void Renderer::Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const // TODO: merge more generic parts into VideoCommon { std::lock_guard guard(m_swap_mutex); - g_renderer->SwapImpl(xfb_entry->texture.get(), xfb_rect, ticks, xfb_entry->gamma); + g_renderer->SwapImpl(xfb_entry->texture.get(), xfb_rect, ticks); } // Update the window size based on the frame that was just rendered. diff --git a/Source/Core/VideoCommon/RenderBase.h b/Source/Core/VideoCommon/RenderBase.h index e7f7668f22..7e4f4e00f7 100644 --- a/Source/Core/VideoCommon/RenderBase.h +++ b/Source/Core/VideoCommon/RenderBase.h @@ -175,8 +175,7 @@ public: // Finish up the current frame, print some stats void Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const EFBRectangle& rc, u64 ticks); - virtual void SwapImpl(AbstractTexture* texture, const EFBRectangle& rc, u64 ticks, - float Gamma = 1.0f) = 0; + virtual void SwapImpl(AbstractTexture* texture, const EFBRectangle& rc, u64 ticks) = 0; PEControl::PixelFormat GetPrevPixelFormat() const { return m_prev_efb_format; } void StorePixelFormat(PEControl::PixelFormat new_format) { m_prev_efb_format = new_format; } diff --git a/Source/Core/VideoCommon/TextureCacheBase.cpp b/Source/Core/VideoCommon/TextureCacheBase.cpp index 9ac75d31f0..f0b1392531 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.cpp +++ b/Source/Core/VideoCommon/TextureCacheBase.cpp @@ -1499,10 +1499,39 @@ void TextureCacheBase::LoadTextureLevelZeroFromMemory(TCacheEntry* entry_to_upda } } -void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, EFBCopyFormat dstFormat, u32 width, - u32 height, u32 dstStride, bool is_depth_copy, - const EFBRectangle& srcRect, bool isIntensity, - bool scaleByHalf, float y_scale, float gamma) +TextureCacheBase::CopyFilterCoefficientArray +TextureCacheBase::GetRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients) +{ + // To simplify the backend, we precalculate the three coefficients in common. Coefficients 0, 1 + // are for the row above, 2, 3, 4 are for the current pixel, and 5, 6 are for the row below. + return {static_cast(coefficients[0]) + static_cast(coefficients[1]), + static_cast(coefficients[2]) + static_cast(coefficients[3]) + + static_cast(coefficients[4]), + static_cast(coefficients[5]) + static_cast(coefficients[6])}; +} + +TextureCacheBase::CopyFilterCoefficientArray +TextureCacheBase::GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients) +{ + // If the user disables the copy filter, only apply it to the VRAM copy. + // This way games which are sensitive to changes to the RAM copy of the XFB will be unaffected. + CopyFilterCoefficientArray res = GetRAMCopyFilterCoefficients(coefficients); + if (!g_ActiveConfig.bDisableCopyFilter) + return res; + + // Disabling the copy filter in options should not ignore the values the game sets completely, + // as some games use the filter coefficients to control the brightness of the screen. Instead, + // add all coefficients to the middle sample, so the deflicker/vertical filter has no effect. + res[1] += res[0] + res[2]; + res[0] = 0; + res[2] = 0; + return res; +} + +void TextureCacheBase::CopyRenderTargetToTexture( + u32 dstAddr, EFBCopyFormat dstFormat, u32 width, u32 height, u32 dstStride, bool is_depth_copy, + const EFBRectangle& srcRect, bool isIntensity, bool scaleByHalf, float y_scale, float gamma, + bool clamp_top, bool clamp_bottom, const CopyFilterCoefficients::Values& filter_coefficients) { // Emulation methods: // @@ -1622,8 +1651,10 @@ void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, EFBCopyFormat dstF if (copy_to_ram) { PEControl::PixelFormat srcFormat = bpmem.zcontrol.pixel_format; - EFBCopyParams format(srcFormat, dstFormat, is_depth_copy, isIntensity, y_scale); - CopyEFB(dst, format, tex_w, bytes_per_row, num_blocks_y, dstStride, srcRect, scaleByHalf); + EFBCopyParams format(srcFormat, dstFormat, is_depth_copy, isIntensity); + CopyEFB(dst, format, tex_w, bytes_per_row, num_blocks_y, dstStride, srcRect, scaleByHalf, + y_scale, gamma, clamp_top, clamp_bottom, + GetRAMCopyFilterCoefficients(filter_coefficients)); } else { @@ -1742,8 +1773,6 @@ void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, EFBCopyFormat dstF { entry->SetGeneralParameters(dstAddr, 0, baseFormat, is_xfb_copy); entry->SetDimensions(tex_w, tex_h, 1); - entry->gamma = gamma; - entry->frameCount = FRAMECOUNT_INVALID; if (is_xfb_copy) { @@ -1757,7 +1786,9 @@ void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, EFBCopyFormat dstF entry->may_have_overlapping_textures = false; entry->is_custom_tex = false; - CopyEFBToCacheEntry(entry, is_depth_copy, srcRect, scaleByHalf, dstFormat, isIntensity); + CopyEFBToCacheEntry(entry, is_depth_copy, srcRect, scaleByHalf, dstFormat, isIntensity, gamma, + clamp_top, clamp_bottom, + GetVRAMCopyFilterCoefficients(filter_coefficients)); u64 hash = entry->CalculateHash(); entry->SetHashes(hash, hash); diff --git a/Source/Core/VideoCommon/TextureCacheBase.h b/Source/Core/VideoCommon/TextureCacheBase.h index 6dce346c24..720e95b470 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.h +++ b/Source/Core/VideoCommon/TextureCacheBase.h @@ -47,23 +47,21 @@ struct TextureAndTLUTFormat struct EFBCopyParams { EFBCopyParams(PEControl::PixelFormat efb_format_, EFBCopyFormat copy_format_, bool depth_, - bool yuv_, float y_scale_) - : efb_format(efb_format_), copy_format(copy_format_), depth(depth_), yuv(yuv_), - y_scale(y_scale_) + bool yuv_) + : efb_format(efb_format_), copy_format(copy_format_), depth(depth_), yuv(yuv_) { } bool operator<(const EFBCopyParams& rhs) const { - return std::tie(efb_format, copy_format, depth, yuv, y_scale) < - std::tie(rhs.efb_format, rhs.copy_format, rhs.depth, rhs.yuv, rhs.y_scale); + return std::tie(efb_format, copy_format, depth, yuv) < + std::tie(rhs.efb_format, rhs.copy_format, rhs.depth, rhs.yuv); } PEControl::PixelFormat efb_format; EFBCopyFormat copy_format; bool depth; bool yuv; - float y_scale; }; struct TextureLookupInformation @@ -108,6 +106,8 @@ private: static const int FRAMECOUNT_INVALID = 0; public: + using CopyFilterCoefficientArray = std::array; + struct TCacheEntry { // common members @@ -126,7 +126,6 @@ public: // content, aren't just downscaled bool should_force_safe_hashing = false; // for XFB bool is_xfb_copy = false; - float gamma = 1.0f; u64 id; bool reference_changed = false; // used by xfb to determine when a reference xfb changed @@ -216,7 +215,9 @@ public: virtual void CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, - bool scale_by_half) = 0; + bool scale_by_half, float y_scale, float gamma, bool clamp_top, + bool clamp_bottom, + const CopyFilterCoefficientArray& filter_coefficients) = 0; virtual bool CompileShaders() = 0; virtual void DeleteShaders() = 0; @@ -248,7 +249,9 @@ public: virtual void BindTextures(); void CopyRenderTargetToTexture(u32 dstAddr, EFBCopyFormat dstFormat, u32 width, u32 height, u32 dstStride, bool is_depth_copy, const EFBRectangle& srcRect, - bool isIntensity, bool scaleByHalf, float y_scale, float gamma); + bool isIntensity, bool scaleByHalf, float y_scale, float gamma, + bool clamp_top, bool clamp_bottom, + const CopyFilterCoefficients::Values& filter_coefficients); virtual void ConvertTexture(TCacheEntry* entry, TCacheEntry* unconverted, const void* palette, TLUTFormat format) = 0; @@ -315,13 +318,21 @@ private: virtual void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const EFBRectangle& src_rect, bool scale_by_half, - EFBCopyFormat dst_format, bool is_intensity) = 0; + EFBCopyFormat dst_format, bool is_intensity, float gamma, + bool clamp_top, bool clamp_bottom, + const CopyFilterCoefficientArray& filter_coefficients) = 0; // Removes and unlinks texture from texture cache and returns it to the pool TexAddrCache::iterator InvalidateTexture(TexAddrCache::iterator t_iter); void UninitializeXFBMemory(u8* dst, u32 stride, u32 bytes_per_row, u32 num_blocks_y); + // Precomputing the coefficients for the previous, current, and next lines for the copy filter. + CopyFilterCoefficientArray + GetRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients); + CopyFilterCoefficientArray + GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients); + TexAddrCache textures_by_address; TexHashCache textures_by_hash; TexPool texture_pool; diff --git a/Source/Core/VideoCommon/TextureConversionShader.cpp b/Source/Core/VideoCommon/TextureConversionShader.cpp index 4d1539572c..1912f2aee5 100644 --- a/Source/Core/VideoCommon/TextureConversionShader.cpp +++ b/Source/Core/VideoCommon/TextureConversionShader.cpp @@ -57,19 +57,44 @@ u16 GetEncodedSampleCount(EFBCopyFormat format) } } -// block dimensions : widthStride, heightStride -// texture dims : width, height, x offset, y offset -static void WriteSwizzler(char*& p, EFBCopyFormat format, APIType ApiType) +static void WriteHeader(char*& p, APIType ApiType) { - // left, top, of source rectangle within source texture - // width of the destination rectangle, scale_factor (1 or 2) - if (ApiType == APIType::Vulkan) - WRITE(p, - "layout(std140, push_constant) uniform PCBlock { int4 position; float y_scale; } PC;\n"); - else + if (ApiType == APIType::OpenGL) { + // left, top, of source rectangle within source texture + // width of the destination rectangle, scale_factor (1 or 2) WRITE(p, "uniform int4 position;\n"); WRITE(p, "uniform float y_scale;\n"); + WRITE(p, "uniform float gamma_rcp;\n"); + WRITE(p, "uniform float2 clamp_tb;\n"); + WRITE(p, "uniform int3 filter_coefficients;\n"); + WRITE(p, "#define samp0 samp9\n"); + WRITE(p, "SAMPLER_BINDING(9) uniform sampler2DArray samp0;\n"); + WRITE(p, "FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n"); + } + else if (ApiType == APIType::Vulkan) + { + WRITE(p, "UBO_BINDING(std140, 1) uniform PSBlock {\n"); + WRITE(p, " int4 position;\n"); + WRITE(p, " float y_scale;\n"); + WRITE(p, " float gamma_rcp;\n"); + WRITE(p, " float2 clamp_tb;\n"); + WRITE(p, " int3 filter_coefficients;\n"); + WRITE(p, "};\n"); + WRITE(p, "SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n"); + WRITE(p, "FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n"); + } + else // D3D + { + WRITE(p, "cbuffer PSBlock : register(b0) {\n"); + WRITE(p, " int4 position;\n"); + WRITE(p, " float y_scale;\n"); + WRITE(p, " float gamma_rcp;\n"); + WRITE(p, " float2 clamp_tb;\n"); + WRITE(p, " int3 filter_coefficients;\n"); + WRITE(p, "};\n"); + WRITE(p, "sampler samp0 : register(s0);\n"); + WRITE(p, "Texture2DArray Tex0 : register(t0);\n"); } // D3D does not have roundEven(), only round(), which is specified "to the nearest integer". @@ -96,39 +121,100 @@ static void WriteSwizzler(char*& p, EFBCopyFormat format, APIType ApiType) WRITE(p, " val = int4(val.r >> 3, val.g >> 2, val.b >> 3, 1);\n"); WRITE(p, " return float4(val) / float4(31.0, 63.0, 31.0, 1.0);\n"); WRITE(p, "}\n"); +} - int blkW = TexDecoder_GetEFBCopyBlockWidthInTexels(format); - int blkH = TexDecoder_GetEFBCopyBlockHeightInTexels(format); - int samples = GetEncodedSampleCount(format); +static void WriteSampleFunction(char*& p, const EFBCopyParams& params, APIType ApiType) +{ + auto WriteSampleOp = [&](int yoffset) { + if (!params.depth) + { + switch (params.efb_format) + { + case PEControl::RGB8_Z24: + WRITE(p, "RGBA8ToRGB8("); + break; + case PEControl::RGBA6_Z24: + WRITE(p, "RGBA8ToRGBA6("); + break; + case PEControl::RGB565_Z16: + WRITE(p, "RGBA8ToRGB565("); + break; + default: + WRITE(p, "("); + break; + } + } + else + { + // Handle D3D depth inversion. + if (ApiType == APIType::D3D || ApiType == APIType::Vulkan) + WRITE(p, "1.0 - ("); + else + WRITE(p, "("); + } - if (ApiType == APIType::OpenGL) + if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) + WRITE(p, "texture(samp0, float3("); + else + WRITE(p, "Tex0.Sample(samp0, float3("); + + WRITE(p, "uv.x + xoffset * pixel_size.x, "); + + // Reverse the direction for OpenGL, since positive numbers are distance from the bottom row. + if (yoffset != 0) + { + if (ApiType == APIType::OpenGL) + WRITE(p, "clamp(uv.y - float(%d) * pixel_size.y, clamp_tb.x, clamp_tb.y)", yoffset); + else + WRITE(p, "clamp(uv.y + float(%d) * pixel_size.y, clamp_tb.x, clamp_tb.y)", yoffset); + } + else + { + WRITE(p, "uv.y"); + } + + WRITE(p, ", 0.0)))"); + }; + + // The copy filter applies to both color and depth copies. This has been verified on hardware. + // The filter is only applied to the RGB channels, the alpha channel is left intact. + WRITE(p, "float4 SampleEFB(float2 uv, float2 pixel_size, int xoffset)\n"); + WRITE(p, "{\n"); + WRITE(p, " float4 prev_row = "); + WriteSampleOp(-1); + WRITE(p, ";\n"); + WRITE(p, " float4 current_row = "); + WriteSampleOp(0); + WRITE(p, ";\n"); + WRITE(p, " float4 next_row = "); + WriteSampleOp(1); + WRITE(p, ";\n"); + WRITE(p, + " float3 col = float3(clamp((int3(prev_row.rgb * 255.0) * filter_coefficients[0] +\n" + " int3(current_row.rgb * 255.0) * filter_coefficients[1] +\n" + " int3(next_row.rgb * 255.0) * filter_coefficients[2]) >> 6,\n" + " int3(0, 0, 0), int3(255, 255, 255))) / 255.0;\n"); + WRITE(p, " return float4(col, current_row.a);\n"); + WRITE(p, "}\n"); +} + +// block dimensions : widthStride, heightStride +// texture dims : width, height, x offset, y offset +static void WriteSwizzler(char*& p, const EFBCopyParams& params, EFBCopyFormat format, + APIType ApiType) +{ + WriteHeader(p, ApiType); + WriteSampleFunction(p, params, ApiType); + + if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) { - WRITE(p, "#define samp0 samp9\n"); - WRITE(p, "SAMPLER_BINDING(9) uniform sampler2DArray samp0;\n"); - - WRITE(p, "FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n"); WRITE(p, "void main()\n"); WRITE(p, "{\n" " int2 sampleUv;\n" " int2 uv1 = int2(gl_FragCoord.xy);\n"); } - else if (ApiType == APIType::Vulkan) - { - WRITE(p, "SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n"); - WRITE(p, "FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n"); - - WRITE(p, "void main()\n"); - WRITE(p, "{\n" - " int2 sampleUv;\n" - " int2 uv1 = int2(gl_FragCoord.xy);\n" - " int4 position = PC.position;\n" - " float y_scale = PC.y_scale;\n"); - } else // D3D { - WRITE(p, "sampler samp0 : register(s0);\n"); - WRITE(p, "Texture2DArray Tex0 : register(t0);\n"); - WRITE(p, "void main(\n"); WRITE(p, " out float4 ocol0 : SV_Target, in float4 rawpos : SV_Position)\n"); WRITE(p, "{\n" @@ -136,6 +222,10 @@ static void WriteSwizzler(char*& p, EFBCopyFormat format, APIType ApiType) " int2 uv1 = int2(rawpos.xy);\n"); } + int blkW = TexDecoder_GetEFBCopyBlockWidthInTexels(format); + int blkH = TexDecoder_GetEFBCopyBlockHeightInTexels(format); + int samples = GetEncodedSampleCount(format); + WRITE(p, " int x_block_position = (uv1.x >> %d) << %d;\n", IntLog2(blkH * blkW / samples), IntLog2(blkW)); WRITE(p, " int y_block_position = uv1.y << %d;\n", IntLog2(blkH)); @@ -167,51 +257,13 @@ static void WriteSwizzler(char*& p, EFBCopyFormat format, APIType ApiType) WRITE(p, " uv0.y = 1.0-uv0.y;\n"); } - WRITE(p, " float sample_offset = float(position.w) / float(%d);\n", EFB_WIDTH); + WRITE(p, " float2 pixel_size = position.ww / float2(%d, %d);\n", EFB_WIDTH, EFB_HEIGHT); } static void WriteSampleColor(char*& p, const char* colorComp, const char* dest, int xoffset, APIType ApiType, const EFBCopyParams& params) { - WRITE(p, " %s = ", dest); - - if (!params.depth) - { - switch (params.efb_format) - { - case PEControl::RGB8_Z24: - WRITE(p, "RGBA8ToRGB8("); - break; - case PEControl::RGBA6_Z24: - WRITE(p, "RGBA8ToRGBA6("); - break; - case PEControl::RGB565_Z16: - WRITE(p, "RGBA8ToRGB565("); - break; - default: - WRITE(p, "("); - break; - } - } - else - { - // Handle D3D depth inversion. - if (ApiType == APIType::D3D || ApiType == APIType::Vulkan) - WRITE(p, "1.0 - ("); - else - WRITE(p, "("); - } - - if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) - { - WRITE(p, "texture(samp0, float3(uv0 + float2(%d, 0) * sample_offset, 0.0))).%s;\n", xoffset, - colorComp); - } - else - { - WRITE(p, "Tex0.Sample(samp0, float3(uv0 + float2(%d, 0) * sample_offset, 0.0))).%s;\n", xoffset, - colorComp); - } + WRITE(p, " %s = SampleEFB(uv0, pixel_size, %d).%s;\n", dest, xoffset, colorComp); } static void WriteColorToIntensity(char*& p, const char* src, const char* dest) @@ -239,7 +291,7 @@ static void WriteEncoderEnd(char*& p) static void WriteI8Encoder(char*& p, APIType ApiType, const EFBCopyParams& params) { - WriteSwizzler(p, EFBCopyFormat::R8, ApiType); + WriteSwizzler(p, params, EFBCopyFormat::R8, ApiType); WRITE(p, " float3 texSample;\n"); WriteSampleColor(p, "rgb", "texSample", 0, ApiType, params); @@ -261,7 +313,7 @@ static void WriteI8Encoder(char*& p, APIType ApiType, const EFBCopyParams& param static void WriteI4Encoder(char*& p, APIType ApiType, const EFBCopyParams& params) { - WriteSwizzler(p, EFBCopyFormat::R4, ApiType); + WriteSwizzler(p, params, EFBCopyFormat::R4, ApiType); WRITE(p, " float3 texSample;\n"); WRITE(p, " float4 color0;\n"); WRITE(p, " float4 color1;\n"); @@ -302,7 +354,7 @@ static void WriteI4Encoder(char*& p, APIType ApiType, const EFBCopyParams& param static void WriteIA8Encoder(char*& p, APIType ApiType, const EFBCopyParams& params) { - WriteSwizzler(p, EFBCopyFormat::RA8, ApiType); + WriteSwizzler(p, params, EFBCopyFormat::RA8, ApiType); WRITE(p, " float4 texSample;\n"); WriteSampleColor(p, "rgba", "texSample", 0, ApiType, params); @@ -320,7 +372,7 @@ static void WriteIA8Encoder(char*& p, APIType ApiType, const EFBCopyParams& para static void WriteIA4Encoder(char*& p, APIType ApiType, const EFBCopyParams& params) { - WriteSwizzler(p, EFBCopyFormat::RA4, ApiType); + WriteSwizzler(p, params, EFBCopyFormat::RA4, ApiType); WRITE(p, " float4 texSample;\n"); WRITE(p, " float4 color0;\n"); WRITE(p, " float4 color1;\n"); @@ -352,7 +404,7 @@ static void WriteIA4Encoder(char*& p, APIType ApiType, const EFBCopyParams& para static void WriteRGB565Encoder(char*& p, APIType ApiType, const EFBCopyParams& params) { - WriteSwizzler(p, EFBCopyFormat::RGB565, ApiType); + WriteSwizzler(p, params, EFBCopyFormat::RGB565, ApiType); WRITE(p, " float3 texSample0;\n"); WRITE(p, " float3 texSample1;\n"); @@ -377,7 +429,7 @@ static void WriteRGB565Encoder(char*& p, APIType ApiType, const EFBCopyParams& p static void WriteRGB5A3Encoder(char*& p, APIType ApiType, const EFBCopyParams& params) { - WriteSwizzler(p, EFBCopyFormat::RGB5A3, ApiType); + WriteSwizzler(p, params, EFBCopyFormat::RGB5A3, ApiType); WRITE(p, " float4 texSample;\n"); WRITE(p, " float color0;\n"); @@ -441,7 +493,7 @@ static void WriteRGB5A3Encoder(char*& p, APIType ApiType, const EFBCopyParams& p static void WriteRGBA8Encoder(char*& p, APIType ApiType, const EFBCopyParams& params) { - WriteSwizzler(p, EFBCopyFormat::RGBA8, ApiType); + WriteSwizzler(p, params, EFBCopyFormat::RGBA8, ApiType); WRITE(p, " float4 texSample;\n"); WRITE(p, " float4 color0;\n"); @@ -466,7 +518,7 @@ static void WriteRGBA8Encoder(char*& p, APIType ApiType, const EFBCopyParams& pa static void WriteC4Encoder(char*& p, const char* comp, APIType ApiType, const EFBCopyParams& params) { - WriteSwizzler(p, EFBCopyFormat::R4, ApiType); + WriteSwizzler(p, params, EFBCopyFormat::R4, ApiType); WRITE(p, " float4 color0;\n"); WRITE(p, " float4 color1;\n"); @@ -488,7 +540,7 @@ static void WriteC4Encoder(char*& p, const char* comp, APIType ApiType, const EF static void WriteC8Encoder(char*& p, const char* comp, APIType ApiType, const EFBCopyParams& params) { - WriteSwizzler(p, EFBCopyFormat::R8, ApiType); + WriteSwizzler(p, params, EFBCopyFormat::R8, ApiType); WriteSampleColor(p, comp, "ocol0.b", 0, ApiType, params); WriteSampleColor(p, comp, "ocol0.g", 1, ApiType, params); @@ -501,7 +553,7 @@ static void WriteC8Encoder(char*& p, const char* comp, APIType ApiType, const EF static void WriteCC4Encoder(char*& p, const char* comp, APIType ApiType, const EFBCopyParams& params) { - WriteSwizzler(p, EFBCopyFormat::RA4, ApiType); + WriteSwizzler(p, params, EFBCopyFormat::RA4, ApiType); WRITE(p, " float2 texSample;\n"); WRITE(p, " float4 color0;\n"); WRITE(p, " float4 color1;\n"); @@ -532,7 +584,7 @@ static void WriteCC4Encoder(char*& p, const char* comp, APIType ApiType, static void WriteCC8Encoder(char*& p, const char* comp, APIType ApiType, const EFBCopyParams& params) { - WriteSwizzler(p, EFBCopyFormat::RA8, ApiType); + WriteSwizzler(p, params, EFBCopyFormat::RA8, ApiType); WriteSampleColor(p, comp, "ocol0.bg", 0, ApiType, params); WriteSampleColor(p, comp, "ocol0.ra", 1, ApiType, params); @@ -543,7 +595,7 @@ static void WriteCC8Encoder(char*& p, const char* comp, APIType ApiType, static void WriteZ8Encoder(char*& p, const char* multiplier, APIType ApiType, const EFBCopyParams& params) { - WriteSwizzler(p, EFBCopyFormat::G8, ApiType); + WriteSwizzler(p, params, EFBCopyFormat::G8, ApiType); WRITE(p, " float depth;\n"); @@ -564,7 +616,7 @@ static void WriteZ8Encoder(char*& p, const char* multiplier, APIType ApiType, static void WriteZ16Encoder(char*& p, APIType ApiType, const EFBCopyParams& params) { - WriteSwizzler(p, EFBCopyFormat::RA8, ApiType); + WriteSwizzler(p, params, EFBCopyFormat::RA8, ApiType); WRITE(p, " float depth;\n"); WRITE(p, " float3 expanded;\n"); @@ -596,7 +648,7 @@ static void WriteZ16Encoder(char*& p, APIType ApiType, const EFBCopyParams& para static void WriteZ16LEncoder(char*& p, APIType ApiType, const EFBCopyParams& params) { - WriteSwizzler(p, EFBCopyFormat::GB8, ApiType); + WriteSwizzler(p, params, EFBCopyFormat::GB8, ApiType); WRITE(p, " float depth;\n"); WRITE(p, " float3 expanded;\n"); @@ -632,7 +684,7 @@ static void WriteZ16LEncoder(char*& p, APIType ApiType, const EFBCopyParams& par static void WriteZ24Encoder(char*& p, APIType ApiType, const EFBCopyParams& params) { - WriteSwizzler(p, EFBCopyFormat::RGBA8, ApiType); + WriteSwizzler(p, params, EFBCopyFormat::RGBA8, ApiType); WRITE(p, " float depth0;\n"); WRITE(p, " float depth1;\n"); @@ -672,18 +724,21 @@ static void WriteZ24Encoder(char*& p, APIType ApiType, const EFBCopyParams& para static void WriteXFBEncoder(char*& p, APIType ApiType, const EFBCopyParams& params) { - WriteSwizzler(p, EFBCopyFormat::XFB, ApiType); - - WRITE(p, " float3 y_const = float3(0.257, 0.504, 0.098);\n"); - WRITE(p, " float3 u_const = float3(-0.148, -0.291, 0.439);\n"); - WRITE(p, " float3 v_const = float3(0.439, -0.368, -0.071);\n"); - WRITE(p, " float3 color0;\n"); - WRITE(p, " float3 color1;\n"); + WriteSwizzler(p, params, EFBCopyFormat::XFB, ApiType); + WRITE(p, "float3 color0, color1;\n"); WriteSampleColor(p, "rgb", "color0", 0, ApiType, params); WriteSampleColor(p, "rgb", "color1", 1, ApiType, params); - WRITE(p, " float3 average = (color0 + color1) * 0.5;\n"); + // Gamma is only applied to XFB copies. + WRITE(p, " color0 = pow(color0, gamma_rcp.xxx);\n"); + WRITE(p, " color1 = pow(color1, gamma_rcp.xxx);\n"); + + // Convert to YUV. + WRITE(p, " const float3 y_const = float3(0.257, 0.504, 0.098);\n"); + WRITE(p, " const float3 u_const = float3(-0.148, -0.291, 0.439);\n"); + WRITE(p, " const float3 v_const = float3(0.439, -0.368, -0.071);\n"); + WRITE(p, " float3 average = (color0 + color1) * 0.5;\n"); WRITE(p, " ocol0.b = dot(color0, y_const) + 0.0625;\n"); WRITE(p, " ocol0.g = dot(average, u_const) + 0.5;\n"); WRITE(p, " ocol0.r = dot(color1, y_const) + 0.0625;\n"); diff --git a/Source/Core/VideoCommon/TextureConverterShaderGen.cpp b/Source/Core/VideoCommon/TextureConverterShaderGen.cpp index 74ab4b38d2..dce823ba5a 100644 --- a/Source/Core/VideoCommon/TextureConverterShaderGen.cpp +++ b/Source/Core/VideoCommon/TextureConverterShaderGen.cpp @@ -38,34 +38,66 @@ ShaderCode GenerateShader(APIType api_type, const UidData* uid_data) if (api_type == APIType::OpenGL) { out.Write("SAMPLER_BINDING(9) uniform sampler2DArray samp9;\n" - "#define samp0 samp9\n" - "#define uv0 f_uv0\n" + "uniform float3 filter_coefficients;\n" + "uniform float gamma_rcp;\n" + "uniform float2 clamp_tb;\n" + "uniform float pixel_height;\n"); + out.Write("float4 SampleEFB(float3 uv, float y_offset) {\n" + " return texture(samp9, float3(uv.x, clamp(uv.y - (y_offset * pixel_height), " + "clamp_tb.x, clamp_tb.y), %s));\n" + "}\n", + mono_depth ? "0.0" : "uv.z"); + out.Write("#define uv0 f_uv0\n" "in vec3 uv0;\n" "out vec4 ocol0;\n" - "void main(){\n" - " vec4 texcol = texture(samp0, %s);\n", - mono_depth ? "vec3(uv0.xy, 0.0)" : "uv0"); + "void main(){\n"); } else if (api_type == APIType::Vulkan) { - out.Write("SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n" - "layout(location = 0) in vec3 uv0;\n" + out.Write("UBO_BINDING(std140, 1) uniform PSBlock {\n" + " float3 filter_coefficients;\n" + " float gamma_rcp;\n" + " float2 clamp_tb;\n" + " float pixel_height;\n" + "};\n"); + out.Write("SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n"); + out.Write("float4 SampleEFB(float3 uv, float y_offset) {\n" + " return texture(samp0, float3(uv.x, clamp(uv.y + (y_offset * pixel_height), " + "clamp_tb.x, clamp_tb.y), %s));\n" + "}\n", + mono_depth ? "0.0" : "uv.z"); + out.Write("layout(location = 0) in vec3 uv0;\n" "layout(location = 1) in vec4 col0;\n" "layout(location = 0) out vec4 ocol0;" - "void main(){\n" - " vec4 texcol = texture(samp0, %s);\n", - mono_depth ? "vec3(uv0.xy, 0.0)" : "uv0"); + "void main(){\n"); } else if (api_type == APIType::D3D) { out.Write("Texture2DArray tex0 : register(t0);\n" "SamplerState samp0 : register(s0);\n" - "void main(out float4 ocol0 : SV_Target,\n" + "uniform float3 filter_coefficients;\n" + "uniform float gamma_rcp;\n" + "uniform float2 clamp_tb;\n" + "uniform float pixel_height;\n\n"); + out.Write("float4 SampleEFB(float3 uv, float y_offset) {\n" + " return tex0.Sample(samp0, float3(uv.x, clamp(uv.y + (y_offset * pixel_height), " + "clamp_tb.x, clamp_tb.y), %s));\n" + "}\n", + mono_depth ? "0.0" : "uv.z"); + out.Write("void main(out float4 ocol0 : SV_Target,\n" " in float4 pos : SV_Position,\n" - " in float3 uv0 : TEXCOORD0) {\n" - " float4 texcol = tex0.Sample(samp0, uv0);\n"); + " in float3 uv0 : TEXCOORD0) {\n"); } + // The copy filter applies to both color and depth copies. This has been verified on hardware. + // The filter is only applied to the RGB channels, the alpha channel is left intact. + out.Write(" float4 prev_row = SampleEFB(uv0, -1.0f);\n" + " float4 current_row = SampleEFB(uv0, 0.0f);\n" + " float4 next_row = SampleEFB(uv0, 1.0f);\n" + " float4 texcol = float4(prev_row.rgb * filter_coefficients[0] +\n" + " current_row.rgb * filter_coefficients[1] +\n" + " next_row.rgb * filter_coefficients[2], current_row.a);\n"); + if (uid_data->is_depth_copy) { if (api_type == APIType::D3D || api_type == APIType::Vulkan) @@ -223,8 +255,8 @@ ShaderCode GenerateShader(APIType api_type, const UidData* uid_data) out.Write(" ocol0 = texcol;\n"); break; - case EFBCopyFormat::XFB: // XFB copy, we just pretend it's an RGBX copy - out.Write(" ocol0 = float4(texcol.rgb, 1.0);\n"); + case EFBCopyFormat::XFB: + out.Write(" ocol0 = float4(pow(texcol.rgb, gamma_rcp.xxx), texcol.a);\n"); break; default: diff --git a/Source/Core/VideoCommon/VideoConfig.cpp b/Source/Core/VideoCommon/VideoConfig.cpp index d08d5b793a..abdf27dcd1 100644 --- a/Source/Core/VideoCommon/VideoConfig.cpp +++ b/Source/Core/VideoCommon/VideoConfig.cpp @@ -120,6 +120,7 @@ void VideoConfig::Refresh() iMaxAnisotropy = Config::Get(Config::GFX_ENHANCE_MAX_ANISOTROPY); sPostProcessingShader = Config::Get(Config::GFX_ENHANCE_POST_SHADER); bForceTrueColor = Config::Get(Config::GFX_ENHANCE_FORCE_TRUE_COLOR); + bDisableCopyFilter = Config::Get(Config::GFX_ENHANCE_DISABLE_COPY_FILTER); stereo_mode = static_cast(Config::Get(Config::GFX_STEREO_MODE)); iStereoDepth = Config::Get(Config::GFX_STEREO_DEPTH); diff --git a/Source/Core/VideoCommon/VideoConfig.h b/Source/Core/VideoCommon/VideoConfig.h index cc8e4cde88..03ccfca3ba 100644 --- a/Source/Core/VideoCommon/VideoConfig.h +++ b/Source/Core/VideoCommon/VideoConfig.h @@ -73,6 +73,7 @@ struct VideoConfig final int iMaxAnisotropy; std::string sPostProcessingShader; bool bForceTrueColor; + bool bDisableCopyFilter; // Information bool bShowFPS;