diff --git a/Source/Android/app/src/main/java/org/dolphinemu/dolphinemu/ui/settings/SettingsFragmentPresenter.java b/Source/Android/app/src/main/java/org/dolphinemu/dolphinemu/ui/settings/SettingsFragmentPresenter.java index 3caeaa514b..92e413e9fa 100644 --- a/Source/Android/app/src/main/java/org/dolphinemu/dolphinemu/ui/settings/SettingsFragmentPresenter.java +++ b/Source/Android/app/src/main/java/org/dolphinemu/dolphinemu/ui/settings/SettingsFragmentPresenter.java @@ -296,8 +296,9 @@ public final class SettingsFragmentPresenter Setting perPixel = mSettings.get(SettingsFile.SETTINGS_GFX).get(SettingsFile.SECTION_GFX_SETTINGS).getSetting(SettingsFile.KEY_PER_PIXEL); Setting forceFilter = mSettings.get(SettingsFile.SETTINGS_GFX).get(SettingsFile.SECTION_GFX_ENHANCEMENTS).getSetting(SettingsFile.KEY_FORCE_FILTERING); Setting disableFog = mSettings.get(SettingsFile.SETTINGS_GFX).get(SettingsFile.SECTION_GFX_SETTINGS).getSetting(SettingsFile.KEY_DISABLE_FOG); + Setting disableCopyFilter = mSettings.get(SettingsFile.SETTINGS_GFX).get(SettingsFile.SECTION_GFX_SETTINGS).getSetting(SettingsFile.KEY_DISABLE_COPY_FILTER); Setting shaderCompilationMode = mSettings.get(SettingsFile.SETTINGS_GFX).get(SettingsFile.SECTION_GFX_SETTINGS).getSetting(SettingsFile.KEY_SHADER_COMPILATION_MODE); - Setting waitForShaders = mSettings.get(SettingsFile.SETTINGS_GFX).get(SettingsFile.SECTION_GFX_SETTINGS).getSetting(SettingsFile.KEY_WAIT_FOR_SHADERS); + Setting waitForShaders = mSettings.get(SettingsFile.SETTINGS_GFX).get(SettingsFile.SECTION_GFX_SETTINGS).getSetting(SettingsFile.KEY_WAIT_FOR_SHADERS); sl.add(new SingleChoiceSetting(SettingsFile.KEY_INTERNAL_RES, SettingsFile.SECTION_GFX_SETTINGS, SettingsFile.SETTINGS_GFX, R.string.internal_resolution, R.string.internal_resolution_descrip, R.array.internalResolutionEntries, R.array.internalResolutionValues, 0, resolution)); sl.add(new SingleChoiceSetting(SettingsFile.KEY_FSAA, SettingsFile.SECTION_GFX_SETTINGS, SettingsFile.SETTINGS_GFX, R.string.FSAA, R.string.FSAA_descrip, R.array.FSAAEntries, R.array.FSAAValues, 0, fsaa)); @@ -311,6 +312,7 @@ public final class SettingsFragmentPresenter sl.add(new CheckBoxSetting(SettingsFile.KEY_PER_PIXEL, SettingsFile.SECTION_GFX_SETTINGS, SettingsFile.SETTINGS_GFX, R.string.per_pixel_lighting, R.string.per_pixel_lighting_descrip, false, perPixel)); sl.add(new CheckBoxSetting(SettingsFile.KEY_FORCE_FILTERING, SettingsFile.SECTION_GFX_ENHANCEMENTS, SettingsFile.SETTINGS_GFX, R.string.force_texture_filtering, R.string.force_texture_filtering_descrip, false, forceFilter)); sl.add(new CheckBoxSetting(SettingsFile.KEY_DISABLE_FOG, SettingsFile.SECTION_GFX_SETTINGS, SettingsFile.SETTINGS_GFX, R.string.disable_fog, R.string.disable_fog_descrip, false, disableFog)); + sl.add(new CheckBoxSetting(SettingsFile.KEY_DISABLE_COPY_FILTER, SettingsFile.SECTION_GFX_SETTINGS, SettingsFile.SETTINGS_GFX, R.string.disable_copy_filter, R.string.disable_copy_filter_descrip, false, disableCopyFilter)); /* Check if we support stereo diff --git a/Source/Android/app/src/main/java/org/dolphinemu/dolphinemu/utils/SettingsFile.java b/Source/Android/app/src/main/java/org/dolphinemu/dolphinemu/utils/SettingsFile.java index 34d4e2fa71..0d1946bb64 100644 --- a/Source/Android/app/src/main/java/org/dolphinemu/dolphinemu/utils/SettingsFile.java +++ b/Source/Android/app/src/main/java/org/dolphinemu/dolphinemu/utils/SettingsFile.java @@ -92,6 +92,7 @@ public final class SettingsFile public static final String KEY_PER_PIXEL = "EnablePixelLighting"; public static final String KEY_FORCE_FILTERING = "ForceFiltering"; public static final String KEY_DISABLE_FOG = "DisableFog"; + public static final String KEY_DISABLE_COPY_FILTER = "DisableCopyFilter"; public static final String KEY_STEREO_MODE = "StereoMode"; public static final String KEY_STEREO_DEPTH = "StereoDepth"; diff --git a/Source/Android/app/src/main/res/values/strings.xml b/Source/Android/app/src/main/res/values/strings.xml index ea7edcfbcd..4ee7985427 100644 --- a/Source/Android/app/src/main/res/values/strings.xml +++ b/Source/Android/app/src/main/res/values/strings.xml @@ -149,6 +149,8 @@ Force texture filtering even if the emulated game explicitly disabled it. Improves texture quality slightly but causes glitches in some games. Disable Fog Makes distant objects more visible by removing fog, thus increasing the overall detail. Disabling fog will break some games which rely on proper fog emulation. + Disable Copy Filter + Disables the blending of adjacent rows when copying the EFB. This is known in some games as \"deflickering\" or \"smoothing\". Disabling the filter is usually safe, and may result in a sharper image. Stereoscopy Stereoscopy allows you to get a better feeling of depth if you have the necessary hardware.\nHeavily decreases emulation speed and sometimes causes issues Stereoscopy Mode diff --git a/Source/Core/Core/Config/GraphicsSettings.cpp b/Source/Core/Core/Config/GraphicsSettings.cpp index f46c7a585a..cfae1b14f2 100644 --- a/Source/Core/Core/Config/GraphicsSettings.cpp +++ b/Source/Core/Core/Config/GraphicsSettings.cpp @@ -106,6 +106,8 @@ const ConfigInfo GFX_ENHANCE_POST_SHADER{ {System::GFX, "Enhancements", "PostProcessingShader"}, ""}; const ConfigInfo GFX_ENHANCE_FORCE_TRUE_COLOR{{System::GFX, "Enhancements", "ForceTrueColor"}, true}; +const ConfigInfo GFX_ENHANCE_DISABLE_COPY_FILTER{ + {System::GFX, "Enhancements", "DisableCopyFilter"}, true}; // Graphics.Stereoscopy diff --git a/Source/Core/Core/Config/GraphicsSettings.h b/Source/Core/Core/Config/GraphicsSettings.h index 4f0e4f741c..6b4c900e01 100644 --- a/Source/Core/Core/Config/GraphicsSettings.h +++ b/Source/Core/Core/Config/GraphicsSettings.h @@ -80,6 +80,7 @@ extern const ConfigInfo GFX_ENHANCE_FORCE_FILTERING; extern const ConfigInfo GFX_ENHANCE_MAX_ANISOTROPY; // NOTE - this is x in (1 << x) extern const ConfigInfo GFX_ENHANCE_POST_SHADER; extern const ConfigInfo GFX_ENHANCE_FORCE_TRUE_COLOR; +extern const ConfigInfo GFX_ENHANCE_DISABLE_COPY_FILTER; // Graphics.Stereoscopy diff --git a/Source/Core/Core/ConfigLoaders/IsSettingSaveable.cpp b/Source/Core/Core/ConfigLoaders/IsSettingSaveable.cpp index 7b92427749..56b3ec8c74 100644 --- a/Source/Core/Core/ConfigLoaders/IsSettingSaveable.cpp +++ b/Source/Core/Core/ConfigLoaders/IsSettingSaveable.cpp @@ -85,6 +85,7 @@ bool IsSettingSaveable(const Config::ConfigLocation& config_location) Config::GFX_ENHANCE_MAX_ANISOTROPY.location, Config::GFX_ENHANCE_POST_SHADER.location, Config::GFX_ENHANCE_FORCE_TRUE_COLOR.location, + Config::GFX_ENHANCE_DISABLE_COPY_FILTER.location, // Graphics.Stereoscopy diff --git a/Source/Core/DolphinQt2/Config/Graphics/EnhancementsWidget.cpp b/Source/Core/DolphinQt2/Config/Graphics/EnhancementsWidget.cpp index 7776e82ce6..872b05101b 100644 --- a/Source/Core/DolphinQt2/Config/Graphics/EnhancementsWidget.cpp +++ b/Source/Core/DolphinQt2/Config/Graphics/EnhancementsWidget.cpp @@ -74,6 +74,8 @@ void EnhancementsWidget::CreateWidgets() m_disable_fog = new GraphicsBool(tr("Disable Fog"), Config::GFX_DISABLE_FOG); m_force_24bit_color = new GraphicsBool(tr("Force 24-Bit Color"), Config::GFX_ENHANCE_FORCE_TRUE_COLOR); + m_disable_copy_filter = + new GraphicsBool(tr("Disable Copy Filter"), Config::GFX_ENHANCE_DISABLE_COPY_FILTER); enhancements_layout->addWidget(new QLabel(tr("Internal Resolution:")), 0, 0); enhancements_layout->addWidget(m_ir_combo, 0, 1, 1, -1); @@ -92,6 +94,7 @@ void EnhancementsWidget::CreateWidgets() enhancements_layout->addWidget(m_widescreen_hack, 6, 1); enhancements_layout->addWidget(m_disable_fog, 7, 0); enhancements_layout->addWidget(m_force_24bit_color, 7, 1); + enhancements_layout->addWidget(m_disable_copy_filter, 8, 0); // Stereoscopy auto* stereoscopy_box = new QGroupBox(tr("Stereoscopy")); @@ -269,7 +272,6 @@ void EnhancementsWidget::AddDescriptions() QT_TR_NOOP("Makes distant objects more visible by removing fog, thus increasing the overall " "detail.\nDisabling fog will break some games which rely on proper fog " "emulation.\n\nIf unsure, leave this unchecked."); - static const char* TR_3D_MODE_DESCRIPTION = QT_TR_NOOP( "Selects the stereoscopic 3D mode. Stereoscopy allows you to get a better feeling " "of depth if you have the necessary hardware.\nSide-by-Side and Top-and-Bottom are " @@ -294,6 +296,11 @@ void EnhancementsWidget::AddDescriptions() QT_TR_NOOP("Filter all textures, including any that the game explicitly set as " "unfiltered.\nMay improve quality of certain textures in some games, but will " "cause issues in others.\n\nIf unsure, leave this unchecked."); + static const char* TR_DISABLE_COPY_FILTER_DESCRIPTION = + QT_TR_NOOP("Disables the blending of adjacent rows when copying the EFB. This is known in " + "some games as \"deflickering\" or \"smoothing\". Disabling the filter has no " + "effect on performance, but may result in a sharper image, and causes few " + "graphical issues.\n\n\nIf unsure, leave this checked."); AddDescription(m_ir_combo, TR_INTERNAL_RESOLUTION_DESCRIPTION); AddDescription(m_aa_combo, TR_ANTIALIAS_DESCRIPTION); @@ -305,6 +312,7 @@ void EnhancementsWidget::AddDescriptions() AddDescription(m_disable_fog, TR_REMOVE_FOG_DESCRIPTION); AddDescription(m_force_24bit_color, TR_FORCE_24BIT_DESCRIPTION); AddDescription(m_force_texture_filtering, TR_FORCE_TEXTURE_FILTERING_DESCRIPTION); + AddDescription(m_disable_copy_filter, TR_DISABLE_COPY_FILTER_DESCRIPTION); AddDescription(m_3d_mode, TR_3D_MODE_DESCRIPTION); AddDescription(m_3d_depth, TR_3D_DEPTH_DESCRIPTION); AddDescription(m_3d_convergence, TR_3D_CONVERGENCE_DESCRIPTION); diff --git a/Source/Core/DolphinQt2/Config/Graphics/EnhancementsWidget.h b/Source/Core/DolphinQt2/Config/Graphics/EnhancementsWidget.h index ac23ed84e9..84001a9633 100644 --- a/Source/Core/DolphinQt2/Config/Graphics/EnhancementsWidget.h +++ b/Source/Core/DolphinQt2/Config/Graphics/EnhancementsWidget.h @@ -38,6 +38,7 @@ private: QCheckBox* m_widescreen_hack; QCheckBox* m_disable_fog; QCheckBox* m_force_24bit_color; + QCheckBox* m_disable_copy_filter; // Stereoscopy QComboBox* m_3d_mode; diff --git a/Source/Core/DolphinWX/VideoConfigDiag.cpp b/Source/Core/DolphinWX/VideoConfigDiag.cpp index e9dea2c863..cea5f4921a 100644 --- a/Source/Core/DolphinWX/VideoConfigDiag.cpp +++ b/Source/Core/DolphinWX/VideoConfigDiag.cpp @@ -287,6 +287,11 @@ static wxString true_color_desc = wxTRANSLATE("Forces the game to render the RGB color channels in 24-bit, thereby increasing " "quality by reducing color banding.\nIt has no impact on performance and causes " "few graphical issues.\n\n\nIf unsure, leave this checked."); +static wxString disable_copy_filter_desc = + wxTRANSLATE("Disables the blending of adjacent rows when copying the EFB. This is known in " + "some games as \"deflickering\" or \"smoothing\". Disabling the filter has no " + "effect on performance, but may result in a sharper image, and causes few " + "graphical issues.\n\n\nIf unsure, leave this checked."); static wxString vertex_rounding_desc = wxTRANSLATE("Rounds 2D vertices to whole pixels. Fixes graphical problems in some games at " "higher internal resolutions. This setting has no effect when native internal " @@ -602,6 +607,9 @@ VideoConfigDiag::VideoConfigDiag(wxWindow* parent, const std::string& title) Config::GFX_DISABLE_FOG)); cb_szr->Add(CreateCheckBox(page_enh, _("Force 24-Bit Color"), wxGetTranslation(true_color_desc), Config::GFX_ENHANCE_FORCE_TRUE_COLOR)); + cb_szr->Add(CreateCheckBox(page_enh, _("Disable Copy Filter"), + wxGetTranslation(disable_copy_filter_desc), + Config::GFX_ENHANCE_DISABLE_COPY_FILTER)); szr_enh->Add(cb_szr, wxGBPosition(row, 0), wxGBSpan(1, 3)); row += 1; diff --git a/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp b/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp index 8d48332af7..de0d9716ec 100644 --- a/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp +++ b/Source/Core/VideoBackends/D3D/PSTextureEncoder.cpp @@ -31,7 +31,11 @@ struct EFBEncodeParams u32 DestWidth; u32 ScaleFactor; float y_scale; - u32 padding[3]; + float gamma_rcp; + float clamp_top; + float clamp_bottom; + s32 filter_coefficients[3]; + u32 padding; }; PSTextureEncoder::PSTextureEncoder() @@ -66,9 +70,11 @@ void PSTextureEncoder::Shutdown() SAFE_RELEASE(m_encode_params); } -void PSTextureEncoder::Encode(u8* dst, const EFBCopyParams& params, u32 native_width, - u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, - const EFBRectangle& src_rect, bool scale_by_half) +void PSTextureEncoder::Encode( + u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row, u32 num_blocks_y, + u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half, float y_scale, float gamma, + bool clamp_top, bool clamp_bottom, + const TextureCacheBase::CopyFilterCoefficientArray& filter_coefficients) { // Resolve MSAA targets before copying. // FIXME: Instead of resolving EFB, it would be better to pick out a @@ -101,7 +107,13 @@ void PSTextureEncoder::Encode(u8* dst, const EFBCopyParams& params, u32 native_w encode_params.SrcTop = src_rect.top; encode_params.DestWidth = native_width; encode_params.ScaleFactor = scale_by_half ? 2 : 1; - encode_params.y_scale = params.y_scale; + encode_params.y_scale = y_scale; + encode_params.gamma_rcp = 1.0f / gamma; + encode_params.clamp_top = clamp_top ? src_rect.top / float(EFB_HEIGHT) : 0.0f; + encode_params.clamp_bottom = clamp_bottom ? src_rect.bottom / float(EFB_HEIGHT) : 0.0f; + for (size_t i = 0; i < filter_coefficients.size(); i++) + encode_params.filter_coefficients[i] = filter_coefficients[i]; + D3D::context->UpdateSubresource(m_encode_params, 0, nullptr, &encode_params, 0, 0); D3D::stateman->SetPixelConstants(m_encode_params); @@ -109,7 +121,7 @@ void PSTextureEncoder::Encode(u8* dst, const EFBCopyParams& params, u32 native_w // TODO: This only produces perfect downsampling for 2x IR, other resolutions will need more // complex down filtering to average all pixels and produce the correct result. // Also, box filtering won't be correct for anything other than 1x IR - if (scale_by_half || g_renderer->GetEFBScale() != 1 || params.y_scale > 1.0f) + if (scale_by_half || g_renderer->GetEFBScale() != 1 || y_scale > 1.0f) D3D::SetLinearCopySampler(); else D3D::SetPointCopySampler(); diff --git a/Source/Core/VideoBackends/D3D/PSTextureEncoder.h b/Source/Core/VideoBackends/D3D/PSTextureEncoder.h index 7c45970d19..4054b07fbf 100644 --- a/Source/Core/VideoBackends/D3D/PSTextureEncoder.h +++ b/Source/Core/VideoBackends/D3D/PSTextureEncoder.h @@ -8,6 +8,7 @@ #include #include "Common/CommonTypes.h" +#include "VideoCommon/TextureCacheBase.h" #include "VideoCommon/TextureConversionShader.h" #include "VideoCommon/VideoCommon.h" @@ -38,8 +39,9 @@ public: void Init(); void Shutdown(); void Encode(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row, - u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, - bool scale_by_half); + u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half, + float y_scale, float gamma, bool clamp_top, bool clamp_bottom, + const TextureCacheBase::CopyFilterCoefficientArray& filter_coefficients); private: ID3D11PixelShader* GetEncodingPixelShader(const EFBCopyParams& params); diff --git a/Source/Core/VideoBackends/D3D/Render.cpp b/Source/Core/VideoBackends/D3D/Render.cpp index 85e81f8171..40e02f4e12 100644 --- a/Source/Core/VideoBackends/D3D/Render.cpp +++ b/Source/Core/VideoBackends/D3D/Render.cpp @@ -627,8 +627,7 @@ void Renderer::ReinterpretPixelData(unsigned int convtype) } // This function has the final picture. We adjust the aspect ratio here. -void Renderer::SwapImpl(AbstractTexture* texture, const EFBRectangle& xfb_region, u64 ticks, - float Gamma) +void Renderer::SwapImpl(AbstractTexture* texture, const EFBRectangle& xfb_region, u64 ticks) { ResetAPIState(); @@ -650,7 +649,7 @@ void Renderer::SwapImpl(AbstractTexture* texture, const EFBRectangle& xfb_region auto* xfb_texture = static_cast(texture); BlitScreen(xfb_region, targetRc, xfb_texture->GetRawTexIdentifier(), - xfb_texture->GetConfig().width, xfb_texture->GetConfig().height, Gamma); + xfb_texture->GetConfig().width, xfb_texture->GetConfig().height); // Reset viewport for drawing text D3D11_VIEWPORT vp = CD3D11_VIEWPORT(0.0f, 0.0f, static_cast(m_backbuffer_width), @@ -854,7 +853,7 @@ void Renderer::BBoxWrite(int index, u16 _value) } void Renderer::BlitScreen(TargetRectangle src, TargetRectangle dst, D3DTexture2D* src_texture, - u32 src_width, u32 src_height, float Gamma) + u32 src_width, u32 src_height) { if (g_ActiveConfig.stereo_mode == StereoMode::SBS || g_ActiveConfig.stereo_mode == StereoMode::TAB) @@ -871,13 +870,13 @@ void Renderer::BlitScreen(TargetRectangle src, TargetRectangle dst, D3DTexture2D D3D::drawShadedTexQuad(src_texture->GetSRV(), src.AsRECT(), src_width, src_height, PixelShaderCache::GetColorCopyProgram(false), VertexShaderCache::GetSimpleVertexShader(), - VertexShaderCache::GetSimpleInputLayout(), nullptr, Gamma, 0); + VertexShaderCache::GetSimpleInputLayout(), nullptr, 1.0f, 0); D3D::context->RSSetViewports(1, &rightVp); D3D::drawShadedTexQuad(src_texture->GetSRV(), src.AsRECT(), src_width, src_height, PixelShaderCache::GetColorCopyProgram(false), VertexShaderCache::GetSimpleVertexShader(), - VertexShaderCache::GetSimpleInputLayout(), nullptr, Gamma, 1); + VertexShaderCache::GetSimpleInputLayout(), nullptr, 1.0f, 1); } else if (g_ActiveConfig.stereo_mode == StereoMode::Nvidia3DVision) { @@ -896,13 +895,13 @@ void Renderer::BlitScreen(TargetRectangle src, TargetRectangle dst, D3DTexture2D D3D::drawShadedTexQuad(src_texture->GetSRV(), src.AsRECT(), src_width, src_height, PixelShaderCache::GetColorCopyProgram(false), VertexShaderCache::GetSimpleVertexShader(), - VertexShaderCache::GetSimpleInputLayout(), nullptr, Gamma, 0); + VertexShaderCache::GetSimpleInputLayout(), nullptr, 1.0f, 0); D3D::context->RSSetViewports(1, &rightVp); D3D::drawShadedTexQuad(src_texture->GetSRV(), src.AsRECT(), src_width, src_height, PixelShaderCache::GetColorCopyProgram(false), VertexShaderCache::GetSimpleVertexShader(), - VertexShaderCache::GetSimpleInputLayout(), nullptr, Gamma, 1); + VertexShaderCache::GetSimpleInputLayout(), nullptr, 1.0f, 1); // Copy the left eye to the backbuffer, if Nvidia 3D Vision is enabled it should // recognize the signature and automatically include the right eye frame. @@ -927,7 +926,7 @@ void Renderer::BlitScreen(TargetRectangle src, TargetRectangle dst, D3DTexture2D nullptr; D3D::drawShadedTexQuad(src_texture->GetSRV(), src.AsRECT(), src_width, src_height, pixelShader, VertexShaderCache::GetSimpleVertexShader(), - VertexShaderCache::GetSimpleInputLayout(), geomShader, Gamma); + VertexShaderCache::GetSimpleInputLayout(), geomShader, 1.0f); } } diff --git a/Source/Core/VideoBackends/D3D/Render.h b/Source/Core/VideoBackends/D3D/Render.h index a7ccb0b9ae..0927f6c934 100644 --- a/Source/Core/VideoBackends/D3D/Render.h +++ b/Source/Core/VideoBackends/D3D/Render.h @@ -63,7 +63,7 @@ public: TargetRectangle ConvertEFBRectangle(const EFBRectangle& rc) override; - void SwapImpl(AbstractTexture* texture, const EFBRectangle& rc, u64 ticks, float Gamma) override; + void SwapImpl(AbstractTexture* texture, const EFBRectangle& rc, u64 ticks) override; void ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaEnable, bool zEnable, u32 color, u32 z) override; @@ -84,7 +84,7 @@ private: void UpdateBackbufferSize(); void BlitScreen(TargetRectangle src, TargetRectangle dst, D3DTexture2D* src_texture, - u32 src_width, u32 src_height, float Gamma); + u32 src_width, u32 src_height); void UpdateUtilityUniformBuffer(const void* uniforms, u32 uniforms_size); void UpdateUtilityVertexBuffer(const void* vertices, u32 vertex_stride, u32 num_vertices); diff --git a/Source/Core/VideoBackends/D3D/TextureCache.cpp b/Source/Core/VideoBackends/D3D/TextureCache.cpp index b0974f3cc3..9c5f139502 100644 --- a/Source/Core/VideoBackends/D3D/TextureCache.cpp +++ b/Source/Core/VideoBackends/D3D/TextureCache.cpp @@ -33,10 +33,12 @@ static std::unique_ptr g_encoder; void TextureCache::CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, - const EFBRectangle& src_rect, bool scale_by_half) + const EFBRectangle& src_rect, bool scale_by_half, float y_scale, + float gamma, bool clamp_top, bool clamp_bottom, + const CopyFilterCoefficientArray& filter_coefficients) { g_encoder->Encode(dst, params, native_width, bytes_per_row, num_blocks_y, memory_stride, src_rect, - scale_by_half); + scale_by_half, y_scale, gamma, clamp_top, clamp_bottom, filter_coefficients); } const char palette_shader[] = @@ -137,9 +139,9 @@ void TextureCache::ConvertTexture(TCacheEntry* destination, TCacheEntry* source, D3D::stateman->SetTexture(1, palette_buf_srv); // TODO: Add support for C14X2 format. (Different multiplier, more palette entries.) - float params[4] = {source->format == TextureFormat::I4 ? 15.f : 255.f}; - D3D::context->UpdateSubresource(palette_uniform, 0, nullptr, ¶ms, 0, 0); - D3D::stateman->SetPixelConstants(palette_uniform); + float params[8] = {source->format == TextureFormat::I4 ? 15.f : 255.f}; + D3D::context->UpdateSubresource(uniform_buffer, 0, nullptr, ¶ms, 0, 0); + D3D::stateman->SetPixelConstants(uniform_buffer); const D3D11_RECT sourcerect = CD3D11_RECT(0, 0, source->GetWidth(), source->GetHeight()); @@ -180,7 +182,7 @@ TextureCache::TextureCache() palette_buf = nullptr; palette_buf_srv = nullptr; - palette_uniform = nullptr; + uniform_buffer = nullptr; palette_pixel_shader[static_cast(TLUTFormat::IA8)] = GetConvertShader("IA8"); palette_pixel_shader[static_cast(TLUTFormat::RGB565)] = GetConvertShader("RGB565"); palette_pixel_shader[static_cast(TLUTFormat::RGB5A3)] = GetConvertShader("RGB5A3"); @@ -195,10 +197,10 @@ TextureCache::TextureCache() CHECK(SUCCEEDED(hr), "create palette decoder lut srv"); D3D::SetDebugObjectName(palette_buf_srv, "texture decoder lut srv"); const D3D11_BUFFER_DESC cbdesc = - CD3D11_BUFFER_DESC(16, D3D11_BIND_CONSTANT_BUFFER, D3D11_USAGE_DEFAULT); - hr = D3D::device->CreateBuffer(&cbdesc, nullptr, &palette_uniform); + CD3D11_BUFFER_DESC(sizeof(float) * 8, D3D11_BIND_CONSTANT_BUFFER, D3D11_USAGE_DEFAULT); + hr = D3D::device->CreateBuffer(&cbdesc, nullptr, &uniform_buffer); CHECK(SUCCEEDED(hr), "Create palette decoder constant buffer"); - D3D::SetDebugObjectName(palette_uniform, + D3D::SetDebugObjectName(uniform_buffer, "a constant buffer used in TextureCache::CopyRenderTargetToTexture"); } @@ -209,7 +211,7 @@ TextureCache::~TextureCache() SAFE_RELEASE(palette_buf); SAFE_RELEASE(palette_buf_srv); - SAFE_RELEASE(palette_uniform); + SAFE_RELEASE(uniform_buffer); for (auto*& shader : palette_pixel_shader) SAFE_RELEASE(shader); for (auto& iter : m_efb_to_tex_pixel_shaders) @@ -218,7 +220,9 @@ TextureCache::~TextureCache() void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const EFBRectangle& src_rect, bool scale_by_half, - EFBCopyFormat dst_format, bool is_intensity) + EFBCopyFormat dst_format, bool is_intensity, float gamma, + bool clamp_top, bool clamp_bottom, + const CopyFilterCoefficientArray& filter_coefficients) { auto* destination_texture = static_cast(entry->texture.get()); @@ -260,6 +264,27 @@ void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, else D3D::SetPointCopySampler(); + struct PixelConstants + { + float filter_coefficients[3]; + float gamma_rcp; + float clamp_top; + float clamp_bottom; + float pixel_height; + u32 padding; + }; + PixelConstants constants; + for (size_t i = 0; i < filter_coefficients.size(); i++) + constants.filter_coefficients[i] = filter_coefficients[i] / 64.0f; + constants.gamma_rcp = 1.0f / gamma; + constants.clamp_top = clamp_top ? src_rect.top / float(EFB_HEIGHT) : 0.0f; + constants.clamp_bottom = clamp_bottom ? src_rect.bottom / float(EFB_HEIGHT) : 1.0f; + constants.pixel_height = + g_ActiveConfig.bCopyEFBScaled ? 1.0f / g_renderer->GetTargetHeight() : 1.0f / EFB_HEIGHT; + constants.padding = 0; + D3D::context->UpdateSubresource(uniform_buffer, 0, nullptr, &constants, 0, 0); + D3D::stateman->SetPixelConstants(uniform_buffer); + // Make sure we don't draw with the texture set as both a source and target. // (This can happen because we don't unbind textures when we free them.) D3D::stateman->UnsetTexture(destination_texture->GetRawTexIdentifier()->GetSRV()); diff --git a/Source/Core/VideoBackends/D3D/TextureCache.h b/Source/Core/VideoBackends/D3D/TextureCache.h index 49332e2de3..24dda22d60 100644 --- a/Source/Core/VideoBackends/D3D/TextureCache.h +++ b/Source/Core/VideoBackends/D3D/TextureCache.h @@ -34,11 +34,13 @@ private: void CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, - bool scale_by_half) override; + bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom, + const CopyFilterCoefficientArray& filter_coefficients) override; void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const EFBRectangle& src_rect, - bool scale_by_half, EFBCopyFormat dst_format, - bool is_intensity) override; + bool scale_by_half, EFBCopyFormat dst_format, bool is_intensity, + float gamma, bool clamp_top, bool clamp_bottom, + const CopyFilterCoefficientArray& filter_coefficients) override; bool CompileShaders() override { return true; } void DeleteShaders() override {} @@ -46,7 +48,7 @@ private: ID3D11Buffer* palette_buf; ID3D11ShaderResourceView* palette_buf_srv; - ID3D11Buffer* palette_uniform; + ID3D11Buffer* uniform_buffer; ID3D11PixelShader* palette_pixel_shader[3]; std::map m_efb_to_tex_pixel_shaders; diff --git a/Source/Core/VideoBackends/Null/Render.cpp b/Source/Core/VideoBackends/Null/Render.cpp index 4e9cf655c2..c9c0e62325 100644 --- a/Source/Core/VideoBackends/Null/Render.cpp +++ b/Source/Core/VideoBackends/Null/Render.cpp @@ -92,7 +92,7 @@ TargetRectangle Renderer::ConvertEFBRectangle(const EFBRectangle& rc) return result; } -void Renderer::SwapImpl(AbstractTexture*, const EFBRectangle&, u64, float) +void Renderer::SwapImpl(AbstractTexture*, const EFBRectangle&, u64) { UpdateActiveConfig(); } diff --git a/Source/Core/VideoBackends/Null/Render.h b/Source/Core/VideoBackends/Null/Render.h index 0c0092554f..c1bf9c122e 100644 --- a/Source/Core/VideoBackends/Null/Render.h +++ b/Source/Core/VideoBackends/Null/Render.h @@ -34,7 +34,7 @@ public: void BBoxWrite(int index, u16 value) override {} TargetRectangle ConvertEFBRectangle(const EFBRectangle& rc) override; - void SwapImpl(AbstractTexture* texture, const EFBRectangle& rc, u64 ticks, float Gamma) override; + void SwapImpl(AbstractTexture* texture, const EFBRectangle& rc, u64 ticks) override; void ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaEnable, bool zEnable, u32 color, u32 z) override diff --git a/Source/Core/VideoBackends/Null/TextureCache.h b/Source/Core/VideoBackends/Null/TextureCache.h index cf9dfa84a2..25803344a4 100644 --- a/Source/Core/VideoBackends/Null/TextureCache.h +++ b/Source/Core/VideoBackends/Null/TextureCache.h @@ -27,12 +27,15 @@ public: void CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, - bool scale_by_half) override + bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom, + const CopyFilterCoefficientArray& filter_coefficients) override { } void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const EFBRectangle& src_rect, - bool scale_by_half, EFBCopyFormat dst_format, bool is_intensity) override + bool scale_by_half, EFBCopyFormat dst_format, bool is_intensity, + float gamma, bool clamp_top, bool clamp_bottom, + const CopyFilterCoefficientArray& filter_coefficients) override { } }; diff --git a/Source/Core/VideoBackends/OGL/Render.cpp b/Source/Core/VideoBackends/OGL/Render.cpp index d5259aa651..f049ee9e3a 100644 --- a/Source/Core/VideoBackends/OGL/Render.cpp +++ b/Source/Core/VideoBackends/OGL/Render.cpp @@ -1365,8 +1365,7 @@ void Renderer::ApplyBlendingState(const BlendingState state, bool force) } // This function has the final picture. We adjust the aspect ratio here. -void Renderer::SwapImpl(AbstractTexture* texture, const EFBRectangle& xfb_region, u64 ticks, - float Gamma) +void Renderer::SwapImpl(AbstractTexture* texture, const EFBRectangle& xfb_region, u64 ticks) { if (g_ogl_config.bSupportsDebug) { diff --git a/Source/Core/VideoBackends/OGL/Render.h b/Source/Core/VideoBackends/OGL/Render.h index d8882e2111..c27c06308c 100644 --- a/Source/Core/VideoBackends/OGL/Render.h +++ b/Source/Core/VideoBackends/OGL/Render.h @@ -126,7 +126,7 @@ public: TargetRectangle ConvertEFBRectangle(const EFBRectangle& rc) override; - void SwapImpl(AbstractTexture* texture, const EFBRectangle& rc, u64 ticks, float Gamma) override; + void SwapImpl(AbstractTexture* texture, const EFBRectangle& rc, u64 ticks) override; void ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaEnable, bool zEnable, u32 color, u32 z) override; diff --git a/Source/Core/VideoBackends/OGL/TextureCache.cpp b/Source/Core/VideoBackends/OGL/TextureCache.cpp index 24f94e39db..086d0f29f2 100644 --- a/Source/Core/VideoBackends/OGL/TextureCache.cpp +++ b/Source/Core/VideoBackends/OGL/TextureCache.cpp @@ -68,10 +68,18 @@ constexpr const char* geometry_program = "layout(triangles) in;\n" void TextureCache::CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, - const EFBRectangle& src_rect, bool scale_by_half) + const EFBRectangle& src_rect, bool scale_by_half, float y_scale, + float gamma, bool clamp_top, bool clamp_bottom, + const CopyFilterCoefficientArray& filter_coefficients) { + // Flip top/bottom due to lower-left coordinate system. + float clamp_top_val = + clamp_bottom ? (1.0f - src_rect.bottom / static_cast(EFB_HEIGHT)) : 0.0f; + float clamp_bottom_val = + clamp_top ? (1.0f - src_rect.top / static_cast(EFB_HEIGHT)) : 0.0f; TextureConverter::EncodeToRamFromTexture(dst, params, native_width, bytes_per_row, num_blocks_y, - memory_stride, src_rect, scale_by_half); + memory_stride, src_rect, scale_by_half, y_scale, gamma, + clamp_top_val, clamp_bottom_val, filter_coefficients); } TextureCache::TextureCache() @@ -483,7 +491,9 @@ void TextureCache::DecodeTextureOnGPU(TCacheEntry* entry, u32 dst_level, const u void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const EFBRectangle& src_rect, bool scale_by_half, - EFBCopyFormat dst_format, bool is_intensity) + EFBCopyFormat dst_format, bool is_intensity, float gamma, + bool clamp_top, bool clamp_bottom, + const CopyFilterCoefficientArray& filter_coefficients) { auto* destination_texture = static_cast(entry->texture.get()); g_renderer->ResetAPIState(); // reset any game specific settings @@ -528,6 +538,11 @@ void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, code.GetBuffer(), geo_program); shader.position_uniform = glGetUniformLocation(shader.shader.glprogid, "copy_position"); + shader.pixel_height_uniform = glGetUniformLocation(shader.shader.glprogid, "pixel_height"); + shader.gamma_rcp_uniform = glGetUniformLocation(shader.shader.glprogid, "gamma_rcp"); + shader.clamp_tb_uniform = glGetUniformLocation(shader.shader.glprogid, "clamp_tb"); + shader.filter_coefficients_uniform = + glGetUniformLocation(shader.shader.glprogid, "filter_coefficients"); } shader.shader.Bind(); @@ -535,6 +550,15 @@ void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, TargetRectangle R = g_renderer->ConvertEFBRectangle(src_rect); glUniform4f(shader.position_uniform, static_cast(R.left), static_cast(R.top), static_cast(R.right), static_cast(R.bottom)); + glUniform1f(shader.pixel_height_uniform, g_ActiveConfig.bCopyEFBScaled ? + 1.0f / g_renderer->GetTargetHeight() : + 1.0f / EFB_HEIGHT); + glUniform1f(shader.gamma_rcp_uniform, 1.0f / gamma); + glUniform2f(shader.clamp_tb_uniform, + clamp_bottom ? (1.0f - src_rect.bottom / static_cast(EFB_HEIGHT)) : 0.0f, + clamp_top ? (1.0f - src_rect.top / static_cast(EFB_HEIGHT)) : 0.0f); + glUniform3f(shader.filter_coefficients_uniform, filter_coefficients[0] / 64.0f, + filter_coefficients[1] / 64.0f, filter_coefficients[2] / 64.0f); ProgramShaderCache::BindVertexFormat(nullptr); glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); diff --git a/Source/Core/VideoBackends/OGL/TextureCache.h b/Source/Core/VideoBackends/OGL/TextureCache.h index 3923919df6..54dc4afef8 100644 --- a/Source/Core/VideoBackends/OGL/TextureCache.h +++ b/Source/Core/VideoBackends/OGL/TextureCache.h @@ -65,11 +65,13 @@ private: void CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, - bool scale_by_half) override; + bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom, + const CopyFilterCoefficientArray& filter_coefficients) override; void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const EFBRectangle& src_rect, - bool scale_by_half, EFBCopyFormat dst_format, - bool is_intensity) override; + bool scale_by_half, EFBCopyFormat dst_format, bool is_intensity, + float gamma, bool clamp_top, bool clamp_bottom, + const CopyFilterCoefficientArray& filter_coefficients) override; bool CompileShaders() override; void DeleteShaders() override; @@ -84,6 +86,10 @@ private: { SHADER shader; GLuint position_uniform; + GLuint pixel_height_uniform; + GLuint gamma_rcp_uniform; + GLuint clamp_tb_uniform; + GLuint filter_coefficients_uniform; }; std::map m_efb_copy_programs; diff --git a/Source/Core/VideoBackends/OGL/TextureConverter.cpp b/Source/Core/VideoBackends/OGL/TextureConverter.cpp index e1ec911a65..615cc9e1c7 100644 --- a/Source/Core/VideoBackends/OGL/TextureConverter.cpp +++ b/Source/Core/VideoBackends/OGL/TextureConverter.cpp @@ -39,6 +39,9 @@ struct EncodingProgram SHADER program; GLint copy_position_uniform; GLint y_scale_uniform; + GLint gamma_rcp_uniform; + GLint clamp_tb_uniform; + GLint filter_coefficients_uniform; }; std::map s_encoding_programs; @@ -81,6 +84,10 @@ static EncodingProgram& GetOrCreateEncodingShader(const EFBCopyParams& params) program.copy_position_uniform = glGetUniformLocation(program.program.glprogid, "position"); program.y_scale_uniform = glGetUniformLocation(program.program.glprogid, "y_scale"); + program.gamma_rcp_uniform = glGetUniformLocation(program.program.glprogid, "gamma_rcp"); + program.clamp_tb_uniform = glGetUniformLocation(program.program.glprogid, "clamp_tb"); + program.filter_coefficients_uniform = + glGetUniformLocation(program.program.glprogid, "filter_coefficients"); return s_encoding_programs.emplace(params, program).first->second; } @@ -137,7 +144,9 @@ static void EncodeToRamUsingShader(GLuint srcTexture, u8* destAddr, u32 dst_line void EncodeToRamFromTexture(u8* dest_ptr, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, - const EFBRectangle& src_rect, bool scale_by_half) + const EFBRectangle& src_rect, bool scale_by_half, float y_scale, + float gamma, float clamp_top, float clamp_bottom, + const TextureCacheBase::CopyFilterCoefficientArray& filter_coefficients) { g_renderer->ResetAPIState(); @@ -146,14 +155,18 @@ void EncodeToRamFromTexture(u8* dest_ptr, const EFBCopyParams& params, u32 nativ texconv_shader.program.Bind(); glUniform4i(texconv_shader.copy_position_uniform, src_rect.left, src_rect.top, native_width, scale_by_half ? 2 : 1); - glUniform1f(texconv_shader.y_scale_uniform, params.y_scale); + glUniform1f(texconv_shader.y_scale_uniform, y_scale); + glUniform1f(texconv_shader.gamma_rcp_uniform, 1.0f / gamma); + glUniform2f(texconv_shader.clamp_tb_uniform, clamp_top, clamp_bottom); + glUniform3i(texconv_shader.filter_coefficients_uniform, filter_coefficients[0], + filter_coefficients[1], filter_coefficients[2]); const GLuint read_texture = params.depth ? FramebufferManager::ResolveAndGetDepthTarget(src_rect) : FramebufferManager::ResolveAndGetRenderTarget(src_rect); EncodeToRamUsingShader(read_texture, dest_ptr, bytes_per_row, num_blocks_y, memory_stride, - scale_by_half && !params.depth, params.y_scale); + scale_by_half && !params.depth, y_scale); g_renderer->RestoreAPIState(); } diff --git a/Source/Core/VideoBackends/OGL/TextureConverter.h b/Source/Core/VideoBackends/OGL/TextureConverter.h index baed715a7e..0d7450e4b6 100644 --- a/Source/Core/VideoBackends/OGL/TextureConverter.h +++ b/Source/Core/VideoBackends/OGL/TextureConverter.h @@ -7,10 +7,9 @@ #include "Common/CommonTypes.h" #include "Common/GL/GLUtil.h" +#include "VideoCommon/TextureCacheBase.h" #include "VideoCommon/VideoCommon.h" -struct EFBCopyParams; - namespace OGL { // Converts textures between formats using shaders @@ -21,9 +20,11 @@ void Init(); void Shutdown(); // returns size of the encoded data (in bytes) -void EncodeToRamFromTexture(u8* dest_ptr, const EFBCopyParams& params, u32 native_width, - u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, - const EFBRectangle& src_rect, bool scale_by_half); +void EncodeToRamFromTexture( + u8* dest_ptr, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row, + u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half, + float y_scale, float gamma, float clamp_top, float clamp_bottom, + const TextureCacheBase::CopyFilterCoefficientArray& filter_coefficients); } } // namespace OGL diff --git a/Source/Core/VideoBackends/Software/EfbInterface.cpp b/Source/Core/VideoBackends/Software/EfbInterface.cpp index a94ac467b8..a13feb6c50 100644 --- a/Source/Core/VideoBackends/Software/EfbInterface.cpp +++ b/Source/Core/VideoBackends/Software/EfbInterface.cpp @@ -469,10 +469,59 @@ u32 GetColor(u16 x, u16 y) return GetPixelColor(offset); } -// For internal used only, return a non-normalized value, which saves work later. -yuv444 GetColorYUV(u16 x, u16 y) +static u32 VerticalFilter(const std::array& colors, + const std::array& filterCoefficients) +{ + u8 in_colors[3][4]; + std::memcpy(&in_colors, colors.data(), sizeof(in_colors)); + + // Alpha channel is not used + u8 out_color[4]; + out_color[ALP_C] = 0; + + // All Coefficients should sum to 64, otherwise the total brightness will change, which many games + // do on purpose to implement a brightness filter across the whole copy. + for (int i = BLU_C; i <= RED_C; i++) + { + // TODO: implement support for multisampling. + // In non-multisampling mode: + // * Coefficients 2, 3 and 4 sample from the current pixel. + // * Coefficients 0 and 1 sample from the pixel above this one + // * Coefficients 5 and 6 sample from the pixel below this one + int sum = + in_colors[0][i] * (filterCoefficients[0] + filterCoefficients[1]) + + in_colors[1][i] * (filterCoefficients[2] + filterCoefficients[3] + filterCoefficients[4]) + + in_colors[2][i] * (filterCoefficients[5] + filterCoefficients[6]); + + // TODO: this clamping behavior appears to be correct, but isn't confirmed on hardware. + out_color[i] = std::min(255, sum >> 6); // clamp larger values to 255 + } + + u32 out_color32; + std::memcpy(&out_color32, out_color, sizeof(out_color32)); + return out_color32; +} + +static u32 GammaCorrection(u32 color, const float gamma_rcp) +{ + u8 in_colors[4]; + std::memcpy(&in_colors, &color, sizeof(in_colors)); + + u8 out_color[4]; + for (int i = BLU_C; i <= RED_C; i++) + { + out_color[i] = static_cast( + MathUtil::Clamp(std::pow(in_colors[i] / 255.0f, gamma_rcp) * 255.0f, 0.0f, 255.0f)); + } + + u32 out_color32; + std::memcpy(&out_color32, out_color, sizeof(out_color32)); + return out_color32; +} + +// For internal used only, return a non-normalized value, which saves work later. +static yuv444 ConvertColorToYUV(u32 color) { - const u32 color = GetColor(x, y); const u8 red = static_cast(color >> 24); const u8 green = static_cast(color >> 16); const u8 blue = static_cast(color >> 8); @@ -497,7 +546,8 @@ u8* GetPixelPointer(u16 x, u16 y, bool depth) return &efb[GetColorOffset(x, y)]; } -void EncodeXFB(u8* xfb_in_ram, u32 memory_stride, const EFBRectangle& source_rect, float y_scale) +void EncodeXFB(u8* xfb_in_ram, u32 memory_stride, const EFBRectangle& source_rect, float y_scale, + float gamma) { if (!xfb_in_ram) { @@ -505,8 +555,12 @@ void EncodeXFB(u8* xfb_in_ram, u32 memory_stride, const EFBRectangle& source_rec return; } - int left = source_rect.left; - int right = source_rect.right; + const int left = source_rect.left; + const int right = source_rect.right; + const bool clamp_top = bpmem.triggerEFBCopy.clamp_top; + const bool clamp_bottom = bpmem.triggerEFBCopy.clamp_bottom; + const float gamma_rcp = 1.0f / gamma; + const auto filter_coefficients = bpmem.copyfilter.GetCoefficients(); // this assumes copies will always start on an even (YU) pixel and the // copy always has an even width, which might not be true. @@ -523,13 +577,30 @@ void EncodeXFB(u8* xfb_in_ram, u32 memory_stride, const EFBRectangle& source_rec source.resize(EFB_WIDTH * EFB_HEIGHT); yuv422_packed* src_ptr = &source[0]; - for (float y = source_rect.top; y < source_rect.bottom; y++) + for (int y = source_rect.top; y < source_rect.bottom; y++) { - // Get a scanline of YUV pixels in 4:4:4 format + // Clamping behavior + // NOTE: when the clamp bits aren't set, the hardware will happily read beyond the EFB, + // which returns random garbage from the empty bus (confirmed by hardware tests). + // + // In our implementation, the garbage just so happens to be the top or bottom row. + // Statistically, that could happen. + u16 y_prev = static_cast(std::max(clamp_top ? source_rect.top : 0, y - 1)); + u16 y_next = static_cast(std::min(clamp_bottom ? source_rect.bottom : EFB_HEIGHT, y + 1)); + // Get a scanline of YUV pixels in 4:4:4 format for (int i = 1, x = left; x < right; i++, x++) { - scanline[i] = GetColorYUV(x, y); + // Get RGB colors + std::array colors = {{GetColor(x, y_prev), GetColor(x, y), GetColor(x, y_next)}}; + + // Vertical Filter (Multisampling resolve, deflicker, brightness) + u32 filtered = VerticalFilter(colors, filter_coefficients); + + // Gamma correction happens here. + filtered = GammaCorrection(filtered, gamma_rcp); + + scanline[i] = ConvertColorToYUV(filtered); } // Flipper clamps the border colors @@ -549,7 +620,7 @@ void EncodeXFB(u8* xfb_in_ram, u32 memory_stride, const EFBRectangle& source_rec src_ptr[x + 1].Y = scanline[i + 1].Y + 16; // V[i] = 1/4 * V[i-1] + 1/2 * V[i] + 1/4 * V[i+1] src_ptr[x + 1].UV = - 128 + ((scanline[i].V + (scanline[i + 1].V << 1) + scanline[i + 2].V) >> 2); + 128 + ((scanline[i - 1].V + (scanline[i].V << 1) + scanline[i + 1].V) >> 2); } src_ptr += memory_stride; } diff --git a/Source/Core/VideoBackends/Software/EfbInterface.h b/Source/Core/VideoBackends/Software/EfbInterface.h index a95d6b8aeb..7f7c0ec608 100644 --- a/Source/Core/VideoBackends/Software/EfbInterface.h +++ b/Source/Core/VideoBackends/Software/EfbInterface.h @@ -4,6 +4,8 @@ #pragma once +#include + #include "Common/CommonTypes.h" #include "VideoCommon/PerfQueryBase.h" #include "VideoCommon/VideoCommon.h" @@ -52,12 +54,12 @@ void SetColor(u16 x, u16 y, u8* color); void SetDepth(u16 x, u16 y, u32 depth); u32 GetColor(u16 x, u16 y); -yuv444 GetColorYUV(u16 x, u16 y); u32 GetDepth(u16 x, u16 y); u8* GetPixelPointer(u16 x, u16 y, bool depth); -void EncodeXFB(u8* xfb_in_ram, u32 memory_stride, const EFBRectangle& source_rect, float y_scale); +void EncodeXFB(u8* xfb_in_ram, u32 memory_stride, const EFBRectangle& source_rect, float y_scale, + float gamma); extern u32 perf_values[PQ_NUM_MEMBERS]; inline void IncPerfCounterQuadCount(PerfQueryType type) diff --git a/Source/Core/VideoBackends/Software/SWRenderer.cpp b/Source/Core/VideoBackends/Software/SWRenderer.cpp index 51b035dd82..f4e322619d 100644 --- a/Source/Core/VideoBackends/Software/SWRenderer.cpp +++ b/Source/Core/VideoBackends/Software/SWRenderer.cpp @@ -87,8 +87,7 @@ std::unique_ptr SWRenderer::CreatePipeline(const AbstractPipel } // Called on the GPU thread -void SWRenderer::SwapImpl(AbstractTexture* texture, const EFBRectangle& xfb_region, u64 ticks, - float Gamma) +void SWRenderer::SwapImpl(AbstractTexture* texture, const EFBRectangle& xfb_region, u64 ticks) { OSD::DoCallbacks(OSD::CallbackType::OnFrame); diff --git a/Source/Core/VideoBackends/Software/SWRenderer.h b/Source/Core/VideoBackends/Software/SWRenderer.h index 2c4a5aeef6..3c274edda7 100644 --- a/Source/Core/VideoBackends/Software/SWRenderer.h +++ b/Source/Core/VideoBackends/Software/SWRenderer.h @@ -34,7 +34,7 @@ public: TargetRectangle ConvertEFBRectangle(const EFBRectangle& rc) override; - void SwapImpl(AbstractTexture* texture, const EFBRectangle& rc, u64 ticks, float Gamma) override; + void SwapImpl(AbstractTexture* texture, const EFBRectangle& rc, u64 ticks) override; void ClearScreen(const EFBRectangle& rc, bool colorEnable, bool alphaEnable, bool zEnable, u32 color, u32 z) override; diff --git a/Source/Core/VideoBackends/Software/TextureCache.h b/Source/Core/VideoBackends/Software/TextureCache.h index 13a70002f2..f3d9c91938 100644 --- a/Source/Core/VideoBackends/Software/TextureCache.h +++ b/Source/Core/VideoBackends/Software/TextureCache.h @@ -18,15 +18,18 @@ public: } void CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, - bool scale_by_half) override + bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom, + const CopyFilterCoefficientArray& filter_coefficients) override { TextureEncoder::Encode(dst, params, native_width, bytes_per_row, num_blocks_y, memory_stride, - src_rect, scale_by_half); + src_rect, scale_by_half, y_scale, gamma); } private: void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const EFBRectangle& src_rect, - bool scale_by_half, EFBCopyFormat dst_format, bool is_intensity) override + bool scale_by_half, EFBCopyFormat dst_format, bool is_intensity, + float gamma, bool clamp_top, bool clamp_bottom, + const CopyFilterCoefficientArray& filter_coefficients) override { // TODO: If we ever want to "fake" vram textures, we would need to implement this } diff --git a/Source/Core/VideoBackends/Software/TextureEncoder.cpp b/Source/Core/VideoBackends/Software/TextureEncoder.cpp index 751b148e18..a31888e544 100644 --- a/Source/Core/VideoBackends/Software/TextureEncoder.cpp +++ b/Source/Core/VideoBackends/Software/TextureEncoder.cpp @@ -1469,11 +1469,12 @@ void EncodeEfbCopy(u8* dst, const EFBCopyParams& params, u32 native_width, u32 b } void Encode(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row, - u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half) + u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half, + float y_scale, float gamma) { if (params.copy_format == EFBCopyFormat::XFB) { - EfbInterface::EncodeXFB(dst, native_width, src_rect, params.y_scale); + EfbInterface::EncodeXFB(dst, native_width, src_rect, y_scale, gamma); } else { diff --git a/Source/Core/VideoBackends/Software/TextureEncoder.h b/Source/Core/VideoBackends/Software/TextureEncoder.h index ec21c97c42..20aa05605a 100644 --- a/Source/Core/VideoBackends/Software/TextureEncoder.h +++ b/Source/Core/VideoBackends/Software/TextureEncoder.h @@ -5,12 +5,12 @@ #pragma once #include "Common/CommonTypes.h" +#include "VideoCommon/TextureCacheBase.h" #include "VideoCommon/VideoCommon.h" -struct EFBCopyParams; - namespace TextureEncoder { void Encode(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row, - u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half); + u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half, + float y_scale, float gamma); } diff --git a/Source/Core/VideoBackends/Vulkan/Renderer.cpp b/Source/Core/VideoBackends/Vulkan/Renderer.cpp index c69ad396d6..38d7bd0992 100644 --- a/Source/Core/VideoBackends/Vulkan/Renderer.cpp +++ b/Source/Core/VideoBackends/Vulkan/Renderer.cpp @@ -697,8 +697,7 @@ void Renderer::ReinterpretPixelData(unsigned int convtype) BindEFBToStateTracker(); } -void Renderer::SwapImpl(AbstractTexture* texture, const EFBRectangle& xfb_region, u64 ticks, - float Gamma) +void Renderer::SwapImpl(AbstractTexture* texture, const EFBRectangle& xfb_region, u64 ticks) { // Pending/batched EFB pokes should be included in the final image. FramebufferManager::GetInstance()->FlushEFBPokes(); diff --git a/Source/Core/VideoBackends/Vulkan/Renderer.h b/Source/Core/VideoBackends/Vulkan/Renderer.h index b2662a3d4b..ce15d12592 100644 --- a/Source/Core/VideoBackends/Vulkan/Renderer.h +++ b/Source/Core/VideoBackends/Vulkan/Renderer.h @@ -59,7 +59,7 @@ public: void BBoxWrite(int index, u16 value) override; TargetRectangle ConvertEFBRectangle(const EFBRectangle& rc) override; - void SwapImpl(AbstractTexture* texture, const EFBRectangle& rc, u64 ticks, float Gamma) override; + void SwapImpl(AbstractTexture* texture, const EFBRectangle& rc, u64 ticks) override; void ClearScreen(const EFBRectangle& rc, bool color_enable, bool alpha_enable, bool z_enable, u32 color, u32 z) override; diff --git a/Source/Core/VideoBackends/Vulkan/TextureCache.cpp b/Source/Core/VideoBackends/Vulkan/TextureCache.cpp index cb61cd4de4..bc7a0d0f18 100644 --- a/Source/Core/VideoBackends/Vulkan/TextureCache.cpp +++ b/Source/Core/VideoBackends/Vulkan/TextureCache.cpp @@ -100,7 +100,9 @@ void TextureCache::ConvertTexture(TCacheEntry* destination, TCacheEntry* source, void TextureCache::CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, - const EFBRectangle& src_rect, bool scale_by_half) + const EFBRectangle& src_rect, bool scale_by_half, float y_scale, + float gamma, bool clamp_top, bool clamp_bottom, + const CopyFilterCoefficientArray& filter_coefficients) { // Flush EFB pokes first, as they're expected to be included. FramebufferManager::GetInstance()->FlushEFBPokes(); @@ -131,9 +133,9 @@ void TextureCache::CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_widt src_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - m_texture_converter->EncodeTextureToMemory(src_texture->GetView(), dst, params, native_width, - bytes_per_row, num_blocks_y, memory_stride, src_rect, - scale_by_half); + m_texture_converter->EncodeTextureToMemory( + src_texture->GetView(), dst, params, native_width, bytes_per_row, num_blocks_y, memory_stride, + src_rect, scale_by_half, y_scale, gamma, clamp_top, clamp_bottom, filter_coefficients); // Transition back to original state src_texture->TransitionToLayout(g_command_buffer_mgr->GetCurrentCommandBuffer(), original_layout); @@ -209,7 +211,9 @@ void TextureCache::DeleteShaders() void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const EFBRectangle& src_rect, bool scale_by_half, - EFBCopyFormat dst_format, bool is_intensity) + EFBCopyFormat dst_format, bool is_intensity, float gamma, + bool clamp_top, bool clamp_bottom, + const CopyFilterCoefficientArray& filter_coefficients) { VKTexture* texture = static_cast(entry->texture.get()); @@ -228,6 +232,26 @@ void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, VkCommandBuffer command_buffer = g_command_buffer_mgr->GetCurrentCommandBuffer(); StateTracker::GetInstance()->EndRenderPass(); + // Fill uniform buffer. + struct PixelUniforms + { + float filter_coefficients[3]; + float gamma_rcp; + float clamp_top; + float clamp_bottom; + float pixel_height; + u32 padding; + }; + PixelUniforms uniforms; + for (size_t i = 0; i < filter_coefficients.size(); i++) + uniforms.filter_coefficients[i] = filter_coefficients[i] / 64.0f; + uniforms.gamma_rcp = 1.0f / gamma; + uniforms.clamp_top = clamp_top ? src_rect.top / float(EFB_HEIGHT) : 0.0f; + uniforms.clamp_bottom = clamp_bottom ? src_rect.bottom / float(EFB_HEIGHT) : 1.0f; + uniforms.pixel_height = + g_ActiveConfig.bCopyEFBScaled ? 1.0f / g_renderer->GetTargetHeight() : 1.0f / EFB_HEIGHT; + uniforms.padding = 0; + // Transition EFB to shader resource before binding. // An out-of-bounds source region is valid here, and fine for the draw (since it is converted // to texture coordinates), but it's not valid to resolve an out-of-range rectangle. @@ -274,6 +298,10 @@ void TextureCache::CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, g_shader_cache->GetPassthroughVertexShader(), g_shader_cache->GetPassthroughGeometryShader(), shader); + u8* ubo_ptr = draw.AllocatePSUniforms(sizeof(PixelUniforms)); + std::memcpy(ubo_ptr, &uniforms, sizeof(PixelUniforms)); + draw.CommitPSUniforms(sizeof(PixelUniforms)); + draw.SetPSSampler(0, src_texture->GetView(), src_sampler); VkRect2D dest_region = {{0, 0}, {texture->GetConfig().width, texture->GetConfig().height}}; diff --git a/Source/Core/VideoBackends/Vulkan/TextureCache.h b/Source/Core/VideoBackends/Vulkan/TextureCache.h index b27f9ad0e7..846761d1d5 100644 --- a/Source/Core/VideoBackends/Vulkan/TextureCache.h +++ b/Source/Core/VideoBackends/Vulkan/TextureCache.h @@ -38,7 +38,8 @@ public: void CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, - bool scale_by_half) override; + bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom, + const CopyFilterCoefficientArray& filter_coefficients) override; bool SupportsGPUTextureDecode(TextureFormat format, TLUTFormat palette_format) override; @@ -52,8 +53,9 @@ public: private: void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const EFBRectangle& src_rect, - bool scale_by_half, EFBCopyFormat dst_format, - bool is_intensity) override; + bool scale_by_half, EFBCopyFormat dst_format, bool is_intensity, + float gamma, bool clamp_top, bool clamp_bottom, + const CopyFilterCoefficientArray& filter_coefficients) override; std::unique_ptr m_texture_upload_buffer; diff --git a/Source/Core/VideoBackends/Vulkan/TextureConverter.cpp b/Source/Core/VideoBackends/Vulkan/TextureConverter.cpp index 6c584fcc95..cf5d7075f7 100644 --- a/Source/Core/VideoBackends/Vulkan/TextureConverter.cpp +++ b/Source/Core/VideoBackends/Vulkan/TextureConverter.cpp @@ -38,6 +38,11 @@ struct EFBEncodeParams { std::array position_uniform; float y_scale; + float gamma_rcp; + float clamp_top; + float clamp_bottom; + s32 filter_coefficients[3]; + u32 padding; }; } TextureConverter::TextureConverter() @@ -201,10 +206,11 @@ void TextureConverter::ConvertTexture(TextureCacheBase::TCacheEntry* dst_entry, draw.EndRenderPass(); } -void TextureConverter::EncodeTextureToMemory(VkImageView src_texture, u8* dest_ptr, - const EFBCopyParams& params, u32 native_width, - u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, - const EFBRectangle& src_rect, bool scale_by_half) +void TextureConverter::EncodeTextureToMemory( + VkImageView src_texture, u8* dest_ptr, const EFBCopyParams& params, u32 native_width, + u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, + bool scale_by_half, float y_scale, float gamma, bool clamp_top, bool clamp_bottom, + const TextureCacheBase::CopyFilterCoefficientArray& filter_coefficients) { VkShaderModule shader = GetEncodingShader(params); if (shader == VK_NULL_HANDLE) @@ -236,14 +242,21 @@ void TextureConverter::EncodeTextureToMemory(VkImageView src_texture, u8* dest_p encoder_params.position_uniform[1] = src_rect.top; encoder_params.position_uniform[2] = static_cast(native_width); encoder_params.position_uniform[3] = scale_by_half ? 2 : 1; - encoder_params.y_scale = params.y_scale; - draw.SetPushConstants(&encoder_params, sizeof(encoder_params)); + encoder_params.y_scale = y_scale; + encoder_params.gamma_rcp = 1.0f / gamma; + encoder_params.clamp_top = clamp_top ? src_rect.top / float(EFB_HEIGHT) : 0.0f; + encoder_params.clamp_bottom = clamp_bottom ? src_rect.bottom / float(EFB_HEIGHT) : 0.0f; + for (size_t i = 0; i < filter_coefficients.size(); i++) + encoder_params.filter_coefficients[i] = filter_coefficients[i]; + u8* ubo_ptr = draw.AllocatePSUniforms(sizeof(EFBEncodeParams)); + std::memcpy(ubo_ptr, &encoder_params, sizeof(EFBEncodeParams)); + draw.CommitPSUniforms(sizeof(EFBEncodeParams)); // We also linear filtering for both box filtering and downsampling higher resolutions to 1x // TODO: This only produces perfect downsampling for 2x IR, other resolutions will need more // complex down filtering to average all pixels and produce the correct result. bool linear_filter = - (scale_by_half && !params.depth) || g_renderer->GetEFBScale() != 1 || params.y_scale > 1.0f; + (scale_by_half && !params.depth) || g_renderer->GetEFBScale() != 1 || y_scale > 1.0f; draw.SetPSSampler(0, src_texture, linear_filter ? g_object_cache->GetLinearSampler() : g_object_cache->GetPointSampler()); diff --git a/Source/Core/VideoBackends/Vulkan/TextureConverter.h b/Source/Core/VideoBackends/Vulkan/TextureConverter.h index f277c2d597..f85efc4d5c 100644 --- a/Source/Core/VideoBackends/Vulkan/TextureConverter.h +++ b/Source/Core/VideoBackends/Vulkan/TextureConverter.h @@ -40,9 +40,12 @@ public: // Uses an encoding shader to copy src_texture to dest_ptr. // NOTE: Executes the current command buffer. - void EncodeTextureToMemory(VkImageView src_texture, u8* dest_ptr, const EFBCopyParams& params, - u32 native_width, u32 bytes_per_row, u32 num_blocks_y, - u32 memory_stride, const EFBRectangle& src_rect, bool scale_by_half); + void + EncodeTextureToMemory(VkImageView src_texture, u8* dest_ptr, const EFBCopyParams& params, + u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, + const EFBRectangle& src_rect, bool scale_by_half, float y_scale, + float gamma, bool clamp_top, bool clamp_bottom, + const TextureCacheBase::CopyFilterCoefficientArray& filter_coefficients); bool SupportsTextureDecoding(TextureFormat format, TLUTFormat palette_format); void DecodeTexture(VkCommandBuffer command_buffer, TextureCache::TCacheEntry* entry, diff --git a/Source/Core/VideoCommon/BPMemory.h b/Source/Core/VideoCommon/BPMemory.h index 0157b5aed3..0f287b4f99 100644 --- a/Source/Core/VideoCommon/BPMemory.h +++ b/Source/Core/VideoCommon/BPMemory.h @@ -4,6 +4,7 @@ #pragma once +#include #include #include "Common/BitField.h" @@ -944,8 +945,8 @@ union UPE_Copy { u32 Hex; - BitField<0, 1, u32> clamp0; // if set clamp top - BitField<1, 1, u32> clamp1; // if set clamp bottom + BitField<0, 1, u32> clamp_top; // if set clamp top + BitField<1, 1, u32> clamp_bottom; // if set clamp bottom BitField<2, 1, u32> yuv; // if set, color conversion from RGB to YUV BitField<3, 4, u32> target_pixel_format; // realformat is (fmt/2)+((fmt&1)*8).... for some reason // the msb is the lsb (pattern: cycling right shift) @@ -967,6 +968,29 @@ union UPE_Copy } }; +union CopyFilterCoefficients +{ + using Values = std::array; + + u64 Hex; + + BitField<0, 6, u64> w0; + BitField<6, 6, u64> w1; + BitField<12, 6, u64> w2; + BitField<18, 6, u64> w3; + BitField<32, 6, u64> w4; + BitField<38, 6, u64> w5; + BitField<44, 6, u64> w6; + + Values GetCoefficients() const + { + return { + static_cast(w0), static_cast(w1), static_cast(w2), static_cast(w3), + static_cast(w4), static_cast(w5), static_cast(w6), + }; + } +}; + union BPU_PreloadTileInfo { u32 hex; @@ -1036,29 +1060,29 @@ struct BPMemory // 2 channel, 16 when dest is RGBA // also, doubles whenever mipmap box filter option is set (excent on RGBA). Probably to do with // number of bytes to look at when smoothing - u32 dispcopyyscale; // 4e - u32 clearcolorAR; // 4f - u32 clearcolorGB; // 50 - u32 clearZValue; // 51 - UPE_Copy triggerEFBCopy; // 52 - u32 copyfilter[2]; // 53,54 - u32 boundbox0; // 55 - u32 boundbox1; // 56 - u32 unknown7[2]; // 57,58 - X10Y10 scissorOffset; // 59 - u32 unknown8[6]; // 5a,5b,5c,5d, 5e,5f - BPS_TmemConfig tmem_config; // 60-66 - u32 metric; // 67 - FieldMode fieldmode; // 68 - u32 unknown10[7]; // 69-6F - u32 unknown11[16]; // 70-7F - FourTexUnits tex[2]; // 80-bf - TevStageCombiner combiners[16]; // 0xC0-0xDF - TevReg tevregs[4]; // 0xE0 - FogRangeParams fogRange; // 0xE8 - FogParams fog; // 0xEE,0xEF,0xF0,0xF1,0xF2 - AlphaTest alpha_test; // 0xF3 - ZTex1 ztex1; // 0xf4,0xf5 + u32 dispcopyyscale; // 4e + u32 clearcolorAR; // 4f + u32 clearcolorGB; // 50 + u32 clearZValue; // 51 + UPE_Copy triggerEFBCopy; // 52 + CopyFilterCoefficients copyfilter; // 53,54 + u32 boundbox0; // 55 + u32 boundbox1; // 56 + u32 unknown7[2]; // 57,58 + X10Y10 scissorOffset; // 59 + u32 unknown8[6]; // 5a,5b,5c,5d, 5e,5f + BPS_TmemConfig tmem_config; // 60-66 + u32 metric; // 67 + FieldMode fieldmode; // 68 + u32 unknown10[7]; // 69-6F + u32 unknown11[16]; // 70-7F + FourTexUnits tex[2]; // 80-bf + TevStageCombiner combiners[16]; // 0xC0-0xDF + TevReg tevregs[4]; // 0xE0 + FogRangeParams fogRange; // 0xE8 + FogParams fog; // 0xEE,0xEF,0xF0,0xF1,0xF2 + AlphaTest alpha_test; // 0xF3 + ZTex1 ztex1; // 0xf4,0xf5 ZTex2 ztex2; TevKSel tevksel[8]; // 0xf6,0xf7,f8,f9,fa,fb,fc,fd u32 bpMask; // 0xFE diff --git a/Source/Core/VideoCommon/BPStructs.cpp b/Source/Core/VideoCommon/BPStructs.cpp index 4647c3de4e..0e0d639a63 100644 --- a/Source/Core/VideoCommon/BPStructs.cpp +++ b/Source/Core/VideoCommon/BPStructs.cpp @@ -229,10 +229,13 @@ static void BPWritten(const BPCmd& bp) { // bpmem.zcontrol.pixel_format to PEControl::Z24 is when the game wants to copy from ZBuffer // (Zbuffer uses 24-bit Format) + static constexpr CopyFilterCoefficients::Values filter_coefficients = { + {0, 0, 21, 22, 21, 0, 0}}; bool is_depth_copy = bpmem.zcontrol.pixel_format == PEControl::Z24; g_texture_cache->CopyRenderTargetToTexture( destAddr, PE_copy.tp_realFormat(), srcRect.GetWidth(), srcRect.GetHeight(), destStride, - is_depth_copy, srcRect, !!PE_copy.intensity_fmt, !!PE_copy.half_scale, 1.0f, 1.0f); + is_depth_copy, srcRect, !!PE_copy.intensity_fmt, !!PE_copy.half_scale, 1.0f, 1.0f, + bpmem.triggerEFBCopy.clamp_top, bpmem.triggerEFBCopy.clamp_bottom, filter_coefficients); } else { @@ -260,9 +263,10 @@ static void BPWritten(const BPCmd& bp) bpmem.copyTexSrcWH.x + 1, destStride, height, yScale); bool is_depth_copy = bpmem.zcontrol.pixel_format == PEControl::Z24; - g_texture_cache->CopyRenderTargetToTexture(destAddr, EFBCopyFormat::XFB, srcRect.GetWidth(), - height, destStride, is_depth_copy, srcRect, false, - false, yScale, s_gammaLUT[PE_copy.gamma]); + g_texture_cache->CopyRenderTargetToTexture( + destAddr, EFBCopyFormat::XFB, srcRect.GetWidth(), height, destStride, is_depth_copy, + srcRect, false, false, yScale, s_gammaLUT[PE_copy.gamma], bpmem.triggerEFBCopy.clamp_top, + bpmem.triggerEFBCopy.clamp_bottom, bpmem.copyfilter.GetCoefficients()); // This stays in to signal end of a "frame" g_renderer->RenderToXFB(destAddr, srcRect, destStride, height, s_gammaLUT[PE_copy.gamma]); @@ -1015,9 +1019,9 @@ void GetBPRegInfo(const u8* data, std::string* name, std::string* desc) "Copy to XFB: %s\n" "Intensity format: %s\n" "Automatic color conversion: %s", - (copy.clamp0 && copy.clamp1) ? + (copy.clamp_top && copy.clamp_bottom) ? "Top and Bottom" : - (copy.clamp0) ? "Top only" : (copy.clamp1) ? "Bottom only" : "None", + (copy.clamp_top) ? "Top only" : (copy.clamp_bottom) ? "Bottom only" : "None", no_yes[copy.yuv], static_cast(copy.tp_realFormat()), (copy.gamma == 0) ? "1.0" : diff --git a/Source/Core/VideoCommon/RenderBase.cpp b/Source/Core/VideoCommon/RenderBase.cpp index 1598fe0a00..76d40f5aa3 100644 --- a/Source/Core/VideoCommon/RenderBase.cpp +++ b/Source/Core/VideoCommon/RenderBase.cpp @@ -680,7 +680,7 @@ void Renderer::Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const // TODO: merge more generic parts into VideoCommon { std::lock_guard guard(m_swap_mutex); - g_renderer->SwapImpl(xfb_entry->texture.get(), xfb_rect, ticks, xfb_entry->gamma); + g_renderer->SwapImpl(xfb_entry->texture.get(), xfb_rect, ticks); } // Update the window size based on the frame that was just rendered. diff --git a/Source/Core/VideoCommon/RenderBase.h b/Source/Core/VideoCommon/RenderBase.h index e7f7668f22..7e4f4e00f7 100644 --- a/Source/Core/VideoCommon/RenderBase.h +++ b/Source/Core/VideoCommon/RenderBase.h @@ -175,8 +175,7 @@ public: // Finish up the current frame, print some stats void Swap(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, const EFBRectangle& rc, u64 ticks); - virtual void SwapImpl(AbstractTexture* texture, const EFBRectangle& rc, u64 ticks, - float Gamma = 1.0f) = 0; + virtual void SwapImpl(AbstractTexture* texture, const EFBRectangle& rc, u64 ticks) = 0; PEControl::PixelFormat GetPrevPixelFormat() const { return m_prev_efb_format; } void StorePixelFormat(PEControl::PixelFormat new_format) { m_prev_efb_format = new_format; } diff --git a/Source/Core/VideoCommon/TextureCacheBase.cpp b/Source/Core/VideoCommon/TextureCacheBase.cpp index 9ac75d31f0..f0b1392531 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.cpp +++ b/Source/Core/VideoCommon/TextureCacheBase.cpp @@ -1499,10 +1499,39 @@ void TextureCacheBase::LoadTextureLevelZeroFromMemory(TCacheEntry* entry_to_upda } } -void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, EFBCopyFormat dstFormat, u32 width, - u32 height, u32 dstStride, bool is_depth_copy, - const EFBRectangle& srcRect, bool isIntensity, - bool scaleByHalf, float y_scale, float gamma) +TextureCacheBase::CopyFilterCoefficientArray +TextureCacheBase::GetRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients) +{ + // To simplify the backend, we precalculate the three coefficients in common. Coefficients 0, 1 + // are for the row above, 2, 3, 4 are for the current pixel, and 5, 6 are for the row below. + return {static_cast(coefficients[0]) + static_cast(coefficients[1]), + static_cast(coefficients[2]) + static_cast(coefficients[3]) + + static_cast(coefficients[4]), + static_cast(coefficients[5]) + static_cast(coefficients[6])}; +} + +TextureCacheBase::CopyFilterCoefficientArray +TextureCacheBase::GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients) +{ + // If the user disables the copy filter, only apply it to the VRAM copy. + // This way games which are sensitive to changes to the RAM copy of the XFB will be unaffected. + CopyFilterCoefficientArray res = GetRAMCopyFilterCoefficients(coefficients); + if (!g_ActiveConfig.bDisableCopyFilter) + return res; + + // Disabling the copy filter in options should not ignore the values the game sets completely, + // as some games use the filter coefficients to control the brightness of the screen. Instead, + // add all coefficients to the middle sample, so the deflicker/vertical filter has no effect. + res[1] += res[0] + res[2]; + res[0] = 0; + res[2] = 0; + return res; +} + +void TextureCacheBase::CopyRenderTargetToTexture( + u32 dstAddr, EFBCopyFormat dstFormat, u32 width, u32 height, u32 dstStride, bool is_depth_copy, + const EFBRectangle& srcRect, bool isIntensity, bool scaleByHalf, float y_scale, float gamma, + bool clamp_top, bool clamp_bottom, const CopyFilterCoefficients::Values& filter_coefficients) { // Emulation methods: // @@ -1622,8 +1651,10 @@ void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, EFBCopyFormat dstF if (copy_to_ram) { PEControl::PixelFormat srcFormat = bpmem.zcontrol.pixel_format; - EFBCopyParams format(srcFormat, dstFormat, is_depth_copy, isIntensity, y_scale); - CopyEFB(dst, format, tex_w, bytes_per_row, num_blocks_y, dstStride, srcRect, scaleByHalf); + EFBCopyParams format(srcFormat, dstFormat, is_depth_copy, isIntensity); + CopyEFB(dst, format, tex_w, bytes_per_row, num_blocks_y, dstStride, srcRect, scaleByHalf, + y_scale, gamma, clamp_top, clamp_bottom, + GetRAMCopyFilterCoefficients(filter_coefficients)); } else { @@ -1742,8 +1773,6 @@ void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, EFBCopyFormat dstF { entry->SetGeneralParameters(dstAddr, 0, baseFormat, is_xfb_copy); entry->SetDimensions(tex_w, tex_h, 1); - entry->gamma = gamma; - entry->frameCount = FRAMECOUNT_INVALID; if (is_xfb_copy) { @@ -1757,7 +1786,9 @@ void TextureCacheBase::CopyRenderTargetToTexture(u32 dstAddr, EFBCopyFormat dstF entry->may_have_overlapping_textures = false; entry->is_custom_tex = false; - CopyEFBToCacheEntry(entry, is_depth_copy, srcRect, scaleByHalf, dstFormat, isIntensity); + CopyEFBToCacheEntry(entry, is_depth_copy, srcRect, scaleByHalf, dstFormat, isIntensity, gamma, + clamp_top, clamp_bottom, + GetVRAMCopyFilterCoefficients(filter_coefficients)); u64 hash = entry->CalculateHash(); entry->SetHashes(hash, hash); diff --git a/Source/Core/VideoCommon/TextureCacheBase.h b/Source/Core/VideoCommon/TextureCacheBase.h index 6dce346c24..720e95b470 100644 --- a/Source/Core/VideoCommon/TextureCacheBase.h +++ b/Source/Core/VideoCommon/TextureCacheBase.h @@ -47,23 +47,21 @@ struct TextureAndTLUTFormat struct EFBCopyParams { EFBCopyParams(PEControl::PixelFormat efb_format_, EFBCopyFormat copy_format_, bool depth_, - bool yuv_, float y_scale_) - : efb_format(efb_format_), copy_format(copy_format_), depth(depth_), yuv(yuv_), - y_scale(y_scale_) + bool yuv_) + : efb_format(efb_format_), copy_format(copy_format_), depth(depth_), yuv(yuv_) { } bool operator<(const EFBCopyParams& rhs) const { - return std::tie(efb_format, copy_format, depth, yuv, y_scale) < - std::tie(rhs.efb_format, rhs.copy_format, rhs.depth, rhs.yuv, rhs.y_scale); + return std::tie(efb_format, copy_format, depth, yuv) < + std::tie(rhs.efb_format, rhs.copy_format, rhs.depth, rhs.yuv); } PEControl::PixelFormat efb_format; EFBCopyFormat copy_format; bool depth; bool yuv; - float y_scale; }; struct TextureLookupInformation @@ -108,6 +106,8 @@ private: static const int FRAMECOUNT_INVALID = 0; public: + using CopyFilterCoefficientArray = std::array; + struct TCacheEntry { // common members @@ -126,7 +126,6 @@ public: // content, aren't just downscaled bool should_force_safe_hashing = false; // for XFB bool is_xfb_copy = false; - float gamma = 1.0f; u64 id; bool reference_changed = false; // used by xfb to determine when a reference xfb changed @@ -216,7 +215,9 @@ public: virtual void CopyEFB(u8* dst, const EFBCopyParams& params, u32 native_width, u32 bytes_per_row, u32 num_blocks_y, u32 memory_stride, const EFBRectangle& src_rect, - bool scale_by_half) = 0; + bool scale_by_half, float y_scale, float gamma, bool clamp_top, + bool clamp_bottom, + const CopyFilterCoefficientArray& filter_coefficients) = 0; virtual bool CompileShaders() = 0; virtual void DeleteShaders() = 0; @@ -248,7 +249,9 @@ public: virtual void BindTextures(); void CopyRenderTargetToTexture(u32 dstAddr, EFBCopyFormat dstFormat, u32 width, u32 height, u32 dstStride, bool is_depth_copy, const EFBRectangle& srcRect, - bool isIntensity, bool scaleByHalf, float y_scale, float gamma); + bool isIntensity, bool scaleByHalf, float y_scale, float gamma, + bool clamp_top, bool clamp_bottom, + const CopyFilterCoefficients::Values& filter_coefficients); virtual void ConvertTexture(TCacheEntry* entry, TCacheEntry* unconverted, const void* palette, TLUTFormat format) = 0; @@ -315,13 +318,21 @@ private: virtual void CopyEFBToCacheEntry(TCacheEntry* entry, bool is_depth_copy, const EFBRectangle& src_rect, bool scale_by_half, - EFBCopyFormat dst_format, bool is_intensity) = 0; + EFBCopyFormat dst_format, bool is_intensity, float gamma, + bool clamp_top, bool clamp_bottom, + const CopyFilterCoefficientArray& filter_coefficients) = 0; // Removes and unlinks texture from texture cache and returns it to the pool TexAddrCache::iterator InvalidateTexture(TexAddrCache::iterator t_iter); void UninitializeXFBMemory(u8* dst, u32 stride, u32 bytes_per_row, u32 num_blocks_y); + // Precomputing the coefficients for the previous, current, and next lines for the copy filter. + CopyFilterCoefficientArray + GetRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients); + CopyFilterCoefficientArray + GetVRAMCopyFilterCoefficients(const CopyFilterCoefficients::Values& coefficients); + TexAddrCache textures_by_address; TexHashCache textures_by_hash; TexPool texture_pool; diff --git a/Source/Core/VideoCommon/TextureConversionShader.cpp b/Source/Core/VideoCommon/TextureConversionShader.cpp index 4d1539572c..1912f2aee5 100644 --- a/Source/Core/VideoCommon/TextureConversionShader.cpp +++ b/Source/Core/VideoCommon/TextureConversionShader.cpp @@ -57,19 +57,44 @@ u16 GetEncodedSampleCount(EFBCopyFormat format) } } -// block dimensions : widthStride, heightStride -// texture dims : width, height, x offset, y offset -static void WriteSwizzler(char*& p, EFBCopyFormat format, APIType ApiType) +static void WriteHeader(char*& p, APIType ApiType) { - // left, top, of source rectangle within source texture - // width of the destination rectangle, scale_factor (1 or 2) - if (ApiType == APIType::Vulkan) - WRITE(p, - "layout(std140, push_constant) uniform PCBlock { int4 position; float y_scale; } PC;\n"); - else + if (ApiType == APIType::OpenGL) { + // left, top, of source rectangle within source texture + // width of the destination rectangle, scale_factor (1 or 2) WRITE(p, "uniform int4 position;\n"); WRITE(p, "uniform float y_scale;\n"); + WRITE(p, "uniform float gamma_rcp;\n"); + WRITE(p, "uniform float2 clamp_tb;\n"); + WRITE(p, "uniform int3 filter_coefficients;\n"); + WRITE(p, "#define samp0 samp9\n"); + WRITE(p, "SAMPLER_BINDING(9) uniform sampler2DArray samp0;\n"); + WRITE(p, "FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n"); + } + else if (ApiType == APIType::Vulkan) + { + WRITE(p, "UBO_BINDING(std140, 1) uniform PSBlock {\n"); + WRITE(p, " int4 position;\n"); + WRITE(p, " float y_scale;\n"); + WRITE(p, " float gamma_rcp;\n"); + WRITE(p, " float2 clamp_tb;\n"); + WRITE(p, " int3 filter_coefficients;\n"); + WRITE(p, "};\n"); + WRITE(p, "SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n"); + WRITE(p, "FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n"); + } + else // D3D + { + WRITE(p, "cbuffer PSBlock : register(b0) {\n"); + WRITE(p, " int4 position;\n"); + WRITE(p, " float y_scale;\n"); + WRITE(p, " float gamma_rcp;\n"); + WRITE(p, " float2 clamp_tb;\n"); + WRITE(p, " int3 filter_coefficients;\n"); + WRITE(p, "};\n"); + WRITE(p, "sampler samp0 : register(s0);\n"); + WRITE(p, "Texture2DArray Tex0 : register(t0);\n"); } // D3D does not have roundEven(), only round(), which is specified "to the nearest integer". @@ -96,39 +121,100 @@ static void WriteSwizzler(char*& p, EFBCopyFormat format, APIType ApiType) WRITE(p, " val = int4(val.r >> 3, val.g >> 2, val.b >> 3, 1);\n"); WRITE(p, " return float4(val) / float4(31.0, 63.0, 31.0, 1.0);\n"); WRITE(p, "}\n"); +} - int blkW = TexDecoder_GetEFBCopyBlockWidthInTexels(format); - int blkH = TexDecoder_GetEFBCopyBlockHeightInTexels(format); - int samples = GetEncodedSampleCount(format); +static void WriteSampleFunction(char*& p, const EFBCopyParams& params, APIType ApiType) +{ + auto WriteSampleOp = [&](int yoffset) { + if (!params.depth) + { + switch (params.efb_format) + { + case PEControl::RGB8_Z24: + WRITE(p, "RGBA8ToRGB8("); + break; + case PEControl::RGBA6_Z24: + WRITE(p, "RGBA8ToRGBA6("); + break; + case PEControl::RGB565_Z16: + WRITE(p, "RGBA8ToRGB565("); + break; + default: + WRITE(p, "("); + break; + } + } + else + { + // Handle D3D depth inversion. + if (ApiType == APIType::D3D || ApiType == APIType::Vulkan) + WRITE(p, "1.0 - ("); + else + WRITE(p, "("); + } - if (ApiType == APIType::OpenGL) + if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) + WRITE(p, "texture(samp0, float3("); + else + WRITE(p, "Tex0.Sample(samp0, float3("); + + WRITE(p, "uv.x + xoffset * pixel_size.x, "); + + // Reverse the direction for OpenGL, since positive numbers are distance from the bottom row. + if (yoffset != 0) + { + if (ApiType == APIType::OpenGL) + WRITE(p, "clamp(uv.y - float(%d) * pixel_size.y, clamp_tb.x, clamp_tb.y)", yoffset); + else + WRITE(p, "clamp(uv.y + float(%d) * pixel_size.y, clamp_tb.x, clamp_tb.y)", yoffset); + } + else + { + WRITE(p, "uv.y"); + } + + WRITE(p, ", 0.0)))"); + }; + + // The copy filter applies to both color and depth copies. This has been verified on hardware. + // The filter is only applied to the RGB channels, the alpha channel is left intact. + WRITE(p, "float4 SampleEFB(float2 uv, float2 pixel_size, int xoffset)\n"); + WRITE(p, "{\n"); + WRITE(p, " float4 prev_row = "); + WriteSampleOp(-1); + WRITE(p, ";\n"); + WRITE(p, " float4 current_row = "); + WriteSampleOp(0); + WRITE(p, ";\n"); + WRITE(p, " float4 next_row = "); + WriteSampleOp(1); + WRITE(p, ";\n"); + WRITE(p, + " float3 col = float3(clamp((int3(prev_row.rgb * 255.0) * filter_coefficients[0] +\n" + " int3(current_row.rgb * 255.0) * filter_coefficients[1] +\n" + " int3(next_row.rgb * 255.0) * filter_coefficients[2]) >> 6,\n" + " int3(0, 0, 0), int3(255, 255, 255))) / 255.0;\n"); + WRITE(p, " return float4(col, current_row.a);\n"); + WRITE(p, "}\n"); +} + +// block dimensions : widthStride, heightStride +// texture dims : width, height, x offset, y offset +static void WriteSwizzler(char*& p, const EFBCopyParams& params, EFBCopyFormat format, + APIType ApiType) +{ + WriteHeader(p, ApiType); + WriteSampleFunction(p, params, ApiType); + + if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) { - WRITE(p, "#define samp0 samp9\n"); - WRITE(p, "SAMPLER_BINDING(9) uniform sampler2DArray samp0;\n"); - - WRITE(p, "FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n"); WRITE(p, "void main()\n"); WRITE(p, "{\n" " int2 sampleUv;\n" " int2 uv1 = int2(gl_FragCoord.xy);\n"); } - else if (ApiType == APIType::Vulkan) - { - WRITE(p, "SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n"); - WRITE(p, "FRAGMENT_OUTPUT_LOCATION(0) out vec4 ocol0;\n"); - - WRITE(p, "void main()\n"); - WRITE(p, "{\n" - " int2 sampleUv;\n" - " int2 uv1 = int2(gl_FragCoord.xy);\n" - " int4 position = PC.position;\n" - " float y_scale = PC.y_scale;\n"); - } else // D3D { - WRITE(p, "sampler samp0 : register(s0);\n"); - WRITE(p, "Texture2DArray Tex0 : register(t0);\n"); - WRITE(p, "void main(\n"); WRITE(p, " out float4 ocol0 : SV_Target, in float4 rawpos : SV_Position)\n"); WRITE(p, "{\n" @@ -136,6 +222,10 @@ static void WriteSwizzler(char*& p, EFBCopyFormat format, APIType ApiType) " int2 uv1 = int2(rawpos.xy);\n"); } + int blkW = TexDecoder_GetEFBCopyBlockWidthInTexels(format); + int blkH = TexDecoder_GetEFBCopyBlockHeightInTexels(format); + int samples = GetEncodedSampleCount(format); + WRITE(p, " int x_block_position = (uv1.x >> %d) << %d;\n", IntLog2(blkH * blkW / samples), IntLog2(blkW)); WRITE(p, " int y_block_position = uv1.y << %d;\n", IntLog2(blkH)); @@ -167,51 +257,13 @@ static void WriteSwizzler(char*& p, EFBCopyFormat format, APIType ApiType) WRITE(p, " uv0.y = 1.0-uv0.y;\n"); } - WRITE(p, " float sample_offset = float(position.w) / float(%d);\n", EFB_WIDTH); + WRITE(p, " float2 pixel_size = position.ww / float2(%d, %d);\n", EFB_WIDTH, EFB_HEIGHT); } static void WriteSampleColor(char*& p, const char* colorComp, const char* dest, int xoffset, APIType ApiType, const EFBCopyParams& params) { - WRITE(p, " %s = ", dest); - - if (!params.depth) - { - switch (params.efb_format) - { - case PEControl::RGB8_Z24: - WRITE(p, "RGBA8ToRGB8("); - break; - case PEControl::RGBA6_Z24: - WRITE(p, "RGBA8ToRGBA6("); - break; - case PEControl::RGB565_Z16: - WRITE(p, "RGBA8ToRGB565("); - break; - default: - WRITE(p, "("); - break; - } - } - else - { - // Handle D3D depth inversion. - if (ApiType == APIType::D3D || ApiType == APIType::Vulkan) - WRITE(p, "1.0 - ("); - else - WRITE(p, "("); - } - - if (ApiType == APIType::OpenGL || ApiType == APIType::Vulkan) - { - WRITE(p, "texture(samp0, float3(uv0 + float2(%d, 0) * sample_offset, 0.0))).%s;\n", xoffset, - colorComp); - } - else - { - WRITE(p, "Tex0.Sample(samp0, float3(uv0 + float2(%d, 0) * sample_offset, 0.0))).%s;\n", xoffset, - colorComp); - } + WRITE(p, " %s = SampleEFB(uv0, pixel_size, %d).%s;\n", dest, xoffset, colorComp); } static void WriteColorToIntensity(char*& p, const char* src, const char* dest) @@ -239,7 +291,7 @@ static void WriteEncoderEnd(char*& p) static void WriteI8Encoder(char*& p, APIType ApiType, const EFBCopyParams& params) { - WriteSwizzler(p, EFBCopyFormat::R8, ApiType); + WriteSwizzler(p, params, EFBCopyFormat::R8, ApiType); WRITE(p, " float3 texSample;\n"); WriteSampleColor(p, "rgb", "texSample", 0, ApiType, params); @@ -261,7 +313,7 @@ static void WriteI8Encoder(char*& p, APIType ApiType, const EFBCopyParams& param static void WriteI4Encoder(char*& p, APIType ApiType, const EFBCopyParams& params) { - WriteSwizzler(p, EFBCopyFormat::R4, ApiType); + WriteSwizzler(p, params, EFBCopyFormat::R4, ApiType); WRITE(p, " float3 texSample;\n"); WRITE(p, " float4 color0;\n"); WRITE(p, " float4 color1;\n"); @@ -302,7 +354,7 @@ static void WriteI4Encoder(char*& p, APIType ApiType, const EFBCopyParams& param static void WriteIA8Encoder(char*& p, APIType ApiType, const EFBCopyParams& params) { - WriteSwizzler(p, EFBCopyFormat::RA8, ApiType); + WriteSwizzler(p, params, EFBCopyFormat::RA8, ApiType); WRITE(p, " float4 texSample;\n"); WriteSampleColor(p, "rgba", "texSample", 0, ApiType, params); @@ -320,7 +372,7 @@ static void WriteIA8Encoder(char*& p, APIType ApiType, const EFBCopyParams& para static void WriteIA4Encoder(char*& p, APIType ApiType, const EFBCopyParams& params) { - WriteSwizzler(p, EFBCopyFormat::RA4, ApiType); + WriteSwizzler(p, params, EFBCopyFormat::RA4, ApiType); WRITE(p, " float4 texSample;\n"); WRITE(p, " float4 color0;\n"); WRITE(p, " float4 color1;\n"); @@ -352,7 +404,7 @@ static void WriteIA4Encoder(char*& p, APIType ApiType, const EFBCopyParams& para static void WriteRGB565Encoder(char*& p, APIType ApiType, const EFBCopyParams& params) { - WriteSwizzler(p, EFBCopyFormat::RGB565, ApiType); + WriteSwizzler(p, params, EFBCopyFormat::RGB565, ApiType); WRITE(p, " float3 texSample0;\n"); WRITE(p, " float3 texSample1;\n"); @@ -377,7 +429,7 @@ static void WriteRGB565Encoder(char*& p, APIType ApiType, const EFBCopyParams& p static void WriteRGB5A3Encoder(char*& p, APIType ApiType, const EFBCopyParams& params) { - WriteSwizzler(p, EFBCopyFormat::RGB5A3, ApiType); + WriteSwizzler(p, params, EFBCopyFormat::RGB5A3, ApiType); WRITE(p, " float4 texSample;\n"); WRITE(p, " float color0;\n"); @@ -441,7 +493,7 @@ static void WriteRGB5A3Encoder(char*& p, APIType ApiType, const EFBCopyParams& p static void WriteRGBA8Encoder(char*& p, APIType ApiType, const EFBCopyParams& params) { - WriteSwizzler(p, EFBCopyFormat::RGBA8, ApiType); + WriteSwizzler(p, params, EFBCopyFormat::RGBA8, ApiType); WRITE(p, " float4 texSample;\n"); WRITE(p, " float4 color0;\n"); @@ -466,7 +518,7 @@ static void WriteRGBA8Encoder(char*& p, APIType ApiType, const EFBCopyParams& pa static void WriteC4Encoder(char*& p, const char* comp, APIType ApiType, const EFBCopyParams& params) { - WriteSwizzler(p, EFBCopyFormat::R4, ApiType); + WriteSwizzler(p, params, EFBCopyFormat::R4, ApiType); WRITE(p, " float4 color0;\n"); WRITE(p, " float4 color1;\n"); @@ -488,7 +540,7 @@ static void WriteC4Encoder(char*& p, const char* comp, APIType ApiType, const EF static void WriteC8Encoder(char*& p, const char* comp, APIType ApiType, const EFBCopyParams& params) { - WriteSwizzler(p, EFBCopyFormat::R8, ApiType); + WriteSwizzler(p, params, EFBCopyFormat::R8, ApiType); WriteSampleColor(p, comp, "ocol0.b", 0, ApiType, params); WriteSampleColor(p, comp, "ocol0.g", 1, ApiType, params); @@ -501,7 +553,7 @@ static void WriteC8Encoder(char*& p, const char* comp, APIType ApiType, const EF static void WriteCC4Encoder(char*& p, const char* comp, APIType ApiType, const EFBCopyParams& params) { - WriteSwizzler(p, EFBCopyFormat::RA4, ApiType); + WriteSwizzler(p, params, EFBCopyFormat::RA4, ApiType); WRITE(p, " float2 texSample;\n"); WRITE(p, " float4 color0;\n"); WRITE(p, " float4 color1;\n"); @@ -532,7 +584,7 @@ static void WriteCC4Encoder(char*& p, const char* comp, APIType ApiType, static void WriteCC8Encoder(char*& p, const char* comp, APIType ApiType, const EFBCopyParams& params) { - WriteSwizzler(p, EFBCopyFormat::RA8, ApiType); + WriteSwizzler(p, params, EFBCopyFormat::RA8, ApiType); WriteSampleColor(p, comp, "ocol0.bg", 0, ApiType, params); WriteSampleColor(p, comp, "ocol0.ra", 1, ApiType, params); @@ -543,7 +595,7 @@ static void WriteCC8Encoder(char*& p, const char* comp, APIType ApiType, static void WriteZ8Encoder(char*& p, const char* multiplier, APIType ApiType, const EFBCopyParams& params) { - WriteSwizzler(p, EFBCopyFormat::G8, ApiType); + WriteSwizzler(p, params, EFBCopyFormat::G8, ApiType); WRITE(p, " float depth;\n"); @@ -564,7 +616,7 @@ static void WriteZ8Encoder(char*& p, const char* multiplier, APIType ApiType, static void WriteZ16Encoder(char*& p, APIType ApiType, const EFBCopyParams& params) { - WriteSwizzler(p, EFBCopyFormat::RA8, ApiType); + WriteSwizzler(p, params, EFBCopyFormat::RA8, ApiType); WRITE(p, " float depth;\n"); WRITE(p, " float3 expanded;\n"); @@ -596,7 +648,7 @@ static void WriteZ16Encoder(char*& p, APIType ApiType, const EFBCopyParams& para static void WriteZ16LEncoder(char*& p, APIType ApiType, const EFBCopyParams& params) { - WriteSwizzler(p, EFBCopyFormat::GB8, ApiType); + WriteSwizzler(p, params, EFBCopyFormat::GB8, ApiType); WRITE(p, " float depth;\n"); WRITE(p, " float3 expanded;\n"); @@ -632,7 +684,7 @@ static void WriteZ16LEncoder(char*& p, APIType ApiType, const EFBCopyParams& par static void WriteZ24Encoder(char*& p, APIType ApiType, const EFBCopyParams& params) { - WriteSwizzler(p, EFBCopyFormat::RGBA8, ApiType); + WriteSwizzler(p, params, EFBCopyFormat::RGBA8, ApiType); WRITE(p, " float depth0;\n"); WRITE(p, " float depth1;\n"); @@ -672,18 +724,21 @@ static void WriteZ24Encoder(char*& p, APIType ApiType, const EFBCopyParams& para static void WriteXFBEncoder(char*& p, APIType ApiType, const EFBCopyParams& params) { - WriteSwizzler(p, EFBCopyFormat::XFB, ApiType); - - WRITE(p, " float3 y_const = float3(0.257, 0.504, 0.098);\n"); - WRITE(p, " float3 u_const = float3(-0.148, -0.291, 0.439);\n"); - WRITE(p, " float3 v_const = float3(0.439, -0.368, -0.071);\n"); - WRITE(p, " float3 color0;\n"); - WRITE(p, " float3 color1;\n"); + WriteSwizzler(p, params, EFBCopyFormat::XFB, ApiType); + WRITE(p, "float3 color0, color1;\n"); WriteSampleColor(p, "rgb", "color0", 0, ApiType, params); WriteSampleColor(p, "rgb", "color1", 1, ApiType, params); - WRITE(p, " float3 average = (color0 + color1) * 0.5;\n"); + // Gamma is only applied to XFB copies. + WRITE(p, " color0 = pow(color0, gamma_rcp.xxx);\n"); + WRITE(p, " color1 = pow(color1, gamma_rcp.xxx);\n"); + + // Convert to YUV. + WRITE(p, " const float3 y_const = float3(0.257, 0.504, 0.098);\n"); + WRITE(p, " const float3 u_const = float3(-0.148, -0.291, 0.439);\n"); + WRITE(p, " const float3 v_const = float3(0.439, -0.368, -0.071);\n"); + WRITE(p, " float3 average = (color0 + color1) * 0.5;\n"); WRITE(p, " ocol0.b = dot(color0, y_const) + 0.0625;\n"); WRITE(p, " ocol0.g = dot(average, u_const) + 0.5;\n"); WRITE(p, " ocol0.r = dot(color1, y_const) + 0.0625;\n"); diff --git a/Source/Core/VideoCommon/TextureConverterShaderGen.cpp b/Source/Core/VideoCommon/TextureConverterShaderGen.cpp index 74ab4b38d2..dce823ba5a 100644 --- a/Source/Core/VideoCommon/TextureConverterShaderGen.cpp +++ b/Source/Core/VideoCommon/TextureConverterShaderGen.cpp @@ -38,34 +38,66 @@ ShaderCode GenerateShader(APIType api_type, const UidData* uid_data) if (api_type == APIType::OpenGL) { out.Write("SAMPLER_BINDING(9) uniform sampler2DArray samp9;\n" - "#define samp0 samp9\n" - "#define uv0 f_uv0\n" + "uniform float3 filter_coefficients;\n" + "uniform float gamma_rcp;\n" + "uniform float2 clamp_tb;\n" + "uniform float pixel_height;\n"); + out.Write("float4 SampleEFB(float3 uv, float y_offset) {\n" + " return texture(samp9, float3(uv.x, clamp(uv.y - (y_offset * pixel_height), " + "clamp_tb.x, clamp_tb.y), %s));\n" + "}\n", + mono_depth ? "0.0" : "uv.z"); + out.Write("#define uv0 f_uv0\n" "in vec3 uv0;\n" "out vec4 ocol0;\n" - "void main(){\n" - " vec4 texcol = texture(samp0, %s);\n", - mono_depth ? "vec3(uv0.xy, 0.0)" : "uv0"); + "void main(){\n"); } else if (api_type == APIType::Vulkan) { - out.Write("SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n" - "layout(location = 0) in vec3 uv0;\n" + out.Write("UBO_BINDING(std140, 1) uniform PSBlock {\n" + " float3 filter_coefficients;\n" + " float gamma_rcp;\n" + " float2 clamp_tb;\n" + " float pixel_height;\n" + "};\n"); + out.Write("SAMPLER_BINDING(0) uniform sampler2DArray samp0;\n"); + out.Write("float4 SampleEFB(float3 uv, float y_offset) {\n" + " return texture(samp0, float3(uv.x, clamp(uv.y + (y_offset * pixel_height), " + "clamp_tb.x, clamp_tb.y), %s));\n" + "}\n", + mono_depth ? "0.0" : "uv.z"); + out.Write("layout(location = 0) in vec3 uv0;\n" "layout(location = 1) in vec4 col0;\n" "layout(location = 0) out vec4 ocol0;" - "void main(){\n" - " vec4 texcol = texture(samp0, %s);\n", - mono_depth ? "vec3(uv0.xy, 0.0)" : "uv0"); + "void main(){\n"); } else if (api_type == APIType::D3D) { out.Write("Texture2DArray tex0 : register(t0);\n" "SamplerState samp0 : register(s0);\n" - "void main(out float4 ocol0 : SV_Target,\n" + "uniform float3 filter_coefficients;\n" + "uniform float gamma_rcp;\n" + "uniform float2 clamp_tb;\n" + "uniform float pixel_height;\n\n"); + out.Write("float4 SampleEFB(float3 uv, float y_offset) {\n" + " return tex0.Sample(samp0, float3(uv.x, clamp(uv.y + (y_offset * pixel_height), " + "clamp_tb.x, clamp_tb.y), %s));\n" + "}\n", + mono_depth ? "0.0" : "uv.z"); + out.Write("void main(out float4 ocol0 : SV_Target,\n" " in float4 pos : SV_Position,\n" - " in float3 uv0 : TEXCOORD0) {\n" - " float4 texcol = tex0.Sample(samp0, uv0);\n"); + " in float3 uv0 : TEXCOORD0) {\n"); } + // The copy filter applies to both color and depth copies. This has been verified on hardware. + // The filter is only applied to the RGB channels, the alpha channel is left intact. + out.Write(" float4 prev_row = SampleEFB(uv0, -1.0f);\n" + " float4 current_row = SampleEFB(uv0, 0.0f);\n" + " float4 next_row = SampleEFB(uv0, 1.0f);\n" + " float4 texcol = float4(prev_row.rgb * filter_coefficients[0] +\n" + " current_row.rgb * filter_coefficients[1] +\n" + " next_row.rgb * filter_coefficients[2], current_row.a);\n"); + if (uid_data->is_depth_copy) { if (api_type == APIType::D3D || api_type == APIType::Vulkan) @@ -223,8 +255,8 @@ ShaderCode GenerateShader(APIType api_type, const UidData* uid_data) out.Write(" ocol0 = texcol;\n"); break; - case EFBCopyFormat::XFB: // XFB copy, we just pretend it's an RGBX copy - out.Write(" ocol0 = float4(texcol.rgb, 1.0);\n"); + case EFBCopyFormat::XFB: + out.Write(" ocol0 = float4(pow(texcol.rgb, gamma_rcp.xxx), texcol.a);\n"); break; default: diff --git a/Source/Core/VideoCommon/VideoConfig.cpp b/Source/Core/VideoCommon/VideoConfig.cpp index d08d5b793a..abdf27dcd1 100644 --- a/Source/Core/VideoCommon/VideoConfig.cpp +++ b/Source/Core/VideoCommon/VideoConfig.cpp @@ -120,6 +120,7 @@ void VideoConfig::Refresh() iMaxAnisotropy = Config::Get(Config::GFX_ENHANCE_MAX_ANISOTROPY); sPostProcessingShader = Config::Get(Config::GFX_ENHANCE_POST_SHADER); bForceTrueColor = Config::Get(Config::GFX_ENHANCE_FORCE_TRUE_COLOR); + bDisableCopyFilter = Config::Get(Config::GFX_ENHANCE_DISABLE_COPY_FILTER); stereo_mode = static_cast(Config::Get(Config::GFX_STEREO_MODE)); iStereoDepth = Config::Get(Config::GFX_STEREO_DEPTH); diff --git a/Source/Core/VideoCommon/VideoConfig.h b/Source/Core/VideoCommon/VideoConfig.h index cc8e4cde88..03ccfca3ba 100644 --- a/Source/Core/VideoCommon/VideoConfig.h +++ b/Source/Core/VideoCommon/VideoConfig.h @@ -73,6 +73,7 @@ struct VideoConfig final int iMaxAnisotropy; std::string sPostProcessingShader; bool bForceTrueColor; + bool bDisableCopyFilter; // Information bool bShowFPS;