diff --git a/src/core/gpu_hw_shadergen.cpp b/src/core/gpu_hw_shadergen.cpp index 6047d7b77..e2c18d01b 100644 --- a/src/core/gpu_hw_shadergen.cpp +++ b/src/core/gpu_hw_shadergen.cpp @@ -1752,19 +1752,54 @@ std::string GPU_HW_ShaderGen::GenerateBoxSampleDownsampleFragmentShader(u32 fact return ss.str(); } -std::string GPU_HW_ShaderGen::GenerateReplacementMergeFragmentShader(bool semitransparent) +std::string GPU_HW_ShaderGen::GenerateReplacementMergeFragmentShader(bool semitransparent, bool bilinear_filter) { std::stringstream ss; WriteHeader(ss); DefineMacro(ss, "SEMITRANSPARENT", semitransparent); - DeclareUniformBuffer(ss, {"float4 u_src_rect"}, true); + DefineMacro(ss, "BILINEAR_FILTER", bilinear_filter); + DeclareUniformBuffer(ss, {"float4 u_texture_size"}, true); DeclareTexture(ss, "samp0", 0); DeclareFragmentEntryPoint(ss, 0, 1); ss << R"( { - float2 coords = u_src_rect.xy + v_tex0 * u_src_rect.zw; - float4 color = SAMPLE_TEXTURE(samp0, coords); +#if BILINEAR_FILTER + // Compute the coordinates of the four texels we will be interpolating between. + // Clamp this to the triangle texture coordinates. + float2 coords = v_tex0 * u_texture_size.xy; + float2 texel_top_left = frac(coords) - float2(0.5, 0.5); + float2 texel_offset = sign(texel_top_left); + float4 fcoords = max(coords.xyxy + float4(0.0, 0.0, texel_offset.x, texel_offset.y), + float4(0.0, 0.0, 0.0, 0.0)) * u_texture_size.zwzw; + + // Load four texels. + float4 s00 = SAMPLE_TEXTURE_LEVEL(samp0, fcoords.xy, 0.0); + float4 s10 = SAMPLE_TEXTURE_LEVEL(samp0, fcoords.zy, 0.0); + float4 s01 = SAMPLE_TEXTURE_LEVEL(samp0, fcoords.xw, 0.0); + float4 s11 = SAMPLE_TEXTURE_LEVEL(samp0, fcoords.zw, 0.0); + + // Bilinearly interpolate. + float2 weights = abs(texel_top_left); + float4 color = lerp(lerp(s00, s10, weights.x), lerp(s01, s11, weights.x), weights.y); + + #if !SEMITRANSPARENT + // Compute alpha from how many texels aren't pixel color 0000h. + float a00 = float(VECTOR_NEQ(s00, float4(0.0, 0.0, 0.0, 0.0))); + float a10 = float(VECTOR_NEQ(s10, float4(0.0, 0.0, 0.0, 0.0))); + float a01 = float(VECTOR_NEQ(s01, float4(0.0, 0.0, 0.0, 0.0))); + float a11 = float(VECTOR_NEQ(s11, float4(0.0, 0.0, 0.0, 0.0))); + color.a = lerp(lerp(a00, a10, weights.x), lerp(a01, a11, weights.x), weights.y); + + // Compensate for partially transparent sampling. + color.rgb /= (color.a != 0.0) ? color.a : 1.0; + + // Use binary alpha. + color.a = (color.a >= 0.5) ? 1.0 : 0.0; + #endif +#else + float4 color = SAMPLE_TEXTURE_LEVEL(samp0, v_tex0, 0.0); +#endif o_col0.rgb = color.rgb; // Alpha processing. diff --git a/src/core/gpu_hw_shadergen.h b/src/core/gpu_hw_shadergen.h index 9ebbbc193..9cba042c9 100644 --- a/src/core/gpu_hw_shadergen.h +++ b/src/core/gpu_hw_shadergen.h @@ -36,7 +36,7 @@ public: std::string GenerateAdaptiveDownsampleCompositeFragmentShader(); std::string GenerateBoxSampleDownsampleFragmentShader(u32 factor); - std::string GenerateReplacementMergeFragmentShader(bool semitransparent); + std::string GenerateReplacementMergeFragmentShader(bool semitransparent, bool bilinear_filter); private: ALWAYS_INLINE bool UsingMSAA() const { return m_multisamples > 1; } diff --git a/src/core/gpu_hw_texture_cache.cpp b/src/core/gpu_hw_texture_cache.cpp index 873a0b729..b29be92a7 100644 --- a/src/core/gpu_hw_texture_cache.cpp +++ b/src/core/gpu_hw_texture_cache.cpp @@ -514,7 +514,6 @@ static std::vector> s_hash_cache_purge_list; static std::vector s_temp_vram_write_list; static std::unique_ptr s_replacement_texture_render_target; -static std::unique_ptr s_replacement_init_pipeline; static std::unique_ptr s_replacement_draw_pipeline; // copies alpha as-is static std::unique_ptr s_replacement_semitransparent_draw_pipeline; // inverts alpha (i.e. semitransparent) @@ -554,6 +553,7 @@ bool GPUTextureCache::IsDumpingVRAMWriteTextures() bool GPUTextureCache::Initialize() { + LoadLocalConfiguration(false, false); UpdateVRAMTrackingState(); if (!CompilePipelines()) return false; @@ -571,13 +571,22 @@ void GPUTextureCache::UpdateSettings(const Settings& old_settings) Invalidate(); DestroyPipelines(); - if (!CompilePipelines()) + if (!CompilePipelines()) [[unlikely]] Panic("Failed to compile pipelines on TC settings change"); } // Reload textures if configuration changes. + const bool old_replacement_scale_linear_filter = s_config.replacement_scale_linear_filter; if (LoadLocalConfiguration(false, false)) + { + if (s_config.replacement_scale_linear_filter != old_replacement_scale_linear_filter) + { + if (!CompilePipelines()) [[unlikely]] + Panic("Failed to compile pipelines on TC replacement settings change"); + } + ReloadTextureReplacements(false); + } } bool GPUTextureCache::DoState(StateWrapper& sw, bool skip) @@ -755,24 +764,18 @@ bool GPUTextureCache::CompilePipelines() plconfig.vertex_shader = fullscreen_quad_vertex_shader.get(); - std::unique_ptr fs = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), - shadergen.GenerateCopyFragmentShader()); - if (!fs) - return false; - plconfig.fragment_shader = fs.get(); - if (!(s_replacement_init_pipeline = g_gpu_device->CreatePipeline(plconfig))) - return false; - - g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), - shadergen.GenerateReplacementMergeFragmentShader(false)); + std::unique_ptr fs = g_gpu_device->CreateShader( + GPUShaderStage::Fragment, shadergen.GetLanguage(), + shadergen.GenerateReplacementMergeFragmentShader(false, s_config.replacement_scale_linear_filter)); if (!fs) return false; plconfig.fragment_shader = fs.get(); if (!(s_replacement_draw_pipeline = g_gpu_device->CreatePipeline(plconfig))) return false; - fs = g_gpu_device->CreateShader(GPUShaderStage::Fragment, shadergen.GetLanguage(), - shadergen.GenerateReplacementMergeFragmentShader(true)); + fs = g_gpu_device->CreateShader( + GPUShaderStage::Fragment, shadergen.GetLanguage(), + shadergen.GenerateReplacementMergeFragmentShader(true, s_config.replacement_scale_linear_filter)); if (!fs) return false; plconfig.blend = GPUPipeline::BlendState::GetNoBlendingState(); @@ -785,7 +788,6 @@ bool GPUTextureCache::CompilePipelines() void GPUTextureCache::DestroyPipelines() { - s_replacement_init_pipeline.reset(); s_replacement_draw_pipeline.reset(); s_replacement_semitransparent_draw_pipeline.reset(); } @@ -3232,9 +3234,6 @@ void GPUTextureCache::ApplyTextureReplacements(SourceKey key, HashType tex_hash, max_scale_y = std::min(max_scale_y, max_possible_scale); const GSVector4 max_scale_v = GSVector4(max_scale_x, max_scale_y).xyxy(); - GPUSampler* filter = - s_config.replacement_scale_linear_filter ? g_gpu_device->GetLinearSampler() : g_gpu_device->GetNearestSampler(); - const u32 new_width = static_cast(std::ceil(static_cast(TEXTURE_PAGE_WIDTH) * max_scale_x)); const u32 new_height = static_cast(std::ceil(static_cast(TEXTURE_PAGE_HEIGHT) * max_scale_y)); if (!s_replacement_texture_render_target || s_replacement_texture_render_target->GetWidth() < new_width || @@ -3259,16 +3258,17 @@ void GPUTextureCache::ApplyTextureReplacements(SourceKey key, HashType tex_hash, return; } - // TODO: This is AWFUL. Need a better way. - // Linear filtering is also wrong, it should do hard edges for 0000 pixels. - // We could just copy this from the original image... - static constexpr const float u_src_rect[4] = {0.0f, 0.0f, 1.0f, 1.0f}; + // TODO: Use rects instead of fullscreen tris, maybe avoid the copy.. + alignas(VECTOR_ALIGNMENT) float uniforms[4]; + GSVector2 texture_size = GSVector2(GSVector2i(entry->texture->GetWidth(), entry->texture->GetHeight())); + GSVector2::store(&uniforms[0], texture_size); + GSVector2::store(&uniforms[2], GSVector2::cxpr(1.0f) / texture_size); g_gpu_device->InvalidateRenderTarget(s_replacement_texture_render_target.get()); g_gpu_device->SetRenderTarget(s_replacement_texture_render_target.get()); g_gpu_device->SetViewportAndScissor(0, 0, new_width, new_height); - g_gpu_device->SetPipeline(s_replacement_init_pipeline.get()); - g_gpu_device->PushUniformBuffer(u_src_rect, sizeof(u_src_rect)); - g_gpu_device->SetTextureSampler(0, entry->texture.get(), filter); + g_gpu_device->SetPipeline(s_replacement_draw_pipeline.get()); + g_gpu_device->PushUniformBuffer(uniforms, sizeof(uniforms)); + g_gpu_device->SetTextureSampler(0, entry->texture.get(), g_gpu_device->GetNearestSampler()); g_gpu_device->Draw(3, 0); for (const TextureReplacementSubImage& si : subimages) @@ -3280,8 +3280,11 @@ void GPUTextureCache::ApplyTextureReplacements(SourceKey key, HashType tex_hash, continue; const GSVector4i dst_rect = GSVector4i(GSVector4(si.dst_rect) * max_scale_v); + texture_size = GSVector2(GSVector2i(temp_texture->GetWidth(), temp_texture->GetHeight())); + GSVector2::store(&uniforms[0], texture_size); + GSVector2::store(&uniforms[2], GSVector2::cxpr(1.0f) / texture_size); g_gpu_device->SetViewportAndScissor(dst_rect); - g_gpu_device->SetTextureSampler(0, temp_texture.get(), filter); + g_gpu_device->SetTextureSampler(0, temp_texture.get(), g_gpu_device->GetNearestSampler()); g_gpu_device->SetPipeline(si.invert_alpha ? s_replacement_semitransparent_draw_pipeline.get() : s_replacement_draw_pipeline.get()); g_gpu_device->Draw(3, 0); diff --git a/src/core/settings.cpp b/src/core/settings.cpp index 3ddc2adf6..1886c64e5 100644 --- a/src/core/settings.cpp +++ b/src/core/settings.cpp @@ -458,7 +458,7 @@ void Settings::Load(SettingsInterface& si, SettingsInterface& controller_si) texture_replacements.config.convert_copies_to_writes = si.GetBoolValue("TextureReplacements", "ConvertCopiesToWrites", false); texture_replacements.config.replacement_scale_linear_filter = - si.GetBoolValue("TextureReplacements", "ReplacementScaleLinearFilter", true); + si.GetBoolValue("TextureReplacements", "ReplacementScaleLinearFilter", false); texture_replacements.config.max_vram_write_splits = si.GetUIntValue("TextureReplacements", "MaxVRAMWriteSplits", 0u); texture_replacements.config.max_vram_write_coalesce_width = diff --git a/src/core/settings.h b/src/core/settings.h index 19b4b82a7..32daba581 100644 --- a/src/core/settings.h +++ b/src/core/settings.h @@ -252,7 +252,7 @@ struct Settings bool dump_c16_textures : 1 = false; bool reduce_palette_range : 1 = true; bool convert_copies_to_writes : 1 = false; - bool replacement_scale_linear_filter = true; + bool replacement_scale_linear_filter = false; u32 max_vram_write_splits = 0; u32 max_vram_write_coalesce_width = 0; diff --git a/src/duckstation-qt/graphicssettingswidget.cpp b/src/duckstation-qt/graphicssettingswidget.cpp index 3f7f3edee..a18900491 100644 --- a/src/duckstation-qt/graphicssettingswidget.cpp +++ b/src/duckstation-qt/graphicssettingswidget.cpp @@ -1183,6 +1183,9 @@ void GraphicsSettingsWidget::onTextureReplacementOptionsClicked() SettingWidgetBinder::BindWidgetToBoolSetting(sif, dlgui.convertCopiesToWrites, "TextureReplacements", "ConvertCopiesToWrites", default_replacement_config.convert_copies_to_writes); + SettingWidgetBinder::BindWidgetToBoolSetting(sif, dlgui.replacementScaleLinearFilter, "TextureReplacements", + "ReplacementScaleLinearFilter", + default_replacement_config.replacement_scale_linear_filter); SettingWidgetBinder::BindWidgetToIntSetting(sif, dlgui.maxVRAMWriteSplits, "TextureReplacements", "MaxVRAMWriteSplits", default_replacement_config.max_vram_write_splits); SettingWidgetBinder::BindWidgetToIntSetting(sif, dlgui.maxVRAMWriteCoalesceWidth, "TextureReplacements", @@ -1225,6 +1228,7 @@ void GraphicsSettingsWidget::onTextureReplacementOptionsClicked() config.dump_c16_textures = dlgui.dumpC16Textures->isChecked(); config.reduce_palette_range = dlgui.reducePaletteRange->isChecked(); config.convert_copies_to_writes = dlgui.convertCopiesToWrites->isChecked(); + config.replacement_scale_linear_filter = dlgui.replacementScaleLinearFilter->isChecked(); config.max_vram_write_splits = dlgui.maxVRAMWriteSplits->value(); config.max_vram_write_coalesce_width = dlgui.maxVRAMWriteCoalesceWidth->value(); config.max_vram_write_coalesce_height = dlgui.maxVRAMWriteCoalesceHeight->value(); diff --git a/src/duckstation-qt/texturereplacementsettingsdialog.ui b/src/duckstation-qt/texturereplacementsettingsdialog.ui index ac42db411..7aa63aba2 100644 --- a/src/duckstation-qt/texturereplacementsettingsdialog.ui +++ b/src/duckstation-qt/texturereplacementsettingsdialog.ui @@ -6,8 +6,8 @@ 0 0 - 646 - 587 + 587 + 597 @@ -52,6 +52,9 @@ true + + true + @@ -177,11 +180,22 @@ - - - Convert Copies To Writes - - + + + + + Convert Copies To Writes + + + + + + + Bilinear Replacement Scaling + + + +