GS/HW: Use box filtering for source downsampling

This commit is contained in:
Stenzek 2024-06-26 14:09:10 +10:00 committed by Connor McLaughlin
parent 1627bd6cc7
commit 5b45913a6a
18 changed files with 218 additions and 3 deletions

View File

@ -75,6 +75,25 @@ float ps_depth_copy(PS_INPUT input) : SV_Depth
return sample_c(input.t).r;
}
PS_OUTPUT ps_downsample_copy(PS_INPUT input)
{
int DownsampleFactor = EMODA;
int2 ClampMin = int2(EMODC, DOFFSET);
float Weight = BGColor.x;
int2 coord = max(int2(input.p.xy) * DownsampleFactor, ClampMin);
PS_OUTPUT output;
output.c = (float4)0;
for (int yoff = 0; yoff < DownsampleFactor; yoff++)
{
for (int xoff = 0; xoff < DownsampleFactor; xoff++)
output.c += Texture.Load(int3(coord + int2(xoff, yoff), 0));
}
output.c /= Weight;
return output;
}
PS_OUTPUT ps_filter_transparency(PS_INPUT input)
{
PS_OUTPUT output;

View File

@ -66,6 +66,24 @@ void ps_depth_copy()
}
#endif
#ifdef ps_downsample_copy
uniform ivec2 ClampMin;
uniform int DownsampleFactor;
uniform float Weight;
void ps_downsample_copy()
{
ivec2 coord = max(ivec2(gl_FragCoord.xy) * DownsampleFactor, ClampMin);
vec4 result = vec4(0);
for (int yoff = 0; yoff < DownsampleFactor; yoff++)
{
for (int xoff = 0; xoff < DownsampleFactor; xoff++)
result += texelFetch(TextureSampler, coord + ivec2(xoff, yoff), 0);
}
SV_Target0 = result / Weight;
}
#endif
#ifdef ps_convert_rgba8_16bits
// Need to be careful with precision here, it can break games like Spider-Man 3 and Dogs Life
void ps_convert_rgba8_16bits()

View File

@ -59,6 +59,28 @@ void ps_depth_copy()
}
#endif
#ifdef ps_downsample_copy
layout(push_constant) uniform cb10
{
ivec2 ClampMin;
int DownsampleFactor;
int pad0;
float Weight;
vec3 pad1;
};
void ps_downsample_copy()
{
ivec2 coord = max(ivec2(gl_FragCoord.xy) * DownsampleFactor, ClampMin);
vec4 result = vec4(0);
for (int yoff = 0; yoff < DownsampleFactor; yoff++)
{
for (int xoff = 0; xoff < DownsampleFactor; xoff++)
result += texelFetch(samp0, coord + ivec2(xoff, yoff), 0);
}
o_col0 = result / Weight;
}
#endif
#ifdef ps_filter_transparency
void ps_filter_transparency()
{

View File

@ -65,6 +65,7 @@ const char* shaderName(ShaderConvert value)
case ShaderConvert::RGBA8_TO_FLOAT16_BILN: return "ps_convert_rgba8_float16_biln";
case ShaderConvert::RGB5A1_TO_FLOAT16_BILN: return "ps_convert_rgb5a1_float16_biln";
case ShaderConvert::DEPTH_COPY: return "ps_depth_copy";
case ShaderConvert::DOWNSAMPLE_COPY: return "ps_downsample_copy";
case ShaderConvert::RGBA_TO_8I: return "ps_convert_rgba_8i";
case ShaderConvert::CLUT_4: return "ps_convert_clut_4";
case ShaderConvert::CLUT_8: return "ps_convert_clut_8";

View File

@ -40,6 +40,7 @@ enum class ShaderConvert
RGBA8_TO_FLOAT16_BILN,
RGB5A1_TO_FLOAT16_BILN,
DEPTH_COPY,
DOWNSAMPLE_COPY,
RGBA_TO_8I,
CLUT_4,
CLUT_8,
@ -986,6 +987,9 @@ public:
/// Converts a colour format to an indexed format texture.
virtual void ConvertToIndexedTexture(GSTexture* sTex, float sScale, u32 offsetX, u32 offsetY, u32 SBW, u32 SPSM, GSTexture* dTex, u32 DBW, u32 DPSM) = 0;
/// Uses box downsampling to resize a texture.
virtual void FilteredDownsampleTexture(GSTexture* sTex, GSTexture* dTex, u32 downsample_factor, const GSVector2i& clamp_min) = 0;
virtual void RenderHW(GSHWDrawConfig& config) = 0;
virtual void ClearSamplerCache() = 0;

View File

@ -1458,6 +1458,26 @@ void GSDevice11::ConvertToIndexedTexture(GSTexture* sTex, float sScale, u32 offs
StretchRect(sTex, GSVector4::zero(), dTex, dRect, m_convert.ps[static_cast<int>(shader)].get(), m_merge.cb.get(), nullptr, false);
}
void GSDevice11::FilteredDownsampleTexture(GSTexture* sTex, GSTexture* dTex, u32 downsample_factor, const GSVector2i& clamp_min)
{
struct Uniforms
{
float weight;
float pad0[3];
GSVector2i clamp_min;
int downsample_factor;
int pad1;
};
const Uniforms cb = {
static_cast<float>(downsample_factor * downsample_factor), {}, clamp_min, static_cast<int>(downsample_factor), 0};
m_ctx->UpdateSubresource(m_merge.cb.get(), 0, nullptr, &cb, 0, 0);
const ShaderConvert shader = ShaderConvert::DOWNSAMPLE_COPY;
const GSVector4 dRect = GSVector4(dTex->GetRect());
StretchRect(sTex, GSVector4::zero(), dTex, dRect, m_convert.ps[static_cast<int>(shader)].get(), m_merge.cb.get(), nullptr, false);
}
void GSDevice11::DrawMultiStretchRects(const MultiStretchRect* rects, u32 num_rects, GSTexture* dTex, ShaderConvert shader)
{
IASetInputLayout(m_convert.il.get());

View File

@ -303,6 +303,7 @@ public:
void PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, PresentShader shader, float shaderTime, bool linear) override;
void UpdateCLUTTexture(GSTexture* sTex, float sScale, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize) override;
void ConvertToIndexedTexture(GSTexture* sTex, float sScale, u32 offsetX, u32 offsetY, u32 SBW, u32 SPSM, GSTexture* dTex, u32 DBW, u32 DPSM) override;
void FilteredDownsampleTexture(GSTexture* sTex, GSTexture* dTex, u32 downsample_factor, const GSVector2i& clamp_min) override;
void DrawMultiStretchRects(const MultiStretchRect* rects, u32 num_rects, GSTexture* dTex, ShaderConvert shader) override;
void DoMultiStretchRects(const MultiStretchRect* rects, u32 num_rects, const GSVector2& ds);

View File

@ -1480,6 +1480,28 @@ void GSDevice12::ConvertToIndexedTexture(
m_convert[static_cast<int>(shader)].get(), false, true);
}
void GSDevice12::FilteredDownsampleTexture(GSTexture* sTex, GSTexture* dTex, u32 downsample_factor, const GSVector2i& clamp_min)
{
struct Uniforms
{
float weight;
float pad0[3];
GSVector2i clamp_min;
int downsample_factor;
int pad1;
};
const Uniforms cb = {
static_cast<float>(downsample_factor * downsample_factor), {}, clamp_min, static_cast<int>(downsample_factor), 0};
SetUtilityRootSignature();
SetUtilityPushConstants(&cb, sizeof(cb));
const GSVector4 dRect = GSVector4(dTex->GetRect());
const ShaderConvert shader = ShaderConvert::DOWNSAMPLE_COPY;
DoStretchRect(static_cast<GSTexture12*>(sTex), GSVector4::zero(), static_cast<GSTexture12*>(dTex), dRect,
m_convert[static_cast<int>(shader)].get(), false, true);
}
void GSDevice12::DrawMultiStretchRects(
const MultiStretchRect* rects, u32 num_rects, GSTexture* dTex, ShaderConvert shader)
{

View File

@ -438,6 +438,7 @@ public:
GSTexture* sTex, float sScale, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize) override;
void ConvertToIndexedTexture(GSTexture* sTex, float sScale, u32 offsetX, u32 offsetY, u32 SBW, u32 SPSM,
GSTexture* dTex, u32 DBW, u32 DPSM) override;
void FilteredDownsampleTexture(GSTexture* sTex, GSTexture* dTex, u32 downsample_factor, const GSVector2i& clamp_min) override;
void DrawMultiStretchRects(
const MultiStretchRect* rects, u32 num_rects, GSTexture* dTex, ShaderConvert shader) override;

View File

@ -5283,8 +5283,25 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c
}
if (m_downscale_source)
{
const GSVector4 dst_rect = GSVector4(0, 0, src_unscaled_size.x, src_unscaled_size.y);
g_gs_device->StretchRect(src_target->m_texture, GSVector4::cxpr(0.0f, 0.0f, 1.0f, 1.0f), src_copy.get(), dst_rect, src_target->m_texture->IsDepthStencil() ? ShaderConvert::DEPTH_COPY : ShaderConvert::COPY, false);
g_perfmon.Put(GSPerfMon::TextureCopies, 1);
// Can't use box filtering on depth (yet), or fractional scales.
if (src_target->m_texture->IsDepthStencil() || std::floor(src_target->GetScale()) != src_target->GetScale())
{
const GSVector4 dst_rect = GSVector4(GSVector4i::loadh(src_unscaled_size));
g_gs_device->StretchRect(src_target->m_texture, GSVector4::cxpr(0.0f, 0.0f, 1.0f, 1.0f), src_copy.get(), dst_rect,
src_target->m_texture->IsDepthStencil() ? ShaderConvert::DEPTH_COPY : ShaderConvert::COPY, false);
}
else
{
// When using native HPO, the top-left column/row of pixels are often not drawn. Clamp these away to avoid sampling black,
// causing bleeding into the edges of the downsampled texture.
const u32 downsample_factor = static_cast<u32>(src_target->GetScale());
const GSVector2i clamp_min = (GSConfig.UserHacks_HalfPixelOffset != GSHalfPixelOffset::Native) ?
GSVector2i(0, 0) :
GSVector2i(downsample_factor, downsample_factor);
g_gs_device->FilteredDownsampleTexture(src_target->m_texture, src_copy.get(), downsample_factor, clamp_min);
}
}
else
{
@ -7135,7 +7152,6 @@ bool GSRendererHW::OI_BlitFMV(GSTextureCache::Target* _rt, GSTextureCache::Sourc
const GSVector4i r_full(0, 0, tw, th);
g_gs_device->CopyRect(tex->m_texture, rt, r_full, 0, 0);
g_perfmon.Put(GSPerfMon::TextureCopies, 1);
g_gs_device->StretchRect(tex->m_texture, sRect, rt, dRect, ShaderConvert::COPY, m_vt.IsRealLinear());
g_perfmon.Put(GSPerfMon::TextureCopies, 1);

View File

@ -414,6 +414,7 @@ public:
void DrawMultiStretchRects(const MultiStretchRect* rects, u32 num_rects, GSTexture* dTex, ShaderConvert shader) override;
void UpdateCLUTTexture(GSTexture* sTex, float sScale, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize) override;
void ConvertToIndexedTexture(GSTexture* sTex, float sScale, u32 offsetX, u32 offsetY, u32 SBW, u32 SPSM, GSTexture* dTex, u32 DBW, u32 DPSM) override;
void FilteredDownsampleTexture(GSTexture* sTex, GSTexture* dTex, u32 downsample_factor, const GSVector2i& clamp_min) override;
void FlushClears(GSTexture* tex);

View File

@ -1116,6 +1116,7 @@ bool GSDeviceMTL::Create(GSVSyncMode vsync_mode, bool allow_present_throttle)
pdesc.depthAttachmentPixelFormat = ConvertPixelFormat(GSTexture::Format::DepthStencil);
break;
case ShaderConvert::COPY:
case ShaderConvert::DOWNSAMPLE_COPY:
case ShaderConvert::RGBA_TO_8I: // Yes really
case ShaderConvert::RTA_CORRECTION:
case ShaderConvert::RTA_DECORRECTION:
@ -1695,6 +1696,20 @@ void GSDeviceMTL::ConvertToIndexedTexture(GSTexture* sTex, float sScale, u32 off
DoStretchRect(sTex, GSVector4::zero(), dTex, dRect, pipeline, false, LoadAction::DontCareIfFull, &uniform, sizeof(uniform));
}}
void GSDeviceMTL::FilteredDownsampleTexture(GSTexture* sTex, GSTexture* dTex, u32 downsample_factor, const GSVector2i& clamp_min)
{ @autoreleasepool {
const ShaderConvert shader = ShaderConvert::DOWNSAMPLE_COPY;
id<MTLRenderPipelineState> pipeline = m_convert_pipeline[static_cast<int>(shader)];
if (!pipeline)
[NSException raise:@"StretchRect Missing Pipeline" format:@"No pipeline for %d", static_cast<int>(shader)];
GSMTLDownsamplePSUniform uniform = { {static_cast<uint>(clamp_min.x), static_cast<uint>(clamp_min.x)}, downsample_factor,
static_cast<float>(downsample_factor * downsample_factor) };
const GSVector4 dRect = GSVector4(dTex->GetRect());
DoStretchRect(sTex, GSVector4::zero(), dTex, dRect, pipeline, false, LoadAction::DontCareIfFull, &uniform, sizeof(uniform));
}}
void GSDeviceMTL::FlushClears(GSTexture* tex)
{
if (tex)

View File

@ -66,6 +66,13 @@ struct GSMTLIndexedConvertPSUniform
uint dbw;
};
struct GSMTLDownsamplePSUniform
{
vector_uint2 clamp_min;
uint downsample_factor;
float weight;
};
struct GSMTLMainVertex
{
vector_float2 st;

View File

@ -182,6 +182,22 @@ fragment DepthOut ps_depth_copy(ConvertShaderData data [[stage_in]], ConvertPSDe
return res.sample(data.t);
}
fragment float4 ps_downsample_copy(ConvertShaderData data [[stage_in]],
texture2d<float> texture [[texture(GSMTLTextureIndexNonHW)]],
constant GSMTLDownsamplePSUniform& uniform [[buffer(GSMTLBufferIndexUniforms)]])
{
uint2 coord = max(uint2(data.p.xy) * uniform.downsample_factor, uniform.clamp_min);
float4 result = float4(0.0, 0.0, 0.0, 0.0);
for (uint yoff = 0; yoff < uniform.downsample_factor; yoff++)
{
for (uint xoff = 0; xoff < uniform.downsample_factor; xoff++)
result += texture.read(coord + uint2(xoff, yoff), 0);
}
result /= uniform.weight;
return result;
}
static float rgba8_to_depth32(half4 unorm)
{
return float(as_type<uint>(uchar4(unorm * 255.5h))) * 0x1p-32f;

View File

@ -362,6 +362,12 @@ bool GSDeviceOGL::Create(GSVSyncMode vsync_mode, bool allow_present_throttle)
m_convert.ps[i].RegisterUniform("offset");
m_convert.ps[i].RegisterUniform("scale");
}
else if (static_cast<ShaderConvert>(i) == ShaderConvert::DOWNSAMPLE_COPY)
{
m_convert.ps[i].RegisterUniform("ClampMin");
m_convert.ps[i].RegisterUniform("DownsampleFactor");
m_convert.ps[i].RegisterUniform("Weight");
}
}
const PSSamplerSelector point;
@ -1601,6 +1607,29 @@ void GSDeviceOGL::ConvertToIndexedTexture(GSTexture* sTex, float sScale, u32 off
DrawStretchRect(GSVector4::zero(), dRect, dTex->GetSize());
}
void GSDeviceOGL::FilteredDownsampleTexture(GSTexture* sTex, GSTexture* dTex, u32 downsample_factor, const GSVector2i& clamp_min)
{
CommitClear(sTex, false);
constexpr ShaderConvert shader = ShaderConvert::DOWNSAMPLE_COPY;
GLProgram& prog = m_convert.ps[static_cast<int>(shader)];
prog.Bind();
prog.Uniform2iv(0, clamp_min.v);
prog.Uniform1i(1, downsample_factor);
prog.Uniform1f(2, static_cast<float>(downsample_factor * downsample_factor));
OMSetDepthStencilState(m_convert.dss);
OMSetBlendState(false);
OMSetColorMaskState();
OMSetRenderTargets(dTex, nullptr);
PSSetShaderResource(0, sTex);
PSSetSamplerState(m_convert.pt);
const GSVector4 dRect = GSVector4(dTex->GetRect());
DrawStretchRect(GSVector4::zero(), dRect, dTex->GetSize());
}
void GSDeviceOGL::DrawStretchRect(const GSVector4& sRect, const GSVector4& dRect, const GSVector2i& ds)
{
// Original code from DX

View File

@ -324,6 +324,7 @@ public:
void PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, PresentShader shader, float shaderTime, bool linear) override;
void UpdateCLUTTexture(GSTexture* sTex, float sScale, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize) override;
void ConvertToIndexedTexture(GSTexture* sTex, float sScale, u32 offsetX, u32 offsetY, u32 SBW, u32 SPSM, GSTexture* dTex, u32 DBW, u32 DPSM) override;
void FilteredDownsampleTexture(GSTexture* sTex, GSTexture* dTex, u32 downsample_factor, const GSVector2i& clamp_min) override;
void DrawMultiStretchRects(const MultiStretchRect* rects, u32 num_rects, GSTexture* dTex, ShaderConvert shader) override;
void DoMultiStretchRects(const MultiStretchRect* rects, u32 num_rects, const GSVector2& ds);

View File

@ -3130,6 +3130,27 @@ void GSDeviceVK::ConvertToIndexedTexture(
m_convert[static_cast<int>(shader)], false, true);
}
void GSDeviceVK::FilteredDownsampleTexture(GSTexture* sTex, GSTexture* dTex, u32 downsample_factor, const GSVector2i& clamp_min)
{
struct Uniforms
{
GSVector2i clamp_min;
int downsample_factor;
int pad0;
float weight;
float pad1[3];
};
const Uniforms uniforms = {
clamp_min, static_cast<int>(downsample_factor), 0, static_cast<float>(downsample_factor * downsample_factor)};
SetUtilityPushConstants(&uniforms, sizeof(uniforms));
const ShaderConvert shader = ShaderConvert::DOWNSAMPLE_COPY;
const GSVector4 dRect = GSVector4(dTex->GetRect());
DoStretchRect(static_cast<GSTextureVK*>(sTex), GSVector4::zero(), static_cast<GSTextureVK*>(dTex), dRect,
m_convert[static_cast<int>(shader)], false, true);
}
void GSDeviceVK::DoMerge(GSTexture* sTex[3], GSVector4* sRect, GSTexture* dTex, GSVector4* dRect,
const GSRegPMODE& PMODE, const GSRegEXTBUF& EXTBUF, u32 c, const bool linear)
{

View File

@ -543,6 +543,7 @@ public:
GSTexture* sTex, float sScale, u32 offsetX, u32 offsetY, GSTexture* dTex, u32 dOffset, u32 dSize) override;
void ConvertToIndexedTexture(GSTexture* sTex, float sScale, u32 offsetX, u32 offsetY, u32 SBW, u32 SPSM,
GSTexture* dTex, u32 DBW, u32 DPSM) override;
void FilteredDownsampleTexture(GSTexture* sTex, GSTexture* dTex, u32 downsample_factor, const GSVector2i& clamp_min) override;
void SetupDATE(GSTexture* rt, GSTexture* ds, SetDATM datm, const GSVector4i& bbox);
GSTextureVK* SetupPrimitiveTrackingDATE(GSHWDrawConfig& config);