GS/HW: First pass having downscaled post processing

This commit is contained in:
refractionpcsx2 2024-06-05 08:29:47 +01:00
parent 55e0464cd3
commit c077d9e2e1
5 changed files with 59 additions and 19 deletions

View File

@ -30,6 +30,7 @@ protected:
bool m_process_texture = false;
bool m_copy_16bit_to_target_shuffle = false;
bool m_same_group_texture_shuffle = false;
bool m_downscale_source = false;
virtual GSTexture* GetOutput(int i, float& scale, int& y_offset) = 0;
virtual GSTexture* GetFeedbackOutput(float& scale) { return nullptr; }

View File

@ -740,7 +740,7 @@ GSVector4 GSRendererHW::RealignTargetTextureCoordinate(const GSTextureCache::Sou
{
if (GSConfig.UserHacks_HalfPixelOffset <= GSHalfPixelOffset::Normal ||
GSConfig.UserHacks_HalfPixelOffset == GSHalfPixelOffset::Native ||
GetUpscaleMultiplier() == 1.0f)
GetUpscaleMultiplier() == 1.0f || m_downscale_source || tex->GetScale() == 1.0f)
{
return GSVector4(0.0f);
}
@ -2614,7 +2614,14 @@ void GSRendererHW::Draw()
m_r = m_r.rintersect(t_size_rect);
float target_scale = GetTextureScaleFactor();
if (isDownscaleDraw(src, no_gaps) || (IsPossibleChannelShuffle() && src && src->m_from_target && src->m_from_target->GetScale() == 1.0f))
{
target_scale = 1.0f;
m_downscale_source = src->m_from_target->GetScale() > 1.0f;
}
else
m_downscale_source = false;
// This upscaling hack is for games which construct P8 textures by drawing a bunch of small sprites in C32,
// then reinterpreting it as P8. We need to keep the off-screen intermediate textures at native resolution,
// but not propagate that through to the normal render targets. Test Case: Crash Wrath of Cortex.
@ -2655,7 +2662,7 @@ void GSRendererHW::Draw()
GSLocalMemory::m_psm[m_cached_ctx.TEX0.PSM].bpp == 16 && GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].bpp == 16) ||
IsPossibleChannelShuffle());
rt = g_texture_cache->LookupTarget(FRAME_TEX0, t_size, target_scale, GSTextureCache::RenderTarget, true,
fm, false, force_preload, preserve_rt_rgb, preserve_rt_alpha, unclamped_draw_rect, possible_shuffle, is_possible_mem_clear && FRAME_TEX0.TBP0 != m_cached_ctx.ZBUF.Block());
fm, false, force_preload, preserve_rt_rgb, preserve_rt_alpha, unclamped_draw_rect, possible_shuffle, is_possible_mem_clear && FRAME_TEX0.TBP0 != m_cached_ctx.ZBUF.Block(), src);
// Draw skipped because it was a clear and there was no target.
if (!rt)
@ -2686,6 +2693,8 @@ void GSRendererHW::Draw()
return;
}
}
else
target_scale = rt->GetScale();
// The target might have previously been a C32 format with valid alpha. If we're switching to C24, we need to preserve it.
preserve_rt_alpha |= (GSLocalMemory::m_psm[m_cached_ctx.FRAME.PSM].trbpp == 24 && rt->HasValidAlpha());
@ -3251,7 +3260,9 @@ void GSRendererHW::Draw()
// A couple of hack to avoid upscaling issue. So far it seems to impacts mostly sprite
// Note: first hack corrects both position and texture coordinate
// Note: second hack corrects only the texture coordinate
if (CanUpscale() && (m_vt.m_primclass == GS_SPRITE_CLASS))
// Be careful to not correct downscaled targets, this can get messy and break post processing
// but it still needs to adjust native stuff from memory as it's not been compensated for upscaling (Dragon Quest 8 font for example).
if (CanUpscale() && (m_vt.m_primclass == GS_SPRITE_CLASS) && rt && rt->GetScale() > 1.0f)
{
const u32 count = m_vertex.next;
GSVertex* v = &m_vertex.buff[0];
@ -4968,7 +4979,7 @@ __ri void GSRendererHW::EmulateTextureSampler(const GSTextureCache::Target* rt,
// Apply a small offset (based on upscale amount) for edges of textures to avoid reading garbage during a clamp+stscale down
// Bigger problem when WH is 1024x1024 and the target is only small.
// This "fixes" a lot of the rainbow garbage in games when upscaling (and xenosaga shadows + VP2 forest seem quite happy).
const GSVector4 region_clamp_offset = GSVector4::cxpr(0.5f, 0.5f, 0.1f, 0.1f) + (GSVector4::cxpr(0.1f, 0.1f, 0.0f, 0.0f) * tex->GetScale());
const GSVector4 region_clamp_offset = GSVector4::cxpr(0.5f, 0.5f, 0.1f, 0.1f) + (GSVector4::cxpr(0.1f, 0.1f, 0.0f, 0.0f) * scale);
const GSVector4 region_clamp = (GSVector4(clamp) + region_clamp_offset) / WH.xyxy();
if (wms >= CLAMP_REGION_CLAMP)
@ -5079,11 +5090,13 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c
GL_CACHE("Source is depth buffer, unsafe to read, taking copy.");
src_target = ds;
}
else
else if (!m_downscale_source)
{
// No match.
return;
}
else
src_target = tex->m_from_target;
// We need to copy. Try to cut down the source range as much as possible so we don't copy texels we're not reading.
const GSVector2i& src_unscaled_size = src_target->GetUnscaledSize();
@ -5095,7 +5108,7 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c
// Shuffles take the whole target. This should've already been halved.
// We can't partially copy depth targets in DirectX, and GL/Vulkan should use the direct read above.
// Restricting it also breaks Tom and Jerry...
if (m_channel_shuffle || tex->m_texture->GetType() == GSTexture::Type::DepthStencil)
if (m_downscale_source || m_channel_shuffle || tex->m_texture->GetType() == GSTexture::Type::DepthStencil)
{
copy_range = src_bounds;
copy_size = src_unscaled_size;
@ -5167,22 +5180,25 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c
}
unscaled_size = copy_size;
scale = src_target->GetScale();
scale = m_downscale_source ? 1 : src_target->GetScale();
const float src_scale = src_target->GetScale();
GL_CACHE("Copy size: %dx%d, range: %d,%d -> %d,%d (%dx%d) @ %.1f", copy_size.x, copy_size.y, copy_range.x,
copy_range.y, copy_range.z, copy_range.w, copy_range.width(), copy_range.height(), scale);
const GSVector2i scaled_copy_size = GSVector2i(static_cast<int>(std::ceil(static_cast<float>(copy_size.x) * scale)),
static_cast<int>(std::ceil(static_cast<float>(copy_size.y) * scale)));
const GSVector4i scaled_copy_range = GSVector4i((GSVector4(copy_range) * GSVector4(scale)).ceil());
const GSVector4i scaled_copy_range = GSVector4i((GSVector4(copy_range) * GSVector4(src_scale)).ceil());
const GSVector2i scaled_copy_dst_offset =
GSVector2i(static_cast<int>(std::ceil(static_cast<float>(copy_dst_offset.x) * scale)),
static_cast<int>(std::ceil(static_cast<float>(copy_dst_offset.y) * scale)));
src_copy.reset(src_target->m_texture->IsDepthStencil() ?
g_gs_device->CreateDepthStencil(
scaled_copy_size.x, scaled_copy_size.y, src_target->m_texture->GetFormat(), false) :
scaled_copy_size.x, scaled_copy_size.y, src_target->m_texture->GetFormat(), false) :
(m_downscale_source ? g_gs_device->CreateRenderTarget(scaled_copy_size.x, scaled_copy_size.y, src_target->m_texture->GetFormat(), false,
true) :
g_gs_device->CreateTexture(
scaled_copy_size.x, scaled_copy_size.y, 1, src_target->m_texture->GetFormat(), true));
scaled_copy_size.x, scaled_copy_size.y, 1, src_target->m_texture->GetFormat(), true)));
if (!src_copy) [[unlikely]]
{
Console.Error("Failed to allocate %dx%d texture for hazard copy", scaled_copy_size.x, scaled_copy_size.y);
@ -5190,9 +5206,18 @@ __ri void GSRendererHW::HandleTextureHazards(const GSTextureCache::Target* rt, c
m_conf.ps.tfx = 4;
return;
}
g_gs_device->CopyRect(
src_target->m_texture, src_copy.get(), scaled_copy_range, scaled_copy_dst_offset.x, scaled_copy_dst_offset.y);
if (m_downscale_source)
{
DevCon.Warning("Resizing to scale %f", scale);
const GSVector4 dst_rect = GSVector4(0, 0, src_unscaled_size.x, src_unscaled_size.y);
const GSVector4 src_rect = GSVector4(scaled_copy_range) / GSVector4(src_unscaled_size * src_scale).xyxy();
g_gs_device->StretchRect(src_target->m_texture, GSVector4(0.0f,0.0f,1.0f,1.0f), src_copy.get(), dst_rect, src_target->m_texture->IsDepthStencil() ? ShaderConvert::DEPTH_COPY : ShaderConvert::COPY, true);
}
else
{
g_gs_device->CopyRect(
src_target->m_texture, src_copy.get(), scaled_copy_range, scaled_copy_dst_offset.x, scaled_copy_dst_offset.y);
}
m_conf.tex = src_copy.get();
}
@ -5911,7 +5936,7 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
float sx, sy, ox2, oy2;
const float ox = static_cast<float>(static_cast<int>(m_context->XYOFFSET.OFX));
const float oy = static_cast<float>(static_cast<int>(m_context->XYOFFSET.OFY));
if (GSConfig.UserHacks_HalfPixelOffset != GSHalfPixelOffset::Native)
if (GSConfig.UserHacks_HalfPixelOffset != GSHalfPixelOffset::Native && rt->GetScale() > 1.0f)
{
sx = 2.0f * rtscale / (rtsize.x << 4);
sy = 2.0f * rtscale / (rtsize.y << 4);
@ -7063,7 +7088,7 @@ bool GSRendererHW::OI_BlitFMV(GSTextureCache::Target* _rt, GSTextureCache::Sourc
bool GSRendererHW::AreAnyPixelsDiscarded() const
{
return ((m_draw_env->SCANMSK.MSK & 2) || // skipping rows
m_cached_ctx.TEST.ATE || // testing alpha (might discard some pixels)
(m_cached_ctx.TEST.ATE && m_cached_ctx.TEST.AFAIL != AFAIL_FB_ONLY) || // testing alpha (might discard some pixels)
m_cached_ctx.TEST.DATE); // reading alpha
}
@ -7158,6 +7183,19 @@ bool GSRendererHW::TextureCoversWithoutGapsNotEqual()
return false;
}
bool GSRendererHW::isDownscaleDraw(GSTextureCache::Source* src, bool no_gaps)
{
if (no_gaps && m_vt.m_primclass >= GS_TRIANGLE_CLASS && m_cached_ctx.FRAME.Block() != m_cached_ctx.TEX0.TBP0 && !IsMipMapDraw() && IsDiscardingDstColor() &&
src && src->m_from_target && m_context->TEX1.MMAG == 1 &&
((m_vt.m_max.t.x - m_vt.m_min.t.x) / 2.0f) >= (m_vt.m_max.p.x - m_vt.m_min.p.x) && ((m_vt.m_max.t.y - m_vt.m_min.t.y) / 2.0f) >= (m_vt.m_max.p.y - m_vt.m_min.p.y))
{
DevCon.Warning("Draw %d is a downscale draw", s_n);
return true;
}
return false;
}
bool GSRendererHW::IsConstantDirectWriteMemClear()
{
const bool direct_draw = (m_vt.m_primclass == GS_SPRITE_CLASS) || (((m_index.tail % 6) == 0 && TrianglesAreQuads()) && m_vt.m_primclass == GS_TRIANGLE_CLASS);

View File

@ -48,6 +48,7 @@ private:
float alpha1(int L, int X0, int X1);
void SwSpriteRender();
bool CanUseSwSpriteRender();
bool isDownscaleDraw(GSTextureCache::Source* src, bool no_gaps);
bool IsConstantDirectWriteMemClear();
u32 GetConstantDirectWriteMemClearColor() const;
u32 GetConstantDirectWriteMemClearDepth() const;

View File

@ -1797,7 +1797,7 @@ GSVector2i GSTextureCache::ScaleRenderTargetSize(const GSVector2i& sz, float sca
}
GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVector2i& size, float scale, int type,
bool used, u32 fbmask, bool is_frame, bool preload, bool preserve_rgb, bool preserve_alpha, const GSVector4i draw_rect, bool is_shuffle, bool possible_clear)
bool used, u32 fbmask, bool is_frame, bool preload, bool preserve_rgb, bool preserve_alpha, const GSVector4i draw_rect, bool is_shuffle, bool possible_clear, Source* src)
{
const GSLocalMemory::psm_t& psm_s = GSLocalMemory::m_psm[TEX0.PSM];
const u32 bp = TEX0.TBP0;
@ -2002,7 +2002,7 @@ GSTextureCache::Target* GSTextureCache::LookupTarget(GIFRegTEX0 TEX0, const GSVe
if (!tex)
return nullptr;
g_gs_device->StretchRect(dst->m_texture, sRect, tex, dRect, (type == RenderTarget) ? ShaderConvert::COPY : ShaderConvert::DEPTH_COPY, false);
g_gs_device->StretchRect(dst->m_texture, sRect, tex, dRect, (type == RenderTarget) ? ShaderConvert::COPY : ShaderConvert::DEPTH_COPY, dst->m_scale == 1.0f);
g_perfmon.Put(GSPerfMon::TextureCopies, 1);
m_target_memory_usage = (m_target_memory_usage - dst->m_texture->GetMemUsage()) + tex->GetMemUsage();

View File

@ -490,7 +490,7 @@ public:
Target* FindTargetOverlap(Target* target, int type, int psm);
Target* LookupTarget(GIFRegTEX0 TEX0, const GSVector2i& size, float scale, int type, bool used = true, u32 fbmask = 0,
bool is_frame = false, bool preload = GSConfig.PreloadFrameWithGSData, bool preserve_rgb = true, bool preserve_alpha = true,
const GSVector4i draw_rc = GSVector4i::zero(), bool is_shuffle = false, bool possible_clear = false);
const GSVector4i draw_rc = GSVector4i::zero(), bool is_shuffle = false, bool possible_clear = false, Source* src = nullptr);
Target* CreateTarget(GIFRegTEX0 TEX0, const GSVector2i& size, const GSVector2i& valid_size,float scale, int type, bool used = true, u32 fbmask = 0,
bool is_frame = false, bool preload = GSConfig.PreloadFrameWithGSData, bool preserve_target = true,
const GSVector4i draw_rc = GSVector4i::zero(), GSTextureCache::Source* src = nullptr);