GS: Get rid of extra binding for channel shuffle

Having this binding was redundant, as there's no "normal" texture
sampled when we're doing a channel shuffle, and it caused issues in
Vulkan when the render target or depth buffer the source.

Also fixes the Urban Chaos HLE shader.

Fixes validation errors in GT4, NFS: Carbon, Urban Chaos, probably
others too.
This commit is contained in:
Connor McLaughlin 2022-01-30 17:10:10 +10:00 committed by refractionpcsx2
parent 5444b575f3
commit f33ee27f56
11 changed files with 91 additions and 61 deletions

View File

@ -56,6 +56,7 @@
#define PS_SCANMSK 0
#define PS_AUTOMATIC_LOD 0
#define PS_MANUAL_LOD 0
#define PS_TEX_IS_FB 0
#endif
#define SW_BLEND (PS_BLEND_A || PS_BLEND_B || PS_BLEND_D)
@ -107,8 +108,7 @@ struct PS_OUTPUT
Texture2D<float4> Texture : register(t0);
Texture2D<float4> Palette : register(t1);
Texture2D<float4> RtSampler : register(t3);
Texture2D<float4> RawTexture : register(t4);
Texture2D<float4> RtTexture : register(t2);
SamplerState TextureSampler : register(s0);
SamplerState PaletteSampler : register(s1);
@ -143,6 +143,9 @@ cbuffer cb1
float4 sample_c(float2 uv, float uv_w)
{
#if PS_TEX_IS_FB == 1
return RtTexture.Load(int3(int2(uv * WH.zw), 0));
#else
if (PS_POINT_SAMPLER)
{
// Weird issue with ATI/AMD cards,
@ -172,6 +175,7 @@ float4 sample_c(float2 uv, float uv_w)
#else
return Texture.SampleLevel(TextureSampler, uv, 0); // No lod
#endif
#endif
}
float4 sample_p(float u)
@ -287,13 +291,21 @@ float4x4 sample_4p(float4 u)
int fetch_raw_depth(int2 xy)
{
float4 col = RawTexture.Load(int3(xy, 0));
#if PS_TEX_IS_FB == 1
float4 col = RtTexture.Load(int3(xy, 0));
#else
float4 col = Texture.Load(int3(xy, 0));
#endif
return (int)(col.r * exp2(32.0f));
}
float4 fetch_raw_color(int2 xy)
{
return RawTexture.Load(int3(xy, 0));
#if PS_TEX_IS_FB == 1
return RtTexture.Load(int3(xy, 0));
#else
return Texture.Load(int3(xy, 0));
#endif
}
float4 fetch_c(int2 uv)
@ -680,7 +692,7 @@ void ps_fbmask(inout float4 C, float2 pos_xy)
{
if (PS_FBMASK)
{
float4 RT = trunc(RtSampler.Load(int3(pos_xy, 0)) * 255.0f + 0.1f);
float4 RT = trunc(RtTexture.Load(int3(pos_xy, 0)) * 255.0f + 0.1f);
C = (float4)(((uint4)C & ~FbMask) | ((uint4)RT & FbMask));
}
}
@ -730,7 +742,7 @@ void ps_blend(inout float4 Color, float As, float2 pos_xy)
return;
}
float4 RT = trunc(RtSampler.Load(int3(pos_xy, 0)) * 255.0f + 0.1f);
float4 RT = trunc(RtTexture.Load(int3(pos_xy, 0)) * 255.0f + 0.1f);
float Ad = (PS_DFMT == FMT_24) ? 1.0f : RT.a / 128.0f;

View File

@ -43,8 +43,7 @@ layout(location = 0, index = 0) out vec4 SV_Target0;
layout(location = 0, index = 1) out vec4 SV_Target1;
layout(binding = 1) uniform sampler2D PaletteSampler;
layout(binding = 3) uniform sampler2D RtSampler; // note 2 already use by the image below
layout(binding = 4) uniform sampler2D RawTextureSampler;
layout(binding = 2) uniform sampler2D RtSampler; // note 2 already use by the image below
#ifndef DISABLE_GL42_image
#if PS_DATE > 0
@ -52,7 +51,7 @@ layout(binding = 4) uniform sampler2D RawTextureSampler;
// require extra shader validation.
// FIXME how to declare memory access
layout(r32i, binding = 2) uniform iimage2D img_prim_min;
layout(r32i, binding = 3) uniform iimage2D img_prim_min;
// WARNING:
// You can't enable it if you discard the fragment. The depth is still
// updated (shadow in Shin Megami Tensei Nocturne)
@ -233,12 +232,20 @@ mat4 sample_4p(vec4 u)
int fetch_raw_depth()
{
return int(texelFetch(RawTextureSampler, ivec2(gl_FragCoord.xy), 0).r * exp2(32.0f));
#if PS_TEX_IS_FB == 1
return int(texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0).r * exp2(32.0f));
#else
return int(texelFetch(TextureSampler, ivec2(gl_FragCoord.xy), 0).r * exp2(32.0f));
#endif
}
vec4 fetch_raw_color()
{
return texelFetch(RawTextureSampler, ivec2(gl_FragCoord.xy), 0);
#if PS_TEX_IS_FB == 1
return texelFetch(RtSampler, ivec2(gl_FragCoord.xy), 0);
#else
return texelFetch(TextureSampler, ivec2(gl_FragCoord.xy), 0);
#endif
}
vec4 fetch_c(ivec2 uv)

View File

@ -387,14 +387,13 @@ layout(location = 0) out vec4 o_col0;
layout(set = 1, binding = 0) uniform sampler2D Texture;
layout(set = 1, binding = 1) uniform sampler2D Palette;
layout(set = 2, binding = 0) uniform texture2D RawTexture;
#if PS_FEEDBACK_LOOP_IS_NEEDED
layout(input_attachment_index = 0, set = 2, binding = 1) uniform subpassInput RtSampler;
layout(input_attachment_index = 0, set = 2, binding = 0) uniform subpassInput RtSampler;
#endif
#if PS_DATE > 0
layout(set = 2, binding = 2) uniform texture2D PrimMinTexture;
layout(set = 2, binding = 1) uniform texture2D PrimMinTexture;
#endif
vec4 sample_c(vec2 uv)
@ -548,13 +547,21 @@ mat4 sample_4p(vec4 u)
int fetch_raw_depth(ivec2 xy)
{
vec4 col = texelFetch(RawTexture, xy, 0);
#if PS_TEX_IS_FB
vec4 col = subpassLoad(RtSampler);
#else
vec4 col = texelFetch(Texture, xy, 0);
#endif
return int(col.r * exp2(32.0f));
}
vec4 fetch_raw_color(ivec2 xy)
{
return texelFetch(RawTexture, xy, 0);
#if PS_TEX_IS_FB
return subpassLoad(RtSampler);
#else
return texelFetch(Texture, xy, 0);
#endif
}
vec4 fetch_c(ivec2 uv)
@ -633,7 +640,7 @@ vec4 sample_depth(vec2 st, ivec2 pos)
int depth = fetch_raw_depth(pos);
// Convert lsb based on the palette
t = Palette.Load(ivec3(depth & 0xFF, 0, 0)) * 255.0f;
t = texelFetch(Palette, ivec2(depth & 0xFF, 0), 0) * 255.0f;
// Msb is easier
float green = float(((depth >> 8) & 0xFF) * 36.0f);

View File

@ -489,7 +489,6 @@ struct alignas(16) GSHWDrawConfig
GSTexture* ds; ///< Depth stencil
GSTexture* tex; ///< Source texture
GSTexture* pal; ///< Palette texture
GSTexture* raw_tex; ///< Used by channel shuffles
GSVertex* verts; ///< Vertices to draw
u32* indices; ///< Indices to draw
u32 nverts; ///< Number of vertices

View File

@ -1360,8 +1360,9 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config)
}
IASetPrimitiveTopology(topology);
OMSetRenderTargets(hdr_rt ? hdr_rt : config.rt, config.ds, &config.scissor);
PSSetShaderResources(config.tex, config.pal);
PSSetShaderResource(4, config.raw_tex);
if (config.require_one_barrier) // Used as "bind rt" flag when texture barrier is unsupported
{
@ -1369,7 +1370,7 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config)
// Do not always bind the rt when it's not needed,
// only bind it when effects use it such as fbmask emulation currently
// because we copy the frame buffer and it is quite slow.
PSSetShaderResource(3, config.rt);
PSSetShaderResource(2, config.rt);
}
SetupOM(config.depth, convertSel(config.colormask, config.blend), config.blend.factor);
@ -1377,8 +1378,6 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config)
SetupGS(config.gs);
SetupPS(config.ps, &config.cb_ps, config.sampler);
OMSetRenderTargets(hdr_rt ? hdr_rt : config.rt, config.ds, &config.scissor);
DrawIndexedPrimitive();
if (config.alpha_second_pass.enable)

View File

@ -194,6 +194,7 @@ void GSDevice11::SetupPS(PSSelector sel, const GSHWDrawConfig::PSConstantBuffer*
sm.AddMacro("PS_SCANMSK", sel.scanmsk);
sm.AddMacro("PS_AUTOMATIC_LOD", sel.automatic_lod);
sm.AddMacro("PS_MANUAL_LOD", sel.manual_lod);
sm.AddMacro("PS_TEX_IS_FB", sel.tex_is_fb);
wil::com_ptr_nothrow<ID3D11PixelShader> ps = m_shader_cache.GetPixelShader(m_dev.get(), m_tfx_source, sm.GetPtr(), "ps_main");
i = m_ps.try_emplace(sel.key, std::move(ps)).first;

View File

@ -346,7 +346,7 @@ void GSRendererNew::EmulateTextureShuffleAndFbmask()
}
}
void GSRendererNew::EmulateChannelShuffle(GSTexture** rt, const GSTextureCache::Source* tex)
void GSRendererNew::EmulateChannelShuffle(const GSTextureCache::Source* tex)
{
// Uncomment to disable HLE emulation (allow to trace the draw call)
// m_channel_shuffle = false;
@ -359,7 +359,7 @@ void GSRendererNew::EmulateChannelShuffle(GSTexture** rt, const GSTextureCache::
GL_INS("Gran Turismo RGB Channel");
m_conf.ps.channel = ChannelFetch_RGB;
m_context->TEX0.TFX = TFX_DECAL;
*rt = tex->m_from_target;
m_conf.rt = tex->m_from_target;
}
else if (m_game.title == CRC::Tekken5)
{
@ -372,7 +372,7 @@ void GSRendererNew::EmulateChannelShuffle(GSTexture** rt, const GSTextureCache::
// 12 pages: 2 calls by channel, 3 channels, 1 blit
// Minus current draw call
m_skip = 12 * (3 + 3 + 1) - 1;
*rt = tex->m_from_target;
m_conf.rt = tex->m_from_target;
}
else
{
@ -478,9 +478,19 @@ void GSRendererNew::EmulateChannelShuffle(GSTexture** rt, const GSTextureCache::
// Effect is really a channel shuffle effect so let's cheat a little
if (m_channel_shuffle)
{
m_conf.raw_tex = tex->m_from_target;
if (g_gs_device->Features().texture_barrier)
m_conf.tex = tex->m_from_target;
if (m_conf.tex == m_conf.rt)
{
// sample from fb instead
m_conf.tex = nullptr;
m_conf.ps.tex_is_fb = true;
m_conf.require_one_barrier = true;
}
else if (m_conf.tex == m_conf.ds)
{
// using the current depth buffer. make sure it's not bound (needed for not-GL).
m_conf.ds = nullptr;
}
// Replace current draw with a fullscreen sprite
//
@ -496,10 +506,6 @@ void GSRendererNew::EmulateChannelShuffle(GSTexture** rt, const GSTextureCache::
m_vertex.head = m_vertex.tail = m_vertex.next = 2;
m_index.tail = 2;
}
else
{
m_conf.raw_tex = nullptr;
}
}
void GSRendererNew::EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER)
@ -1066,7 +1072,9 @@ void GSRendererNew::EmulateTextureSampler(const GSTextureCache::Source* tex)
// manual trilinear causes the chain to be uploaded, auto causes it to be generated
m_conf.sampler.lodclamp = !(trilinear_manual || trilinear_auto);
m_conf.tex = tex->m_texture;
// don't overwrite the texture when using channel shuffle, but keep the palette
if (!m_channel_shuffle)
m_conf.tex = tex->m_texture;
m_conf.pal = tex->m_palette;
}
@ -1149,6 +1157,8 @@ void GSRendererNew::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
ResetStates();
m_conf.cb_vs.texture_offset = GSVector2(0, 0);
m_conf.ps.scanmsk = m_env.SCANMSK.MSK;
m_conf.rt = rt;
m_conf.ds = ds;
ASSERT(g_gs_device != NULL);
@ -1157,7 +1167,7 @@ void GSRendererNew::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
// Warning it must be done at the begining because it will change the
// vertex list (it will interact with PrimitiveOverlap and accurate
// blending)
EmulateChannelShuffle(&rt, tex);
EmulateChannelShuffle(tex);
// Upscaling hack to avoid various line/grid issues
MergeSprite(tex);
@ -1304,7 +1314,8 @@ void GSRendererNew::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
// om
EmulateZbuffer(); // will update VS depth mask
if (!m_channel_shuffle)
EmulateZbuffer(); // will update VS depth mask
// vs
@ -1456,7 +1467,7 @@ void GSRendererNew::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
// Extract the depth as palette index
m_conf.ps.depth_fmt = 1;
m_conf.ps.channel = ChannelFetch_BLUE;
m_conf.raw_tex = ds;
m_conf.tex = ds;
// We need the palette to convert the depth to the correct alpha value.
if (!tex->m_palette)
@ -1469,10 +1480,10 @@ void GSRendererNew::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
}
// rs
const GSVector4& hacked_scissor = m_channel_shuffle ? GSVector4(0, 0, 1024, 1024) : m_context->scissor.in;
const GSVector4i scissor = GSVector4i(GSVector4(rtscale).xyxy() * hacked_scissor).rintersect(GSVector4i(rtsize).zwxy());
const GSVector4 hacked_scissor(m_channel_shuffle ? GSVector4(0, 0, 1024, 1024) : m_context->scissor.in);
const GSVector4i scissor(GSVector4i(GSVector4(rtscale).xyxy() * hacked_scissor).rintersect(GSVector4i(rtsize).zwxy()));
m_conf.drawarea = scissor.rintersect(ComputeBoundingBox(rtscale, rtsize));
m_conf.drawarea = m_channel_shuffle ? scissor : scissor.rintersect(ComputeBoundingBox(rtscale, rtsize));
m_conf.scissor = (DATE && !DATE_BARRIER) ? m_conf.drawarea : scissor;
SetupIA(sx, sy);
@ -1570,8 +1581,6 @@ void GSRendererNew::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
m_conf.drawlist = (m_conf.require_full_barrier && m_vt.m_primclass == GS_SPRITE_CLASS) ? &m_drawlist : nullptr;
m_conf.rt = rt;
m_conf.ds = ds;
g_gs_device->RenderHW(m_conf);
}

View File

@ -27,7 +27,7 @@ private:
inline void ResetStates();
inline void SetupIA(const float& sx, const float& sy);
inline void EmulateTextureShuffleAndFbmask();
inline void EmulateChannelShuffle(GSTexture** rt, const GSTextureCache::Source* tex);
inline void EmulateChannelShuffle(const GSTextureCache::Source* tex);
inline void EmulateBlending(bool& DATE_PRIMID, bool& DATE_BARRIER);
inline void EmulateTextureSampler(const GSTextureCache::Source* tex);
inline void EmulateZbuffer();

View File

@ -949,10 +949,10 @@ void GSDeviceOGL::InitPrimDateTexture(GSTexture* rt, const GSVector4i& area)
const int max_int = 0x7FFFFFFF;
static_cast<GSTextureOGL*>(m_date.t)->Clear(&max_int, area);
glBindImageTexture(2, static_cast<GSTextureOGL*>(m_date.t)->GetID(), 0, false, 0, GL_READ_WRITE, GL_R32I);
glBindImageTexture(3, static_cast<GSTextureOGL*>(m_date.t)->GetID(), 0, false, 0, GL_READ_WRITE, GL_R32I);
#ifdef ENABLE_OGL_DEBUG
// Help to see the texture in apitrace
PSSetShaderResource(2, m_date.t);
PSSetShaderResource(3, m_date.t);
#endif
}
@ -1880,9 +1880,8 @@ void GSDeviceOGL::RenderHW(GSHWDrawConfig& config)
IASetPrimitiveTopology(topology);
PSSetShaderResources(config.tex, config.pal);
PSSetShaderResource(4, config.raw_tex);
// Always bind the RT. This way special effect can use it.
PSSetShaderResource(3, config.rt);
PSSetShaderResource(2, config.rt);
SetupSampler(config.sampler);
OMSetBlendState(config.blend.index, config.blend.factor, config.blend.is_constant, config.blend.is_accumulation, config.blend.is_mixed_hw_sw);

View File

@ -1160,9 +1160,8 @@ bool GSDeviceVK::CreatePipelineLayouts()
if ((m_tfx_sampler_ds_layout = dslb.Create(dev)) == VK_NULL_HANDLE)
return false;
Vulkan::Util::SetObjectName(dev, m_tfx_sampler_ds_layout, "TFX sampler descriptor layout");
dslb.AddBinding(0, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 1, VK_SHADER_STAGE_FRAGMENT_BIT);
dslb.AddBinding(1, VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT, 1, VK_SHADER_STAGE_FRAGMENT_BIT);
dslb.AddBinding(2, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 1, VK_SHADER_STAGE_FRAGMENT_BIT);
dslb.AddBinding(0, VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT, 1, VK_SHADER_STAGE_FRAGMENT_BIT);
dslb.AddBinding(1, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 1, VK_SHADER_STAGE_FRAGMENT_BIT);
if ((m_tfx_rt_texture_ds_layout = dslb.Create(dev)) == VK_NULL_HANDLE)
return false;
Vulkan::Util::SetObjectName(dev, m_tfx_rt_texture_ds_layout, "TFX RT texture descriptor layout");
@ -2072,13 +2071,13 @@ void GSDeviceVK::SetBlendConstants(u8 color)
m_dirty_flags |= DIRTY_FLAG_BLEND_CONSTANTS;
}
void GSDeviceVK::PSSetShaderResource(int i, GSTexture* sr)
void GSDeviceVK::PSSetShaderResource(int i, GSTexture* sr, bool check_state)
{
VkImageView view;
if (sr)
{
GSTextureVK* vkTex = static_cast<GSTextureVK*>(sr);
if (i < 3)
if (check_state)
{
if (vkTex->GetTexture().GetLayout() != VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL && InRenderPass())
{
@ -2402,9 +2401,8 @@ bool GSDeviceVK::ApplyTFXState(bool already_execed)
return ApplyTFXState(true);
}
dsub.AddImageDescriptorWrite(ds, 0, m_tfx_textures[NUM_TFX_SAMPLERS]);
dsub.AddInputAttachmentDescriptorWrite(ds, 1, m_tfx_textures[NUM_TFX_SAMPLERS + 1]);
dsub.AddImageDescriptorWrite(ds, 2, m_tfx_textures[NUM_TFX_SAMPLERS + 2]);
dsub.AddInputAttachmentDescriptorWrite(ds, 0, m_tfx_textures[NUM_TFX_SAMPLERS]);
dsub.AddImageDescriptorWrite(ds, 1, m_tfx_textures[NUM_TFX_SAMPLERS + 1]);
dsub.Update(dev);
m_tfx_descriptor_sets[2] = ds;
@ -2631,7 +2629,7 @@ GSTextureVK* GSDeviceVK::SetupPrimitiveTrackingDATE(GSHWDrawConfig& config, Pipe
// and bind the image to the primitive sampler
image->TransitionToLayout(VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
PSSetShaderResource(4, image);
PSSetShaderResource(3, image, false);
return image;
}
@ -2676,12 +2674,11 @@ void GSDeviceVK::RenderHW(GSHWDrawConfig& config)
PipelineSelector& pipe = m_pipeline_selector;
if (config.tex)
{
PSSetShaderResource(0, config.tex);
PSSetShaderResource(1, config.pal);
PSSetShaderResource(0, config.tex, config.tex != config.rt);
PSSetSampler(0, config.sampler);
}
if (config.raw_tex)
PSSetShaderResource(2, config.raw_tex);
if (config.pal)
PSSetShaderResource(1, config.pal, true);
if (config.blend.is_constant)
SetBlendConstants(config.blend.factor);
@ -2750,7 +2747,7 @@ void GSDeviceVK::RenderHW(GSHWDrawConfig& config)
pipe.feedback_loop |= render_area_okay && same_framebuffer && CurrentFramebufferHasFeedbackLoop();
OMSetRenderTargets(draw_rt, draw_ds, config.scissor, pipe.feedback_loop);
if (pipe.feedback_loop)
PSSetShaderResource(3, draw_rt);
PSSetShaderResource(2, draw_rt, false);
// Begin render pass if new target or out of the area.
if (!render_area_okay || !InRenderPass())

View File

@ -82,7 +82,7 @@ public:
NUM_TFX_DESCRIPTOR_SETS = 3,
NUM_TFX_DYNAMIC_OFFSETS = 2,
NUM_TFX_SAMPLERS = 2,
NUM_TFX_RT_TEXTURES = 3,
NUM_TFX_RT_TEXTURES = 2,
NUM_TFX_TEXTURES = NUM_TFX_SAMPLERS + NUM_TFX_RT_TEXTURES,
NUM_CONVERT_TEXTURES = 1,
NUM_CONVERT_SAMPLERS = 1,
@ -253,7 +253,7 @@ public:
void IAUnmapVertexBuffer();
void IASetIndexBuffer(const void* index, size_t count);
void PSSetShaderResource(int i, GSTexture* sr);
void PSSetShaderResource(int i, GSTexture* sr, bool check_state);
void PSSetSampler(u32 index, GSHWDrawConfig::SamplerSelector sel);
void OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i& scissor, bool feedback_loop);