GS/HW: Use texel fetch/load instead of sampler for palettes

Saves a multiply in the shaders in some cases, and frees up one
descriptor slot.
This commit is contained in:
Stenzek 2023-01-03 21:13:50 +10:00 committed by refractionpcsx2
parent 3ffa5eb613
commit 0619555232
12 changed files with 120 additions and 141 deletions

View File

@ -130,7 +130,6 @@ Texture2D<float4> Palette : register(t1);
Texture2D<float4> RtTexture : register(t2); Texture2D<float4> RtTexture : register(t2);
Texture2D<float> PrimMinTexture : register(t3); Texture2D<float> PrimMinTexture : register(t3);
SamplerState TextureSampler : register(s0); SamplerState TextureSampler : register(s0);
SamplerState PaletteSampler : register(s1);
#ifdef DX12 #ifdef DX12
cbuffer cb0 : register(b0) cbuffer cb0 : register(b0)
@ -207,9 +206,14 @@ float4 sample_c(float2 uv, float uv_w)
#endif #endif
} }
float4 sample_p(float u) float4 sample_p(uint u)
{ {
return Palette.Sample(PaletteSampler, u); return Palette.Load(int3(int(u), 0, 0));
}
float4 sample_p_norm(float u)
{
return sample_p(uint(u * 255.5f));
} }
float4 clamp_wrap_uv(float4 uv) float4 clamp_wrap_uv(float4 uv)
@ -278,7 +282,7 @@ float4x4 sample_4c(float4 uv, float uv_w)
return c; return c;
} }
float4 sample_4_index(float4 uv, float uv_w) uint4 sample_4_index(float4 uv, float uv_w)
{ {
float4 c; float4 c;
@ -288,25 +292,26 @@ float4 sample_4_index(float4 uv, float uv_w)
c.w = sample_c(uv.zw, uv_w).a; c.w = sample_c(uv.zw, uv_w).a;
// Denormalize value // Denormalize value
uint4 i = uint4(c * 255.0f + 0.5f); uint4 i = uint4(c * 255.5f);
if (PS_PAL_FMT == 1) if (PS_PAL_FMT == 1)
{ {
// 4HL // 4HL
c = float4(i & 0xFu) / 255.0f; return i & 0xFu;
} }
else if (PS_PAL_FMT == 2) else if (PS_PAL_FMT == 2)
{ {
// 4HH // 4HH
c = float4(i >> 4u) / 255.0f; return i >> 4u;
}
else
{
// 8
return i;
}
} }
// Most of texture will hit this code so keep normalized float value float4x4 sample_4p(uint4 u)
// 8 bits
return c * 255./256 + 0.5/256;
}
float4x4 sample_4p(float4 u)
{ {
float4x4 c; float4x4 c;
@ -468,7 +473,7 @@ float4 fetch_red(int2 xy)
rt = fetch_raw_color(xy); rt = fetch_raw_color(xy);
} }
return sample_p(rt.r) * 255.0f; return sample_p_norm(rt.r) * 255.0f;
} }
float4 fetch_green(int2 xy) float4 fetch_green(int2 xy)
@ -485,7 +490,7 @@ float4 fetch_green(int2 xy)
rt = fetch_raw_color(xy); rt = fetch_raw_color(xy);
} }
return sample_p(rt.g) * 255.0f; return sample_p_norm(rt.g) * 255.0f;
} }
float4 fetch_blue(int2 xy) float4 fetch_blue(int2 xy)
@ -502,19 +507,19 @@ float4 fetch_blue(int2 xy)
rt = fetch_raw_color(xy); rt = fetch_raw_color(xy);
} }
return sample_p(rt.b) * 255.0f; return sample_p_norm(rt.b) * 255.0f;
} }
float4 fetch_alpha(int2 xy) float4 fetch_alpha(int2 xy)
{ {
float4 rt = fetch_raw_color(xy); float4 rt = fetch_raw_color(xy);
return sample_p(rt.a) * 255.0f; return sample_p_norm(rt.a) * 255.0f;
} }
float4 fetch_rgb(int2 xy) float4 fetch_rgb(int2 xy)
{ {
float4 rt = fetch_raw_color(xy); float4 rt = fetch_raw_color(xy);
float4 c = float4(sample_p(rt.r).r, sample_p(rt.g).g, sample_p(rt.b).b, 1.0); float4 c = float4(sample_p_norm(rt.r).r, sample_p_norm(rt.g).g, sample_p_norm(rt.b).b, 1.0);
return c * 255.0f; return c * 255.0f;
} }

View File

@ -133,9 +133,14 @@ vec4 sample_c(vec2 uv)
#endif #endif
} }
vec4 sample_p(float idx) vec4 sample_p(uint idx)
{ {
return texture(PaletteSampler, vec2(idx, 0.0f)); return texelFetch(PaletteSampler, ivec2(int(idx), 0), 0);
}
vec4 sample_p_norm(float u)
{
return sample_p(uint(u * 255.5f));
} }
vec4 clamp_wrap_uv(vec4 uv) vec4 clamp_wrap_uv(vec4 uv)
@ -202,7 +207,7 @@ mat4 sample_4c(vec4 uv)
return c; return c;
} }
vec4 sample_4_index(vec4 uv) uvec4 sample_4_index(vec4 uv)
{ {
vec4 c; vec4 c;
@ -218,26 +223,22 @@ vec4 sample_4_index(vec4 uv)
c.z = sample_c(uv.xw).a; c.z = sample_c(uv.xw).a;
c.w = sample_c(uv.zw).a; c.w = sample_c(uv.zw).a;
uvec4 i = uvec4(c * 255.0f + 0.5f); // Denormalize value uvec4 i = uvec4(c * 255.5f); // Denormalize value
#if PS_PAL_FMT == 1 #if PS_PAL_FMT == 1
// 4HL // 4HL
return vec4(i & 0xFu) / 255.0f; return i & 0xFu
#elif PS_PAL_FMT == 2 #elif PS_PAL_FMT == 2
// 4HH // 4HH
return vec4(i >> 4u) / 255.0f; return i >> 4u;
#else #else
// Most of texture will hit this code so keep normalized float value // 8
return i;
// 8 bits
return c;
#endif #endif
} }
mat4 sample_4p(vec4 u) mat4 sample_4p(uvec4 u)
{ {
mat4 c; mat4 c;
@ -398,7 +399,7 @@ vec4 fetch_red()
#else #else
vec4 rt = fetch_raw_color(); vec4 rt = fetch_raw_color();
#endif #endif
return sample_p(rt.r) * 255.0f; return sample_p_norm(rt.r) * 255.0f;
} }
vec4 fetch_green() vec4 fetch_green()
@ -409,7 +410,7 @@ vec4 fetch_green()
#else #else
vec4 rt = fetch_raw_color(); vec4 rt = fetch_raw_color();
#endif #endif
return sample_p(rt.g) * 255.0f; return sample_p_norm(rt.g) * 255.0f;
} }
vec4 fetch_blue() vec4 fetch_blue()
@ -420,19 +421,19 @@ vec4 fetch_blue()
#else #else
vec4 rt = fetch_raw_color(); vec4 rt = fetch_raw_color();
#endif #endif
return sample_p(rt.b) * 255.0f; return sample_p_norm(rt.b) * 255.0f;
} }
vec4 fetch_alpha() vec4 fetch_alpha()
{ {
vec4 rt = fetch_raw_color(); vec4 rt = fetch_raw_color();
return sample_p(rt.a) * 255.0f; return sample_p_norm(rt.a) * 255.0f;
} }
vec4 fetch_rgb() vec4 fetch_rgb()
{ {
vec4 rt = fetch_raw_color(); vec4 rt = fetch_raw_color();
vec4 c = vec4(sample_p(rt.r).r, sample_p(rt.g).g, sample_p(rt.b).b, 1.0f); vec4 c = vec4(sample_p_norm(rt.r).r, sample_p_norm(rt.g).g, sample_p_norm(rt.b).b, 1.0f);
return c * 255.0f; return c * 255.0f;
} }

View File

@ -390,7 +390,7 @@ layout(location = 0) out vec4 o_col0;
#endif #endif
layout(set = 1, binding = 0) uniform sampler2D Texture; layout(set = 1, binding = 0) uniform sampler2D Texture;
layout(set = 1, binding = 1) uniform sampler2D Palette; layout(set = 1, binding = 1) uniform texture2D Palette;
#if PS_FEEDBACK_LOOP_IS_NEEDED #if PS_FEEDBACK_LOOP_IS_NEEDED
#ifndef DISABLE_TEXTURE_BARRIER #ifndef DISABLE_TEXTURE_BARRIER
@ -443,9 +443,14 @@ vec4 sample_c(vec2 uv)
#endif #endif
} }
vec4 sample_p(float u) vec4 sample_p(uint idx)
{ {
return texture(Palette, vec2(u, 0.0f)); return texelFetch(Palette, ivec2(int(idx), 0), 0);
}
vec4 sample_p_norm(float u)
{
return sample_p(uint(u * 255.5f));
} }
vec4 clamp_wrap_uv(vec4 uv) vec4 clamp_wrap_uv(vec4 uv)
@ -519,7 +524,7 @@ mat4 sample_4c(vec4 uv)
return c; return c;
} }
vec4 sample_4_index(vec4 uv) uvec4 sample_4_index(vec4 uv)
{ {
vec4 c; vec4 c;
@ -533,18 +538,17 @@ vec4 sample_4_index(vec4 uv)
#if PS_PAL_FMT == 1 #if PS_PAL_FMT == 1
// 4HL // 4HL
c = vec4(i & 0xFu) / 255.0f; c = i & 0xFu;
#elif PS_PAL_FMT == 2 #elif PS_PAL_FMT == 2
// 4HH // 4HH
c = vec4(i >> 4u) / 255.0f; c = i >> 4u;
#else
// 8
return i;
#endif #endif
// Most of texture will hit this code so keep normalized float value
// 8 bits
return c * 255./256 + 0.5/256;
} }
mat4 sample_4p(vec4 u) mat4 sample_4p(uvec4 u)
{ {
mat4 c; mat4 c;
@ -709,7 +713,7 @@ vec4 fetch_red(ivec2 xy)
rt = fetch_raw_color(xy); rt = fetch_raw_color(xy);
#endif #endif
return sample_p(rt.r) * 255.0f; return sample_p_norm(rt.r) * 255.0f;
} }
vec4 fetch_green(ivec2 xy) vec4 fetch_green(ivec2 xy)
@ -723,7 +727,7 @@ vec4 fetch_green(ivec2 xy)
rt = fetch_raw_color(xy); rt = fetch_raw_color(xy);
#endif #endif
return sample_p(rt.g) * 255.0f; return sample_p_norm(rt.g) * 255.0f;
} }
vec4 fetch_blue(ivec2 xy) vec4 fetch_blue(ivec2 xy)
@ -737,19 +741,19 @@ vec4 fetch_blue(ivec2 xy)
rt = fetch_raw_color(xy); rt = fetch_raw_color(xy);
#endif #endif
return sample_p(rt.b) * 255.0f; return sample_p_norm(rt.b) * 255.0f;
} }
vec4 fetch_alpha(ivec2 xy) vec4 fetch_alpha(ivec2 xy)
{ {
vec4 rt = fetch_raw_color(xy); vec4 rt = fetch_raw_color(xy);
return sample_p(rt.a) * 255.0f; return sample_p_norm(rt.a) * 255.0f;
} }
vec4 fetch_rgb(ivec2 xy) vec4 fetch_rgb(ivec2 xy)
{ {
vec4 rt = fetch_raw_color(xy); vec4 rt = fetch_raw_color(xy);
vec4 c = vec4(sample_p(rt.r).r, sample_p(rt.g).g, sample_p(rt.b).b, 1.0); vec4 c = vec4(sample_p_norm(rt.r).r, sample_p_norm(rt.g).g, sample_p_norm(rt.b).b, 1.0);
return c * 255.0f; return c * 255.0f;
} }

View File

@ -682,7 +682,7 @@ void GSDevice11::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture*
// ps // ps
PSSetShaderResources(sTex, nullptr); PSSetShaderResources(sTex, nullptr);
PSSetSamplerState(linear ? m_convert.ln.get() : m_convert.pt.get(), nullptr); PSSetSamplerState(linear ? m_convert.ln.get() : m_convert.pt.get());
PSSetShader(ps, ps_cb); PSSetShader(ps, ps_cb);
// //
@ -759,7 +759,7 @@ void GSDevice11::PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture*
// ps // ps
PSSetShaderResources(sTex, nullptr); PSSetShaderResources(sTex, nullptr);
PSSetSamplerState(linear ? m_convert.ln.get() : m_convert.pt.get(), nullptr); PSSetSamplerState(linear ? m_convert.ln.get() : m_convert.pt.get());
PSSetShader(m_present.ps[static_cast<u32>(shader)].get(), m_present.ps_cb.get()); PSSetShader(m_present.ps[static_cast<u32>(shader)].get(), m_present.ps_cb.get());
// //
@ -958,7 +958,7 @@ void GSDevice11::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* vert
// ps // ps
PSSetShaderResources(rt, nullptr); PSSetShaderResources(rt, nullptr);
PSSetSamplerState(m_convert.pt.get(), nullptr); PSSetSamplerState(m_convert.pt.get());
PSSetShader(m_convert.ps[static_cast<int>(datm ? ShaderConvert::DATM_1 : ShaderConvert::DATM_0)].get(), nullptr); PSSetShader(m_convert.ps[static_cast<int>(datm ? ShaderConvert::DATM_1 : ShaderConvert::DATM_0)].get(), nullptr);
// //
@ -1184,10 +1184,9 @@ void GSDevice11::PSSetShaderResource(int i, GSTexture* sr)
m_state.ps_sr_views[i] = sr ? static_cast<ID3D11ShaderResourceView*>(*static_cast<GSTexture11*>(sr)) : nullptr; m_state.ps_sr_views[i] = sr ? static_cast<ID3D11ShaderResourceView*>(*static_cast<GSTexture11*>(sr)) : nullptr;
} }
void GSDevice11::PSSetSamplerState(ID3D11SamplerState* ss0, ID3D11SamplerState* ss1) void GSDevice11::PSSetSamplerState(ID3D11SamplerState* ss0)
{ {
m_state.ps_ss[0] = ss0; m_state.ps_ss[0] = ss0;
m_state.ps_ss[1] = ss1;
} }
void GSDevice11::PSSetShader(ID3D11PixelShader* ps, ID3D11Buffer* ps_cb) void GSDevice11::PSSetShader(ID3D11PixelShader* ps, ID3D11Buffer* ps_cb)

View File

@ -110,7 +110,7 @@ public:
private: private:
static constexpr u32 MAX_TEXTURES = 4; static constexpr u32 MAX_TEXTURES = 4;
static constexpr u32 MAX_SAMPLERS = 2; static constexpr u32 MAX_SAMPLERS = 1;
int m_d3d_texsize; int m_d3d_texsize;
@ -221,7 +221,6 @@ private:
std::unordered_map<PSSelector, wil::com_ptr_nothrow<ID3D11PixelShader>, GSHWDrawConfig::PSSelectorHash> m_ps; std::unordered_map<PSSelector, wil::com_ptr_nothrow<ID3D11PixelShader>, GSHWDrawConfig::PSSelectorHash> m_ps;
wil::com_ptr_nothrow<ID3D11Buffer> m_ps_cb; wil::com_ptr_nothrow<ID3D11Buffer> m_ps_cb;
std::unordered_map<u32, wil::com_ptr_nothrow<ID3D11SamplerState>> m_ps_ss; std::unordered_map<u32, wil::com_ptr_nothrow<ID3D11SamplerState>> m_ps_ss;
wil::com_ptr_nothrow<ID3D11SamplerState> m_palette_ss;
std::unordered_map<u32, wil::com_ptr_nothrow<ID3D11DepthStencilState>> m_om_dss; std::unordered_map<u32, wil::com_ptr_nothrow<ID3D11DepthStencilState>> m_om_dss;
std::unordered_map<u32, wil::com_ptr_nothrow<ID3D11BlendState>> m_om_bs; std::unordered_map<u32, wil::com_ptr_nothrow<ID3D11BlendState>> m_om_bs;
wil::com_ptr_nothrow<ID3D11RasterizerState> m_rs; wil::com_ptr_nothrow<ID3D11RasterizerState> m_rs;
@ -287,7 +286,7 @@ public:
void PSSetShaderResource(int i, GSTexture* sr); void PSSetShaderResource(int i, GSTexture* sr);
void PSSetShader(ID3D11PixelShader* ps, ID3D11Buffer* ps_cb); void PSSetShader(ID3D11PixelShader* ps, ID3D11Buffer* ps_cb);
void PSUpdateShaderState(); void PSUpdateShaderState();
void PSSetSamplerState(ID3D11SamplerState* ss0, ID3D11SamplerState* ss1); void PSSetSamplerState(ID3D11SamplerState* ss0);
void OMSetDepthStencilState(ID3D11DepthStencilState* dss, u8 sref); void OMSetDepthStencilState(ID3D11DepthStencilState* dss, u8 sref);
void OMSetBlendState(ID3D11BlendState* bs, float bf); void OMSetBlendState(ID3D11BlendState* bs, float bf);

View File

@ -43,24 +43,6 @@ bool GSDevice11::CreateTextureFX()
hr = m_dev->CreateBuffer(&bd, nullptr, m_ps_cb.put()); hr = m_dev->CreateBuffer(&bd, nullptr, m_ps_cb.put());
if (FAILED(hr))
return false;
D3D11_SAMPLER_DESC sd;
memset(&sd, 0, sizeof(sd));
sd.Filter = D3D11_FILTER_MIN_MAG_MIP_POINT;
sd.AddressU = D3D11_TEXTURE_ADDRESS_CLAMP;
sd.AddressV = D3D11_TEXTURE_ADDRESS_CLAMP;
sd.AddressW = D3D11_TEXTURE_ADDRESS_CLAMP;
sd.MinLOD = -FLT_MAX;
sd.MaxLOD = FLT_MAX;
sd.MaxAnisotropy = 1;
sd.ComparisonFunc = D3D11_COMPARISON_NEVER;
hr = m_dev->CreateSamplerState(&sd, m_palette_ss.put());
if (FAILED(hr)) if (FAILED(hr))
return false; return false;
@ -212,7 +194,7 @@ void GSDevice11::SetupPS(const PSSelector& sel, const GSHWDrawConfig::PSConstant
m_ctx->UpdateSubresource(m_ps_cb.get(), 0, NULL, cb, 0, 0); m_ctx->UpdateSubresource(m_ps_cb.get(), 0, NULL, cb, 0, 0);
} }
wil::com_ptr_nothrow<ID3D11SamplerState> ss0, ss1; wil::com_ptr_nothrow<ID3D11SamplerState> ss0;
if (sel.tfx != 4) if (sel.tfx != 4)
{ {
@ -267,14 +249,9 @@ void GSDevice11::SetupPS(const PSSelector& sel, const GSHWDrawConfig::PSConstant
m_ps_ss[ssel.key] = ss0; m_ps_ss[ssel.key] = ss0;
} }
if (sel.pal_fmt)
{
ss1 = m_palette_ss;
}
} }
PSSetSamplerState(ss0.get(), ss1.get()); PSSetSamplerState(ss0.get());
PSSetShader(i->second.get(), m_ps_cb.get()); PSSetShader(i->second.get(), m_ps_cb.get());
} }

View File

@ -1143,7 +1143,7 @@ bool GSDevice12::CreateRootSignatures()
rsb.AddCBVParameter(0, D3D12_SHADER_VISIBILITY_ALL); rsb.AddCBVParameter(0, D3D12_SHADER_VISIBILITY_ALL);
rsb.AddCBVParameter(1, D3D12_SHADER_VISIBILITY_PIXEL); rsb.AddCBVParameter(1, D3D12_SHADER_VISIBILITY_PIXEL);
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 2, D3D12_SHADER_VISIBILITY_PIXEL); rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 2, D3D12_SHADER_VISIBILITY_PIXEL);
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, 2, D3D12_SHADER_VISIBILITY_PIXEL); rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, NUM_TFX_SAMPLERS, D3D12_SHADER_VISIBILITY_PIXEL);
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 2, 2, D3D12_SHADER_VISIBILITY_PIXEL); rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 2, 2, D3D12_SHADER_VISIBILITY_PIXEL);
if (!(m_tfx_root_signature = rsb.Create())) if (!(m_tfx_root_signature = rsb.Create()))
return false; return false;
@ -1806,8 +1806,7 @@ void GSDevice12::InitializeState()
{ {
for (u32 i = 0; i < NUM_TOTAL_TFX_TEXTURES; i++) for (u32 i = 0; i < NUM_TOTAL_TFX_TEXTURES; i++)
m_tfx_textures[i] = m_null_texture.GetSRVDescriptor(); m_tfx_textures[i] = m_null_texture.GetSRVDescriptor();
for (u32 i = 0; i < NUM_TFX_SAMPLERS; i++) m_tfx_sampler_sel = GSHWDrawConfig::SamplerSelector::Point().key;
m_tfx_sampler_sel[i] = GSHWDrawConfig::SamplerSelector::Point().key;
InvalidateCachedState(); InvalidateCachedState();
} }
@ -1816,9 +1815,7 @@ void GSDevice12::InitializeSamplers()
{ {
bool result = GetSampler(&m_point_sampler_cpu, GSHWDrawConfig::SamplerSelector::Point()); bool result = GetSampler(&m_point_sampler_cpu, GSHWDrawConfig::SamplerSelector::Point());
result = result && GetSampler(&m_linear_sampler_cpu, GSHWDrawConfig::SamplerSelector::Linear()); result = result && GetSampler(&m_linear_sampler_cpu, GSHWDrawConfig::SamplerSelector::Linear());
result = result && GetSampler(&m_tfx_sampler, m_tfx_sampler_sel);
for (u32 i = 0; i < NUM_TFX_SAMPLERS; i++)
result = result && GetSampler(&m_tfx_samplers[i], m_tfx_sampler_sel[i]);
if (!result) if (!result)
pxFailRel("Failed to initialize samplers"); pxFailRel("Failed to initialize samplers");
@ -1970,13 +1967,13 @@ void GSDevice12::PSSetShaderResource(int i, GSTexture* sr, bool check_state)
m_dirty_flags |= (i < 2) ? DIRTY_FLAG_TFX_TEXTURES : DIRTY_FLAG_TFX_RT_TEXTURES; m_dirty_flags |= (i < 2) ? DIRTY_FLAG_TFX_TEXTURES : DIRTY_FLAG_TFX_RT_TEXTURES;
} }
void GSDevice12::PSSetSampler(u32 index, GSHWDrawConfig::SamplerSelector sel) void GSDevice12::PSSetSampler(GSHWDrawConfig::SamplerSelector sel)
{ {
if (m_tfx_sampler_sel[index] == sel.key) if (m_tfx_sampler_sel == sel.key)
return; return;
GetSampler(&m_tfx_samplers[index], sel); GetSampler(&m_tfx_sampler, sel);
m_tfx_sampler_sel[index] = sel.key; m_tfx_sampler_sel = sel.key;
m_dirty_flags |= DIRTY_FLAG_TFX_SAMPLERS; m_dirty_flags |= DIRTY_FLAG_TFX_SAMPLERS;
} }
@ -2330,7 +2327,7 @@ bool GSDevice12::ApplyTFXState(bool already_execed)
if (flags & DIRTY_FLAG_TFX_SAMPLERS) if (flags & DIRTY_FLAG_TFX_SAMPLERS)
{ {
if (!g_d3d12_context->GetSamplerAllocator().LookupGroup(&m_tfx_samplers_handle_gpu, m_tfx_samplers.data())) if (!g_d3d12_context->GetSamplerAllocator().LookupSingle(&m_tfx_samplers_handle_gpu, m_tfx_sampler))
{ {
ExecuteCommandListAndRestartRenderPass(false, "Ran out of sampler groups"); ExecuteCommandListAndRestartRenderPass(false, "Ran out of sampler groups");
return ApplyTFXState(true); return ApplyTFXState(true);
@ -2555,7 +2552,7 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config)
if (config.tex) if (config.tex)
{ {
PSSetShaderResource(0, config.tex, config.tex != config.rt); PSSetShaderResource(0, config.tex, config.tex != config.rt);
PSSetSampler(0, config.sampler); PSSetSampler(config.sampler);
} }
if (config.pal) if (config.pal)
PSSetShaderResource(1, config.pal, true); PSSetShaderResource(1, config.pal, true);

View File

@ -112,7 +112,7 @@ public:
NUM_TFX_TEXTURES = 2, NUM_TFX_TEXTURES = 2,
NUM_TFX_RT_TEXTURES = 2, NUM_TFX_RT_TEXTURES = 2,
NUM_TOTAL_TFX_TEXTURES = NUM_TFX_TEXTURES + NUM_TFX_RT_TEXTURES, NUM_TOTAL_TFX_TEXTURES = NUM_TFX_TEXTURES + NUM_TFX_RT_TEXTURES,
NUM_TFX_SAMPLERS = 2, NUM_TFX_SAMPLERS = 1,
NUM_UTILITY_TEXTURES = 1, NUM_UTILITY_TEXTURES = 1,
NUM_UTILITY_SAMPLERS = 1, NUM_UTILITY_SAMPLERS = 1,
CONVERT_PUSH_CONSTANTS_SIZE = 96, CONVERT_PUSH_CONSTANTS_SIZE = 96,
@ -279,7 +279,7 @@ public:
void IASetIndexBuffer(const void* index, size_t count); void IASetIndexBuffer(const void* index, size_t count);
void PSSetShaderResource(int i, GSTexture* sr, bool check_state); void PSSetShaderResource(int i, GSTexture* sr, bool check_state);
void PSSetSampler(u32 index, GSHWDrawConfig::SamplerSelector sel); void PSSetSampler(GSHWDrawConfig::SamplerSelector sel);
void OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i& scissor); void OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i& scissor);
@ -404,8 +404,8 @@ private:
std::array<D3D12_GPU_VIRTUAL_ADDRESS, NUM_TFX_CONSTANT_BUFFERS> m_tfx_constant_buffers{}; std::array<D3D12_GPU_VIRTUAL_ADDRESS, NUM_TFX_CONSTANT_BUFFERS> m_tfx_constant_buffers{};
std::array<D3D12::DescriptorHandle, NUM_TOTAL_TFX_TEXTURES> m_tfx_textures{}; std::array<D3D12::DescriptorHandle, NUM_TOTAL_TFX_TEXTURES> m_tfx_textures{};
std::array<D3D12::DescriptorHandle, NUM_TFX_SAMPLERS> m_tfx_samplers{}; D3D12::DescriptorHandle m_tfx_sampler;
std::array<u32, NUM_TFX_SAMPLERS> m_tfx_sampler_sel{}; u32 m_tfx_sampler_sel = 0;
D3D12::DescriptorHandle m_tfx_textures_handle_gpu; D3D12::DescriptorHandle m_tfx_textures_handle_gpu;
D3D12::DescriptorHandle m_tfx_samplers_handle_gpu; D3D12::DescriptorHandle m_tfx_samplers_handle_gpu;
D3D12::DescriptorHandle m_tfx_rt_textures_handle_gpu; D3D12::DescriptorHandle m_tfx_rt_textures_handle_gpu;

View File

@ -3244,7 +3244,7 @@ void GSTextureCache::Palette::InitializeTexture()
// sampling such texture are always normalized by 255. // sampling such texture are always normalized by 255.
// This is because indexes are stored as normalized values of an RGBA texture (e.g. index 15 will be read as (15/255), // This is because indexes are stored as normalized values of an RGBA texture (e.g. index 15 will be read as (15/255),
// and therefore will read texel 15/255 * texture size). // and therefore will read texel 15/255 * texture size).
m_tex_palette = g_gs_device->CreateTexture(256, 1, 1, GSTexture::Format::Color); m_tex_palette = g_gs_device->CreateTexture(m_pal, 1, 1, GSTexture::Format::Color);
m_tex_palette->Update(GSVector4i(0, 0, m_pal, 1), m_clut, m_pal * sizeof(m_clut[0])); m_tex_palette->Update(GSVector4i(0, 0, m_pal, 1), m_clut, m_pal * sizeof(m_clut[0]));
} }
} }

View File

@ -283,8 +283,6 @@ vertex MainVSOut vs_main_expand(
// MARK: - Fragment functions // MARK: - Fragment functions
constexpr sampler palette_sampler(filter::nearest, address::clamp_to_edge);
struct PSMain struct PSMain
{ {
texture2d<float> tex; texture2d<float> tex;
@ -349,9 +347,14 @@ struct PSMain
} }
} }
float4 sample_p(float idx) float4 sample_p(uint idx)
{ {
return palette.sample(palette_sampler, float2(idx, 0)); return palette.read(uint2(idx, 0));
}
float4 sample_p_norm(float u)
{
return sample_p(uint(u * 255.5f));
} }
float4 clamp_wrap_uv(float4 uv) float4 clamp_wrap_uv(float4 uv)
@ -415,7 +418,7 @@ struct PSMain
}; };
} }
float4 sample_4_index(float4 uv) uint4 sample_4_index(float4 uv)
{ {
float4 c; float4 c;
@ -432,15 +435,14 @@ struct PSMain
uint4 i = uint4(c * 255.5f); // Denormalize value uint4 i = uint4(c * 255.5f); // Denormalize value
if (PS_PAL_FMT == 1) if (PS_PAL_FMT == 1)
return float4(i & 0xF) / 255.f; return i & 0xF;
if (PS_PAL_FMT == 2) if (PS_PAL_FMT == 2)
return float4(i >> 4) / 255.f; return i >> 4;
// Most textures will hit this code so keep normalized float value return i;
return c;
} }
float4x4 sample_4p(float4 u) float4x4 sample_4p(uint4 u)
{ {
return { return {
sample_p(u.x), sample_p(u.x),
@ -559,30 +561,30 @@ struct PSMain
float4 fetch_red() float4 fetch_red()
{ {
float rt = PS_TEX_IS_DEPTH ? float(fetch_raw_depth() & 0xFF) / 255.f : fetch_raw_color().r; float rt = PS_TEX_IS_DEPTH ? float(fetch_raw_depth() & 0xFF) / 255.f : fetch_raw_color().r;
return sample_p(rt) * 255.f; return sample_p_norm(rt) * 255.f;
} }
float4 fetch_green() float4 fetch_green()
{ {
float rt = PS_TEX_IS_DEPTH ? float((fetch_raw_depth() >> 8) & 0xFF) / 255.f : fetch_raw_color().g; float rt = PS_TEX_IS_DEPTH ? float((fetch_raw_depth() >> 8) & 0xFF) / 255.f : fetch_raw_color().g;
return sample_p(rt) * 255.f; return sample_p_norm(rt) * 255.f;
} }
float4 fetch_blue() float4 fetch_blue()
{ {
float rt = PS_TEX_IS_DEPTH ? float((fetch_raw_depth() >> 16) & 0xFF) / 255.f : fetch_raw_color().b; float rt = PS_TEX_IS_DEPTH ? float((fetch_raw_depth() >> 16) & 0xFF) / 255.f : fetch_raw_color().b;
return sample_p(rt) * 255.f; return sample_p_norm(rt) * 255.f;
} }
float4 fetch_alpha() float4 fetch_alpha()
{ {
return sample_p(fetch_raw_color().a) * 255.f; return sample_p_norm(fetch_raw_color().a) * 255.f;
} }
float4 fetch_rgb() float4 fetch_rgb()
{ {
float4 rt = fetch_raw_color(); float4 rt = fetch_raw_color();
return float4(sample_p(rt.r).r, sample_p(rt.g).g, sample_p(rt.b).b, 1) * 255.f; return float4(sample_p_norm(rt.r).r, sample_p_norm(rt.g).g, sample_p_norm(rt.b).b, 1) * 255.f;
} }
float4 fetch_gXbY() float4 fetch_gXbY()

View File

@ -1084,9 +1084,7 @@ void GSDeviceVK::ClearSamplerCache()
m_point_sampler = GetSampler(GSHWDrawConfig::SamplerSelector::Point()); m_point_sampler = GetSampler(GSHWDrawConfig::SamplerSelector::Point());
m_linear_sampler = GetSampler(GSHWDrawConfig::SamplerSelector::Linear()); m_linear_sampler = GetSampler(GSHWDrawConfig::SamplerSelector::Linear());
m_utility_sampler = m_point_sampler; m_utility_sampler = m_point_sampler;
m_tfx_sampler = m_point_sampler;
for (u32 i = 0; i < std::size(m_tfx_samplers); i++)
m_tfx_samplers[i] = GetSampler(m_tfx_sampler_sel[i]);
} }
static void AddMacro(std::stringstream& ss, const char* name, const char* value) static void AddMacro(std::stringstream& ss, const char* name, const char* value)
@ -1236,8 +1234,8 @@ bool GSDeviceVK::CreatePipelineLayouts()
if ((m_tfx_ubo_ds_layout = dslb.Create(dev)) == VK_NULL_HANDLE) if ((m_tfx_ubo_ds_layout = dslb.Create(dev)) == VK_NULL_HANDLE)
return false; return false;
Vulkan::Util::SetObjectName(dev, m_tfx_ubo_ds_layout, "TFX UBO descriptor layout"); Vulkan::Util::SetObjectName(dev, m_tfx_ubo_ds_layout, "TFX UBO descriptor layout");
for (u32 i = 0; i < NUM_TFX_SAMPLERS; i++) dslb.AddBinding(0, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT);
dslb.AddBinding(i, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT); dslb.AddBinding(1, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 1, VK_SHADER_STAGE_FRAGMENT_BIT);
if ((m_tfx_sampler_ds_layout = dslb.Create(dev)) == VK_NULL_HANDLE) if ((m_tfx_sampler_ds_layout = dslb.Create(dev)) == VK_NULL_HANDLE)
return false; return false;
Vulkan::Util::SetObjectName(dev, m_tfx_sampler_ds_layout, "TFX sampler descriptor layout"); Vulkan::Util::SetObjectName(dev, m_tfx_sampler_ds_layout, "TFX sampler descriptor layout");
@ -2308,11 +2306,8 @@ void GSDeviceVK::InitializeState()
if (m_linear_sampler) if (m_linear_sampler)
Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_point_sampler, "Linear sampler"); Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_point_sampler, "Linear sampler");
for (u32 i = 0; i < NUM_TFX_SAMPLERS; i++) m_tfx_sampler_sel = GSHWDrawConfig::SamplerSelector::Point().key;
{ m_tfx_sampler = m_point_sampler;
m_tfx_sampler_sel[i] = GSHWDrawConfig::SamplerSelector::Point().key;
m_tfx_samplers[i] = m_point_sampler;
}
InvalidateCachedState(); InvalidateCachedState();
} }
@ -2463,13 +2458,13 @@ void GSDeviceVK::PSSetShaderResource(int i, GSTexture* sr, bool check_state)
m_dirty_flags |= (i < 2) ? DIRTY_FLAG_TFX_SAMPLERS_DS : DIRTY_FLAG_TFX_RT_TEXTURE_DS; m_dirty_flags |= (i < 2) ? DIRTY_FLAG_TFX_SAMPLERS_DS : DIRTY_FLAG_TFX_RT_TEXTURE_DS;
} }
void GSDeviceVK::PSSetSampler(u32 index, GSHWDrawConfig::SamplerSelector sel) void GSDeviceVK::PSSetSampler(GSHWDrawConfig::SamplerSelector sel)
{ {
if (m_tfx_sampler_sel[index] == sel.key) if (m_tfx_sampler_sel == sel.key)
return; return;
m_tfx_sampler_sel[index] = sel.key; m_tfx_sampler_sel = sel.key;
m_tfx_samplers[index] = GetSampler(sel); m_tfx_sampler = GetSampler(sel);
m_dirty_flags |= DIRTY_FLAG_TFX_SAMPLERS_DS; m_dirty_flags |= DIRTY_FLAG_TFX_SAMPLERS_DS;
} }
@ -2739,8 +2734,8 @@ bool GSDeviceVK::ApplyTFXState(bool already_execed)
return ApplyTFXState(true); return ApplyTFXState(true);
} }
dsub.AddCombinedImageSamplerDescriptorWrites( dsub.AddCombinedImageSamplerDescriptorWrite(ds, 0, m_tfx_textures[0], m_tfx_sampler);
ds, 0, m_tfx_textures.data(), m_tfx_samplers.data(), NUM_TFX_SAMPLERS); dsub.AddImageDescriptorWrite(ds, 1, m_tfx_textures[1]);
dsub.Update(dev); dsub.Update(dev);
m_tfx_descriptor_sets[1] = ds; m_tfx_descriptor_sets[1] = ds;
@ -2764,10 +2759,10 @@ bool GSDeviceVK::ApplyTFXState(bool already_execed)
} }
if (m_features.texture_barrier) if (m_features.texture_barrier)
dsub.AddInputAttachmentDescriptorWrite(ds, 0, m_tfx_textures[NUM_TFX_SAMPLERS]); dsub.AddInputAttachmentDescriptorWrite(ds, 0, m_tfx_textures[NUM_TFX_DRAW_TEXTURES]);
else else
dsub.AddImageDescriptorWrite(ds, 0, m_tfx_textures[NUM_TFX_SAMPLERS]); dsub.AddImageDescriptorWrite(ds, 0, m_tfx_textures[NUM_TFX_DRAW_TEXTURES]);
dsub.AddImageDescriptorWrite(ds, 1, m_tfx_textures[NUM_TFX_SAMPLERS + 1]); dsub.AddImageDescriptorWrite(ds, 1, m_tfx_textures[NUM_TFX_DRAW_TEXTURES + 1]);
dsub.Update(dev); dsub.Update(dev);
m_tfx_descriptor_sets[2] = ds; m_tfx_descriptor_sets[2] = ds;
@ -3028,7 +3023,7 @@ void GSDeviceVK::RenderHW(GSHWDrawConfig& config)
if (config.tex) if (config.tex)
{ {
PSSetShaderResource(0, config.tex, config.tex != config.rt); PSSetShaderResource(0, config.tex, config.tex != config.rt);
PSSetSampler(0, config.sampler); PSSetSampler(config.sampler);
} }
if (config.pal) if (config.pal)
PSSetShaderResource(1, config.pal, true); PSSetShaderResource(1, config.pal, true);

View File

@ -72,9 +72,9 @@ public:
{ {
NUM_TFX_DESCRIPTOR_SETS = 3, NUM_TFX_DESCRIPTOR_SETS = 3,
NUM_TFX_DYNAMIC_OFFSETS = 2, NUM_TFX_DYNAMIC_OFFSETS = 2,
NUM_TFX_SAMPLERS = 2, NUM_TFX_DRAW_TEXTURES = 2,
NUM_TFX_RT_TEXTURES = 2, NUM_TFX_RT_TEXTURES = 2,
NUM_TFX_TEXTURES = NUM_TFX_SAMPLERS + NUM_TFX_RT_TEXTURES, NUM_TFX_TEXTURES = NUM_TFX_DRAW_TEXTURES + NUM_TFX_RT_TEXTURES,
NUM_CONVERT_TEXTURES = 1, NUM_CONVERT_TEXTURES = 1,
NUM_CONVERT_SAMPLERS = 1, NUM_CONVERT_SAMPLERS = 1,
CONVERT_PUSH_CONSTANTS_SIZE = 96, CONVERT_PUSH_CONSTANTS_SIZE = 96,
@ -260,7 +260,7 @@ public:
void IASetIndexBuffer(const void* index, size_t count); void IASetIndexBuffer(const void* index, size_t count);
void PSSetShaderResource(int i, GSTexture* sr, bool check_state); void PSSetShaderResource(int i, GSTexture* sr, bool check_state);
void PSSetSampler(u32 index, GSHWDrawConfig::SamplerSelector sel); void PSSetSampler(GSHWDrawConfig::SamplerSelector sel);
void OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i& scissor, bool feedback_loop); void OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i& scissor, bool feedback_loop);
@ -372,8 +372,8 @@ private:
u8 m_blend_constant_color = 0; u8 m_blend_constant_color = 0;
std::array<VkImageView, NUM_TFX_TEXTURES> m_tfx_textures{}; std::array<VkImageView, NUM_TFX_TEXTURES> m_tfx_textures{};
std::array<VkSampler, NUM_TFX_SAMPLERS> m_tfx_samplers{}; VkSampler m_tfx_sampler = VK_NULL_HANDLE;
std::array<u32, NUM_TFX_SAMPLERS> m_tfx_sampler_sel{}; u32 m_tfx_sampler_sel = 0;
std::array<VkDescriptorSet, NUM_TFX_DESCRIPTOR_SETS> m_tfx_descriptor_sets{}; std::array<VkDescriptorSet, NUM_TFX_DESCRIPTOR_SETS> m_tfx_descriptor_sets{};
std::array<u32, NUM_TFX_DYNAMIC_OFFSETS> m_tfx_dynamic_offsets{}; std::array<u32, NUM_TFX_DYNAMIC_OFFSETS> m_tfx_dynamic_offsets{};