mirror of https://github.com/PCSX2/pcsx2.git
GS/HW: Use texel fetch/load instead of sampler for palettes
Saves a multiply in the shaders in some cases, and frees up one descriptor slot.
This commit is contained in:
parent
3ffa5eb613
commit
0619555232
|
@ -130,7 +130,6 @@ Texture2D<float4> Palette : register(t1);
|
|||
Texture2D<float4> RtTexture : register(t2);
|
||||
Texture2D<float> PrimMinTexture : register(t3);
|
||||
SamplerState TextureSampler : register(s0);
|
||||
SamplerState PaletteSampler : register(s1);
|
||||
|
||||
#ifdef DX12
|
||||
cbuffer cb0 : register(b0)
|
||||
|
@ -207,9 +206,14 @@ float4 sample_c(float2 uv, float uv_w)
|
|||
#endif
|
||||
}
|
||||
|
||||
float4 sample_p(float u)
|
||||
float4 sample_p(uint u)
|
||||
{
|
||||
return Palette.Sample(PaletteSampler, u);
|
||||
return Palette.Load(int3(int(u), 0, 0));
|
||||
}
|
||||
|
||||
float4 sample_p_norm(float u)
|
||||
{
|
||||
return sample_p(uint(u * 255.5f));
|
||||
}
|
||||
|
||||
float4 clamp_wrap_uv(float4 uv)
|
||||
|
@ -278,7 +282,7 @@ float4x4 sample_4c(float4 uv, float uv_w)
|
|||
return c;
|
||||
}
|
||||
|
||||
float4 sample_4_index(float4 uv, float uv_w)
|
||||
uint4 sample_4_index(float4 uv, float uv_w)
|
||||
{
|
||||
float4 c;
|
||||
|
||||
|
@ -288,25 +292,26 @@ float4 sample_4_index(float4 uv, float uv_w)
|
|||
c.w = sample_c(uv.zw, uv_w).a;
|
||||
|
||||
// Denormalize value
|
||||
uint4 i = uint4(c * 255.0f + 0.5f);
|
||||
uint4 i = uint4(c * 255.5f);
|
||||
|
||||
if (PS_PAL_FMT == 1)
|
||||
{
|
||||
// 4HL
|
||||
c = float4(i & 0xFu) / 255.0f;
|
||||
return i & 0xFu;
|
||||
}
|
||||
else if (PS_PAL_FMT == 2)
|
||||
{
|
||||
// 4HH
|
||||
c = float4(i >> 4u) / 255.0f;
|
||||
return i >> 4u;
|
||||
}
|
||||
else
|
||||
{
|
||||
// 8
|
||||
return i;
|
||||
}
|
||||
|
||||
// Most of texture will hit this code so keep normalized float value
|
||||
// 8 bits
|
||||
return c * 255./256 + 0.5/256;
|
||||
}
|
||||
|
||||
float4x4 sample_4p(float4 u)
|
||||
float4x4 sample_4p(uint4 u)
|
||||
{
|
||||
float4x4 c;
|
||||
|
||||
|
@ -468,7 +473,7 @@ float4 fetch_red(int2 xy)
|
|||
rt = fetch_raw_color(xy);
|
||||
}
|
||||
|
||||
return sample_p(rt.r) * 255.0f;
|
||||
return sample_p_norm(rt.r) * 255.0f;
|
||||
}
|
||||
|
||||
float4 fetch_green(int2 xy)
|
||||
|
@ -485,7 +490,7 @@ float4 fetch_green(int2 xy)
|
|||
rt = fetch_raw_color(xy);
|
||||
}
|
||||
|
||||
return sample_p(rt.g) * 255.0f;
|
||||
return sample_p_norm(rt.g) * 255.0f;
|
||||
}
|
||||
|
||||
float4 fetch_blue(int2 xy)
|
||||
|
@ -502,19 +507,19 @@ float4 fetch_blue(int2 xy)
|
|||
rt = fetch_raw_color(xy);
|
||||
}
|
||||
|
||||
return sample_p(rt.b) * 255.0f;
|
||||
return sample_p_norm(rt.b) * 255.0f;
|
||||
}
|
||||
|
||||
float4 fetch_alpha(int2 xy)
|
||||
{
|
||||
float4 rt = fetch_raw_color(xy);
|
||||
return sample_p(rt.a) * 255.0f;
|
||||
return sample_p_norm(rt.a) * 255.0f;
|
||||
}
|
||||
|
||||
float4 fetch_rgb(int2 xy)
|
||||
{
|
||||
float4 rt = fetch_raw_color(xy);
|
||||
float4 c = float4(sample_p(rt.r).r, sample_p(rt.g).g, sample_p(rt.b).b, 1.0);
|
||||
float4 c = float4(sample_p_norm(rt.r).r, sample_p_norm(rt.g).g, sample_p_norm(rt.b).b, 1.0);
|
||||
return c * 255.0f;
|
||||
}
|
||||
|
||||
|
|
|
@ -133,9 +133,14 @@ vec4 sample_c(vec2 uv)
|
|||
#endif
|
||||
}
|
||||
|
||||
vec4 sample_p(float idx)
|
||||
vec4 sample_p(uint idx)
|
||||
{
|
||||
return texture(PaletteSampler, vec2(idx, 0.0f));
|
||||
return texelFetch(PaletteSampler, ivec2(int(idx), 0), 0);
|
||||
}
|
||||
|
||||
vec4 sample_p_norm(float u)
|
||||
{
|
||||
return sample_p(uint(u * 255.5f));
|
||||
}
|
||||
|
||||
vec4 clamp_wrap_uv(vec4 uv)
|
||||
|
@ -202,7 +207,7 @@ mat4 sample_4c(vec4 uv)
|
|||
return c;
|
||||
}
|
||||
|
||||
vec4 sample_4_index(vec4 uv)
|
||||
uvec4 sample_4_index(vec4 uv)
|
||||
{
|
||||
vec4 c;
|
||||
|
||||
|
@ -218,26 +223,22 @@ vec4 sample_4_index(vec4 uv)
|
|||
c.z = sample_c(uv.xw).a;
|
||||
c.w = sample_c(uv.zw).a;
|
||||
|
||||
uvec4 i = uvec4(c * 255.0f + 0.5f); // Denormalize value
|
||||
uvec4 i = uvec4(c * 255.5f); // Denormalize value
|
||||
|
||||
#if PS_PAL_FMT == 1
|
||||
// 4HL
|
||||
return vec4(i & 0xFu) / 255.0f;
|
||||
|
||||
return i & 0xFu
|
||||
#elif PS_PAL_FMT == 2
|
||||
// 4HH
|
||||
return vec4(i >> 4u) / 255.0f;
|
||||
|
||||
return i >> 4u;
|
||||
#else
|
||||
// Most of texture will hit this code so keep normalized float value
|
||||
|
||||
// 8 bits
|
||||
return c;
|
||||
// 8
|
||||
return i;
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
mat4 sample_4p(vec4 u)
|
||||
mat4 sample_4p(uvec4 u)
|
||||
{
|
||||
mat4 c;
|
||||
|
||||
|
@ -398,7 +399,7 @@ vec4 fetch_red()
|
|||
#else
|
||||
vec4 rt = fetch_raw_color();
|
||||
#endif
|
||||
return sample_p(rt.r) * 255.0f;
|
||||
return sample_p_norm(rt.r) * 255.0f;
|
||||
}
|
||||
|
||||
vec4 fetch_green()
|
||||
|
@ -409,7 +410,7 @@ vec4 fetch_green()
|
|||
#else
|
||||
vec4 rt = fetch_raw_color();
|
||||
#endif
|
||||
return sample_p(rt.g) * 255.0f;
|
||||
return sample_p_norm(rt.g) * 255.0f;
|
||||
}
|
||||
|
||||
vec4 fetch_blue()
|
||||
|
@ -420,19 +421,19 @@ vec4 fetch_blue()
|
|||
#else
|
||||
vec4 rt = fetch_raw_color();
|
||||
#endif
|
||||
return sample_p(rt.b) * 255.0f;
|
||||
return sample_p_norm(rt.b) * 255.0f;
|
||||
}
|
||||
|
||||
vec4 fetch_alpha()
|
||||
{
|
||||
vec4 rt = fetch_raw_color();
|
||||
return sample_p(rt.a) * 255.0f;
|
||||
return sample_p_norm(rt.a) * 255.0f;
|
||||
}
|
||||
|
||||
vec4 fetch_rgb()
|
||||
{
|
||||
vec4 rt = fetch_raw_color();
|
||||
vec4 c = vec4(sample_p(rt.r).r, sample_p(rt.g).g, sample_p(rt.b).b, 1.0f);
|
||||
vec4 c = vec4(sample_p_norm(rt.r).r, sample_p_norm(rt.g).g, sample_p_norm(rt.b).b, 1.0f);
|
||||
return c * 255.0f;
|
||||
}
|
||||
|
||||
|
|
|
@ -390,7 +390,7 @@ layout(location = 0) out vec4 o_col0;
|
|||
#endif
|
||||
|
||||
layout(set = 1, binding = 0) uniform sampler2D Texture;
|
||||
layout(set = 1, binding = 1) uniform sampler2D Palette;
|
||||
layout(set = 1, binding = 1) uniform texture2D Palette;
|
||||
|
||||
#if PS_FEEDBACK_LOOP_IS_NEEDED
|
||||
#ifndef DISABLE_TEXTURE_BARRIER
|
||||
|
@ -443,9 +443,14 @@ vec4 sample_c(vec2 uv)
|
|||
#endif
|
||||
}
|
||||
|
||||
vec4 sample_p(float u)
|
||||
vec4 sample_p(uint idx)
|
||||
{
|
||||
return texture(Palette, vec2(u, 0.0f));
|
||||
return texelFetch(Palette, ivec2(int(idx), 0), 0);
|
||||
}
|
||||
|
||||
vec4 sample_p_norm(float u)
|
||||
{
|
||||
return sample_p(uint(u * 255.5f));
|
||||
}
|
||||
|
||||
vec4 clamp_wrap_uv(vec4 uv)
|
||||
|
@ -519,7 +524,7 @@ mat4 sample_4c(vec4 uv)
|
|||
return c;
|
||||
}
|
||||
|
||||
vec4 sample_4_index(vec4 uv)
|
||||
uvec4 sample_4_index(vec4 uv)
|
||||
{
|
||||
vec4 c;
|
||||
|
||||
|
@ -533,18 +538,17 @@ vec4 sample_4_index(vec4 uv)
|
|||
|
||||
#if PS_PAL_FMT == 1
|
||||
// 4HL
|
||||
c = vec4(i & 0xFu) / 255.0f;
|
||||
c = i & 0xFu;
|
||||
#elif PS_PAL_FMT == 2
|
||||
// 4HH
|
||||
c = vec4(i >> 4u) / 255.0f;
|
||||
c = i >> 4u;
|
||||
#else
|
||||
// 8
|
||||
return i;
|
||||
#endif
|
||||
|
||||
// Most of texture will hit this code so keep normalized float value
|
||||
// 8 bits
|
||||
return c * 255./256 + 0.5/256;
|
||||
}
|
||||
|
||||
mat4 sample_4p(vec4 u)
|
||||
mat4 sample_4p(uvec4 u)
|
||||
{
|
||||
mat4 c;
|
||||
|
||||
|
@ -709,7 +713,7 @@ vec4 fetch_red(ivec2 xy)
|
|||
rt = fetch_raw_color(xy);
|
||||
#endif
|
||||
|
||||
return sample_p(rt.r) * 255.0f;
|
||||
return sample_p_norm(rt.r) * 255.0f;
|
||||
}
|
||||
|
||||
vec4 fetch_green(ivec2 xy)
|
||||
|
@ -723,7 +727,7 @@ vec4 fetch_green(ivec2 xy)
|
|||
rt = fetch_raw_color(xy);
|
||||
#endif
|
||||
|
||||
return sample_p(rt.g) * 255.0f;
|
||||
return sample_p_norm(rt.g) * 255.0f;
|
||||
}
|
||||
|
||||
vec4 fetch_blue(ivec2 xy)
|
||||
|
@ -737,19 +741,19 @@ vec4 fetch_blue(ivec2 xy)
|
|||
rt = fetch_raw_color(xy);
|
||||
#endif
|
||||
|
||||
return sample_p(rt.b) * 255.0f;
|
||||
return sample_p_norm(rt.b) * 255.0f;
|
||||
}
|
||||
|
||||
vec4 fetch_alpha(ivec2 xy)
|
||||
{
|
||||
vec4 rt = fetch_raw_color(xy);
|
||||
return sample_p(rt.a) * 255.0f;
|
||||
return sample_p_norm(rt.a) * 255.0f;
|
||||
}
|
||||
|
||||
vec4 fetch_rgb(ivec2 xy)
|
||||
{
|
||||
vec4 rt = fetch_raw_color(xy);
|
||||
vec4 c = vec4(sample_p(rt.r).r, sample_p(rt.g).g, sample_p(rt.b).b, 1.0);
|
||||
vec4 c = vec4(sample_p_norm(rt.r).r, sample_p_norm(rt.g).g, sample_p_norm(rt.b).b, 1.0);
|
||||
return c * 255.0f;
|
||||
}
|
||||
|
||||
|
|
|
@ -682,7 +682,7 @@ void GSDevice11::StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture*
|
|||
// ps
|
||||
|
||||
PSSetShaderResources(sTex, nullptr);
|
||||
PSSetSamplerState(linear ? m_convert.ln.get() : m_convert.pt.get(), nullptr);
|
||||
PSSetSamplerState(linear ? m_convert.ln.get() : m_convert.pt.get());
|
||||
PSSetShader(ps, ps_cb);
|
||||
|
||||
//
|
||||
|
@ -759,7 +759,7 @@ void GSDevice11::PresentRect(GSTexture* sTex, const GSVector4& sRect, GSTexture*
|
|||
// ps
|
||||
|
||||
PSSetShaderResources(sTex, nullptr);
|
||||
PSSetSamplerState(linear ? m_convert.ln.get() : m_convert.pt.get(), nullptr);
|
||||
PSSetSamplerState(linear ? m_convert.ln.get() : m_convert.pt.get());
|
||||
PSSetShader(m_present.ps[static_cast<u32>(shader)].get(), m_present.ps_cb.get());
|
||||
|
||||
//
|
||||
|
@ -958,7 +958,7 @@ void GSDevice11::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* vert
|
|||
|
||||
// ps
|
||||
PSSetShaderResources(rt, nullptr);
|
||||
PSSetSamplerState(m_convert.pt.get(), nullptr);
|
||||
PSSetSamplerState(m_convert.pt.get());
|
||||
PSSetShader(m_convert.ps[static_cast<int>(datm ? ShaderConvert::DATM_1 : ShaderConvert::DATM_0)].get(), nullptr);
|
||||
|
||||
//
|
||||
|
@ -1184,10 +1184,9 @@ void GSDevice11::PSSetShaderResource(int i, GSTexture* sr)
|
|||
m_state.ps_sr_views[i] = sr ? static_cast<ID3D11ShaderResourceView*>(*static_cast<GSTexture11*>(sr)) : nullptr;
|
||||
}
|
||||
|
||||
void GSDevice11::PSSetSamplerState(ID3D11SamplerState* ss0, ID3D11SamplerState* ss1)
|
||||
void GSDevice11::PSSetSamplerState(ID3D11SamplerState* ss0)
|
||||
{
|
||||
m_state.ps_ss[0] = ss0;
|
||||
m_state.ps_ss[1] = ss1;
|
||||
}
|
||||
|
||||
void GSDevice11::PSSetShader(ID3D11PixelShader* ps, ID3D11Buffer* ps_cb)
|
||||
|
|
|
@ -110,7 +110,7 @@ public:
|
|||
|
||||
private:
|
||||
static constexpr u32 MAX_TEXTURES = 4;
|
||||
static constexpr u32 MAX_SAMPLERS = 2;
|
||||
static constexpr u32 MAX_SAMPLERS = 1;
|
||||
|
||||
int m_d3d_texsize;
|
||||
|
||||
|
@ -221,7 +221,6 @@ private:
|
|||
std::unordered_map<PSSelector, wil::com_ptr_nothrow<ID3D11PixelShader>, GSHWDrawConfig::PSSelectorHash> m_ps;
|
||||
wil::com_ptr_nothrow<ID3D11Buffer> m_ps_cb;
|
||||
std::unordered_map<u32, wil::com_ptr_nothrow<ID3D11SamplerState>> m_ps_ss;
|
||||
wil::com_ptr_nothrow<ID3D11SamplerState> m_palette_ss;
|
||||
std::unordered_map<u32, wil::com_ptr_nothrow<ID3D11DepthStencilState>> m_om_dss;
|
||||
std::unordered_map<u32, wil::com_ptr_nothrow<ID3D11BlendState>> m_om_bs;
|
||||
wil::com_ptr_nothrow<ID3D11RasterizerState> m_rs;
|
||||
|
@ -287,7 +286,7 @@ public:
|
|||
void PSSetShaderResource(int i, GSTexture* sr);
|
||||
void PSSetShader(ID3D11PixelShader* ps, ID3D11Buffer* ps_cb);
|
||||
void PSUpdateShaderState();
|
||||
void PSSetSamplerState(ID3D11SamplerState* ss0, ID3D11SamplerState* ss1);
|
||||
void PSSetSamplerState(ID3D11SamplerState* ss0);
|
||||
|
||||
void OMSetDepthStencilState(ID3D11DepthStencilState* dss, u8 sref);
|
||||
void OMSetBlendState(ID3D11BlendState* bs, float bf);
|
||||
|
|
|
@ -43,24 +43,6 @@ bool GSDevice11::CreateTextureFX()
|
|||
|
||||
hr = m_dev->CreateBuffer(&bd, nullptr, m_ps_cb.put());
|
||||
|
||||
if (FAILED(hr))
|
||||
return false;
|
||||
|
||||
D3D11_SAMPLER_DESC sd;
|
||||
|
||||
memset(&sd, 0, sizeof(sd));
|
||||
|
||||
sd.Filter = D3D11_FILTER_MIN_MAG_MIP_POINT;
|
||||
sd.AddressU = D3D11_TEXTURE_ADDRESS_CLAMP;
|
||||
sd.AddressV = D3D11_TEXTURE_ADDRESS_CLAMP;
|
||||
sd.AddressW = D3D11_TEXTURE_ADDRESS_CLAMP;
|
||||
sd.MinLOD = -FLT_MAX;
|
||||
sd.MaxLOD = FLT_MAX;
|
||||
sd.MaxAnisotropy = 1;
|
||||
sd.ComparisonFunc = D3D11_COMPARISON_NEVER;
|
||||
|
||||
hr = m_dev->CreateSamplerState(&sd, m_palette_ss.put());
|
||||
|
||||
if (FAILED(hr))
|
||||
return false;
|
||||
|
||||
|
@ -212,7 +194,7 @@ void GSDevice11::SetupPS(const PSSelector& sel, const GSHWDrawConfig::PSConstant
|
|||
m_ctx->UpdateSubresource(m_ps_cb.get(), 0, NULL, cb, 0, 0);
|
||||
}
|
||||
|
||||
wil::com_ptr_nothrow<ID3D11SamplerState> ss0, ss1;
|
||||
wil::com_ptr_nothrow<ID3D11SamplerState> ss0;
|
||||
|
||||
if (sel.tfx != 4)
|
||||
{
|
||||
|
@ -267,14 +249,9 @@ void GSDevice11::SetupPS(const PSSelector& sel, const GSHWDrawConfig::PSConstant
|
|||
|
||||
m_ps_ss[ssel.key] = ss0;
|
||||
}
|
||||
|
||||
if (sel.pal_fmt)
|
||||
{
|
||||
ss1 = m_palette_ss;
|
||||
}
|
||||
}
|
||||
|
||||
PSSetSamplerState(ss0.get(), ss1.get());
|
||||
PSSetSamplerState(ss0.get());
|
||||
|
||||
PSSetShader(i->second.get(), m_ps_cb.get());
|
||||
}
|
||||
|
|
|
@ -1143,7 +1143,7 @@ bool GSDevice12::CreateRootSignatures()
|
|||
rsb.AddCBVParameter(0, D3D12_SHADER_VISIBILITY_ALL);
|
||||
rsb.AddCBVParameter(1, D3D12_SHADER_VISIBILITY_PIXEL);
|
||||
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 0, 2, D3D12_SHADER_VISIBILITY_PIXEL);
|
||||
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, 2, D3D12_SHADER_VISIBILITY_PIXEL);
|
||||
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, 0, NUM_TFX_SAMPLERS, D3D12_SHADER_VISIBILITY_PIXEL);
|
||||
rsb.AddDescriptorTable(D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 2, 2, D3D12_SHADER_VISIBILITY_PIXEL);
|
||||
if (!(m_tfx_root_signature = rsb.Create()))
|
||||
return false;
|
||||
|
@ -1806,8 +1806,7 @@ void GSDevice12::InitializeState()
|
|||
{
|
||||
for (u32 i = 0; i < NUM_TOTAL_TFX_TEXTURES; i++)
|
||||
m_tfx_textures[i] = m_null_texture.GetSRVDescriptor();
|
||||
for (u32 i = 0; i < NUM_TFX_SAMPLERS; i++)
|
||||
m_tfx_sampler_sel[i] = GSHWDrawConfig::SamplerSelector::Point().key;
|
||||
m_tfx_sampler_sel = GSHWDrawConfig::SamplerSelector::Point().key;
|
||||
|
||||
InvalidateCachedState();
|
||||
}
|
||||
|
@ -1816,9 +1815,7 @@ void GSDevice12::InitializeSamplers()
|
|||
{
|
||||
bool result = GetSampler(&m_point_sampler_cpu, GSHWDrawConfig::SamplerSelector::Point());
|
||||
result = result && GetSampler(&m_linear_sampler_cpu, GSHWDrawConfig::SamplerSelector::Linear());
|
||||
|
||||
for (u32 i = 0; i < NUM_TFX_SAMPLERS; i++)
|
||||
result = result && GetSampler(&m_tfx_samplers[i], m_tfx_sampler_sel[i]);
|
||||
result = result && GetSampler(&m_tfx_sampler, m_tfx_sampler_sel);
|
||||
|
||||
if (!result)
|
||||
pxFailRel("Failed to initialize samplers");
|
||||
|
@ -1970,13 +1967,13 @@ void GSDevice12::PSSetShaderResource(int i, GSTexture* sr, bool check_state)
|
|||
m_dirty_flags |= (i < 2) ? DIRTY_FLAG_TFX_TEXTURES : DIRTY_FLAG_TFX_RT_TEXTURES;
|
||||
}
|
||||
|
||||
void GSDevice12::PSSetSampler(u32 index, GSHWDrawConfig::SamplerSelector sel)
|
||||
void GSDevice12::PSSetSampler(GSHWDrawConfig::SamplerSelector sel)
|
||||
{
|
||||
if (m_tfx_sampler_sel[index] == sel.key)
|
||||
if (m_tfx_sampler_sel == sel.key)
|
||||
return;
|
||||
|
||||
GetSampler(&m_tfx_samplers[index], sel);
|
||||
m_tfx_sampler_sel[index] = sel.key;
|
||||
GetSampler(&m_tfx_sampler, sel);
|
||||
m_tfx_sampler_sel = sel.key;
|
||||
m_dirty_flags |= DIRTY_FLAG_TFX_SAMPLERS;
|
||||
}
|
||||
|
||||
|
@ -2330,7 +2327,7 @@ bool GSDevice12::ApplyTFXState(bool already_execed)
|
|||
|
||||
if (flags & DIRTY_FLAG_TFX_SAMPLERS)
|
||||
{
|
||||
if (!g_d3d12_context->GetSamplerAllocator().LookupGroup(&m_tfx_samplers_handle_gpu, m_tfx_samplers.data()))
|
||||
if (!g_d3d12_context->GetSamplerAllocator().LookupSingle(&m_tfx_samplers_handle_gpu, m_tfx_sampler))
|
||||
{
|
||||
ExecuteCommandListAndRestartRenderPass(false, "Ran out of sampler groups");
|
||||
return ApplyTFXState(true);
|
||||
|
@ -2555,7 +2552,7 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config)
|
|||
if (config.tex)
|
||||
{
|
||||
PSSetShaderResource(0, config.tex, config.tex != config.rt);
|
||||
PSSetSampler(0, config.sampler);
|
||||
PSSetSampler(config.sampler);
|
||||
}
|
||||
if (config.pal)
|
||||
PSSetShaderResource(1, config.pal, true);
|
||||
|
|
|
@ -112,7 +112,7 @@ public:
|
|||
NUM_TFX_TEXTURES = 2,
|
||||
NUM_TFX_RT_TEXTURES = 2,
|
||||
NUM_TOTAL_TFX_TEXTURES = NUM_TFX_TEXTURES + NUM_TFX_RT_TEXTURES,
|
||||
NUM_TFX_SAMPLERS = 2,
|
||||
NUM_TFX_SAMPLERS = 1,
|
||||
NUM_UTILITY_TEXTURES = 1,
|
||||
NUM_UTILITY_SAMPLERS = 1,
|
||||
CONVERT_PUSH_CONSTANTS_SIZE = 96,
|
||||
|
@ -279,7 +279,7 @@ public:
|
|||
void IASetIndexBuffer(const void* index, size_t count);
|
||||
|
||||
void PSSetShaderResource(int i, GSTexture* sr, bool check_state);
|
||||
void PSSetSampler(u32 index, GSHWDrawConfig::SamplerSelector sel);
|
||||
void PSSetSampler(GSHWDrawConfig::SamplerSelector sel);
|
||||
|
||||
void OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i& scissor);
|
||||
|
||||
|
@ -404,8 +404,8 @@ private:
|
|||
|
||||
std::array<D3D12_GPU_VIRTUAL_ADDRESS, NUM_TFX_CONSTANT_BUFFERS> m_tfx_constant_buffers{};
|
||||
std::array<D3D12::DescriptorHandle, NUM_TOTAL_TFX_TEXTURES> m_tfx_textures{};
|
||||
std::array<D3D12::DescriptorHandle, NUM_TFX_SAMPLERS> m_tfx_samplers{};
|
||||
std::array<u32, NUM_TFX_SAMPLERS> m_tfx_sampler_sel{};
|
||||
D3D12::DescriptorHandle m_tfx_sampler;
|
||||
u32 m_tfx_sampler_sel = 0;
|
||||
D3D12::DescriptorHandle m_tfx_textures_handle_gpu;
|
||||
D3D12::DescriptorHandle m_tfx_samplers_handle_gpu;
|
||||
D3D12::DescriptorHandle m_tfx_rt_textures_handle_gpu;
|
||||
|
|
|
@ -3244,7 +3244,7 @@ void GSTextureCache::Palette::InitializeTexture()
|
|||
// sampling such texture are always normalized by 255.
|
||||
// This is because indexes are stored as normalized values of an RGBA texture (e.g. index 15 will be read as (15/255),
|
||||
// and therefore will read texel 15/255 * texture size).
|
||||
m_tex_palette = g_gs_device->CreateTexture(256, 1, 1, GSTexture::Format::Color);
|
||||
m_tex_palette = g_gs_device->CreateTexture(m_pal, 1, 1, GSTexture::Format::Color);
|
||||
m_tex_palette->Update(GSVector4i(0, 0, m_pal, 1), m_clut, m_pal * sizeof(m_clut[0]));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -283,8 +283,6 @@ vertex MainVSOut vs_main_expand(
|
|||
|
||||
// MARK: - Fragment functions
|
||||
|
||||
constexpr sampler palette_sampler(filter::nearest, address::clamp_to_edge);
|
||||
|
||||
struct PSMain
|
||||
{
|
||||
texture2d<float> tex;
|
||||
|
@ -349,9 +347,14 @@ struct PSMain
|
|||
}
|
||||
}
|
||||
|
||||
float4 sample_p(float idx)
|
||||
float4 sample_p(uint idx)
|
||||
{
|
||||
return palette.sample(palette_sampler, float2(idx, 0));
|
||||
return palette.read(uint2(idx, 0));
|
||||
}
|
||||
|
||||
float4 sample_p_norm(float u)
|
||||
{
|
||||
return sample_p(uint(u * 255.5f));
|
||||
}
|
||||
|
||||
float4 clamp_wrap_uv(float4 uv)
|
||||
|
@ -415,7 +418,7 @@ struct PSMain
|
|||
};
|
||||
}
|
||||
|
||||
float4 sample_4_index(float4 uv)
|
||||
uint4 sample_4_index(float4 uv)
|
||||
{
|
||||
float4 c;
|
||||
|
||||
|
@ -432,15 +435,14 @@ struct PSMain
|
|||
uint4 i = uint4(c * 255.5f); // Denormalize value
|
||||
|
||||
if (PS_PAL_FMT == 1)
|
||||
return float4(i & 0xF) / 255.f;
|
||||
return i & 0xF;
|
||||
if (PS_PAL_FMT == 2)
|
||||
return float4(i >> 4) / 255.f;
|
||||
return i >> 4;
|
||||
|
||||
// Most textures will hit this code so keep normalized float value
|
||||
return c;
|
||||
return i;
|
||||
}
|
||||
|
||||
float4x4 sample_4p(float4 u)
|
||||
float4x4 sample_4p(uint4 u)
|
||||
{
|
||||
return {
|
||||
sample_p(u.x),
|
||||
|
@ -559,30 +561,30 @@ struct PSMain
|
|||
float4 fetch_red()
|
||||
{
|
||||
float rt = PS_TEX_IS_DEPTH ? float(fetch_raw_depth() & 0xFF) / 255.f : fetch_raw_color().r;
|
||||
return sample_p(rt) * 255.f;
|
||||
return sample_p_norm(rt) * 255.f;
|
||||
}
|
||||
|
||||
float4 fetch_green()
|
||||
{
|
||||
float rt = PS_TEX_IS_DEPTH ? float((fetch_raw_depth() >> 8) & 0xFF) / 255.f : fetch_raw_color().g;
|
||||
return sample_p(rt) * 255.f;
|
||||
return sample_p_norm(rt) * 255.f;
|
||||
}
|
||||
|
||||
float4 fetch_blue()
|
||||
{
|
||||
float rt = PS_TEX_IS_DEPTH ? float((fetch_raw_depth() >> 16) & 0xFF) / 255.f : fetch_raw_color().b;
|
||||
return sample_p(rt) * 255.f;
|
||||
return sample_p_norm(rt) * 255.f;
|
||||
}
|
||||
|
||||
float4 fetch_alpha()
|
||||
{
|
||||
return sample_p(fetch_raw_color().a) * 255.f;
|
||||
return sample_p_norm(fetch_raw_color().a) * 255.f;
|
||||
}
|
||||
|
||||
float4 fetch_rgb()
|
||||
{
|
||||
float4 rt = fetch_raw_color();
|
||||
return float4(sample_p(rt.r).r, sample_p(rt.g).g, sample_p(rt.b).b, 1) * 255.f;
|
||||
return float4(sample_p_norm(rt.r).r, sample_p_norm(rt.g).g, sample_p_norm(rt.b).b, 1) * 255.f;
|
||||
}
|
||||
|
||||
float4 fetch_gXbY()
|
||||
|
|
|
@ -1084,9 +1084,7 @@ void GSDeviceVK::ClearSamplerCache()
|
|||
m_point_sampler = GetSampler(GSHWDrawConfig::SamplerSelector::Point());
|
||||
m_linear_sampler = GetSampler(GSHWDrawConfig::SamplerSelector::Linear());
|
||||
m_utility_sampler = m_point_sampler;
|
||||
|
||||
for (u32 i = 0; i < std::size(m_tfx_samplers); i++)
|
||||
m_tfx_samplers[i] = GetSampler(m_tfx_sampler_sel[i]);
|
||||
m_tfx_sampler = m_point_sampler;
|
||||
}
|
||||
|
||||
static void AddMacro(std::stringstream& ss, const char* name, const char* value)
|
||||
|
@ -1236,8 +1234,8 @@ bool GSDeviceVK::CreatePipelineLayouts()
|
|||
if ((m_tfx_ubo_ds_layout = dslb.Create(dev)) == VK_NULL_HANDLE)
|
||||
return false;
|
||||
Vulkan::Util::SetObjectName(dev, m_tfx_ubo_ds_layout, "TFX UBO descriptor layout");
|
||||
for (u32 i = 0; i < NUM_TFX_SAMPLERS; i++)
|
||||
dslb.AddBinding(i, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT);
|
||||
dslb.AddBinding(0, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 1, VK_SHADER_STAGE_FRAGMENT_BIT);
|
||||
dslb.AddBinding(1, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, 1, VK_SHADER_STAGE_FRAGMENT_BIT);
|
||||
if ((m_tfx_sampler_ds_layout = dslb.Create(dev)) == VK_NULL_HANDLE)
|
||||
return false;
|
||||
Vulkan::Util::SetObjectName(dev, m_tfx_sampler_ds_layout, "TFX sampler descriptor layout");
|
||||
|
@ -2308,11 +2306,8 @@ void GSDeviceVK::InitializeState()
|
|||
if (m_linear_sampler)
|
||||
Vulkan::Util::SetObjectName(g_vulkan_context->GetDevice(), m_point_sampler, "Linear sampler");
|
||||
|
||||
for (u32 i = 0; i < NUM_TFX_SAMPLERS; i++)
|
||||
{
|
||||
m_tfx_sampler_sel[i] = GSHWDrawConfig::SamplerSelector::Point().key;
|
||||
m_tfx_samplers[i] = m_point_sampler;
|
||||
}
|
||||
m_tfx_sampler_sel = GSHWDrawConfig::SamplerSelector::Point().key;
|
||||
m_tfx_sampler = m_point_sampler;
|
||||
|
||||
InvalidateCachedState();
|
||||
}
|
||||
|
@ -2463,13 +2458,13 @@ void GSDeviceVK::PSSetShaderResource(int i, GSTexture* sr, bool check_state)
|
|||
m_dirty_flags |= (i < 2) ? DIRTY_FLAG_TFX_SAMPLERS_DS : DIRTY_FLAG_TFX_RT_TEXTURE_DS;
|
||||
}
|
||||
|
||||
void GSDeviceVK::PSSetSampler(u32 index, GSHWDrawConfig::SamplerSelector sel)
|
||||
void GSDeviceVK::PSSetSampler(GSHWDrawConfig::SamplerSelector sel)
|
||||
{
|
||||
if (m_tfx_sampler_sel[index] == sel.key)
|
||||
if (m_tfx_sampler_sel == sel.key)
|
||||
return;
|
||||
|
||||
m_tfx_sampler_sel[index] = sel.key;
|
||||
m_tfx_samplers[index] = GetSampler(sel);
|
||||
m_tfx_sampler_sel = sel.key;
|
||||
m_tfx_sampler = GetSampler(sel);
|
||||
m_dirty_flags |= DIRTY_FLAG_TFX_SAMPLERS_DS;
|
||||
}
|
||||
|
||||
|
@ -2739,8 +2734,8 @@ bool GSDeviceVK::ApplyTFXState(bool already_execed)
|
|||
return ApplyTFXState(true);
|
||||
}
|
||||
|
||||
dsub.AddCombinedImageSamplerDescriptorWrites(
|
||||
ds, 0, m_tfx_textures.data(), m_tfx_samplers.data(), NUM_TFX_SAMPLERS);
|
||||
dsub.AddCombinedImageSamplerDescriptorWrite(ds, 0, m_tfx_textures[0], m_tfx_sampler);
|
||||
dsub.AddImageDescriptorWrite(ds, 1, m_tfx_textures[1]);
|
||||
dsub.Update(dev);
|
||||
|
||||
m_tfx_descriptor_sets[1] = ds;
|
||||
|
@ -2764,10 +2759,10 @@ bool GSDeviceVK::ApplyTFXState(bool already_execed)
|
|||
}
|
||||
|
||||
if (m_features.texture_barrier)
|
||||
dsub.AddInputAttachmentDescriptorWrite(ds, 0, m_tfx_textures[NUM_TFX_SAMPLERS]);
|
||||
dsub.AddInputAttachmentDescriptorWrite(ds, 0, m_tfx_textures[NUM_TFX_DRAW_TEXTURES]);
|
||||
else
|
||||
dsub.AddImageDescriptorWrite(ds, 0, m_tfx_textures[NUM_TFX_SAMPLERS]);
|
||||
dsub.AddImageDescriptorWrite(ds, 1, m_tfx_textures[NUM_TFX_SAMPLERS + 1]);
|
||||
dsub.AddImageDescriptorWrite(ds, 0, m_tfx_textures[NUM_TFX_DRAW_TEXTURES]);
|
||||
dsub.AddImageDescriptorWrite(ds, 1, m_tfx_textures[NUM_TFX_DRAW_TEXTURES + 1]);
|
||||
dsub.Update(dev);
|
||||
|
||||
m_tfx_descriptor_sets[2] = ds;
|
||||
|
@ -3028,7 +3023,7 @@ void GSDeviceVK::RenderHW(GSHWDrawConfig& config)
|
|||
if (config.tex)
|
||||
{
|
||||
PSSetShaderResource(0, config.tex, config.tex != config.rt);
|
||||
PSSetSampler(0, config.sampler);
|
||||
PSSetSampler(config.sampler);
|
||||
}
|
||||
if (config.pal)
|
||||
PSSetShaderResource(1, config.pal, true);
|
||||
|
|
|
@ -72,9 +72,9 @@ public:
|
|||
{
|
||||
NUM_TFX_DESCRIPTOR_SETS = 3,
|
||||
NUM_TFX_DYNAMIC_OFFSETS = 2,
|
||||
NUM_TFX_SAMPLERS = 2,
|
||||
NUM_TFX_DRAW_TEXTURES = 2,
|
||||
NUM_TFX_RT_TEXTURES = 2,
|
||||
NUM_TFX_TEXTURES = NUM_TFX_SAMPLERS + NUM_TFX_RT_TEXTURES,
|
||||
NUM_TFX_TEXTURES = NUM_TFX_DRAW_TEXTURES + NUM_TFX_RT_TEXTURES,
|
||||
NUM_CONVERT_TEXTURES = 1,
|
||||
NUM_CONVERT_SAMPLERS = 1,
|
||||
CONVERT_PUSH_CONSTANTS_SIZE = 96,
|
||||
|
@ -260,7 +260,7 @@ public:
|
|||
void IASetIndexBuffer(const void* index, size_t count);
|
||||
|
||||
void PSSetShaderResource(int i, GSTexture* sr, bool check_state);
|
||||
void PSSetSampler(u32 index, GSHWDrawConfig::SamplerSelector sel);
|
||||
void PSSetSampler(GSHWDrawConfig::SamplerSelector sel);
|
||||
|
||||
void OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i& scissor, bool feedback_loop);
|
||||
|
||||
|
@ -372,8 +372,8 @@ private:
|
|||
u8 m_blend_constant_color = 0;
|
||||
|
||||
std::array<VkImageView, NUM_TFX_TEXTURES> m_tfx_textures{};
|
||||
std::array<VkSampler, NUM_TFX_SAMPLERS> m_tfx_samplers{};
|
||||
std::array<u32, NUM_TFX_SAMPLERS> m_tfx_sampler_sel{};
|
||||
VkSampler m_tfx_sampler = VK_NULL_HANDLE;
|
||||
u32 m_tfx_sampler_sel = 0;
|
||||
std::array<VkDescriptorSet, NUM_TFX_DESCRIPTOR_SETS> m_tfx_descriptor_sets{};
|
||||
std::array<u32, NUM_TFX_DYNAMIC_OFFSETS> m_tfx_dynamic_offsets{};
|
||||
|
||||
|
|
Loading…
Reference in New Issue