GS: Unify constant buffers between renderers

This commit is contained in:
Connor McLaughlin 2021-12-21 17:41:45 +10:00 committed by lightningterror
parent 2a1c3d1f33
commit 08c00eed1e
9 changed files with 132 additions and 498 deletions

View File

@ -99,37 +99,33 @@ SamplerState PaletteSampler : register(s1);
cbuffer cb0
{
float4 VertexScale;
float4 VertexOffset;
float4 Texture_Scale_Offset;
float2 VertexScale;
float2 VertexOffset;
float2 TextureScale;
float2 TextureOffset;
float2 PointSize;
uint MaxDepth;
uint3 pad_cb0;
uint pad_cb0;
};
cbuffer cb1
{
float3 FogColor;
float AREF;
float4 HalfTexel;
float4 WH;
float4 MinMax;
float2 MinF;
float2 TA;
uint4 MskFix;
int4 ChannelShuffle;
uint4 FbMask;
float4 TC_OffsetHack;
float Af;
float MaxDepthPS;
float Af;
uint4 MskFix;
uint4 FbMask;
float4 HalfTexel;
float4 MinMax;
int4 ChannelShuffle;
float2 TC_OffsetHack;
float2 pad_cb1;
float4x4 DitherMatrix;
};
cbuffer cb2
{
float2 PointSize;
};
float4 sample_c(float2 uv)
{
if (PS_POINT_SAMPLER)
@ -801,17 +797,18 @@ VS_OUTPUT vs_main(VS_INPUT input)
// input granularity is 1/16 pixel, anything smaller than that won't step drawing up/left by one pixel
// example: 133.0625 (133 + 1/16) should start from line 134, ceil(133.0625 - 0.05) still above 133
float4 p = float4(input.p, input.z, 0) - float4(0.05f, 0.05f, 0, 0);
output.p = float4(input.p, input.z, 1.0f) - float4(0.05f, 0.05f, 0, 0);
output.p = p * VertexScale - VertexOffset;
output.p.xy = output.p.xy * float2(VertexScale.x, -VertexScale.y) - float2(VertexOffset.x, -VertexOffset.y);
output.p.z *= exp2(-32.0f); // integer->float depth
if(VS_TME)
{
float2 uv = input.uv - Texture_Scale_Offset.zw;
float2 st = input.st - Texture_Scale_Offset.zw;
float2 uv = input.uv - TextureOffset;
float2 st = input.st - TextureOffset;
// Integer nomalized
output.ti.xy = uv * Texture_Scale_Offset.xy;
output.ti.xy = uv * TextureScale;
if (VS_FST)
{
@ -821,7 +818,7 @@ VS_OUTPUT vs_main(VS_INPUT input)
else
{
// float for post-processing in some games
output.ti.zw = st / Texture_Scale_Offset.xy;
output.ti.zw = st / TextureScale;
}
// Float coords
output.t.xy = st;

View File

@ -48,7 +48,6 @@ out gl_PerVertex {
layout(std140, binding = 15) uniform cb15
{
ivec4 ScalingFactor;
ivec4 ChannelShuffle;
int EMODA;
int EMODC;
@ -91,8 +90,10 @@ layout(std140, binding = 21) uniform cb21
vec4 MinMax;
vec2 pad_cb21;
ivec4 ChannelShuffle;
vec2 TC_OffsetHack;
vec2 pad_cb21;
mat4 DitherMatrix;
};

View File

@ -337,108 +337,81 @@ struct GSHWDrawConfig
GSVector2 texture_offset;
GSVector2 point_size;
GSVector2i max_depth;
VSConstantBuffer()
__fi VSConstantBuffer()
{
memset(this, 0, sizeof(*this));
}
VSConstantBuffer(const VSConstantBuffer& other)
__fi VSConstantBuffer(const VSConstantBuffer& other)
{
memcpy(this, &other, sizeof(*this));
}
VSConstantBuffer& operator=(const VSConstantBuffer& other)
__fi VSConstantBuffer& operator=(const VSConstantBuffer& other)
{
new (this) VSConstantBuffer(other);
return *this;
}
bool operator==(const VSConstantBuffer& other) const
__fi bool operator==(const VSConstantBuffer& other) const
{
return BitEqual(*this, other);
}
bool operator!=(const VSConstantBuffer& other) const
__fi bool operator!=(const VSConstantBuffer& other) const
{
return !(*this == other);
}
__fi bool Update(const VSConstantBuffer& other)
{
if (*this == other)
return false;
memcpy(this, &other, sizeof(*this));
return true;
}
};
struct alignas(16) PSConstantBuffer
{
union
{
struct
{
u8 fog_color[3];
u8 aref;
};
u32 fog_color_aref;
};
union
{
struct
{
u8 r, g, b, a;
} fbmask;
u32 fbmask_int;
};
u32 max_depth;
union
{
struct
{
u8 ta0;
u8 ta1;
u8 _pad;
u8 alpha_fix;
};
u32 ta_af;
};
union
{
struct
{
u8 blue_mask;
u8 blue_shift;
u8 green_mask;
u8 green_shift;
} channel_shuffle;
u32 channel_shuffle_int;
};
union
{
struct
{
u16 umsk;
u16 vmsk;
u16 ufix;
u16 vfix;
};
u64 uv_msk_fix;
};
GIFRegDIMX dither_matrix;
GSVector2 tc_offset;
GSVector4 texture_size; // xy → PS2 size, wz → emulator size
GSVector4 FogColor_AREF;
GSVector4 WH;
GSVector4 TA_MaxDepth_Af;
GSVector4i MskFix;
GSVector4i FbMask;
GSVector4 half_texel;
GSVector4 uv_min_max;
PSConstantBuffer()
GSVector4 HalfTexel;
GSVector4 MinMax;
GSVector4i ChannelShuffle;
GSVector2 TCOffsetHack;
float pad1[2];
GSVector4 DitherMatrix[4];
__fi PSConstantBuffer()
{
memset(this, 0, sizeof(*this));
}
PSConstantBuffer(const PSConstantBuffer& other)
__fi PSConstantBuffer(const PSConstantBuffer& other)
{
memcpy(this, &other, sizeof(*this));
}
PSConstantBuffer& operator=(const PSConstantBuffer& other)
__fi PSConstantBuffer& operator=(const PSConstantBuffer& other)
{
new (this) PSConstantBuffer(other);
return *this;
}
bool operator==(const PSConstantBuffer& other) const
__fi bool operator==(const PSConstantBuffer& other) const
{
return BitEqual(*this, other);
}
bool operator!=(const PSConstantBuffer& other) const
__fi bool operator!=(const PSConstantBuffer& other) const
{
return !(*this == other);
}
__fi bool Update(const PSConstantBuffer& other)
{
if (*this == other)
return false;
memcpy(this, &other, sizeof(*this));
return true;
}
};
struct BlendState
{

View File

@ -1477,53 +1477,6 @@ void GSDevice11::CompileShader(const std::string& source, const char* fn, ID3DIn
throw GSRecoverableError();
}
static GSDevice11::VSConstantBuffer convertCB(const GSHWDrawConfig::VSConstantBuffer& cb)
{
GSDevice11::VSConstantBuffer out;
out.VertexScale = GSVector4(cb.vertex_scale.x, -cb.vertex_scale.y, ldexpf(1, -32), 0.0f);
out.VertexOffset = GSVector4(cb.vertex_offset.x, -cb.vertex_offset.y, 0.0f, -1.0f);
out.Texture_Scale_Offset = GSVector4::loadl(&cb.texture_scale).upld(GSVector4::loadl(&cb.texture_offset));
out.MaxDepth = cb.max_depth;
return out;
}
static GSDevice11::GSConstantBuffer convertCBGS(const GSHWDrawConfig::VSConstantBuffer& cb)
{
GSDevice11::GSConstantBuffer out;
out.PointSize = cb.point_size;
return out;
}
static GSDevice11::PSConstantBuffer convertCB(const GSHWDrawConfig::PSConstantBuffer& cb, int atst)
{
GSDevice11::PSConstantBuffer out;
out.FogColor_AREF = GSVector4(GSVector4i::load(cb.fog_color_aref).u8to32());
if (atst == 1 || atst == 2) // Greater / Less alpha
out.FogColor_AREF.w -= 0.1f;
out.HalfTexel = cb.half_texel;
out.WH = cb.texture_size;
out.MinMax = cb.uv_min_max;
const GSVector4 ta_af(GSVector4i::load(cb.ta_af).u8to32());
out.MinF_TA = (GSVector4(out.MskFix) + 0.5f).xyxy(ta_af) / out.WH.xyxy(GSVector4(255, 255));
out.MskFix = GSVector4i::loadl(&cb.uv_msk_fix).u16to32();
out.ChannelShuffle = GSVector4i::load(cb.channel_shuffle_int).u8to32();
out.FbMask = GSVector4i::load(cb.fbmask_int).u8to32();
out.TC_OffsetHack = GSVector4(cb.tc_offset.x, cb.tc_offset.y).xyxy();
out.Af_MaxDepth = GSVector4(ta_af.a / 128.f, cb.max_depth * ldexpf(1, -32));
GSVector4i dither = GSVector4i::loadl(&cb.dither_matrix).u8to16();
const GSVector4i ditherLow = dither.sll16(13).sra16(13);
const GSVector4i ditherHi = dither.sll16(9).sra16(5);
dither = ditherLow.blend8(ditherHi, GSVector4i(0xFF00FF00));
out.DitherMatrix[0] = GSVector4(dither.xxxx().i8to32());
out.DitherMatrix[1] = GSVector4(dither.yyyy().i8to32());
out.DitherMatrix[2] = GSVector4(dither.zzzz().i8to32());
out.DitherMatrix[3] = GSVector4(dither.wwww().i8to32());
return out;
}
static GSDevice11::OMBlendSelector convertSel(GSHWDrawConfig::ColorMaskSelector cm, GSHWDrawConfig::BlendState blend)
{
GSDevice11::OMBlendSelector out;
@ -1618,14 +1571,10 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config)
PSSetShaderResource(3, config.rt);
}
const VSConstantBuffer cb_vs = convertCB(config.cb_vs);
const GSConstantBuffer cb_gs = convertCBGS(config.cb_vs);
PSConstantBuffer cb_ps = convertCB(config.cb_ps, config.ps.atst);
SetupOM(config.depth, convertSel(config.colormask, config.blend), config.blend.factor);
SetupVS(config.vs, &cb_vs);
SetupGS(config.gs, &cb_gs);
SetupPS(config.ps, &cb_ps, config.sampler);
SetupVS(config.vs, &config.cb_vs);
SetupGS(config.gs);
SetupPS(config.ps, &config.cb_ps, config.sampler);
OMSetRenderTargets(hdr_rt ? hdr_rt : config.rt, config.ds, &config.scissor);
@ -1634,11 +1583,7 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config)
if (config.alpha_second_pass.enable)
{
preprocessSel(config.alpha_second_pass.ps);
if (config.cb_ps != config.alpha_second_pass.cb_ps)
{
cb_ps = convertCB(config.alpha_second_pass.cb_ps, config.alpha_second_pass.ps.atst);
}
SetupPS(config.alpha_second_pass.ps, &cb_ps, config.sampler);
SetupPS(config.alpha_second_pass.ps, &config.alpha_second_pass.cb_ps, config.sampler);
SetupOM(config.alpha_second_pass.depth, convertSel(config.alpha_second_pass.colormask, config.blend), config.blend.factor);
DrawIndexedPrimitive();

View File

@ -31,129 +31,13 @@ struct GSVertexShader11
class GSDevice11 final : public GSDevice
{
public:
#pragma pack(push, 1)
struct alignas(32) VSConstantBuffer
{
GSVector4 VertexScale;
GSVector4 VertexOffset;
GSVector4 Texture_Scale_Offset;
GSVector2i MaxDepth;
GSVector2i pad_vscb;
VSConstantBuffer()
{
VertexScale = GSVector4::zero();
VertexOffset = GSVector4::zero();
Texture_Scale_Offset = GSVector4::zero();
MaxDepth = GSVector2i(0);
pad_vscb = GSVector2i(0);
}
__forceinline bool Update(const VSConstantBuffer* cb)
{
GSVector4i* a = (GSVector4i*)this;
GSVector4i* b = (GSVector4i*)cb;
if (!((a[0] == b[0]) & (a[1] == b[1]) & (a[2] == b[2]) & (a[3] == b[3])).alltrue())
{
a[0] = b[0];
a[1] = b[1];
a[2] = b[2];
a[3] = b[3];
return true;
}
return false;
}
};
struct alignas(32) PSConstantBuffer
{
GSVector4 FogColor_AREF;
GSVector4 HalfTexel;
GSVector4 WH;
GSVector4 MinMax;
GSVector4 MinF_TA;
GSVector4i MskFix;
GSVector4i ChannelShuffle;
GSVector4i FbMask;
GSVector4 TC_OffsetHack;
GSVector4 Af_MaxDepth;
GSVector4 DitherMatrix[4];
PSConstantBuffer()
{
FogColor_AREF = GSVector4::zero();
HalfTexel = GSVector4::zero();
WH = GSVector4::zero();
MinMax = GSVector4::zero();
MinF_TA = GSVector4::zero();
MskFix = GSVector4i::zero();
ChannelShuffle = GSVector4i::zero();
FbMask = GSVector4i::zero();
Af_MaxDepth = GSVector4::zero();
DitherMatrix[0] = GSVector4::zero();
DitherMatrix[1] = GSVector4::zero();
DitherMatrix[2] = GSVector4::zero();
DitherMatrix[3] = GSVector4::zero();
}
__forceinline bool Update(const PSConstantBuffer* cb)
{
GSVector4i* a = (GSVector4i*)this;
GSVector4i* b = (GSVector4i*)cb;
if (!((a[0] == b[0]) /*& (a[1] == b1)*/ & (a[2] == b[2]) & (a[3] == b[3]) & (a[4] == b[4]) & (a[5] == b[5]) &
(a[6] == b[6]) & (a[7] == b[7]) & (a[9] == b[9]) & // if WH matches HalfTexel does too
(a[10] == b[10]) & (a[11] == b[11]) & (a[12] == b[12]) & (a[13] == b[13])).alltrue())
{
a[0] = b[0];
a[1] = b[1];
a[2] = b[2];
a[3] = b[3];
a[4] = b[4];
a[5] = b[5];
a[6] = b[6];
a[7] = b[7];
a[9] = b[9];
a[10] = b[10];
a[11] = b[11];
a[12] = b[12];
a[13] = b[13];
return true;
}
return false;
}
};
struct alignas(32) GSConstantBuffer
{
GSVector2 PointSize;
GSConstantBuffer()
{
PointSize = GSVector2(0);
}
__forceinline bool Update(const GSConstantBuffer* cb)
{
return true;
}
};
using VSSelector = GSHWDrawConfig::VSSelector;
using GSSelector = GSHWDrawConfig::GSSelector;
using PSSelector = GSHWDrawConfig::PSSelector;
using PSSamplerSelector = GSHWDrawConfig::SamplerSelector;
using OMDepthStencilSelector = GSHWDrawConfig::DepthStencilSelector;
#pragma pack(push, 1)
struct OMBlendSelector
{
union
@ -332,7 +216,6 @@ private:
std::unordered_map<u32, GSVertexShader11> m_vs;
wil::com_ptr_nothrow<ID3D11Buffer> m_vs_cb;
std::unordered_map<u32, wil::com_ptr_nothrow<ID3D11GeometryShader>> m_gs;
wil::com_ptr_nothrow<ID3D11Buffer> m_gs_cb;
std::unordered_map<u64, wil::com_ptr_nothrow<ID3D11PixelShader>> m_ps;
wil::com_ptr_nothrow<ID3D11Buffer> m_ps_cb;
std::unordered_map<u32, wil::com_ptr_nothrow<ID3D11SamplerState>> m_ps_ss;
@ -340,9 +223,8 @@ private:
std::unordered_map<u32, wil::com_ptr_nothrow<ID3D11DepthStencilState>> m_om_dss;
std::unordered_map<u32, wil::com_ptr_nothrow<ID3D11BlendState>> m_om_bs;
VSConstantBuffer m_vs_cb_cache;
GSConstantBuffer m_gs_cb_cache;
PSConstantBuffer m_ps_cb_cache;
GSHWDrawConfig::VSConstantBuffer m_vs_cb_cache;
GSHWDrawConfig::PSConstantBuffer m_ps_cb_cache;
std::unique_ptr<GSTexture> m_font;
std::unique_ptr<GSTexture11> m_download_tex;
@ -415,9 +297,9 @@ public:
void OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i* scissor = NULL);
bool CreateTextureFX();
void SetupVS(VSSelector sel, const VSConstantBuffer* cb);
void SetupGS(GSSelector sel, const GSConstantBuffer* cb);
void SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel);
void SetupVS(VSSelector sel, const GSHWDrawConfig::VSConstantBuffer* cb);
void SetupGS(GSSelector sel);
void SetupPS(PSSelector sel, const GSHWDrawConfig::PSConstantBuffer* cb, PSSamplerSelector ssel);
void SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, u8 afix);
void RenderHW(GSHWDrawConfig& config) final;

View File

@ -26,7 +26,7 @@ bool GSDevice11::CreateTextureFX()
memset(&bd, 0, sizeof(bd));
bd.ByteWidth = sizeof(VSConstantBuffer);
bd.ByteWidth = sizeof(GSHWDrawConfig::VSConstantBuffer);
bd.Usage = D3D11_USAGE_DEFAULT;
bd.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
@ -37,18 +37,7 @@ bool GSDevice11::CreateTextureFX()
memset(&bd, 0, sizeof(bd));
bd.ByteWidth = sizeof(GSConstantBuffer);
bd.Usage = D3D11_USAGE_DEFAULT;
bd.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
hr = m_dev->CreateBuffer(&bd, nullptr, m_gs_cb.put());
if (FAILED(hr))
return false;
memset(&bd, 0, sizeof(bd));
bd.ByteWidth = sizeof(PSConstantBuffer);
bd.ByteWidth = sizeof(GSHWDrawConfig::PSConstantBuffer);
bd.Usage = D3D11_USAGE_DEFAULT;
bd.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
@ -78,20 +67,18 @@ bool GSDevice11::CreateTextureFX()
// create layout
VSSelector sel;
VSConstantBuffer cb;
GSHWDrawConfig::VSConstantBuffer cb;
SetupVS(sel, &cb);
GSConstantBuffer gcb;
SetupGS(GSSelector(1), &gcb);
SetupGS(GSSelector(1));
//
return true;
}
void GSDevice11::SetupVS(VSSelector sel, const VSConstantBuffer* cb)
void GSDevice11::SetupVS(VSSelector sel, const GSHWDrawConfig::VSConstantBuffer* cb)
{
auto i = std::as_const(m_vs).find(sel.key);
@ -119,7 +106,7 @@ void GSDevice11::SetupVS(VSSelector sel, const VSConstantBuffer* cb)
i = m_vs.try_emplace(sel.key, std::move(vs)).first;
}
if (m_vs_cb_cache.Update(cb))
if (m_vs_cb_cache.Update(*cb))
{
m_ctx->UpdateSubresource(m_vs_cb.get(), 0, NULL, cb, 0, 0);
}
@ -129,7 +116,7 @@ void GSDevice11::SetupVS(VSSelector sel, const VSConstantBuffer* cb)
IASetInputLayout(i->second.il.get());
}
void GSDevice11::SetupGS(GSSelector sel, const GSConstantBuffer* cb)
void GSDevice11::SetupGS(GSSelector sel)
{
wil::com_ptr_nothrow<ID3D11GeometryShader> gs;
@ -157,16 +144,10 @@ void GSDevice11::SetupGS(GSSelector sel, const GSConstantBuffer* cb)
}
}
if (m_gs_cb_cache.Update(cb))
{
m_ctx->UpdateSubresource(m_gs_cb.get(), 0, NULL, cb, 0, 0);
}
GSSetShader(gs.get(), m_gs_cb.get());
GSSetShader(gs.get(), m_vs_cb.get());
}
void GSDevice11::SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel)
void GSDevice11::SetupPS(PSSelector sel, const GSHWDrawConfig::PSConstantBuffer* cb, PSSamplerSelector ssel)
{
auto i = std::as_const(m_ps).find(sel.key);
@ -215,7 +196,7 @@ void GSDevice11::SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSe
i = m_ps.try_emplace(sel.key, std::move(ps)).first;
}
if (m_ps_cb_cache.Update(cb))
if (m_ps_cb_cache.Update(*cb))
{
m_ctx->UpdateSubresource(m_ps_cb.get(), 0, NULL, cb, 0, 0);
}

View File

@ -165,7 +165,7 @@ void GSRendererNew::EmulateZbuffer()
}
else if (!m_context->ZBUF.ZMSK)
{
m_conf.cb_ps.max_depth = max_z;
m_conf.cb_ps.TA_MaxDepth_Af.z = static_cast<float>(max_z) * 0x1p-32f;
m_conf.ps.zclamp = 1;
}
}
@ -282,10 +282,10 @@ void GSRendererNew::EmulateTextureShuffleAndFbmask()
if (m_conf.ps.fbmask && enable_fbmask_emulation)
{
m_conf.cb_ps.fbmask.r = rg_mask;
m_conf.cb_ps.fbmask.g = rg_mask;
m_conf.cb_ps.fbmask.b = ba_mask;
m_conf.cb_ps.fbmask.a = ba_mask;
m_conf.cb_ps.FbMask.r = rg_mask;
m_conf.cb_ps.FbMask.g = rg_mask;
m_conf.cb_ps.FbMask.b = ba_mask;
m_conf.cb_ps.FbMask.a = ba_mask;
// No blending so hit unsafe path.
if (!PRIM->ABE || !m_dev->Features().texture_barrier)
@ -318,7 +318,7 @@ void GSRendererNew::EmulateTextureShuffleAndFbmask()
if (m_conf.ps.fbmask)
{
m_conf.cb_ps.fbmask_int = m_context->FRAME.FBMSK;
m_conf.cb_ps.FbMask = fbmask_v.u8to32();
// Only alpha is special here, I think we can take a very unsafe shortcut
// Alpha isn't blended on the GS but directly copyied into the RT.
//
@ -456,10 +456,7 @@ void GSRendererNew::EmulateChannelShuffle(GSTexture** rt, const GSTextureCache::
const int green_shift = 8 - blue_shift;
GL_INS("Green/Blue channel (%d, %d)", blue_shift, green_shift);
m_conf.cb_ps.channel_shuffle.blue_mask = blue_mask;
m_conf.cb_ps.channel_shuffle.blue_shift = blue_shift;
m_conf.cb_ps.channel_shuffle.green_mask = green_mask;
m_conf.cb_ps.channel_shuffle.green_shift = green_shift;
m_conf.cb_ps.ChannelShuffle = GSVector4i(blue_mask, blue_shift, green_mask, green_shift);
m_conf.ps.channel = ChannelFetch_GXBY;
m_context->FRAME.FBMSK = 0x00FFFFFF;
}
@ -741,7 +738,7 @@ void GSRendererNew::EmulateBlending(bool& DATE_GL42, bool& DATE_GL45)
// Require the fix alpha vlaue
if (ALPHA.C == 2)
m_conf.cb_ps.alpha_fix = ALPHA.FIX;
m_conf.cb_ps.TA_MaxDepth_Af.a = static_cast<float>(ALPHA.FIX) / 128.0f;
}
else
{
@ -822,8 +819,10 @@ void GSRendererNew::EmulateTextureSampler(const GSTextureCache::Source* tex)
}
// Shuffle is a 16 bits format, so aem is always required
m_conf.cb_ps.ta0 = m_env.TEXA.TA0;
m_conf.cb_ps.ta1 = m_env.TEXA.TA1;
GSVector4 ta(m_env.TEXA & GSVector4i::x000000ff());
ta /= 255.0f;
m_conf.cb_ps.TA_MaxDepth_Af.x = ta.x;
m_conf.cb_ps.TA_MaxDepth_Af.y = ta.y;
// The purpose of texture shuffle is to move color channel. Extra interpolation is likely a bad idea.
bilinear &= m_vt.IsLinear();
@ -843,8 +842,10 @@ void GSRendererNew::EmulateTextureSampler(const GSTextureCache::Source* tex)
// Don't upload AEM if format is 32 bits
if (cpsm.fmt)
{
m_conf.cb_ps.ta0 = m_env.TEXA.TA0;
m_conf.cb_ps.ta1 = m_env.TEXA.TA1;
GSVector4 ta(m_env.TEXA & GSVector4i::x000000ff());
ta /= 255.0f;
m_conf.cb_ps.TA_MaxDepth_Af.x = ta.x;
m_conf.cb_ps.TA_MaxDepth_Af.y = ta.y;
}
// Select the index format
@ -926,23 +927,20 @@ void GSRendererNew::EmulateTextureSampler(const GSTextureCache::Source* tex)
m_conf.ps.fst = !!PRIM->FST;
m_conf.cb_ps.texture_size = WH;
m_conf.cb_ps.half_texel = GSVector4(-0.5f, 0.5f).xxyy() / WH.zwzw();
m_conf.cb_ps.WH = WH;
m_conf.cb_ps.HalfTexel = GSVector4(-0.5f, 0.5f).xxyy() / WH.zwzw();
if (complex_wms_wmt)
{
m_conf.cb_ps.umsk = m_context->CLAMP.MINU;
m_conf.cb_ps.vmsk = m_context->CLAMP.MINV;
m_conf.cb_ps.ufix = m_context->CLAMP.MAXU;
m_conf.cb_ps.vfix = m_context->CLAMP.MAXV;
m_conf.cb_ps.uv_min_max = GSVector4(GSVector4i::loadl(&m_conf.cb_ps.uv_msk_fix).u16to32()) / WH.xyxy();
m_conf.cb_ps.MskFix = GSVector4i(m_context->CLAMP.MINU, m_context->CLAMP.MINV, m_context->CLAMP.MAXU, m_context->CLAMP.MAXV);;
m_conf.cb_ps.MinMax = GSVector4(m_conf.cb_ps.MskFix) / WH.xyxy();
}
else if (trilinear_manual)
{
// Reuse uv_min_max for mipmap parameter to avoid an extension of the UBO
m_conf.cb_ps.uv_min_max.x = (float)m_context->TEX1.K / 16.0f;
m_conf.cb_ps.uv_min_max.y = float(1 << m_context->TEX1.L);
m_conf.cb_ps.uv_min_max.z = float(m_lod.x); // Offset because first layer is m_lod, dunno if we can do better
m_conf.cb_ps.uv_min_max.w = float(m_lod.y);
m_conf.cb_ps.MinMax.x = (float)m_context->TEX1.K / 16.0f;
m_conf.cb_ps.MinMax.y = float(1 << m_context->TEX1.L);
m_conf.cb_ps.MinMax.z = float(m_lod.x); // Offset because first layer is m_lod, dunno if we can do better
m_conf.cb_ps.MinMax.w = float(m_lod.y);
}
else if (trilinear_auto)
{
@ -952,16 +950,16 @@ void GSRendererNew::EmulateTextureSampler(const GSTextureCache::Source* tex)
// TC Offset Hack
m_conf.ps.tcoffsethack = m_userhacks_tcoffset;
GSVector4 tc_oh_ts = GSVector4(1 / 16.0f, 1 / 16.0f, m_userhacks_tcoffset_x, m_userhacks_tcoffset_y) / WH.xyxy();
m_conf.cb_ps.TCOffsetHack = GSVector2(tc_oh_ts.z, tc_oh_ts.w);
m_conf.cb_vs.texture_scale = GSVector2(tc_oh_ts.x, tc_oh_ts.y);
m_conf.cb_ps.tc_offset = GSVector2(tc_oh_ts.z, tc_oh_ts.w);
// Must be done after all coordinates math
if (m_context->HasFixedTEX0() && !PRIM->FST)
{
m_conf.ps.invalid_tex0 = 1;
// Use invalid size to denormalize ST coordinate
m_conf.cb_ps.texture_size.x = (float)(1 << m_context->stack.TEX0.TW);
m_conf.cb_ps.texture_size.y = (float)(1 << m_context->stack.TEX0.TH);
m_conf.cb_ps.WH.x = (float)(1 << m_context->stack.TEX0.TW);
m_conf.cb_ps.WH.y = (float)(1 << m_context->stack.TEX0.TH);
// We can't handle m_target with invalid_tex0 atm due to upscaling
ASSERT(!tex->m_target);
@ -1115,27 +1113,27 @@ void GSRendererNew::EmulateATST(GSHWDrawConfig::PSConstantBuffer& cb, GSHWDrawCo
switch (atst)
{
case ATST_LESS:
cb.aref = m_context->TEST.AREF;
cb.FogColor_AREF.a = (float)m_context->TEST.AREF - 0.1f;
ps.atst = 1;
break;
case ATST_LEQUAL:
cb.aref = m_context->TEST.AREF + 1;
cb.FogColor_AREF.a = (float)m_context->TEST.AREF - 0.1f + 1.0f;
ps.atst = 1;
break;
case ATST_GEQUAL:
cb.aref = m_context->TEST.AREF;
cb.FogColor_AREF.a = (float)m_context->TEST.AREF - 0.1f;
ps.atst = 2;
break;
case ATST_GREATER:
cb.aref = m_context->TEST.AREF + 1;
cb.FogColor_AREF.a = (float)m_context->TEST.AREF - 0.1f + 1.0f;
ps.atst = 2;
break;
case ATST_EQUAL:
cb.aref = m_context->TEST.AREF;
cb.FogColor_AREF.a = (float)m_context->TEST.AREF;
ps.atst = 3;
break;
case ATST_NOTEQUAL:
cb.aref = m_context->TEST.AREF;
cb.FogColor_AREF.a = (float)m_context->TEST.AREF;
ps.atst = 4;
break;
case ATST_NEVER: // Draw won't be done so no need to implement it in shader
@ -1395,16 +1393,19 @@ void GSRendererNew::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
GL_DBG("DITHERING mode ENABLED (%d)", m_dithering);
m_conf.ps.dither = m_dithering;
m_conf.cb_ps.dither_matrix.U64 = m_env.DIMX.U64 & 0x7777777777777777ull;
m_conf.cb_ps.DitherMatrix[0] = GSVector4(m_env.DIMX.DM00, m_env.DIMX.DM01, m_env.DIMX.DM02, m_env.DIMX.DM03);
m_conf.cb_ps.DitherMatrix[1] = GSVector4(m_env.DIMX.DM10, m_env.DIMX.DM11, m_env.DIMX.DM12, m_env.DIMX.DM13);
m_conf.cb_ps.DitherMatrix[2] = GSVector4(m_env.DIMX.DM20, m_env.DIMX.DM21, m_env.DIMX.DM22, m_env.DIMX.DM23);
m_conf.cb_ps.DitherMatrix[3] = GSVector4(m_env.DIMX.DM30, m_env.DIMX.DM31, m_env.DIMX.DM32, m_env.DIMX.DM33);
}
if (PRIM->FGE)
{
m_conf.ps.fog = 1;
m_conf.cb_ps.fog_color[0] = m_env.FOGCOL.FCR;
m_conf.cb_ps.fog_color[1] = m_env.FOGCOL.FCG;
m_conf.cb_ps.fog_color[2] = m_env.FOGCOL.FCB;
const GSVector4 fc = GSVector4::rgba32(m_env.FOGCOL.U32[0]);
// Blend AREF to avoid to load a random value for alpha (dirty cache)
m_conf.cb_ps.FogColor_AREF = fc.blend32<8>(m_conf.cb_ps.FogColor_AREF);
}
// Warning must be done after EmulateZbuffer

View File

@ -1907,20 +1907,20 @@ __fi static void WriteToStreamBuffer(GL::StreamBuffer* sb, u32 index, u32 align,
glBindBufferRange(GL_UNIFORM_BUFFER, index, sb->GetGLBufferId(), res.buffer_offset, size);
}
void GSDeviceOGL::SetupCB(const VSConstantBuffer* vs_cb, const PSConstantBuffer* ps_cb)
void GSDeviceOGL::SetupCB(const GSHWDrawConfig::VSConstantBuffer* vs_cb, const GSHWDrawConfig::PSConstantBuffer* ps_cb)
{
GL_PUSH("UBO");
if (m_vs_cb_cache.Update(vs_cb))
if (m_vs_cb_cache.Update(*vs_cb))
{
WriteToStreamBuffer(m_vertex_uniform_stream_buffer.get(), g_vs_cb_index,
m_uniform_buffer_alignment, vs_cb, sizeof(VSConstantBuffer));
m_uniform_buffer_alignment, vs_cb, sizeof(GSHWDrawConfig::VSConstantBuffer));
}
if (m_ps_cb_cache.Update(ps_cb))
if (m_ps_cb_cache.Update(*ps_cb))
{
WriteToStreamBuffer(m_fragment_uniform_stream_buffer.get(), g_ps_cb_index,
m_uniform_buffer_alignment, ps_cb, sizeof(PSConstantBuffer));
m_uniform_buffer_alignment, ps_cb, sizeof(GSHWDrawConfig::PSConstantBuffer));
}
}
@ -2019,44 +2019,6 @@ void GSDeviceOGL::SetupOM(OMDepthStencilSelector dssel)
OMSetDepthStencilState(m_om_dss[dssel.key]);
}
static GSDeviceOGL::VSConstantBuffer convertCB(const GSHWDrawConfig::VSConstantBuffer& cb)
{
GSDeviceOGL::VSConstantBuffer out;
out.Vertex_Scale_Offset = GSVector4::loadl(&cb.vertex_scale).upld(GSVector4::loadl(&cb.vertex_offset));
out.Texture_Scale_Offset = GSVector4::loadl(&cb.texture_scale).upld(GSVector4::loadl(&cb.texture_offset));
out.PointSize = cb.point_size;
out.MaxDepth = cb.max_depth;
return out;
}
static GSDeviceOGL::PSConstantBuffer convertCB(const GSHWDrawConfig::PSConstantBuffer& cb, int atst)
{
GSDeviceOGL::PSConstantBuffer out;
out.FogColor_AREF = GSVector4(GSVector4i::load(cb.fog_color_aref).u8to32());
if (atst == 1 || atst == 2) // Greater / Less alpha
out.FogColor_AREF.w -= 0.1f;
out.WH = cb.texture_size;
out.TA_MaxDepth_Af = GSVector4(GSVector4i::load(cb.ta_af).u8to32()) / GSVector4(255.f, 255.f, 1.f, 128.f);
out.TA_MaxDepth_Af.z = cb.max_depth * ldexpf(1, -32);
out.MskFix = GSVector4i::loadl(&cb.uv_msk_fix).u16to32();
out.FbMask = GSVector4i::load(cb.fbmask_int).u8to32();
out.HalfTexel = cb.half_texel;
out.MinMax = cb.uv_min_max;
out.TC_OH = GSVector4::zero().upld(GSVector4(cb.tc_offset));
GSVector4i dither = GSVector4i::loadl(&cb.dither_matrix).u8to16();
const GSVector4i ditherLow = dither.sll16(13).sra16(13);
const GSVector4i ditherHi = dither.sll16( 9).sra16( 5);
dither = ditherLow.blend8(ditherHi, GSVector4i(0xFF00FF00));
out.DitherMatrix[0] = GSVector4(dither.xxxx().i8to32());
out.DitherMatrix[1] = GSVector4(dither.yyyy().i8to32());
out.DitherMatrix[2] = GSVector4(dither.zzzz().i8to32());
out.DitherMatrix[3] = GSVector4(dither.wwww().i8to32());
return out;
}
static GSDeviceOGL::VSSelector convertSel(const GSHWDrawConfig::VSSelector sel)
{
GSDeviceOGL::VSSelector out;
@ -2137,14 +2099,7 @@ void GSDeviceOGL::RenderHW(GSHWDrawConfig& config)
OMSetColorMaskState(config.colormask);
SetupOM(config.depth);
VSConstantBuffer cb_vs = convertCB(config.cb_vs);
PSConstantBuffer cb_ps = convertCB(config.cb_ps, config.ps.atst);
SetupCB(&cb_vs, &cb_ps);
if (config.cb_ps.channel_shuffle_int)
{
SetupCBMisc(GSVector4i::load(config.cb_ps.channel_shuffle_int).u8to32());
}
SetupCB(&config.cb_vs, &config.cb_ps);
GSSelector gssel;
if (config.gs.expand)
@ -2198,11 +2153,7 @@ void GSDeviceOGL::RenderHW(GSHWDrawConfig& config)
if (config.alpha_second_pass.enable)
{
if (config.cb_ps != config.alpha_second_pass.cb_ps)
{
cb_ps = convertCB(config.alpha_second_pass.cb_ps, config.alpha_second_pass.ps.atst);
SetupCB(&cb_vs, &cb_ps);
}
SetupCB(&config.cb_vs, &config.alpha_second_pass.cb_ps);
SetupPipeline(vssel, gssel, config.alpha_second_pass.ps);
OMSetColorMaskState(config.alpha_second_pass.colormask);
SetupOM(config.alpha_second_pass.depth);

View File

@ -123,41 +123,6 @@ public:
class GSDeviceOGL final : public GSDevice
{
public:
struct alignas(32) VSConstantBuffer
{
GSVector4 Vertex_Scale_Offset;
GSVector4 Texture_Scale_Offset;
GSVector2 PointSize;
GSVector2i MaxDepth;
VSConstantBuffer()
{
Vertex_Scale_Offset = GSVector4::zero();
Texture_Scale_Offset = GSVector4::zero();
PointSize = GSVector2(0);
MaxDepth = GSVector2i(0);
}
__forceinline bool Update(const VSConstantBuffer* cb)
{
GSVector4i* a = (GSVector4i*)this;
GSVector4i* b = (GSVector4i*)cb;
if (!((a[0] == b[0]) & (a[1] == b[1]) & (a[2] == b[2])).alltrue())
{
a[0] = b[0];
a[1] = b[1];
a[2] = b[2];
return true;
}
return false;
}
};
struct VSSelector
{
union
@ -211,67 +176,6 @@ public:
}
};
struct alignas(32) PSConstantBuffer
{
GSVector4 FogColor_AREF;
GSVector4 WH;
GSVector4 TA_MaxDepth_Af;
GSVector4i MskFix;
GSVector4i FbMask;
GSVector4 HalfTexel;
GSVector4 MinMax;
GSVector4 TC_OH;
GSVector4 DitherMatrix[4];
PSConstantBuffer()
{
FogColor_AREF = GSVector4::zero();
HalfTexel = GSVector4::zero();
WH = GSVector4::zero();
TA_MaxDepth_Af = GSVector4::zero();
MinMax = GSVector4::zero();
MskFix = GSVector4i::zero();
TC_OH = GSVector4::zero();
FbMask = GSVector4i::zero();
DitherMatrix[0] = GSVector4::zero();
DitherMatrix[1] = GSVector4::zero();
DitherMatrix[2] = GSVector4::zero();
DitherMatrix[3] = GSVector4::zero();
}
__forceinline bool Update(const PSConstantBuffer* cb)
{
GSVector4i* a = (GSVector4i*)this;
GSVector4i* b = (GSVector4i*)cb;
// if WH matches both HalfTexel and TC_OH_TS do too
if (!((a[0] == b[0]) & (a[1] == b[1]) & (a[2] == b[2]) & (a[3] == b[3]) & (a[4] == b[4]) & (a[6] == b[6])
& (a[8] == b[8]) & (a[9] == b[9]) & (a[10] == b[10]) & (a[11] == b[11])).alltrue())
{
// Note previous check uses SSE already, a plain copy will be faster than any memcpy
a[0] = b[0];
a[1] = b[1];
a[2] = b[2];
a[3] = b[3];
a[4] = b[4];
a[5] = b[5];
a[6] = b[6];
a[8] = b[8];
a[9] = b[9];
a[10] = b[10];
a[11] = b[11];
return true;
}
return false;
}
};
using PSSelector = GSHWDrawConfig::PSSelector;
using PSSamplerSelector = GSHWDrawConfig::SamplerSelector;
using OMDepthStencilSelector = GSHWDrawConfig::DepthStencilSelector;
@ -280,7 +184,6 @@ public:
struct alignas(32) MiscConstantBuffer
{
GSVector4i ScalingFactor;
GSVector4i ChannelShuffle;
GSVector4i EMOD_AC;
MiscConstantBuffer() { memset(this, 0, sizeof(*this)); }
@ -379,8 +282,8 @@ private:
GLuint m_palette_ss;
VSConstantBuffer m_vs_cb_cache;
PSConstantBuffer m_ps_cb_cache;
GSHWDrawConfig::VSConstantBuffer m_vs_cb_cache;
GSHWDrawConfig::PSConstantBuffer m_ps_cb_cache;
MiscConstantBuffer m_misc_cb_cache;
std::unique_ptr<GSTexture> m_font;
@ -476,7 +379,7 @@ public:
void SelfShaderTest();
void SetupPipeline(const VSSelector& vsel, const GSSelector& gsel, const PSSelector& psel);
void SetupCB(const VSConstantBuffer* vs_cb, const PSConstantBuffer* ps_cb);
void SetupCB(const GSHWDrawConfig::VSConstantBuffer* vs_cb, const GSHWDrawConfig::PSConstantBuffer* ps_cb);
void SetupCBMisc(const GSVector4i& channel);
void SetupSampler(PSSamplerSelector ssel);
void SetupOM(OMDepthStencilSelector dssel);