GS:MTL: Expand lines and sprites in vertex shader

This commit is contained in:
TellowKrinkle 2022-07-15 23:50:33 -05:00 committed by tellowkrinkle
parent 98724979cb
commit 693790a19f
8 changed files with 305 additions and 13 deletions

View File

@ -2739,7 +2739,8 @@ void GSState::GrowVertexBuffer()
const size_t maxcount = std::max<size_t>(m_vertex.maxcount * 3 / 2, 10000);
GSVertex* vertex = (GSVertex*)_aligned_malloc(sizeof(GSVertex) * maxcount, 32);
u32* index = (u32*)_aligned_malloc(sizeof(u32) * maxcount * 3, 32); // worst case is slightly less than vertex number * 3
// Worst case index list is a list of points with vs expansion, 6 indices per point
u32* index = (u32*)_aligned_malloc(sizeof(u32) * maxcount * 6, 32);
if (vertex == NULL || index == NULL)
{

View File

@ -246,6 +246,13 @@ struct alignas(16) GSHWDrawConfig
Triangle,
Sprite,
};
enum class VSExpand: u8
{
None,
Point,
Line,
Sprite,
};
#pragma pack(push, 1)
struct GSSelector
{
@ -272,7 +279,8 @@ struct alignas(16) GSHWDrawConfig
u8 tme : 1;
u8 iip : 1;
u8 point_size : 1; ///< Set when points need to be expanded without geometry shader.
u8 _free : 1;
VSExpand expand : 2;
u8 _free : 2;
};
u8 key;
};
@ -690,6 +698,7 @@ public:
{
bool broken_point_sampler : 1; ///< Issue with AMD cards, see tfx shader for details
bool geometry_shader : 1; ///< Supports geometry shader
bool vs_expand : 1; ///< Supports expanding points/lines/sprites in the vertex shader
bool image_load_store : 1; ///< Supports atomic min and max on images (for use with prim tracking destination alpha algorithm)
bool texture_barrier : 1; ///< Supports sampling rt and hopefully texture barrier
bool provoking_vertex_last: 1; ///< Supports using the last vertex in a primitive as the value for flat shading.

View File

@ -382,6 +382,68 @@ void GSRendererHW::Lines2Sprites()
}
}
template <GSHWDrawConfig::VSExpand Expand>
void GSRendererHW::ExpandIndices()
{
size_t process_count = (m_index.tail + 3) / 4 * 4;
if (Expand == GSHWDrawConfig::VSExpand::Point)
{
// Make sure we have space for writing off the end slightly
while (process_count > m_vertex.maxcount)
GrowVertexBuffer();
}
u32 expansion_factor = Expand == GSHWDrawConfig::VSExpand::Point ? 6 : 3;
m_index.tail *= expansion_factor;
GSVector4i* end = reinterpret_cast<GSVector4i*>(m_index.buff);
GSVector4i* read = reinterpret_cast<GSVector4i*>(m_index.buff + process_count);
GSVector4i* write = reinterpret_cast<GSVector4i*>(m_index.buff + process_count * expansion_factor);
while (read > end)
{
read -= 1;
write -= expansion_factor;
switch (Expand)
{
case GSHWDrawConfig::VSExpand::None:
break;
case GSHWDrawConfig::VSExpand::Point:
{
constexpr GSVector4i low0 = GSVector4i::cxpr(0, 1, 2, 1);
constexpr GSVector4i low1 = GSVector4i::cxpr(2, 3, 0, 1);
constexpr GSVector4i low2 = GSVector4i::cxpr(2, 1, 2, 3);
GSVector4i in = read->sll32(2);
write[0] = in.xxxx() | low0;
write[1] = in.xxyy() | low1;
write[2] = in.yyyy() | low2;
write[3] = in.zzzz() | low0;
write[4] = in.zzww() | low1;
write[5] = in.wwww() | low2;
break;
}
case GSHWDrawConfig::VSExpand::Line:
{
constexpr GSVector4i low0 = GSVector4i::cxpr(0, 1, 2, 1);
constexpr GSVector4i low1 = GSVector4i::cxpr(2, 3, 0, 1);
constexpr GSVector4i low2 = GSVector4i::cxpr(2, 1, 2, 3);
GSVector4i in = read->sll32(2);
write[0] = in.xxyx() | low0;
write[1] = in.yyzz() | low1;
write[2] = in.wzww() | low2;
break;
}
case GSHWDrawConfig::VSExpand::Sprite:
{
constexpr GSVector4i low = GSVector4i::cxpr(0, 1, 0, 1);
GSVector4i in = read->sll32(1);
write[0] = in.xxyx() | low;
write[1] = in.yyzz() | low;
write[2] = in.wzww() | low;
break;
}
}
}
}
void GSRendererHW::EmulateAtst(GSVector4& FogColor_AREF, u8& ps_atst, const bool pass_2)
{
static const u32 inverted_atst[] = {ATST_ALWAYS, ATST_NEVER, ATST_GEQUAL, ATST_GREATER, ATST_NOTEQUAL, ATST_LESS, ATST_LEQUAL, ATST_EQUAL};
@ -1837,6 +1899,55 @@ void GSRendererHW::Draw()
#endif
}
/// Verifies assumptions we expect to hold about indices
bool GSRendererHW::VerifyIndices()
{
switch (m_vt.m_primclass)
{
case GS_SPRITE_CLASS:
if (m_index.tail % 2 != 0)
return false;
[[fallthrough]];
case GS_POINT_CLASS:
// Expect indices to be flat increasing
for (size_t i = 0; i < m_index.tail; i++)
{
if (m_index.buff[i] != i)
return false;
}
break;
case GS_LINE_CLASS:
if (m_index.tail % 2 != 0)
return false;
// Expect each line to be a pair next to each other
// VS expand relies on this!
if (g_gs_device->Features().provoking_vertex_last)
{
for (size_t i = 0; i < m_index.tail; i += 2)
{
if (m_index.buff[i] + 1 != m_index.buff[i + 1])
return false;
}
}
else
{
for (size_t i = 0; i < m_index.tail; i += 2)
{
if (m_index.buff[i] != m_index.buff[i + 1] + 1)
return false;
}
}
break;
case GS_TRIANGLE_CLASS:
if (m_index.tail % 3 != 0)
return false;
break;
case GS_INVALID_CLASS:
break;
}
return true;
}
void GSRendererHW::SetupIA(const float& sx, const float& sy)
{
GL_PUSH("IA");
@ -1849,9 +1960,14 @@ void GSRendererHW::SetupIA(const float& sx, const float& sy)
const bool unscale_pt_ln = !GSConfig.UserHacks_DisableSafeFeatures && (GetUpscaleMultiplier() != 1);
const GSDevice::FeatureSupport features = g_gs_device->Features();
ASSERT(VerifyIndices());
switch (m_vt.m_primclass)
{
case GS_POINT_CLASS:
m_conf.gs.topology = GSHWDrawConfig::GSTopology::Point;
m_conf.topology = GSHWDrawConfig::Topology::Point;
m_conf.indices_per_prim = 1;
if (unscale_pt_ln)
{
if (features.point_expand)
@ -1863,14 +1979,21 @@ void GSRendererHW::SetupIA(const float& sx, const float& sy)
m_conf.gs.expand = true;
m_conf.cb_vs.point_size = GSVector2(16.0f * sx, 16.0f * sy);
}
else if (features.vs_expand)
{
m_conf.vs.expand = GSHWDrawConfig::VSExpand::Point;
m_conf.cb_vs.point_size = GSVector2(16.0f * sx, 16.0f * sy);
m_conf.topology = GSHWDrawConfig::Topology::Triangle;
m_conf.indices_per_prim = 6;
ExpandIndices<GSHWDrawConfig::VSExpand::Point>();
}
}
m_conf.gs.topology = GSHWDrawConfig::GSTopology::Point;
m_conf.topology = GSHWDrawConfig::Topology::Point;
m_conf.indices_per_prim = 1;
break;
case GS_LINE_CLASS:
m_conf.gs.topology = GSHWDrawConfig::GSTopology::Line;
m_conf.topology = GSHWDrawConfig::Topology::Line;
m_conf.indices_per_prim = 2;
if (unscale_pt_ln)
{
if (features.line_expand)
@ -1882,11 +2005,15 @@ void GSRendererHW::SetupIA(const float& sx, const float& sy)
m_conf.gs.expand = true;
m_conf.cb_vs.point_size = GSVector2(16.0f * sx, 16.0f * sy);
}
else if (features.vs_expand)
{
m_conf.vs.expand = GSHWDrawConfig::VSExpand::Line;
m_conf.cb_vs.point_size = GSVector2(16.0f * sx, 16.0f * sy);
m_conf.topology = GSHWDrawConfig::Topology::Triangle;
m_conf.indices_per_prim = 6;
ExpandIndices<GSHWDrawConfig::VSExpand::Line>();
}
}
m_conf.gs.topology = GSHWDrawConfig::GSTopology::Line;
m_conf.topology = GSHWDrawConfig::Topology::Line;
m_conf.indices_per_prim = 2;
break;
case GS_SPRITE_CLASS:
@ -1915,6 +2042,13 @@ void GSRendererHW::SetupIA(const float& sx, const float& sy)
m_conf.topology = GSHWDrawConfig::Topology::Line;
m_conf.indices_per_prim = 2;
}
else if (features.vs_expand && !m_vt.m_accurate_stq)
{
m_conf.topology = GSHWDrawConfig::Topology::Triangle;
m_conf.vs.expand = GSHWDrawConfig::VSExpand::Sprite;
m_conf.indices_per_prim = 6;
ExpandIndices<GSHWDrawConfig::VSExpand::Sprite>();
}
else
{
Lines2Sprites();

View File

@ -179,6 +179,8 @@ public:
bool CanUpscale() override;
int GetUpscaleMultiplier() override;
void Lines2Sprites();
bool VerifyIndices();
template <GSHWDrawConfig::VSExpand Expand> void ExpandIndices();
void EmulateAtst(GSVector4& FogColor_AREF, u8& atst, const bool pass_2);
void ConvertSpriteTextureShuffle(bool& write_ba, bool& read_ba);
GSVector4 RealignTargetTextureCoordinate(const GSTextureCache::Source* tex);

View File

@ -201,6 +201,7 @@ public:
bool iip : 1;
bool fst : 1;
bool point_size : 1;
GSMTLExpandType expand : 2;
};
u8 key;
};
@ -243,7 +244,7 @@ public:
MRCOwned<id<MTLRenderPipelineState>> m_imgui_pipeline;
MRCOwned<id<MTLRenderPipelineState>> m_imgui_pipeline_a8;
MRCOwned<id<MTLFunction>> m_hw_vs[1 << 3];
MRCOwned<id<MTLFunction>> m_hw_vs[1 << 5];
std::unordered_map<PSSelector, MRCOwned<id<MTLFunction>>> m_hw_ps;
std::unordered_map<PipelineSelectorMTL, MRCOwned<id<MTLRenderPipelineState>>> m_hw_pipeline;

View File

@ -586,6 +586,7 @@ bool GSDeviceMTL::Create(HostDisplay* display)
m_features.broken_point_sampler = [[m_dev.dev name] containsString:@"AMD"];
m_features.geometry_shader = false;
m_features.vs_expand = true;
m_features.image_load_store = m_dev.features.primid;
m_features.texture_barrier = true;
m_features.provoking_vertex_last = false;
@ -741,10 +742,18 @@ bool GSDeviceMTL::Create(HostDisplay* display)
{
VSSelector sel;
sel.key = i;
if (sel.point_size && sel.expand != GSMTLExpandType::None)
continue;
setFnConstantB(m_fn_constants, sel.fst, GSMTLConstantIndex_FST);
setFnConstantB(m_fn_constants, sel.iip, GSMTLConstantIndex_IIP);
setFnConstantB(m_fn_constants, sel.point_size, GSMTLConstantIndex_VS_POINT_SIZE);
m_hw_vs[i] = LoadShader(@"vs_main");
NSString* shader = @"vs_main";
if (sel.expand != GSMTLExpandType::None)
{
setFnConstantI(m_fn_constants, static_cast<u32>(sel.expand), GSMTLConstantIndex_VS_EXPAND_TYPE);
shader = @"vs_main_expand";
}
m_hw_vs[i] = LoadShader(shader);
}
// Init pipelines
@ -1159,6 +1168,17 @@ static MTLBlendOperation ConvertBlendOp(GSDevice::BlendOp generic)
static constexpr MTLColorWriteMask MTLColorWriteMaskRGB = MTLColorWriteMaskRed | MTLColorWriteMaskGreen | MTLColorWriteMaskBlue;
static GSMTLExpandType ConvertVSExpand(GSHWDrawConfig::VSExpand generic)
{
switch (generic)
{
case GSHWDrawConfig::VSExpand::None: return GSMTLExpandType::None;
case GSHWDrawConfig::VSExpand::Point: return GSMTLExpandType::Point;
case GSHWDrawConfig::VSExpand::Line: return GSMTLExpandType::Line;
case GSHWDrawConfig::VSExpand::Sprite: return GSMTLExpandType::Sprite;
}
}
void GSDeviceMTL::MRESetHWPipelineState(GSHWDrawConfig::VSSelector vssel, GSHWDrawConfig::PSSelector pssel, GSHWDrawConfig::BlendState blend, GSHWDrawConfig::ColorMaskSelector cms)
{
PipelineSelectorExtrasMTL extras(blend, m_current_render.color_target, cms, m_current_render.depth_target, m_current_render.stencil_target);
@ -1180,6 +1200,7 @@ void GSDeviceMTL::MRESetHWPipelineState(GSHWDrawConfig::VSSelector vssel, GSHWDr
vssel_mtl.fst = vssel.fst;
vssel_mtl.iip = vssel.iip;
vssel_mtl.point_size = vssel.point_size;
vssel_mtl.expand = ConvertVSExpand(vssel.expand);
id<MTLFunction> vs = m_hw_vs[vssel_mtl.key];
id<MTLFunction> ps;
@ -1242,7 +1263,12 @@ void GSDeviceMTL::MRESetHWPipelineState(GSHWDrawConfig::VSSelector vssel, GSHWDr
}
MRCOwned<MTLRenderPipelineDescriptor*> pdesc = MRCTransfer([MTLRenderPipelineDescriptor new]);
[pdesc setVertexDescriptor:m_hw_vertex];
if (vssel_mtl.point_size)
[pdesc setInputPrimitiveTopology:MTLPrimitiveTopologyClassPoint];
if (vssel_mtl.expand == GSMTLExpandType::None)
[pdesc setVertexDescriptor:m_hw_vertex];
else
[pdesc setInputPrimitiveTopology:MTLPrimitiveTopologyClassTriangle];
MTLRenderPipelineColorAttachmentDescriptor* color = [[pdesc colorAttachments] objectAtIndexedSubscript:0];
color.pixelFormat = ConvertPixelFormat(extras.rt);
[pdesc setDepthAttachmentPixelFormat:extras.has_depth ? MTLPixelFormatDepth32Float_Stencil8 : MTLPixelFormatInvalid];
@ -1396,6 +1422,15 @@ void GSDeviceMTL::MRESetPipeline(id<MTLRenderPipelineState> pipe)
// MARK: - HW Render
// Metal can't import GSDevice.h, but we should make sure the structs are at least compatible
static_assert(sizeof(GSVertex) == sizeof(GSMTLMainVertex));
static_assert(offsetof(GSVertex, ST) == offsetof(GSMTLMainVertex, st));
static_assert(offsetof(GSVertex, RGBAQ.R) == offsetof(GSMTLMainVertex, rgba));
static_assert(offsetof(GSVertex, RGBAQ.Q) == offsetof(GSMTLMainVertex, q));
static_assert(offsetof(GSVertex, XYZ.X) == offsetof(GSMTLMainVertex, xy));
static_assert(offsetof(GSVertex, XYZ.Z) == offsetof(GSMTLMainVertex, z));
static_assert(offsetof(GSVertex, UV) == offsetof(GSMTLMainVertex, uv));
static_assert(offsetof(GSVertex, FOG) == offsetof(GSMTLMainVertex, fog));
static_assert(sizeof(GSHWDrawConfig::VSConstantBuffer) == sizeof(GSMTLMainVSUniform));
static_assert(sizeof(GSHWDrawConfig::PSConstantBuffer) == sizeof(GSMTLMainPSUniform));
static_assert(offsetof(GSHWDrawConfig::VSConstantBuffer, vertex_scale) == offsetof(GSMTLMainVSUniform, vertex_scale));

View File

@ -57,6 +57,17 @@ struct GSMTLInterlacePSUniform
vector_float2 ZrH;
};
struct GSMTLMainVertex
{
vector_float2 st;
vector_uchar4 rgba;
float q;
vector_ushort2 xy;
uint z;
vector_ushort2 uv;
unsigned char fog;
};
struct GSMTLMainVSUniform
{
vector_float2 vertex_scale;
@ -111,6 +122,14 @@ enum GSMTLAttributes
GSMTLAttributeIndexF,
};
enum class GSMTLExpandType : unsigned char
{
None = 0,
Point = 1,
Line = 2,
Sprite = 3,
};
enum GSMTLFnConstants
{
GSMTLConstantIndex_SCALING_FACTOR,
@ -118,6 +137,7 @@ enum GSMTLFnConstants
GSMTLConstantIndex_FST,
GSMTLConstantIndex_IIP,
GSMTLConstantIndex_VS_POINT_SIZE,
GSMTLConstantIndex_VS_EXPAND_TYPE,
GSMTLConstantIndex_PS_AEM_FMT,
GSMTLConstantIndex_PS_PAL_FMT,
GSMTLConstantIndex_PS_DFMT,

View File

@ -23,6 +23,7 @@ constant bool HAS_FBFETCH [[function_constant(GSMTLConstantIndex_FRAME
constant bool FST [[function_constant(GSMTLConstantIndex_FST)]];
constant bool IIP [[function_constant(GSMTLConstantIndex_IIP)]];
constant bool VS_POINT_SIZE [[function_constant(GSMTLConstantIndex_VS_POINT_SIZE)]];
constant uint VS_EXPAND_TYPE_RAW [[function_constant(GSMTLConstantIndex_VS_EXPAND_TYPE)]];
constant uint PS_AEM_FMT [[function_constant(GSMTLConstantIndex_PS_AEM_FMT)]];
constant uint PS_PAL_FMT [[function_constant(GSMTLConstantIndex_PS_PAL_FMT)]];
constant uint PS_DFMT [[function_constant(GSMTLConstantIndex_PS_DFMT)]];
@ -67,6 +68,8 @@ constant bool PS_POINT_SAMPLER [[function_constant(GSMTLConstantIndex_PS_PO
constant bool PS_INVALID_TEX0 [[function_constant(GSMTLConstantIndex_PS_INVALID_TEX0)]];
constant uint PS_SCANMSK [[function_constant(GSMTLConstantIndex_PS_SCANMSK)]];
constant GSMTLExpandType VS_EXPAND_TYPE = static_cast<GSMTLExpandType>(VS_EXPAND_TYPE_RAW);
#if defined(__METAL_MACOS__) && __METAL_VERSION__ >= 220
#define PRIMID_SUPPORT 1
#else
@ -191,6 +194,93 @@ vertex MainVSOut vs_main(MainVSIn v [[stage_in]], constant GSMTLMainVSUniform& c
return vs_main_run(v, cb);
}
static MainVSIn load_vertex(GSMTLMainVertex base)
{
MainVSIn out;
out.st = base.st;
out.c = float4(base.rgba);
out.q = base.q;
out.p = uint2(base.xy);
out.z = base.z;
out.uv = uint2(base.uv);
out.f = float4(static_cast<float>(base.fog) / 255.f);
return out;
}
vertex MainVSOut vs_main_expand(
uint vid [[vertex_id]],
device const GSMTLMainVertex* vertices [[buffer(GSMTLBufferIndexHWVertices)]],
constant GSMTLMainVSUniform& cb [[buffer(GSMTLBufferIndexHWUniforms)]])
{
switch (VS_EXPAND_TYPE)
{
case GSMTLExpandType::None:
return vs_main_run(load_vertex(vertices[vid]), cb);
case GSMTLExpandType::Point:
{
MainVSOut point = vs_main_run(load_vertex(vertices[vid >> 2]), cb);
if (vid & 1)
point.p.x += cb.point_size.x;
if (vid & 2)
point.p.y += cb.point_size.y;
return point;
}
case GSMTLExpandType::Line:
{
uint vid_base = vid >> 2;
bool is_bottom = vid & 2;
bool is_right = vid & 1;
// All lines will be a pair of vertices next to each other
// Since Metal uses provoking vertex first, the bottom point will be the lower of the two
uint vid_other = is_bottom ? vid_base + 1 : vid_base - 1;
MainVSOut point = vs_main_run(load_vertex(vertices[vid_base]), cb);
MainVSOut other = vs_main_run(load_vertex(vertices[vid_other]), cb);
float2 line_vector = normalize(point.p.xy - other.p.xy);
float2 line_normal = float2(line_vector.y, -line_vector.x);
float2 line_width = (line_normal * cb.point_size) / 2;
// line_normal is inverted for bottom point
float2 offset = (is_bottom ^ is_right) ? line_width : -line_width;
point.p.xy += offset;
// Lines will be run as (0 1 2) (1 2 3)
// This means that both triangles will have a point based off the top line point as their first point
// So we don't have to do anything for !IIP
return point;
}
case GSMTLExpandType::Sprite:
{
uint vid_base = vid >> 1;
bool is_bottom = vid & 2;
bool is_right = vid & 1;
// Sprite points are always in pairs
uint vid_lt = vid_base & ~1;
uint vid_rb = vid_base | 1;
MainVSOut lt = vs_main_run(load_vertex(vertices[vid_lt]), cb);
MainVSOut rb = vs_main_run(load_vertex(vertices[vid_rb]), cb);
MainVSOut out = rb;
if (!is_right)
{
out.p.x = lt.p.x;
out.t.x = lt.t.x;
out.ti.xz = lt.ti.xz;
}
if (!is_bottom)
{
out.p.y = lt.p.y;
out.t.y = lt.t.y;
out.ti.yw = lt.ti.yw;
}
return out;
}
}
}
// MARK: - Fragment functions
constexpr sampler palette_sampler(filter::nearest, address::clamp_to_edge);