VideoCommon: Add vertex shader point and line expansion

This commit is contained in:
TellowKrinkle 2022-07-23 00:47:04 -05:00
parent 804e42150e
commit 68f49df0f8
17 changed files with 584 additions and 73 deletions

View File

@ -94,7 +94,7 @@ static size_t s_state_writes_in_queue;
static std::condition_variable s_state_write_queue_is_empty;
// Don't forget to increase this after doing changes on the savestate system
constexpr u32 STATE_VERSION = 154; // Last changed in PR 11177
constexpr u32 STATE_VERSION = 155; // Last changed in PR 10890
// Maps savestate versions to Dolphin versions.
// Versions after 42 don't need to be added to this list,

View File

@ -358,7 +358,7 @@ bool DXContext::CreateGXRootSignature()
SetRootParamTable(&params[param_count], &ranges[param_count], D3D12_DESCRIPTOR_RANGE_TYPE_SRV, 3,
1, D3D12_SHADER_VISIBILITY_VERTEX);
param_count++;
SetRootParamConstant(&params[param_count], 2, 1, D3D12_SHADER_VISIBILITY_VERTEX);
SetRootParamConstant(&params[param_count], 3, 1, D3D12_SHADER_VISIBILITY_VERTEX);
param_count++;
// Since these must be contiguous, pixel lighting goes to bbox if not enabled.

View File

@ -102,9 +102,18 @@ struct VertexShaderConstants
std::array<u32, 8> vertex_offset_texcoords;
};
enum class VSExpand : u32
{
None = 0,
Point,
Line,
};
struct GeometryShaderConstants
{
float4 stereoparams;
float4 lineptparams;
int4 texoffset;
VSExpand vs_expand; // Used by VS point/line expansion in ubershaders
u32 pad[3];
};

View File

@ -19,7 +19,7 @@ namespace VideoCommon
// As pipelines encompass both shader UIDs and render states, changes to either of these should
// also increment the pipeline UID version. Incrementing the UID version will cause all UID
// caches to be invalidated.
constexpr u32 GX_PIPELINE_UID_VERSION = 5; // Last changed in PR 10747
constexpr u32 GX_PIPELINE_UID_VERSION = 6; // Last changed in PR 10890
struct GXPipelineUid
{

View File

@ -97,10 +97,8 @@ ShaderCode GenerateGeometryShaderCode(APIType api_type, const ShaderHostConfig&
else
out.Write("cbuffer GSBlock {{\n");
out.Write("\tfloat4 " I_STEREOPARAMS ";\n"
"\tfloat4 " I_LINEPTPARAMS ";\n"
"\tint4 " I_TEXOFFSET ";\n"
"}};\n");
out.Write("{}", s_geometry_shader_uniforms);
out.Write("}};\n");
out.Write("struct VS_OUTPUT {{\n");
GenerateVSOutputMembers(out, api_type, uid_data->numTexGens, host_config, "",

View File

@ -8,6 +8,7 @@
#include "Common/ChunkFile.h"
#include "Common/CommonTypes.h"
#include "VideoCommon/BPMemory.h"
#include "VideoCommon/RenderState.h"
#include "VideoCommon/VideoConfig.h"
#include "VideoCommon/XFMemory.h"
@ -36,10 +37,22 @@ void GeometryShaderManager::Dirty()
// Any constants that can changed based on settings should be re-calculated
s_projection_changed = true;
// Uses EFB scale config
SetLinePtWidthChanged();
dirty = true;
}
void GeometryShaderManager::SetConstants()
static void SetVSExpand(VSExpand expand)
{
if (GeometryShaderManager::constants.vs_expand != expand)
{
GeometryShaderManager::constants.vs_expand = expand;
GeometryShaderManager::dirty = true;
}
}
void GeometryShaderManager::SetConstants(PrimitiveType prim)
{
if (s_projection_changed && g_ActiveConfig.stereo_mode != StereoMode::Off)
{
@ -63,6 +76,16 @@ void GeometryShaderManager::SetConstants()
dirty = true;
}
if (g_ActiveConfig.UseVSForLinePointExpand())
{
if (prim == PrimitiveType::Points)
SetVSExpand(VSExpand::Point);
else if (prim == PrimitiveType::Lines)
SetVSExpand(VSExpand::Line);
else
SetVSExpand(VSExpand::None);
}
if (s_viewport_changed)
{
s_viewport_changed = false;

View File

@ -7,6 +7,7 @@
#include "VideoCommon/ConstantManager.h"
class PointerWrap;
enum class PrimitiveType : u32;
// The non-API dependent parts.
class GeometryShaderManager
@ -16,7 +17,7 @@ public:
static void Dirty();
static void DoState(PointerWrap& p);
static void SetConstants();
static void SetConstants(PrimitiveType prim);
static void SetViewportChanged();
static void SetProjectionChanged();
static void SetLinePtWidthChanged();

View File

@ -190,6 +190,39 @@ u16* AddLineStrip(u16* index_ptr, u32 num_verts, u32 index)
return index_ptr;
}
template <bool pr, bool linestrip>
u16* AddLines_VSExpand(u16* index_ptr, u32 num_verts, u32 index)
{
// VS Expand uses (index >> 2) as the base vertex
// Bit 0 indicates which side of the line (left/right for a vertical line)
// Bit 1 indicates which point of the line (top/bottom for a vertical line)
// VS Expand assumes the two points will be adjacent vertices
constexpr u32 advance = linestrip ? 1 : 2;
for (u32 i = 1; i < num_verts; i += advance)
{
u32 p0 = (index + i - 1) << 2;
u32 p1 = (index + i - 0) << 2;
if constexpr (pr)
{
*index_ptr++ = p0 + 0;
*index_ptr++ = p0 + 1;
*index_ptr++ = p1 + 2;
*index_ptr++ = p1 + 3;
*index_ptr++ = s_primitive_restart;
}
else
{
*index_ptr++ = p0 + 0;
*index_ptr++ = p0 + 1;
*index_ptr++ = p1 + 2;
*index_ptr++ = p0 + 1;
*index_ptr++ = p1 + 2;
*index_ptr++ = p1 + 3;
}
}
return index_ptr;
}
u16* AddPoints(u16* index_ptr, u32 num_verts, u32 index)
{
for (u32 i = 0; i != num_verts; ++i)
@ -198,6 +231,35 @@ u16* AddPoints(u16* index_ptr, u32 num_verts, u32 index)
}
return index_ptr;
}
template <bool pr>
u16* AddPoints_VSExpand(u16* index_ptr, u32 num_verts, u32 index)
{
// VS Expand uses (index >> 2) as the base vertex
// Bottom two bits indicate which of (TL, TR, BL, BR) this is
for (u32 i = 0; i < num_verts; ++i)
{
u32 base = (index + i) << 2;
if constexpr (pr)
{
*index_ptr++ = base + 0;
*index_ptr++ = base + 1;
*index_ptr++ = base + 2;
*index_ptr++ = base + 3;
*index_ptr++ = s_primitive_restart;
}
else
{
*index_ptr++ = base + 0;
*index_ptr++ = base + 1;
*index_ptr++ = base + 2;
*index_ptr++ = base + 1;
*index_ptr++ = base + 2;
*index_ptr++ = base + 3;
}
}
return index_ptr;
}
} // Anonymous namespace
void IndexGenerator::Init()
@ -220,9 +282,27 @@ void IndexGenerator::Init()
m_primitive_table[Primitive::GX_DRAW_TRIANGLE_STRIP] = AddStrip<false>;
m_primitive_table[Primitive::GX_DRAW_TRIANGLE_FAN] = AddFan<false>;
}
m_primitive_table[Primitive::GX_DRAW_LINES] = AddLineList;
m_primitive_table[Primitive::GX_DRAW_LINE_STRIP] = AddLineStrip;
m_primitive_table[Primitive::GX_DRAW_POINTS] = AddPoints;
if (g_Config.UseVSForLinePointExpand())
{
if (g_Config.backend_info.bSupportsPrimitiveRestart)
{
m_primitive_table[Primitive::GX_DRAW_LINES] = AddLines_VSExpand<true, false>;
m_primitive_table[Primitive::GX_DRAW_LINE_STRIP] = AddLines_VSExpand<true, true>;
m_primitive_table[Primitive::GX_DRAW_POINTS] = AddPoints_VSExpand<true>;
}
else
{
m_primitive_table[Primitive::GX_DRAW_LINES] = AddLines_VSExpand<false, false>;
m_primitive_table[Primitive::GX_DRAW_LINE_STRIP] = AddLines_VSExpand<false, true>;
m_primitive_table[Primitive::GX_DRAW_POINTS] = AddPoints_VSExpand<false>;
}
}
else
{
m_primitive_table[Primitive::GX_DRAW_LINES] = AddLineList;
m_primitive_table[Primitive::GX_DRAW_LINE_STRIP] = AddLineStrip;
m_primitive_table[Primitive::GX_DRAW_POINTS] = AddPoints;
}
}
void IndexGenerator::Start(u16* index_ptr)
@ -246,10 +326,14 @@ void IndexGenerator::AddExternalIndices(const u16* indices, u32 num_indices, u32
m_base_index += num_vertices;
}
u32 IndexGenerator::GetRemainingIndices() const
u32 IndexGenerator::GetRemainingIndices(OpcodeDecoder::Primitive primitive) const
{
// -1 is reserved for primitive restart (OGL + DX11)
constexpr u32 max_index = 65534;
u32 max_index = USHRT_MAX;
return max_index - m_base_index;
if (g_Config.UseVSForLinePointExpand() && primitive >= OpcodeDecoder::Primitive::GX_DRAW_LINES)
max_index >>= 2;
// -1 is reserved for primitive restart
return max_index - m_base_index - 1;
}

View File

@ -23,7 +23,7 @@ public:
// returns numprimitives
u32 GetNumVerts() const { return m_base_index; }
u32 GetIndexLen() const { return static_cast<u32>(m_index_buffer_current - m_base_index_ptr); }
u32 GetRemainingIndices() const;
u32 GetRemainingIndices(OpcodeDecoder::Primitive primitive) const;
private:
u16* m_index_buffer_current = nullptr;

View File

@ -10,6 +10,7 @@
#include "Common/MsgHandler.h"
#include "Core/ConfigManager.h"
#include "VideoCommon/ConstantManager.h"
#include "VideoCommon/DriverDetails.h"
#include "VideoCommon/FramebufferManager.h"
#include "VideoCommon/FramebufferShaderGen.h"
@ -695,6 +696,35 @@ static GXPipelineUid ApplyDriverBugs(const GXPipelineUid& in)
ps->ztest = EmulatedZ::EarlyWithZComplocHack;
}
if (g_ActiveConfig.UseVSForLinePointExpand() &&
(out.rasterization_state.primitive == PrimitiveType::Points ||
out.rasterization_state.primitive == PrimitiveType::Lines))
{
// All primitives are expanded to triangles in the vertex shader
vertex_shader_uid_data* vs = out.vs_uid.GetUidData();
const PortableVertexDeclaration& decl = out.vertex_format->GetVertexDeclaration();
vs->position_has_3_elems = decl.position.components >= 3;
vs->texcoord_elem_count = 0;
for (int i = 0; i < 8; i++)
{
if (decl.texcoords[i].enable)
{
ASSERT(decl.texcoords[i].components <= 3);
vs->texcoord_elem_count |= decl.texcoords[i].components << (i * 2);
}
}
out.vertex_format = nullptr;
if (out.rasterization_state.primitive == PrimitiveType::Points)
vs->vs_expand = VSExpand::Point;
else
vs->vs_expand = VSExpand::Line;
PrimitiveType prim = g_ActiveConfig.backend_info.bSupportsPrimitiveRestart ?
PrimitiveType::TriangleStrip :
PrimitiveType::Triangles;
out.rasterization_state.primitive = prim;
out.gs_uid.GetUidData()->primitive_type = static_cast<u32>(prim);
}
return out;
}
@ -760,6 +790,17 @@ static GXUberPipelineUid ApplyDriverBugs(const GXUberPipelineUid& in)
out.blending_state.usedualsrc = false;
out.ps_uid.GetUidData()->no_dual_src = true;
}
if (g_ActiveConfig.UseVSForLinePointExpand())
{
// All primitives are expanded to triangles in the vertex shader
PrimitiveType prim = g_ActiveConfig.backend_info.bSupportsPrimitiveRestart ?
PrimitiveType::TriangleStrip :
PrimitiveType::Triangles;
out.rasterization_state.primitive = prim;
out.gs_uid.GetUidData()->primitive_type = static_cast<u32>(prim);
}
return out;
}

View File

@ -5,6 +5,7 @@
#include <fmt/format.h>
#include "Common/Assert.h"
#include "Common/FileUtil.h"
#include "Core/ConfigManager.h"
#include "VideoCommon/VideoCommon.h"
@ -44,6 +45,7 @@ ShaderHostConfig ShaderHostConfig::GetCurrent()
g_ActiveConfig.ManualTextureSamplingWithHiResTextures();
bits.backend_sampler_lod_bias = g_ActiveConfig.backend_info.bSupportsLodBiasInSampler;
bits.backend_dynamic_vertex_loader = g_ActiveConfig.backend_info.bSupportsDynamicVertexLoader;
bits.backend_vs_point_line_expand = g_ActiveConfig.UseVSForLinePointExpand();
return bits;
}

View File

@ -178,6 +178,7 @@ union ShaderHostConfig
BitField<25, 1, bool, u32> manual_texture_sampling_custom_texture_sizes;
BitField<26, 1, bool, u32> backend_sampler_lod_bias;
BitField<27, 1, bool, u32> backend_dynamic_vertex_loader;
BitField<28, 1, bool, u32> backend_vs_point_line_expand;
static ShaderHostConfig GetCurrent();
};
@ -316,3 +317,8 @@ static const char s_shader_uniforms[] = "\tuint components;\n"
"\t#define xfmem_postMtxInfo(i) (xfmem_pack1[(i)].y)\n"
"\t#define xfmem_color(i) (xfmem_pack1[(i)].z)\n"
"\t#define xfmem_alpha(i) (xfmem_pack1[(i)].w)\n";
static const char s_geometry_shader_uniforms[] = "\tfloat4 " I_STEREOPARAMS ";\n"
"\tfloat4 " I_LINEPTPARAMS ";\n"
"\tint4 " I_TEXOFFSET ";\n"
"\tuint vs_expand;\n";

View File

@ -3,6 +3,7 @@
#include "VideoCommon/UberShaderVertex.h"
#include "VideoCommon/ConstantManager.h"
#include "VideoCommon/DriverDetails.h"
#include "VideoCommon/NativeVertexFormat.h"
#include "VideoCommon/UberShaderCommon.h"
@ -35,6 +36,8 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config
const bool ssaa = host_config.ssaa;
const bool per_pixel_lighting = host_config.per_pixel_lighting;
const bool vertex_rounding = host_config.vertex_rounding;
const bool vertex_loader =
host_config.backend_dynamic_vertex_loader || host_config.backend_vs_point_line_expand;
const u32 num_texgen = uid_data->num_texgens;
ShaderCode out;
@ -46,6 +49,13 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config
out.Write("{}", s_shader_uniforms);
out.Write("}};\n");
if (vertex_loader)
{
out.Write("UBO_BINDING(std140, 3) uniform GSBlock {{\n");
out.Write("{}", s_geometry_shader_uniforms);
out.Write("}};\n");
}
out.Write("struct VS_OUTPUT {{\n");
GenerateVSOutputMembers(out, api_type, num_texgen, host_config, "", ShaderStage::Vertex);
out.Write("}};\n\n");
@ -54,7 +64,7 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config
WriteBitfieldExtractHeader(out, api_type, host_config);
WriteLightingFunction(out);
if (host_config.backend_dynamic_vertex_loader)
if (vertex_loader)
{
out.Write(R"(
SSBO_BINDING(1) readonly restrict buffer Vertices {{
@ -73,17 +83,17 @@ SSBO_BINDING(1) readonly restrict buffer Vertices {{
// D3D12 uses a root constant for this uniform, since it changes with every draw.
// D3D11 doesn't currently support dynamic vertex loader, and we'll have to figure something
// out for it if we want to support it in the future.
out.Write("UBO_BINDING(std140, 3) uniform DX_Constants {{\n"
out.Write("UBO_BINDING(std140, 4) uniform DX_Constants {{\n"
" uint base_vertex;\n"
"}};\n\n"
"uint GetVertexBaseOffset() {{\n"
" return (gl_VertexID + base_vertex) * vertex_stride;\n"
"uint GetVertexBaseOffset(uint vertex_id) {{\n"
" return (vertex_id + base_vertex) * vertex_stride;\n"
"}}\n");
}
else
{
out.Write("uint GetVertexBaseOffset() {{\n"
" return gl_VertexID * vertex_stride;\n"
out.Write("uint GetVertexBaseOffset(uint vertex_id) {{\n"
" return vertex_id * vertex_stride;\n"
"}}\n");
}
@ -187,9 +197,17 @@ float3 load_input_float3_rawtex(uint vtx_offset, uint attr_offset) {{
out.Write("VS_OUTPUT o;\n"
"\n");
if (host_config.backend_dynamic_vertex_loader)
if (host_config.backend_vs_point_line_expand)
{
out.Write("uint vertex_base_offset = GetVertexBaseOffset();\n");
out.Write("uint vertex_id = gl_VertexID;\n"
"if (vs_expand != 0u) {{\n"
" vertex_id = vertex_id >> 2;\n"
"}}\n"
"uint vertex_base_offset = GetVertexBaseOffset(vertex_id);\n");
}
else if (host_config.backend_dynamic_vertex_loader)
{
out.Write("uint vertex_base_offset = GetVertexBaseOffset(gl_VertexID);\n");
}
// rawpos is always needed
LoadVertexAttribute(out, host_config, 0, "rawpos", "float4", "rawpos");
@ -320,6 +338,88 @@ float3 load_input_float3_rawtex(uint vtx_offset, uint attr_offset) {{
if (num_texgen > 0)
GenVertexShaderTexGens(api_type, host_config, num_texgen, out);
if (host_config.backend_vs_point_line_expand)
{
out.Write("if (vs_expand == {}u) {{ // Line\n", static_cast<u32>(VSExpand::Line));
out.Write(" bool is_bottom = (gl_VertexID & 2) != 0;\n"
" bool is_right = (gl_VertexID & 1) != 0;\n"
" uint other_base_offset = vertex_base_offset;\n"
" if (is_bottom) {{\n"
" other_base_offset -= vertex_stride;\n"
" }} else {{\n"
" other_base_offset += vertex_stride;\n"
" }}\n"
" float4 other_rawpos = load_input_float4_rawpos(other_base_offset, "
"vertex_offset_rawpos);\n"
" float4 other_p0 = P0;\n"
" float4 other_p1 = P1;\n"
" float4 other_p2 = P2;\n"
" if ((components & {}u) != 0u) {{ // VB_HAS_POSMTXIDX\n",
VB_HAS_POSMTXIDX);
out.Write(" uint other_posidx = int(load_input_uint4_ubyte4(other_base_offset, "
"vertex_offset_posmtx).r);\n"
" other_p0 = " I_TRANSFORMMATRICES "[other_posidx];\n"
" other_p1 = " I_TRANSFORMMATRICES "[other_posidx+1];\n"
" other_p2 = " I_TRANSFORMMATRICES "[other_posidx+2];\n"
" }}\n"
" float4 other_pos = float4(dot(other_p0, other_rawpos), "
"dot(other_p1, other_rawpos), dot(other_p2, other_rawpos), 1.0);\n"
" other_pos = float4(dot(" I_PROJECTION "[0], other_pos), dot(" I_PROJECTION
"[1], other_pos), dot(" I_PROJECTION "[2], other_pos), dot(" I_PROJECTION
"[3], other_pos));\n"
"\n"
" float sign = is_right ? 1.0f : -1.0f;\n"
// GameCube/Wii's line drawing algorithm is a little quirky. It does not
// use the correct line caps. Instead, the line caps are vertical or
// horizontal depending the slope of the line.
" float2 offset;\n"
" float2 to = abs(o.pos.xy / o.pos.w - other_pos.xy / other_pos.w);\n"
// FIXME: What does real hardware do when line is at a 45-degree angle?
// FIXME: Lines aren't drawn at the correct width. See Twilight Princess map.
" if (" I_LINEPTPARAMS ".y * to.y > " I_LINEPTPARAMS ".x * to.x) {{\n"
// Line is more tall. Extend geometry left and right.
// Lerp LineWidth/2 from [0..VpWidth] to [-1..1]
" offset = float2(sign * " I_LINEPTPARAMS ".z / " I_LINEPTPARAMS ".x, 0);\n"
" }} else {{\n"
// Line is more wide. Extend geometry up and down.
// Lerp LineWidth/2 from [0..VpHeight] to [1..-1]
" offset = float2(0, sign * " I_LINEPTPARAMS ".z / " I_LINEPTPARAMS ".y);\n"
" }}\n"
"\n"
" o.pos.xy += offset * o.pos.w;\n");
if (num_texgen > 0)
{
out.Write(" if ((" I_TEXOFFSET "[2] != 0) && is_right) {{\n"
" float texOffset = 1.0 / float(" I_TEXOFFSET "[2]);\n");
for (u32 i = 0; i < num_texgen; i++)
{
out.Write(" if (((" I_TEXOFFSET "[0] >> {}) & 0x1) != 0)\n", i);
out.Write(" o.tex{}.x += texOffset;\n", i);
}
out.Write(" }}\n");
}
out.Write("}} else if (vs_expand == {}u) {{ // Point\n", static_cast<u32>(VSExpand::Point));
out.Write(" bool is_bottom = (gl_VertexID & 2) != 0;\n"
" bool is_right = (gl_VertexID & 1) != 0;\n"
" float2 sign = float2(is_right ? 1.0f : -1.0f, is_bottom ? 1.0f : -1.0f);\n"
" float2 offset = sign * " I_LINEPTPARAMS ".ww / " I_LINEPTPARAMS ".xy;\n"
" o.pos.xy += offset * o.pos.w;\n");
if (num_texgen > 0)
{
out.Write(" if (" I_TEXOFFSET "[3] != 0) {{\n"
" float texOffsetMagnitude = 1.0f / float(" I_TEXOFFSET "[3]);\n"
" float2 texOffset = float2(is_right ? texOffsetMagnitude : 0.0f, "
"is_bottom ? texOffsetMagnitude : 0.0f);");
for (u32 i = 0; i < num_texgen; i++)
{
out.Write(" if (((" I_TEXOFFSET "[1] >> {}) & 0x1) != 0)\n", i);
out.Write(" o.tex{}.xy += texOffset;\n", i);
}
out.Write(" }}\n");
}
out.Write("}}\n");
}
if (per_pixel_lighting)
{
out.Write("// When per-pixel lighting is enabled, the vertex colors are passed through\n"
@ -574,7 +674,7 @@ static void GenVertexShaderTexGens(APIType api_type, const ShaderHostConfig& hos
" {{\n");
out.Write(" if ((components & ({}u /* VB_HAS_TEXMTXIDX0 */ << texgen)) != 0u) {{\n",
VB_HAS_TEXMTXIDX0);
if (host_config.backend_dynamic_vertex_loader)
if (host_config.backend_dynamic_vertex_loader || host_config.backend_vs_point_line_expand)
{
out.Write(" int tmp = int(load_input_float3_rawtex(vertex_base_offset, "
"vertex_offset_rawtex[texgen / 4][texgen % 4]).z);\n"
@ -655,7 +755,7 @@ static void LoadVertexAttribute(ShaderCode& code, const ShaderHostConfig& host_c
std::string_view name, std::string_view shader_type,
std::string_view stored_type, std::string_view offset_name)
{
if (host_config.backend_dynamic_vertex_loader)
if (host_config.backend_dynamic_vertex_loader || host_config.backend_vs_point_line_expand)
{
code.Write("{:{}}{} {} = load_input_{}_{}(vertex_base_offset, vertex_offset_{});\n", "", indent,
shader_type, name, shader_type, stored_type,

View File

@ -140,12 +140,12 @@ DataReader VertexManagerBase::PrepareForAdditionalData(OpcodeDecoder::Primitive
// Check for size in buffer, if the buffer gets full, call Flush()
if (!m_is_flushed &&
(count > m_index_generator.GetRemainingIndices() || count > GetRemainingIndices(primitive) ||
needed_vertex_bytes > GetRemainingSize()))
(count > m_index_generator.GetRemainingIndices(primitive) ||
count > GetRemainingIndices(primitive) || needed_vertex_bytes > GetRemainingSize()))
{
Flush();
if (count > m_index_generator.GetRemainingIndices())
if (count > m_index_generator.GetRemainingIndices(primitive))
{
ERROR_LOG_FMT(VIDEO, "Too little remaining index values. Use 32-bit or reset them on flush.");
}
@ -193,7 +193,55 @@ u32 VertexManagerBase::GetRemainingIndices(OpcodeDecoder::Primitive primitive) c
{
const u32 index_len = MAXIBUFFERSIZE - m_index_generator.GetIndexLen();
if (g_Config.backend_info.bSupportsPrimitiveRestart)
if (primitive >= Primitive::GX_DRAW_LINES)
{
if (g_Config.UseVSForLinePointExpand())
{
if (g_Config.backend_info.bSupportsPrimitiveRestart)
{
switch (primitive)
{
case Primitive::GX_DRAW_LINES:
return index_len / 5 * 2;
case Primitive::GX_DRAW_LINE_STRIP:
return index_len / 5 + 1;
case Primitive::GX_DRAW_POINTS:
return index_len / 5;
default:
return 0;
}
}
else
{
switch (primitive)
{
case Primitive::GX_DRAW_LINES:
return index_len / 6 * 2;
case Primitive::GX_DRAW_LINE_STRIP:
return index_len / 6 + 1;
case Primitive::GX_DRAW_POINTS:
return index_len / 6;
default:
return 0;
}
}
}
else
{
switch (primitive)
{
case Primitive::GX_DRAW_LINES:
return index_len;
case Primitive::GX_DRAW_LINE_STRIP:
return index_len / 2 + 1;
case Primitive::GX_DRAW_POINTS:
return index_len;
default:
return 0;
}
}
}
else if (g_Config.backend_info.bSupportsPrimitiveRestart)
{
switch (primitive)
{
@ -206,15 +254,6 @@ u32 VertexManagerBase::GetRemainingIndices(OpcodeDecoder::Primitive primitive) c
return index_len / 1 - 1;
case Primitive::GX_DRAW_TRIANGLE_FAN:
return index_len / 6 * 4 + 1;
case Primitive::GX_DRAW_LINES:
return index_len;
case Primitive::GX_DRAW_LINE_STRIP:
return index_len / 2 + 1;
case Primitive::GX_DRAW_POINTS:
return index_len;
default:
return 0;
}
@ -232,15 +271,6 @@ u32 VertexManagerBase::GetRemainingIndices(OpcodeDecoder::Primitive primitive) c
return index_len / 3 + 2;
case Primitive::GX_DRAW_TRIANGLE_FAN:
return index_len / 3 + 2;
case Primitive::GX_DRAW_LINES:
return index_len;
case Primitive::GX_DRAW_LINE_STRIP:
return index_len / 2 + 1;
case Primitive::GX_DRAW_POINTS:
return index_len;
default:
return 0;
}
@ -511,13 +541,24 @@ void VertexManagerBase::Flush()
VertexLoaderManager::GetCurrentVertexFormat()->GetVertexStride(), num_indices,
&base_vertex, &base_index);
if (g_ActiveConfig.backend_info.api_type != APIType::D3D &&
g_ActiveConfig.UseVSForLinePointExpand() &&
(m_current_primitive_type == PrimitiveType::Points ||
m_current_primitive_type == PrimitiveType::Lines))
{
// VS point/line expansion puts the vertex id at gl_VertexID << 2
// That means the base vertex has to be adjusted to match
// (The shader adds this after shifting right on D3D, so no need to do this)
base_vertex <<= 2;
}
// Texture loading can cause palettes to be applied (-> uniforms -> draws).
// Palette application does not use vertices, only a full-screen quad, so this is okay.
// Same with GPU texture decoding, which uses compute shaders.
g_texture_cache->BindTextures(used_textures);
// Now we can upload uniforms, as nothing else will override them.
GeometryShaderManager::SetConstants();
GeometryShaderManager::SetConstants(m_current_primitive_type);
PixelShaderManager::SetConstants();
UploadUniforms();

View File

@ -6,6 +6,7 @@
#include "Common/Assert.h"
#include "Common/CommonTypes.h"
#include "VideoCommon/BPMemory.h"
#include "VideoCommon/ConstantManager.h"
#include "VideoCommon/LightingShaderGen.h"
#include "VideoCommon/NativeVertexFormat.h"
#include "VideoCommon/VertexLoaderManager.h"
@ -83,6 +84,8 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
const bool ssaa = host_config.ssaa;
const bool vertex_rounding = host_config.vertex_rounding;
ShaderCode input_extract;
out.Write("{}", s_lighting_struct);
// uniforms
@ -91,6 +94,21 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
out.Write("{}", s_shader_uniforms);
out.Write("}};\n");
if (uid_data->vs_expand != VSExpand::None)
{
out.Write("UBO_BINDING(std140, 3) uniform GSBlock {{\n");
out.Write("{}", s_geometry_shader_uniforms);
out.Write("}};\n");
if (api_type == APIType::D3D)
{
// D3D doesn't include the base vertex in SV_VertexID
out.Write("UBO_BINDING(std140, 4) uniform DX_Constants {{\n"
" uint base_vertex;\n"
"}};\n\n");
}
}
out.Write("struct VS_OUTPUT {{\n");
GenerateVSOutputMembers(out, api_type, uid_data->numTexGens, host_config, "",
ShaderStage::Vertex);
@ -98,31 +116,114 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
WriteIsNanHeader(out, api_type);
out.Write("ATTRIBUTE_LOCATION({}) in float4 rawpos;\n", SHADER_POSITION_ATTRIB);
if ((uid_data->components & VB_HAS_POSMTXIDX) != 0)
out.Write("ATTRIBUTE_LOCATION({}) in uint4 posmtx;\n", SHADER_POSMTX_ATTRIB);
if ((uid_data->components & VB_HAS_NORMAL) != 0)
out.Write("ATTRIBUTE_LOCATION({}) in float3 rawnormal;\n", SHADER_NORMAL_ATTRIB);
if ((uid_data->components & VB_HAS_TANGENT) != 0)
out.Write("ATTRIBUTE_LOCATION({}) in float3 rawtangent;\n", SHADER_TANGENT_ATTRIB);
if ((uid_data->components & VB_HAS_BINORMAL) != 0)
out.Write("ATTRIBUTE_LOCATION({}) in float3 rawbinormal;\n", SHADER_BINORMAL_ATTRIB);
if ((uid_data->components & VB_HAS_COL0) != 0)
out.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor0;\n", SHADER_COLOR0_ATTRIB);
if ((uid_data->components & VB_HAS_COL1) != 0)
out.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor1;\n", SHADER_COLOR1_ATTRIB);
for (u32 i = 0; i < 8; ++i)
if (uid_data->vs_expand == VSExpand::None)
{
const u32 has_texmtx = (uid_data->components & (VB_HAS_TEXMTXIDX0 << i));
out.Write("ATTRIBUTE_LOCATION({}) in float4 rawpos;\n", SHADER_POSITION_ATTRIB);
if ((uid_data->components & VB_HAS_POSMTXIDX) != 0)
out.Write("ATTRIBUTE_LOCATION({}) in uint4 posmtx;\n", SHADER_POSMTX_ATTRIB);
if ((uid_data->components & VB_HAS_NORMAL) != 0)
out.Write("ATTRIBUTE_LOCATION({}) in float3 rawnormal;\n", SHADER_NORMAL_ATTRIB);
if ((uid_data->components & VB_HAS_TANGENT) != 0)
out.Write("ATTRIBUTE_LOCATION({}) in float3 rawtangent;\n", SHADER_TANGENT_ATTRIB);
if ((uid_data->components & VB_HAS_BINORMAL) != 0)
out.Write("ATTRIBUTE_LOCATION({}) in float3 rawbinormal;\n", SHADER_BINORMAL_ATTRIB);
if ((uid_data->components & (VB_HAS_UV0 << i)) != 0 || has_texmtx != 0)
if ((uid_data->components & VB_HAS_COL0) != 0)
out.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor0;\n", SHADER_COLOR0_ATTRIB);
if ((uid_data->components & VB_HAS_COL1) != 0)
out.Write("ATTRIBUTE_LOCATION({}) in float4 rawcolor1;\n", SHADER_COLOR1_ATTRIB);
for (u32 i = 0; i < 8; ++i)
{
out.Write("ATTRIBUTE_LOCATION({}) in float{} rawtex{};\n", SHADER_TEXTURE0_ATTRIB + i,
has_texmtx != 0 ? 3 : 2, i);
const u32 has_texmtx = (uid_data->components & (VB_HAS_TEXMTXIDX0 << i));
if ((uid_data->components & (VB_HAS_UV0 << i)) != 0 || has_texmtx != 0)
{
out.Write("ATTRIBUTE_LOCATION({}) in float{} rawtex{};\n", SHADER_TEXTURE0_ATTRIB + i,
has_texmtx != 0 ? 3 : 2, i);
}
}
}
else
{
// Can't use float3, etc because we want 4-byte alignment
out.Write(
"uint4 unpack_ubyte4(uint value) {{\n"
" return uint4(value & 0xff, (value >> 8) & 0xff, (value >> 16) & 0xff, value >> 24);\n"
"}}\n\n"
"struct InputData {{\n");
if (uid_data->components & VB_HAS_POSMTXIDX)
{
out.Write(" uint posmtx;\n");
input_extract.Write("uint4 posmtx = unpack_ubyte4(i.posmtx);\n");
}
if (uid_data->position_has_3_elems)
{
out.Write(" float pos0;\n"
" float pos1;\n"
" float pos2;\n");
input_extract.Write("float4 rawpos = float4(i.pos0, i.pos1, i.pos2, 1.0f);\n");
}
else
{
out.Write(" float pos0;\n"
" float pos1;\n");
input_extract.Write("float4 rawpos = float4(i.pos0, i.pos1, 0.0f, 1.0f);\n");
}
std::array<std::string_view, 3> names = {"normal", "binormal", "tangent"};
for (int i = 0; i < 3; i++)
{
if (uid_data->components & (VB_HAS_NORMAL << i))
{
out.Write(" float {0}0;\n"
" float {0}1;\n"
" float {0}2;\n",
names[i]);
input_extract.Write("float3 raw{0} = float3(i.{0}0, i.{0}1, i.{0}2);\n", names[i]);
}
}
for (int i = 0; i < 2; i++)
{
if (uid_data->components & (VB_HAS_COL0 << i))
{
out.Write(" uint color{};\n", i);
input_extract.Write("float4 rawcolor{0} = float4(unpack_ubyte4(i.color{0})) / 255.0f;\n",
i);
}
}
for (int i = 0; i < 8; i++)
{
if (uid_data->components & (VB_HAS_UV0 << i))
{
u32 ncomponents = (uid_data->texcoord_elem_count >> (2 * i)) & 3;
if (ncomponents < 2)
{
out.Write(" float tex{};\n", i);
input_extract.Write("float3 rawtex{0} = float3(i.tex{0}, 0.0f, 0.0f);\n", i);
}
else if (ncomponents == 2)
{
out.Write(" float tex{0}_0;\n"
" float tex{0}_1;\n",
i);
input_extract.Write("float3 rawtex{0} = float3(i.tex{0}_0, i.tex{0}_1, 0.0f);\n", i);
}
else
{
out.Write(" float tex{0}_0;\n"
" float tex{0}_1;\n"
" float tex{0}_2;\n",
i);
input_extract.Write("float3 rawtex{0} = float3(i.tex{0}_0, i.tex{0}_1, i.tex{0}_2);\n",
i);
}
}
}
out.Write("}};\n\n"
"SSBO_BINDING(1) readonly restrict buffer InputBuffer {{\n"
" InputData input_buffer[];\n"
"}};\n\n");
}
if (host_config.backend_geometry_shaders)
{
@ -161,6 +262,21 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
out.Write("void main()\n{{\n");
if (uid_data->vs_expand != VSExpand::None)
{
out.Write("bool is_bottom = (gl_VertexID & 2) != 0;\n"
"bool is_right = (gl_VertexID & 1) != 0;\n");
// D3D doesn't include the base vertex in SV_VertexID
// See comment in UberShaderVertex for details
if (api_type == APIType::D3D)
out.Write("uint vertex_id = (gl_VertexID >> 2) + base_vertex;\n");
else
out.Write("uint vertex_id = gl_VertexID >> 2;\n");
out.Write("InputData i = input_buffer[vertex_id];\n"
"{}",
input_extract.GetBuffer());
}
out.Write("VS_OUTPUT o;\n");
// xfmem.numColorChans controls the number of color channels available to TEV, but we still need
@ -403,6 +519,86 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
out.Write("}}\n");
}
if (uid_data->vs_expand == VSExpand::Line)
{
out.Write("// Line expansion\n"
"uint other_id = vertex_id;\n"
"if (is_bottom) {{\n"
" other_id -= 1;\n"
"}} else {{\n"
" other_id += 1;\n"
"}}\n"
"InputData other = input_buffer[other_id];\n");
if (uid_data->position_has_3_elems)
out.Write("float4 other_pos = float4(other.pos0, other.pos1, other.pos2, 1.0f);\n");
else
out.Write("float4 other_pos = float4(other.pos0, other.pos1, 0.0f, 1.0f);\n");
if (uid_data->components & VB_HAS_POSMTXIDX)
{
out.Write("uint other_posidx = other.posmtx & 0xff;\n"
"float4 other_p0 = " I_TRANSFORMMATRICES "[other_posidx];\n"
"float4 other_p1 = " I_TRANSFORMMATRICES "[other_posidx + 1];\n"
"float4 other_p2 = " I_TRANSFORMMATRICES "[other_posidx + 2];\n"
"other_pos = float4(dot(other_p0, other_pos), dot(other_p1, other_pos), "
"dot(other_p2, other_pos), 1.0f);\n");
}
else
{
out.Write("other_pos = float4(dot(P0, other_pos), dot(P1, other_pos), dot(P2, other_pos), "
"1.0f);\n");
}
out.Write("other_pos = float4(dot(" I_PROJECTION "[0], other_pos), dot(" I_PROJECTION
"[1], other_pos), dot(" I_PROJECTION "[2], other_pos), dot(" I_PROJECTION
"[3], other_pos));\n"
"float expand_sign = is_right ? 1.0f : -1.0f;\n"
"float2 offset;\n"
"float2 to = abs(o.pos.xy / o.pos.w - other_pos.xy / other_pos.w);\n"
// FIXME: What does real hardware do when line is at a 45-degree angle?
// FIXME: Lines aren't drawn at the correct width. See Twilight Princess map.
"if (" I_LINEPTPARAMS ".y * to.y > " I_LINEPTPARAMS ".x * to.x) {{\n"
// Line is more tall. Extend geometry left and right.
// Lerp LineWidth/2 from [0..VpWidth] to [-1..1]
" offset = float2(expand_sign * " I_LINEPTPARAMS ".z / " I_LINEPTPARAMS ".x, 0);\n"
"}} else {{\n"
// Line is more wide. Extend geometry up and down.
// Lerp LineWidth/2 from [0..VpHeight] to [1..-1]
" offset = float2(0, expand_sign * " I_LINEPTPARAMS ".z / " I_LINEPTPARAMS ".y);\n"
"}}\n"
"\n"
"o.pos.xy += offset * o.pos.w;\n");
if (uid_data->numTexGens > 0)
{
out.Write("if ((" I_TEXOFFSET "[2] != 0) && is_right) {{\n"
" float texOffset = 1.0 / float(" I_TEXOFFSET "[2]);\n");
for (u32 i = 0; i < uid_data->numTexGens; i++)
{
out.Write(" if (((" I_TEXOFFSET "[0] >> {}) & 0x1) != 0)\n", i);
out.Write(" o.tex{}.x += texOffset;\n", i);
}
out.Write("}}\n");
}
}
else if (uid_data->vs_expand == VSExpand::Point)
{
out.Write("// Point expansion\n"
"float2 expand_sign = float2(is_right ? 1.0f : -1.0f, is_bottom ? 1.0f : -1.0f);\n"
"float2 offset = expand_sign * " I_LINEPTPARAMS ".ww / " I_LINEPTPARAMS ".xy;\n"
"o.pos.xy += offset * o.pos.w;\n");
if (uid_data->numTexGens > 0)
{
out.Write("if (" I_TEXOFFSET "[3] != 0) {{\n"
" float texOffsetMagnitude = 1.0f / float(" I_TEXOFFSET "[3]);\n"
" float2 texOffset = float2(is_right ? texOffsetMagnitude : 0.0f, "
"is_bottom ? texOffsetMagnitude : 0.0f);");
for (u32 i = 0; i < uid_data->numTexGens; i++)
{
out.Write(" if (((" I_TEXOFFSET "[1] >> {}) & 0x1) != 0)\n", i);
out.Write(" o.tex{}.xy += texOffset;\n", i);
}
out.Write("}}\n");
}
}
if (per_pixel_lighting)
{
// When per-pixel lighting is enabled, the vertex colors are passed through

View File

@ -11,6 +11,7 @@ enum class APIType;
enum class TexInputForm : u32;
enum class TexGenType : u32;
enum class SourceRow : u32;
enum class VSExpand : u32;
// TODO should be reordered
enum : int
@ -42,10 +43,12 @@ struct vertex_shader_uid_data
u32 numTexGens : 4;
u32 numColorChans : 2;
u32 dualTexTrans_enabled : 1;
VSExpand vs_expand : 2;
u32 position_has_3_elems : 1;
u32 texMtxInfo_n_projection : 16; // Stored separately to guarantee that the texMtxInfo struct is
// 8 bits wide
u32 pad : 18;
u16 texcoord_elem_count; // 2 bits per texcoord input
u16 texMtxInfo_n_projection; // Stored separately to guarantee that the texMtxInfo struct is
// 8 bits wide
struct
{

View File

@ -222,9 +222,16 @@ struct VideoConfig final
bool bSupportsSettingObjectNames = false;
bool bSupportsPartialMultisampleResolve = false;
bool bSupportsDynamicVertexLoader = false;
bool bSupportsVSLinePointExpand = false;
} backend_info;
// Utility
bool UseVSForLinePointExpand() const
{
if (!backend_info.bSupportsVSLinePointExpand)
return false;
return !backend_info.bSupportsGeometryShaders;
}
bool MultisamplingEnabled() const { return iMultisamples > 1; }
bool ExclusiveFullscreenEnabled() const
{