Cache normals in addition to binormals and tangents

Fixes LIT (https://bugs.dolphin-emu.org/issues/13635). The text does not include normals, but has lighting enabled. With the previous default of (0, 0, 0), lighting was always black (as dot(X, (0, 0, 0)) is always 0). It seems like the normal from the map in the background (0, 0, 1) is re-used.

LIT also has the vertex color enabled while vertex color is not specified, the same as SMS's debug cubes; the default MissingColorValue GameINI value of solid white seems to work correctly in this case.
This commit is contained in:
Pokechu22 2024-09-24 23:46:45 -07:00
parent 35ec2e97a8
commit 937bb2aa2e
15 changed files with 140 additions and 83 deletions

View File

@ -98,7 +98,7 @@ static size_t s_state_writes_in_queue;
static std::condition_variable s_state_write_queue_is_empty;
// Don't forget to increase this after doing changes on the savestate system
constexpr u32 STATE_VERSION = 168; // Last changed in PR 12639
constexpr u32 STATE_VERSION = 169; // Last changed in PR 13074
// Increase this if the StateExtendedHeader definition changes
constexpr u32 EXTENDED_HEADER_VERSION = 1; // Last changed in PR 12217

View File

@ -81,9 +81,7 @@ void SWVertexLoader::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_
// transform this vertex so that it can be used for rasterization (outVertex)
OutputVertexData* outVertex = m_setup_unit.GetVertex();
TransformUnit::TransformPosition(&m_vertex, outVertex);
outVertex->normal = {};
if (VertexLoaderManager::g_current_components & VB_HAS_NORMAL)
TransformUnit::TransformNormal(&m_vertex, outVertex);
TransformUnit::TransformNormal(&m_vertex, outVertex);
TransformUnit::TransformColor(&m_vertex, outVertex);
TransformUnit::TransformTexCoord(&m_vertex, outVertex);
@ -209,6 +207,14 @@ void SWVertexLoader::ParseVertex(const PortableVertexDeclaration& vdec, int inde
{
ReadVertexAttribute<float>(&m_vertex.normal[i][0], src, vdec.normals[i], 0, 3, false);
}
if (!vdec.normals[0].enable)
{
auto& system = Core::System::GetInstance();
auto& vertex_shader_manager = system.GetVertexShaderManager();
m_vertex.normal[0][0] = vertex_shader_manager.constants.cached_normal[0];
m_vertex.normal[0][1] = vertex_shader_manager.constants.cached_normal[1];
m_vertex.normal[0][2] = vertex_shader_manager.constants.cached_normal[2];
}
if (!vdec.normals[1].enable)
{
auto& system = Core::System::GetInstance();

View File

@ -93,6 +93,7 @@ struct alignas(16) VertexShaderConstants
// .x - texMtxInfo, .y - postMtxInfo, [0..1].z = color, [0..1].w = alpha
std::array<uint4, 8> xfmem_pack1;
float4 cached_normal;
float4 cached_tangent;
float4 cached_binormal;
// For UberShader vertex loader

View File

@ -283,6 +283,7 @@ void WriteSwitch(ShaderCode& out, APIType ApiType, std::string_view variable,
#define I_POSTTRANSFORMMATRICES "cpostmtx"
#define I_PIXELCENTERCORRECTION "cpixelcenter"
#define I_VIEWPORT_SIZE "cviewport"
#define I_CACHED_NORMAL "cnormal"
#define I_CACHED_TANGENT "ctangent"
#define I_CACHED_BINORMAL "cbinormal"
@ -306,6 +307,7 @@ static const char s_shader_uniforms[] = "\tuint components;\n"
"\tfloat4 " I_PIXELCENTERCORRECTION ";\n"
"\tfloat2 " I_VIEWPORT_SIZE ";\n"
"\tuint4 xfmem_pack1[8];\n"
"\tfloat4 " I_CACHED_NORMAL ";\n"
"\tfloat4 " I_CACHED_TANGENT ";\n"
"\tfloat4 " I_CACHED_BINORMAL ";\n"
"\tuint vertex_stride;\n"

View File

@ -251,47 +251,53 @@ float3 load_input_float3_rawtex(uint vtx_offset, uint attr_offset) {{
"o.pos = float4(dot(" I_PROJECTION "[0], pos), dot(" I_PROJECTION
"[1], pos), dot(" I_PROJECTION "[2], pos), dot(" I_PROJECTION "[3], pos));\n"
"\n"
"float3 _rawnormal;\n"
"float3 _rawtangent;\n"
"float3 _rawbinormal;\n"
"if ((components & {}u) != 0u) // VB_HAS_NORMAL\n"
"{{\n",
Common::ToUnderlying(VB_HAS_NORMAL));
LoadVertexAttribute(out, host_config, 2, "rawnormal", "float3", "float3");
out.Write(" _rawnormal = rawnormal;\n"
"}}\n"
"else\n"
"{{\n"
" _rawnormal = " I_CACHED_NORMAL ".xyz;\n"
"}}\n"
"\n"
"if ((components & {}u) != 0u) // VB_HAS_TANGENT\n"
"{{\n",
Common::ToUnderlying(VB_HAS_TANGENT));
LoadVertexAttribute(out, host_config, 2, "rawtangent", "float3", "float3");
out.Write(" _rawtangent = rawtangent;\n"
"}}\n"
"else\n"
"{{\n"
" _rawtangent = " I_CACHED_TANGENT ".xyz;\n"
"}}\n"
"\n"
"if ((components & {}u) != 0u) // VB_HAS_BINORMAL\n"
"{{\n",
Common::ToUnderlying(VB_HAS_BINORMAL));
LoadVertexAttribute(out, host_config, 2, "rawbinormal", "float3", "float3");
out.Write(" _rawbinormal = rawbinormal;\n"
"}}\n"
"else\n"
"{{\n"
" _rawbinormal = " I_CACHED_BINORMAL ".xyz;\n"
"}}\n"
"\n"
"// The scale of the transform matrix is used to control the size of the emboss map\n"
"// effect by changing the scale of the transformed binormals (which only get used by\n"
"// emboss map texgens). By normalising the first transformed normal (which is used\n"
"// by lighting calculations and needs to be unit length), the same transform matrix\n"
"// can do double duty, scaling for emboss mapping, and not scaling for lighting.\n"
"float3 _normal = float3(0.0, 0.0, 0.0);\n"
"if ((components & {}u) != 0u) // VB_HAS_NORMAL\n"
"{{\n",
Common::ToUnderlying(VB_HAS_NORMAL));
LoadVertexAttribute(out, host_config, 2, "rawnormal", "float3", "float3");
out.Write(" _normal = normalize(float3(dot(N0, rawnormal), dot(N1, rawnormal), dot(N2, "
"rawnormal)));\n"
"}}\n"
"\n"
"float3 _tangent = float3(0.0, 0.0, 0.0);\n"
"if ((components & {}u) != 0u) // VB_HAS_TANGENT\n"
"{{\n",
Common::ToUnderlying(VB_HAS_TANGENT));
LoadVertexAttribute(out, host_config, 2, "rawtangent", "float3", "float3");
out.Write(" _tangent = float3(dot(N0, rawtangent), dot(N1, rawtangent), dot(N2, rawtangent));\n"
"}}\n"
"else\n"
"{{\n"
" _tangent = float3(dot(N0, " I_CACHED_TANGENT ".xyz), dot(N1, " I_CACHED_TANGENT
".xyz), dot(N2, " I_CACHED_TANGENT ".xyz));\n"
"}}\n"
"\n"
"float3 _binormal = float3(0.0, 0.0, 0.0);\n"
"if ((components & {}u) != 0u) // VB_HAS_BINORMAL\n"
"{{\n",
Common::ToUnderlying(VB_HAS_BINORMAL));
LoadVertexAttribute(out, host_config, 2, "rawbinormal", "float3", "float3");
out.Write(" _binormal = float3(dot(N0, rawbinormal), dot(N1, rawbinormal), dot(N2, "
"rawbinormal));\n"
"}}\n"
"else\n"
"{{\n"
" _binormal = float3(dot(N0, " I_CACHED_BINORMAL ".xyz), dot(N1, " I_CACHED_BINORMAL
".xyz), dot(N2, " I_CACHED_BINORMAL ".xyz));\n"
"}}\n"
"\n");
"float3 _normal = normalize(float3(dot(N0, _rawnormal), dot(N1, _rawnormal), dot(N2, "
"_rawnormal)));\n"
"float3 _tangent = float3(dot(N0, _rawtangent), dot(N1, _rawtangent), dot(N2, "
"_rawtangent));\n"
"float3 _binormal = float3(dot(N0, _rawbinormal), dot(N1, _rawbinormal), dot(N2, "
"_rawbinormal));\n");
// Hardware Lighting
out.Write("// xfmem.numColorChans controls the number of color channels available to TEV,\n"

View File

@ -164,6 +164,13 @@ void VertexLoaderARM64::ReadVertex(VertexComponentFormat attribute, ComponentFor
m_float_emit.STR(128, coords, EncodeRegTo64(scratch2_reg), ArithOption(remaining_reg, true));
SetJumpTarget(dont_store);
}
else if (native_format == &m_native_vtx_decl.normals[0])
{
FixupBranch dont_store = CBNZ(remaining_reg);
MOVP2R(EncodeRegTo64(scratch2_reg), VertexLoaderManager::normal_cache.data());
m_float_emit.STR(128, IndexType::Unsigned, coords, EncodeRegTo64(scratch2_reg), 0);
SetJumpTarget(dont_store);
}
else if (native_format == &m_native_vtx_decl.normals[1])
{
FixupBranch dont_store = CBNZ(remaining_reg);

View File

@ -68,6 +68,7 @@ public:
VertexLoaderManager::position_matrix_index_cache;
const std::array<std::array<float, 4>, 3> old_position_cache =
VertexLoaderManager::position_cache;
const std::array<float, 4> old_normal_cache = VertexLoaderManager::normal_cache;
const std::array<float, 4> old_tangent_cache = VertexLoaderManager::tangent_cache;
const std::array<float, 4> old_binormal_cache = VertexLoaderManager::binormal_cache;
@ -77,12 +78,14 @@ public:
VertexLoaderManager::position_matrix_index_cache;
const std::array<std::array<float, 4>, 3> a_position_cache =
VertexLoaderManager::position_cache;
const std::array<float, 4> a_normal_cache = VertexLoaderManager::normal_cache;
const std::array<float, 4> a_tangent_cache = VertexLoaderManager::tangent_cache;
const std::array<float, 4> a_binormal_cache = VertexLoaderManager::binormal_cache;
// Reset state before running b
VertexLoaderManager::position_matrix_index_cache = old_position_matrix_index_cache;
VertexLoaderManager::position_cache = old_position_cache;
VertexLoaderManager::normal_cache = old_normal_cache;
VertexLoaderManager::tangent_cache = old_tangent_cache;
VertexLoaderManager::binormal_cache = old_binormal_cache;
@ -92,6 +95,7 @@ public:
VertexLoaderManager::position_matrix_index_cache;
const std::array<std::array<float, 4>, 3> b_position_cache =
VertexLoaderManager::position_cache;
const std::array<float, 4> b_normal_cache = VertexLoaderManager::normal_cache;
const std::array<float, 4> b_tangent_cache = VertexLoaderManager::tangent_cache;
const std::array<float, 4> b_binormal_cache = VertexLoaderManager::binormal_cache;
@ -140,6 +144,12 @@ public:
fmt::join(b_position_cache[1], ", "), fmt::join(b_position_cache[2], ", "));
// The last element is allowed to be garbage for SIMD overwrites
ASSERT_MSG(VIDEO,
std::equal(a_normal_cache.begin(), a_normal_cache.begin() + 3,
b_normal_cache.begin(), b_normal_cache.begin() + 3, bit_equal),
"Expected matching normal caches after loading (a: {}; b: {})",
fmt::join(a_normal_cache, ", "), fmt::join(b_normal_cache, ", "));
ASSERT_MSG(VIDEO,
std::equal(a_tangent_cache.begin(), a_tangent_cache.begin() + 3,
b_tangent_cache.begin(), b_tangent_cache.begin() + 3, bit_equal),

View File

@ -40,6 +40,7 @@ namespace VertexLoaderManager
std::array<u32, 3> position_matrix_index_cache;
// 3 vertices, 4 floats each to allow SIMD overwrite
alignas(sizeof(std::array<float, 4>)) std::array<std::array<float, 4>, 3> position_cache;
alignas(sizeof(std::array<float, 4>)) std::array<float, 4> normal_cache;
alignas(sizeof(std::array<float, 4>)) std::array<float, 4> tangent_cache;
alignas(sizeof(std::array<float, 4>)) std::array<float, 4> binormal_cache;

View File

@ -62,6 +62,10 @@ void UpdateVertexArrayPointers();
// These arrays are in reverse order.
extern std::array<std::array<float, 4>, 3> position_cache;
extern std::array<u32, 3> position_matrix_index_cache;
// Needed for the game "LIT", which has text that has lighting enabled, but doesn't have normal
// vectors. The normals from the last drawn object are used instead.
// See https://bugs.dolphin-emu.org/issues/13635
extern std::array<float, 4> normal_cache;
// Store the tangent and binormal vectors for games that use emboss texgens when the vertex format
// doesn't include them (e.g. RS2 and RS3). These too are 4 floats each for SIMD overwrites.
extern std::array<float, 4> tangent_cache;

View File

@ -137,6 +137,14 @@ void VertexLoaderX64::ReadVertex(OpArg data, VertexComponentFormat attribute,
MOVUPS(MPIC(VertexLoaderManager::position_cache.data(), scratch3, SCALE_4), coords);
SetJumpTarget(dont_store);
}
else if (native_format == &m_native_vtx_decl.normals[0])
{
TEST(32, R(remaining_reg), R(remaining_reg));
FixupBranch dont_store = J_CC(CC_NZ);
// For similar reasons, the cached normal is 4 floats each
MOVUPS(MPIC(VertexLoaderManager::normal_cache.data()), coords);
SetJumpTarget(dont_store);
}
else if (native_format == &m_native_vtx_decl.normals[1])
{
TEST(32, R(remaining_reg), R(remaining_reg));

View File

@ -49,7 +49,9 @@ void ReadIndirect(VertexLoader* loader, const T* data)
const float value = FracAdjust(Common::FromBigEndian(data[i]));
if (loader->m_remaining == 0)
{
if (i >= 3 && i < 6)
if (i < 3)
VertexLoaderManager::normal_cache[i] = value;
else if (i >= 3 && i < 6)
VertexLoaderManager::tangent_cache[i - 3] = value;
else if (i >= 6 && i < 9)
VertexLoaderManager::binormal_cache[i - 6] = value;

View File

@ -558,7 +558,7 @@ void VertexManagerBase::Flush()
pixel_shader_manager.constants.time_ms = seconds_elapsed * 1000;
}
CalculateBinormals(VertexLoaderManager::GetCurrentVertexFormat());
CalculateNormals(VertexLoaderManager::GetCurrentVertexFormat());
// Calculate ZSlope for zfreeze
const auto used_textures = UsedTextures();
std::vector<std::string> texture_names;
@ -699,6 +699,7 @@ void VertexManagerBase::DoState(PointerWrap& p)
}
p.Do(m_zslope);
p.Do(VertexLoaderManager::normal_cache);
p.Do(VertexLoaderManager::tangent_cache);
p.Do(VertexLoaderManager::binormal_cache);
}
@ -769,7 +770,7 @@ void VertexManagerBase::CalculateZSlope(NativeVertexFormat* format)
m_zslope.dirty = true;
}
void VertexManagerBase::CalculateBinormals(NativeVertexFormat* format)
void VertexManagerBase::CalculateNormals(NativeVertexFormat* format)
{
const PortableVertexDeclaration vert_decl = format->GetVertexDeclaration();
@ -794,6 +795,16 @@ void VertexManagerBase::CalculateBinormals(NativeVertexFormat* format)
vertex_shader_manager.constants.cached_binormal = VertexLoaderManager::binormal_cache;
vertex_shader_manager.dirty = true;
}
if (vert_decl.normals[0].enable)
return;
VertexLoaderManager::normal_cache[3] = 0;
if (vertex_shader_manager.constants.cached_normal != VertexLoaderManager::normal_cache)
{
vertex_shader_manager.constants.cached_normal = VertexLoaderManager::normal_cache;
vertex_shader_manager.dirty = true;
}
}
void VertexManagerBase::UpdatePipelineConfig()

View File

@ -192,7 +192,7 @@ protected:
u32 GetRemainingIndices(OpcodeDecoder::Primitive primitive) const;
void CalculateZSlope(NativeVertexFormat* format);
void CalculateBinormals(NativeVertexFormat* format);
void CalculateNormals(NativeVertexFormat* format);
BitSet32 UsedTextures() const;

View File

@ -312,56 +312,43 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
out.Write("int posidx = int(posmtx.r);\n"
"float4 P0 = " I_TRANSFORMMATRICES "[posidx];\n"
"float4 P1 = " I_TRANSFORMMATRICES "[posidx + 1];\n"
"float4 P2 = " I_TRANSFORMMATRICES "[posidx + 2];\n");
if ((uid_data->components & VB_HAS_NORMAL) != 0)
{
out.Write("int normidx = posidx & 31;\n"
"float3 N0 = " I_NORMALMATRICES "[normidx].xyz;\n"
"float3 N1 = " I_NORMALMATRICES "[normidx + 1].xyz;\n"
"float3 N2 = " I_NORMALMATRICES "[normidx + 2].xyz;\n");
}
"float4 P2 = " I_TRANSFORMMATRICES "[posidx + 2];\n"
"int normidx = posidx & 31;\n"
"float3 N0 = " I_NORMALMATRICES "[normidx].xyz;\n"
"float3 N1 = " I_NORMALMATRICES "[normidx + 1].xyz;\n"
"float3 N2 = " I_NORMALMATRICES "[normidx + 2].xyz;\n");
}
else
{
// One shared matrix
out.Write("float4 P0 = " I_POSNORMALMATRIX "[0];\n"
"float4 P1 = " I_POSNORMALMATRIX "[1];\n"
"float4 P2 = " I_POSNORMALMATRIX "[2];\n");
if ((uid_data->components & VB_HAS_NORMAL) != 0)
{
out.Write("float3 N0 = " I_POSNORMALMATRIX "[3].xyz;\n"
"float3 N1 = " I_POSNORMALMATRIX "[4].xyz;\n"
"float3 N2 = " I_POSNORMALMATRIX "[5].xyz;\n");
}
"float4 P2 = " I_POSNORMALMATRIX "[2];\n"
"float3 N0 = " I_POSNORMALMATRIX "[3].xyz;\n"
"float3 N1 = " I_POSNORMALMATRIX "[4].xyz;\n"
"float3 N2 = " I_POSNORMALMATRIX "[5].xyz;\n");
}
out.Write("// Multiply the position vector by the position matrix\n"
"float4 pos = float4(dot(P0, rawpos), dot(P1, rawpos), dot(P2, rawpos), 1.0);\n");
if ((uid_data->components & VB_HAS_NORMAL) != 0)
{
if ((uid_data->components & VB_HAS_TANGENT) == 0)
out.Write("float3 rawtangent = " I_CACHED_TANGENT ".xyz;\n");
if ((uid_data->components & VB_HAS_BINORMAL) == 0)
out.Write("float3 rawbinormal = " I_CACHED_BINORMAL ".xyz;\n");
if ((uid_data->components & VB_HAS_NORMAL) == 0)
out.Write("float3 rawnormal = " I_CACHED_NORMAL ".xyz;\n");
if ((uid_data->components & VB_HAS_TANGENT) == 0)
out.Write("float3 rawtangent = " I_CACHED_TANGENT ".xyz;\n");
if ((uid_data->components & VB_HAS_BINORMAL) == 0)
out.Write("float3 rawbinormal = " I_CACHED_BINORMAL ".xyz;\n");
// The scale of the transform matrix is used to control the size of the emboss map effect, by
// changing the scale of the transformed binormals (which only get used by emboss map texgens).
// By normalising the first transformed normal (which is used by lighting calculations and needs
// to be unit length), the same transform matrix can do double duty, scaling for emboss mapping,
// and not scaling for lighting.
out.Write("float3 _normal = normalize(float3(dot(N0, rawnormal), dot(N1, rawnormal), dot(N2, "
"rawnormal)));\n"
"float3 _tangent = float3(dot(N0, rawtangent), dot(N1, rawtangent), dot(N2, "
"rawtangent));\n"
"float3 _binormal = float3(dot(N0, rawbinormal), dot(N1, rawbinormal), dot(N2, "
"rawbinormal));\n");
}
else
{
out.Write("float3 _normal = float3(0.0, 0.0, 0.0);\n");
out.Write("float3 _binormal = float3(0.0, 0.0, 0.0);\n");
out.Write("float3 _tangent = float3(0.0, 0.0, 0.0);\n");
}
// The scale of the transform matrix is used to control the size of the emboss map effect, by
// changing the scale of the transformed binormals (which only get used by emboss map texgens).
// By normalising the first transformed normal (which is used by lighting calculations and needs
// to be unit length), the same transform matrix can do double duty, scaling for emboss mapping,
// and not scaling for lighting.
out.Write("float3 _normal = normalize(float3(dot(N0, rawnormal), dot(N1, rawnormal), dot(N2, "
"rawnormal)));\n"
"float3 _tangent = float3(dot(N0, rawtangent), dot(N1, rawtangent), dot(N2, "
"rawtangent));\n"
"float3 _binormal = float3(dot(N0, rawbinormal), dot(N1, rawbinormal), dot(N2, "
"rawbinormal));\n");
out.Write("o.pos = float4(dot(" I_PROJECTION "[0], pos), dot(" I_PROJECTION
"[1], pos), dot(" I_PROJECTION "[2], pos), dot(" I_PROJECTION "[3], pos));\n");

View File

@ -702,6 +702,7 @@ TEST_P(VertexLoaderNormalTest, NormalAll)
input_with_expected_type(i / 32.f);
// Pre-fill these values to detect if they're modified
VertexLoaderManager::normal_cache = {-42.f, -43.f, -44.f, -45.f};
VertexLoaderManager::binormal_cache = {42.f, 43.f, 44.f, 45.f};
VertexLoaderManager::tangent_cache = {46.f, 47.f, 48.f, 49.f};
@ -738,6 +739,9 @@ TEST_P(VertexLoaderNormalTest, NormalAll)
ExpectOut(10 / 32.f);
ExpectOut(11 / 32.f);
ExpectOut(12 / 32.f);
EXPECT_EQ(VertexLoaderManager::normal_cache[0], 10 / 32.f);
EXPECT_EQ(VertexLoaderManager::normal_cache[1], 11 / 32.f);
EXPECT_EQ(VertexLoaderManager::normal_cache[2], 12 / 32.f);
if (elements == NormalComponentCount::NTB)
{
// Tangent
@ -759,6 +763,14 @@ TEST_P(VertexLoaderNormalTest, NormalAll)
}
}
if (addr == VertexComponentFormat::NotPresent)
{
// Expect these to not be written
EXPECT_EQ(VertexLoaderManager::normal_cache[0], -42.f);
EXPECT_EQ(VertexLoaderManager::normal_cache[1], -43.f);
EXPECT_EQ(VertexLoaderManager::normal_cache[2], -44.f);
EXPECT_EQ(VertexLoaderManager::normal_cache[3], -45.f);
}
if (addr == VertexComponentFormat::NotPresent || elements == NormalComponentCount::N)
{
// Expect these to not be written