VideoCommon: Handle emboss texgen with only a single normal

Fixes a large number of effects in Rogue Squadron 2 and 3.
This commit is contained in:
Pokechu22 2022-04-13 22:03:34 -07:00
parent 39b2854b98
commit 2a5c77f43f
14 changed files with 138 additions and 62 deletions

View File

@ -25,6 +25,7 @@
#include "VideoCommon/Statistics.h"
#include "VideoCommon/VertexLoaderBase.h"
#include "VideoCommon/VertexLoaderManager.h"
#include "VideoCommon/VertexShaderManager.h"
#include "VideoCommon/VideoConfig.h"
#include "VideoCommon/XFMemory.h"
@ -90,10 +91,7 @@ void SWVertexLoader::DrawCurrentBatch(u32 base_index, u32 num_indices, u32 base_
TransformUnit::TransformPosition(&m_vertex, outVertex);
outVertex->normal = {};
if (VertexLoaderManager::g_current_components & VB_HAS_NORMAL)
{
TransformUnit::TransformNormal(
&m_vertex, (VertexLoaderManager::g_current_components & VB_HAS_BINORMAL) != 0, outVertex);
}
TransformUnit::TransformNormal(&m_vertex, outVertex);
TransformUnit::TransformColor(&m_vertex, outVertex);
TransformUnit::TransformTexCoord(&m_vertex, outVertex);
@ -230,6 +228,18 @@ void SWVertexLoader::ParseVertex(const PortableVertexDeclaration& vdec, int inde
{
ReadVertexAttribute<float>(&m_vertex.normal[i][0], src, vdec.normals[i], 0, 3, false);
}
if (!vdec.normals[1].enable)
{
m_vertex.normal[1][0] = VertexShaderManager::constants.cached_tangent[0];
m_vertex.normal[1][1] = VertexShaderManager::constants.cached_tangent[1];
m_vertex.normal[1][2] = VertexShaderManager::constants.cached_tangent[2];
}
if (!vdec.normals[2].enable)
{
m_vertex.normal[2][0] = VertexShaderManager::constants.cached_binormal[0];
m_vertex.normal[2][1] = VertexShaderManager::constants.cached_binormal[1];
m_vertex.normal[2][2] = VertexShaderManager::constants.cached_binormal[2];
}
ParseColorAttributes(&m_vertex, src, vdec);

View File

@ -90,22 +90,14 @@ void TransformPosition(const InputVertexData* src, OutputVertexData* dst)
}
}
void TransformNormal(const InputVertexData* src, bool nbt, OutputVertexData* dst)
void TransformNormal(const InputVertexData* src, OutputVertexData* dst)
{
const float* mat = &xfmem.normalMatrices[(src->posMtx & 31) * 3];
if (nbt)
{
MultiplyVec3Mat33(src->normal[0], mat, dst->normal[0]);
MultiplyVec3Mat33(src->normal[1], mat, dst->normal[1]);
MultiplyVec3Mat33(src->normal[2], mat, dst->normal[2]);
dst->normal[0].Normalize();
}
else
{
MultiplyVec3Mat33(src->normal[0], mat, dst->normal[0]);
dst->normal[0].Normalize();
}
MultiplyVec3Mat33(src->normal[0], mat, dst->normal[0]);
MultiplyVec3Mat33(src->normal[1], mat, dst->normal[1]);
MultiplyVec3Mat33(src->normal[2], mat, dst->normal[2]);
dst->normal[0].Normalize();
}
static void TransformTexCoordRegular(const TexMtxInfo& texinfo, int coordNum,

View File

@ -9,7 +9,7 @@ struct OutputVertexData;
namespace TransformUnit
{
void TransformPosition(const InputVertexData* src, OutputVertexData* dst);
void TransformNormal(const InputVertexData* src, bool nbt, OutputVertexData* dst);
void TransformNormal(const InputVertexData* src, OutputVertexData* dst);
void TransformColor(const InputVertexData* src, OutputVertexData* dst);
void TransformTexCoord(const InputVertexData* src, OutputVertexData* dst);
} // namespace TransformUnit

View File

@ -90,6 +90,9 @@ struct VertexShaderConstants
// .x - texMtxInfo, .y - postMtxInfo, [0..1].z = color, [0..1].w = alpha
std::array<uint4, 8> xfmem_pack1;
float4 cached_tangent;
float4 cached_binormal;
};
struct GeometryShaderConstants

View File

@ -296,6 +296,8 @@ void WriteSwitch(ShaderCode& out, APIType ApiType, std::string_view variable,
#define I_POSTTRANSFORMMATRICES "cpostmtx"
#define I_PIXELCENTERCORRECTION "cpixelcenter"
#define I_VIEWPORT_SIZE "cviewport"
#define I_CACHED_TANGENT "ctangent"
#define I_CACHED_BINORMAL "cbinormal"
#define I_STEREOPARAMS "cstereo"
#define I_LINEPTPARAMS "clinept"
@ -317,6 +319,8 @@ static const char s_shader_uniforms[] = "\tuint components;\n"
"\tfloat4 " I_PIXELCENTERCORRECTION ";\n"
"\tfloat2 " I_VIEWPORT_SIZE ";\n"
"\tuint4 xfmem_pack1[8];\n"
"\tfloat4 " I_CACHED_TANGENT ";\n"
"\tfloat4 " I_CACHED_BINORMAL ";\n"
"\t#define xfmem_texMtxInfo(i) (xfmem_pack1[(i)].x)\n"
"\t#define xfmem_postMtxInfo(i) (xfmem_pack1[(i)].y)\n"
"\t#define xfmem_color(i) (xfmem_pack1[(i)].z)\n"

View File

@ -169,12 +169,18 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config
"if ((components & {}u) != 0u) // VB_HAS_TANGENT\n",
VB_HAS_TANGENT);
out.Write(" _tangent = float3(dot(N0, rawtangent), dot(N1, rawtangent), dot(N2, rawtangent));\n"
"else\n"
" _tangent = float3(dot(N0, " I_CACHED_TANGENT ".xyz), dot(N1, " I_CACHED_TANGENT
".xyz), dot(N2, " I_CACHED_TANGENT ".xyz));\n"
"\n"
"float3 _binormal = float3(0.0, 0.0, 0.0);\n"
"if ((components & {}u) != 0u) // VB_HAS_BINORMAL\n",
VB_HAS_BINORMAL);
out.Write(" _binormal = float3(dot(N0, rawbinormal), dot(N1, rawbinormal), dot(N2, "
"rawbinormal));\n"
"else\n"
" _binormal = float3(dot(N0, " I_CACHED_BINORMAL ".xyz), dot(N1, " I_CACHED_BINORMAL
".xyz), dot(N2, " I_CACHED_BINORMAL ".xyz));\n"
"\n");
// Hardware Lighting
@ -449,12 +455,9 @@ static void GenVertexShaderTexGens(APIType api_type, u32 num_texgen, ShaderCode&
for (u32 i = 0; i < num_texgen; i++)
out.Write(" case {}u: output_tex.xyz = o.tex{}; break;\n", i, i);
out.Write(" default: output_tex.xyz = float3(0.0, 0.0, 0.0); break;\n"
" }}\n");
out.Write(" if ((components & {}u) != 0u) {{ // VB_HAS_TANGENT | VB_HAS_BINORMAL\n",
VB_HAS_TANGENT | VB_HAS_BINORMAL);
out.Write(" float3 ldir = normalize(" I_LIGHTS "[light].pos.xyz - pos.xyz);\n"
" output_tex.xyz += float3(dot(ldir, _tangent), dot(ldir, _binormal), 0.0);\n"
" }}\n"
" float3 ldir = normalize(" I_LIGHTS "[light].pos.xyz - pos.xyz);\n"
" output_tex.xyz += float3(dot(ldir, _tangent), dot(ldir, _binormal), 0.0);\n"
" }}\n"
" break;\n\n");
out.Write(" case {:s}:\n", TexGenType::Color0);

View File

@ -215,6 +215,20 @@ int VertexLoaderARM64::ReadVertex(VertexComponentFormat attribute, ComponentForm
m_float_emit.STR(128, coords, EncodeRegTo64(scratch2_reg), ArithOption(remaining_reg, true));
SetJumpTarget(dont_store);
}
else if (native_format == &m_native_vtx_decl.normals[1])
{
FixupBranch dont_store = CBNZ(remaining_reg);
MOVP2R(EncodeRegTo64(scratch2_reg), VertexLoaderManager::tangent_cache.data());
m_float_emit.STR(128, IndexType::Unsigned, coords, EncodeRegTo64(scratch2_reg), 0);
SetJumpTarget(dont_store);
}
else if (native_format == &m_native_vtx_decl.normals[2])
{
FixupBranch dont_store = CBNZ(remaining_reg);
MOVP2R(EncodeRegTo64(scratch2_reg), VertexLoaderManager::binormal_cache.data());
m_float_emit.STR(128, IndexType::Unsigned, coords, EncodeRegTo64(scratch2_reg), 0);
SetJumpTarget(dont_store);
}
native_format->components = count_out;
native_format->enable = true;

View File

@ -35,6 +35,8 @@ namespace VertexLoaderManager
std::array<u32, 3> position_matrix_index_cache;
// 3 vertices, 4 floats each to allow SIMD overwrite
alignas(sizeof(std::array<float, 4>)) std::array<std::array<float, 4>, 3> position_cache;
alignas(sizeof(std::array<float, 4>)) std::array<float, 4> tangent_cache;
alignas(sizeof(std::array<float, 4>)) std::array<float, 4> binormal_cache;
static NativeVertexFormatMap s_native_vertex_map;
static NativeVertexFormat* s_current_vtx_fmt;

View File

@ -55,6 +55,10 @@ void UpdateVertexArrayPointers();
// These arrays are in reverse order.
extern std::array<std::array<float, 4>, 3> position_cache;
extern std::array<u32, 3> position_matrix_index_cache;
// Store the tangent and binormal vectors for games that use emboss texgens when the vertex format
// doesn't include them (e.g. RS2 and RS3). These too are 4 floats each for SIMD overwrites.
extern std::array<float, 4> tangent_cache;
extern std::array<float, 4> binormal_cache;
// VB_HAS_X. Bitmask telling what vertex components are present.
extern u32 g_current_components;

View File

@ -127,6 +127,22 @@ int VertexLoaderX64::ReadVertex(OpArg data, VertexComponentFormat attribute, Com
MOVUPS(MPIC(VertexLoaderManager::position_cache.data(), scratch3, SCALE_4), coords);
SetJumpTarget(dont_store);
}
else if (native_format == &m_native_vtx_decl.normals[1])
{
TEST(32, R(remaining_reg), R(remaining_reg));
FixupBranch dont_store = J_CC(CC_NZ);
// For similar reasons, the cached tangent and binormal are 4 floats each
MOVUPS(MPIC(VertexLoaderManager::tangent_cache.data()), coords);
SetJumpTarget(dont_store);
}
else if (native_format == &m_native_vtx_decl.normals[2])
{
CMP(32, R(remaining_reg), R(remaining_reg));
FixupBranch dont_store = J_CC(CC_NZ);
// For similar reasons, the cached tangent and binormal are 4 floats each
MOVUPS(MPIC(VertexLoaderManager::binormal_cache.data()), coords);
SetJumpTarget(dont_store);
}
};
int elem_size = GetElementSize(format);
@ -217,7 +233,9 @@ int VertexLoaderX64::ReadVertex(OpArg data, VertexComponentFormat attribute, Com
dest.AddMemOffset(sizeof(float));
// zfreeze
if (native_format == &m_native_vtx_decl.position)
if (native_format == &m_native_vtx_decl.position ||
native_format == &m_native_vtx_decl.normals[1] ||
native_format == &m_native_vtx_decl.normals[2])
{
if (cpu_info.bSSE4_1)
{

View File

@ -40,14 +40,22 @@ constexpr float FracAdjust(float val)
}
template <typename T, u32 N>
void ReadIndirect(const T* data)
void ReadIndirect(VertexLoader* loader, const T* data)
{
static_assert(3 == N || 9 == N, "N is only sane as 3 or 9!");
DataReader dst(g_vertex_manager_write_ptr, nullptr);
for (u32 i = 0; i < N; ++i)
{
dst.Write(FracAdjust(Common::FromBigEndian(data[i])));
const float value = FracAdjust(Common::FromBigEndian(data[i]));
if (loader->m_remaining == 0)
{
if (i >= 3 && i < 6)
VertexLoaderManager::tangent_cache[i - 3] = value;
else if (i >= 6 && i < 9)
VertexLoaderManager::binormal_cache[i - 6] = value;
}
dst.Write(value);
}
g_vertex_manager_write_ptr = dst.GetPointer();
@ -57,10 +65,10 @@ void ReadIndirect(const T* data)
template <typename T, u32 N>
struct Normal_Direct
{
static void function([[maybe_unused]] VertexLoader* loader)
static void function(VertexLoader* loader)
{
const auto source = reinterpret_cast<const T*>(DataGetPosition());
ReadIndirect<T, N * 3>(source);
ReadIndirect<T, N * 3>(loader, source);
DataSkip<N * 3 * sizeof(T)>();
}
@ -68,7 +76,7 @@ struct Normal_Direct
};
template <typename I, typename T, u32 N, u32 Offset>
void Normal_Index_Offset()
void Normal_Index_Offset(VertexLoader* loader)
{
static_assert(std::is_unsigned_v<I>, "Only unsigned I is sane!");
@ -76,24 +84,24 @@ void Normal_Index_Offset()
const auto data = reinterpret_cast<const T*>(
VertexLoaderManager::cached_arraybases[CPArray::Normal] +
(index * g_main_cp_state.array_strides[CPArray::Normal]) + sizeof(T) * 3 * Offset);
ReadIndirect<T, N * 3>(data);
ReadIndirect<T, N * 3>(loader, data);
}
template <typename I, typename T, u32 N>
struct Normal_Index
{
static void function([[maybe_unused]] VertexLoader* loader) { Normal_Index_Offset<I, T, N, 0>(); }
static void function(VertexLoader* loader) { Normal_Index_Offset<I, T, N, 0>(loader); }
static constexpr u32 size = sizeof(I);
};
template <typename I, typename T>
struct Normal_Index_Indices3
{
static void function([[maybe_unused]] VertexLoader* loader)
static void function(VertexLoader* loader)
{
Normal_Index_Offset<I, T, 1, 0>();
Normal_Index_Offset<I, T, 1, 1>();
Normal_Index_Offset<I, T, 1, 2>();
Normal_Index_Offset<I, T, 1, 0>(loader);
Normal_Index_Offset<I, T, 1, 1>(loader);
Normal_Index_Offset<I, T, 1, 2>(loader);
}
static constexpr u32 size = sizeof(I) * 3;

View File

@ -453,6 +453,7 @@ void VertexManagerBase::Flush()
}
}
CalculateBinormals(VertexLoaderManager::GetCurrentVertexFormat());
// Calculate ZSlope for zfreeze
VertexShaderManager::SetConstants();
if (!bpmem.genMode.zfreeze)
@ -595,6 +596,31 @@ void VertexManagerBase::CalculateZSlope(NativeVertexFormat* format)
m_zslope.dirty = true;
}
void VertexManagerBase::CalculateBinormals(NativeVertexFormat* format)
{
const PortableVertexDeclaration vert_decl = format->GetVertexDeclaration();
// Only update the binormal/tangent vertex shader constants if the vertex format lacks binormals
// (VertexLoaderManager::binormal_cache gets updated by the vertex loader when binormals are
// present, though)
if (vert_decl.normals[1].enable)
return;
VertexLoaderManager::tangent_cache[3] = 0;
VertexLoaderManager::binormal_cache[3] = 0;
if (VertexShaderManager::constants.cached_tangent != VertexLoaderManager::tangent_cache)
{
VertexShaderManager::constants.cached_tangent = VertexLoaderManager::tangent_cache;
VertexShaderManager::dirty = true;
}
if (VertexShaderManager::constants.cached_binormal != VertexLoaderManager::binormal_cache)
{
VertexShaderManager::constants.cached_binormal = VertexLoaderManager::binormal_cache;
VertexShaderManager::dirty = true;
}
}
void VertexManagerBase::UpdatePipelineConfig()
{
NativeVertexFormat* vertex_format = VertexLoaderManager::GetCurrentVertexFormat();

View File

@ -172,6 +172,7 @@ protected:
u32 GetRemainingIndices(OpcodeDecoder::Primitive primitive) const;
void CalculateZSlope(NativeVertexFormat* format);
void CalculateBinormals(NativeVertexFormat* format);
void LoadTextures();
u8* m_cur_buffer_pointer = nullptr;

View File

@ -253,23 +253,24 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
"float4 pos = float4(dot(P0, rawpos), dot(P1, rawpos), dot(P2, rawpos), 1.0);\n");
if ((uid_data->components & VB_HAS_NORMAL) != 0)
{
if ((uid_data->components & VB_HAS_TANGENT) == 0)
out.Write("float3 rawtangent = " I_CACHED_TANGENT ".xyz;\n");
if ((uid_data->components & VB_HAS_BINORMAL) == 0)
out.Write("float3 rawbinormal = " I_CACHED_BINORMAL ".xyz;\n");
// Only the first normal gets normalized (TODO: why?)
out.Write("float3 _normal = normalize(float3(dot(N0, rawnormal), dot(N1, rawnormal), dot(N2, "
"rawnormal)));\n");
"rawnormal)));\n"
"float3 _tangent = float3(dot(N0, rawtangent), dot(N1, rawtangent), dot(N2, "
"rawtangent));\n"
"float3 _binormal = float3(dot(N0, rawbinormal), dot(N1, rawbinormal), dot(N2, "
"rawbinormal));\n");
}
else
{
out.Write("float3 _normal = float3(0.0, 0.0, 0.0);\n");
}
if ((uid_data->components & VB_HAS_TANGENT) != 0)
{
out.Write("float3 _tangent = float3(dot(N0, rawtangent), dot(N1, rawtangent), dot(N2, "
"rawtangent));\n");
}
if ((uid_data->components & VB_HAS_BINORMAL) != 0)
{
out.Write("float3 _binormal = float3(dot(N0, rawbinormal), dot(N1, rawbinormal), dot(N2, "
"rawbinormal));\n");
out.Write("float3 _binormal = float3(0.0, 0.0, 0.0);\n");
out.Write("float3 _tangent = float3(0.0, 0.0, 0.0);\n");
}
out.Write("o.pos = float4(dot(" I_PROJECTION "[0], pos), dot(" I_PROJECTION
@ -341,22 +342,12 @@ ShaderCode GenerateVertexShaderCode(APIType api_type, const ShaderHostConfig& ho
{
case TexGenType::EmbossMap: // calculate tex coords into bump map
if ((uid_data->components & (VB_HAS_TANGENT | VB_HAS_BINORMAL)) != 0)
{
// transform the light dir into tangent space
out.Write("ldir = normalize(" LIGHT_POS ".xyz - pos.xyz);\n",
LIGHT_POS_PARAMS(texinfo.embosslightshift));
out.Write(
"o.tex{}.xyz = o.tex{}.xyz + float3(dot(ldir, _tangent), dot(ldir, _binormal), 0.0);\n",
i, texinfo.embosssourceshift);
}
else
{
// The following assert was triggered in House of the Dead Overkill and Star Wars Rogue
// Squadron 2
// ASSERT(0); // should have normals
out.Write("o.tex{}.xyz = o.tex{}.xyz;\n", i, texinfo.embosssourceshift);
}
// transform the light dir into tangent space
out.Write("ldir = normalize(" LIGHT_POS ".xyz - pos.xyz);\n",
LIGHT_POS_PARAMS(texinfo.embosslightshift));
out.Write(
"o.tex{}.xyz = o.tex{}.xyz + float3(dot(ldir, _tangent), dot(ldir, _binormal), 0.0);\n",
i, texinfo.embosssourceshift);
break;
case TexGenType::Color0: