Optimized vertex blending by only recalculating the required world/view matrices, and a simplified HLSL implementation

This commit is contained in:
PatrickvL 2020-12-10 12:21:03 +01:00 committed by Anthony
parent 8200cd8e43
commit 0245cc6ee8
8 changed files with 85 additions and 69 deletions

View File

@ -6337,13 +6337,38 @@ void UpdateFixedFunctionVertexShaderState()
{
using namespace xbox;
// Preprocessing
// Prepare vertex blending mode variables used in transforms, below
auto VertexBlend = XboxRenderStates.GetXboxRenderState(X_D3DRS_VERTEXBLEND);
// Xbox and host D3DVERTEXBLENDFLAGS :
// D3DVBF_DISABLE = 0 : 1 matrix, 0 weights => final weight 1
// D3DVBF_1WEIGHTS = 1 : 2 matrices, 1 weights => final weight calculated
// D3DVBF_2WEIGHTS = 3 : 3 matrices, 2 weights => final weight calculated
// D3DVBF_3WEIGHTS = 5 : 4 matrices, 3 weights => final weight calculated
// Xbox X_D3DVERTEXBLENDFLAGS :
// X_D3DVBF_2WEIGHTS2MATRICES = 2 : 2 matrices, 2 weights
// X_D3DVBF_3WEIGHTS3MATRICES = 4 : 3 matrices, 3 weights
// X_D3DVBF_4WEIGHTS4MATRICES = 6 : 4 matrices, 4 weights
//
if (VertexBlend > xbox::X_D3DVBF_4WEIGHTS4MATRICES) LOG_TEST_CASE("X_D3DRS_VERTEXBLEND out of range");
// Calculate the number of matrices, by adding the LSB to turn (0,1,3,5) and (0,2,4,6) into (0,2,4,6); Then divide by 2 to get (0,1,2,3), and add 1 to get 1, 2, 3 or 4 matrices :
auto NrBlendMatrices = ((VertexBlend + (VertexBlend & 1)) / 2) + 1;
// Looking at the above values, 0 or the LSB of VertexBlend signals that the final weight needs to be calculated from all previous weigths (deducting them all from an initial 1) :
auto CalcLastBlendWeight = (VertexBlend == xbox::X_D3DVBF_DISABLE) || (VertexBlend & 1);
// Copy the resulting values over to shader state :
ffShaderState.Modes.VertexBlend_NrOfMatrices = (float)NrBlendMatrices;
ffShaderState.Modes.VertexBlend_CalcLastWeight = (float)CalcLastBlendWeight;
// Transforms
// Transpose row major to column major for HLSL
D3DXMatrixTranspose((D3DXMATRIX*)&ffShaderState.Transforms.Projection, (D3DXMATRIX*)&d3d8TransformState.Transforms[X_D3DTS_PROJECTION]);
D3DXMatrixTranspose((D3DXMATRIX*)&ffShaderState.Transforms.View, (D3DXMATRIX*)&d3d8TransformState.Transforms[X_D3DTS_VIEW]);
for (unsigned i = 0; i < 4; i++) {
for (unsigned i = 0; i < 4; i++) { // TODO : Would it help to limit this to just the active texture channels?
D3DXMatrixTranspose((D3DXMATRIX*)&ffShaderState.Transforms.Texture[i], (D3DXMATRIX*)&d3d8TransformState.Transforms[X_D3DTS_TEXTURE0 + i]);
}
for (unsigned i = 0; i < ffShaderState.Modes.VertexBlend_NrOfMatrices; i++) {
D3DXMatrixTranspose((D3DXMATRIX*)&ffShaderState.Transforms.WorldView[i], (D3DXMATRIX*)d3d8TransformState.GetWorldView(i));
D3DXMatrixTranspose((D3DXMATRIX*)&ffShaderState.Transforms.WorldViewInverseTranspose[i], (D3DXMATRIX*)d3d8TransformState.GetWorldViewInverseTranspose(i));
}
@ -6372,7 +6397,7 @@ void UpdateFixedFunctionVertexShaderState()
auto pointSize = XboxRenderStates.GetXboxRenderState(X_D3DRS_POINTSIZE);
ffShaderState.PointSprite.PointSize = *reinterpret_cast<float*>(&pointSize);
ffShaderState.PointSprite.PointScaleEnable = (float)XboxRenderStates.GetXboxRenderState(X_D3DRS_POINTSCALEENABLE);
ffShaderState.PointSprite.RenderTargetHeight = GetPixelContainerHeight(g_pXbox_RenderTarget);
ffShaderState.PointSprite.RenderTargetHeight = (float)GetPixelContainerHeight(g_pXbox_RenderTarget);
auto scaleA = XboxRenderStates.GetXboxRenderState(X_D3DRS_POINTSCALE_A);
ffShaderState.PointSprite.ScaleA = *reinterpret_cast<float*>(&scaleA);
auto scaleB = XboxRenderStates.GetXboxRenderState(X_D3DRS_POINTSCALE_B);
@ -6428,7 +6453,6 @@ void UpdateFixedFunctionVertexShaderState()
}
// Misc flags
ffShaderState.Modes.VertexBlend = (float)XboxRenderStates.GetXboxRenderState(X_D3DRS_VERTEXBLEND);
ffShaderState.Modes.NormalizeNormals = (float)XboxRenderStates.GetXboxRenderState(X_D3DRS_NORMALIZENORMALS);
// Update lights

View File

@ -290,44 +290,23 @@ TransformInfo DoTransform(const float4 position, const float3 normal, const floa
output.Position = float4(0, 0, 0, 0);
output.Normal = float3(0, 0, 0);
// D3D
const int _BLEND_OFF = 0;
const int _1WEIGHT_2MAT = 1;
const int _2WEIGHT_3MAT = 3;
const int _3WEIGHT_4MAT = 5;
// Xbox
const int _2WEIGHT_2MAT = 2;
const int _3WEIGHT_3MAT = 4;
const int _4WEIGHT_4MAT = 6;
if (state.Modes.VertexBlend == _BLEND_OFF) {
output.Position = mul(position, state.Transforms.WorldView[0]);
output.Normal = mul(normal, (float3x3)state.Transforms.WorldViewInverseTranspose[0]);
return output;
}
// The number of matrices to blend
int mats = floor((state.Modes.VertexBlend - 1) / 2 + 2);
// If we have to calculate the last blend value
bool calcLastBlend = fmod(state.Modes.VertexBlend, 2) == 1;
// The number of matrices to blend (always in the range [1..4])
int matrices = state.Modes.VertexBlend_NrOfMatrices;
// Initialize the final matrix its blend weight at 1, from which all preceding blend weights will be deducted :
float lastBlend = 1;
for (int i = 0; i < mats - 1; i++)
for (int i = 0; i < matrices; i++)
{
output.Position += mul(position, state.Transforms.WorldView[i]) * blendWeights[i];
output.Normal += mul(normal, (float3x3) state.Transforms.WorldViewInverseTranspose[i]) * blendWeights[i];
// Do we have to calculate the last blend value (never happens when there's already 4 matrices) ?
bool bCalcFinalWeight = (state.Modes.VertexBlend_CalcLastWeight > 0) && (i == (matrices - 1));
// Note : In case of X_D3DVBF_DISABLE, no prior weights have been deducted from lastBlend, so it will still be 1.
// The number of matrices will also be 1, which effectively turns this into non-weighted single-matrix multiplications :
float blendWeight = bCalcFinalWeight ? lastBlend : blendWeights[i];
// Reduce the blend weight for the final matrix :
lastBlend -= blendWeights[i];
}
if (calcLastBlend)
{
output.Position += mul(position, state.Transforms.WorldView[mats-1]) * lastBlend;
output.Normal += mul(normal, (float3x3) state.Transforms.WorldViewInverseTranspose[mats-1]) * lastBlend;
}
else
{
output.Position += mul(position, state.Transforms.WorldView[mats-1]) * blendWeights[mats-1];
output.Normal += mul(normal, (float3x3) state.Transforms.WorldViewInverseTranspose[mats-1]) * blendWeights[mats-1];
// Add this matrix (multiplied by its blend weight) to the output :
output.Position += mul(position, state.Transforms.WorldView[i]) * blendWeight;
output.Normal += mul(normal, (float3x3) state.Transforms.WorldViewInverseTranspose[i]) * blendWeight;
}
return output;

View File

@ -98,7 +98,8 @@ struct Modes {
alignas(16) float LocalViewer;
alignas(16) float ColorVertex;
alignas(16) float VertexBlend;
alignas(16) float VertexBlend_NrOfMatrices;
alignas(16) float VertexBlend_CalcLastWeight; // Could be a bool in higer shader models
alignas(16) float NormalizeNormals;
};

View File

@ -396,14 +396,13 @@ void XboxRenderStateConverter::ApplyComplexRenderState(uint32_t State, uint32_t
switch (State) {
case xbox::X_D3DRS_VERTEXBLEND:
// convert from Xbox direct3d to PC direct3d enumeration
if (Value <= 1) {
Value = Value;
} else if (Value == 3) {
Value = 2;
} else if (Value == 5) {
Value = 3;
} else {
// convert from Xbox X_D3DVERTEXBLENDFLAGS to PC D3DVERTEXBLENDFLAGS enumeration
switch (Value) {
case xbox::X_D3DVBF_DISABLE: Value = D3DVBF_DISABLE; break;
case xbox::X_D3DVBF_1WEIGHTS: Value = D3DVBF_1WEIGHTS; break;
case xbox::X_D3DVBF_2WEIGHTS: Value = D3DVBF_2WEIGHTS; break;
case xbox::X_D3DVBF_3WEIGHTS: Value = D3DVBF_3WEIGHTS; break;
default:
LOG_TEST_CASE("Unsupported D3DVERTEXBLENDFLAGS (%d)");
return;
}

View File

@ -94,7 +94,7 @@ D3D8TransformState::D3D8TransformState() {
this->Transforms.fill(identity);
this->WorldView.fill(identity);
this->WorldViewInverseTranspose.fill(identity);
bWorldViewDirty = true;
bWorldViewDirty.fill(true);
}
void D3D8TransformState::SetTransform(xbox::X_D3DTRANSFORMSTATETYPE state, const D3DMATRIX* pMatrix)
@ -111,33 +111,34 @@ void D3D8TransformState::SetTransform(xbox::X_D3DTRANSFORMSTATETYPE state, const
// Update transform state
this->Transforms[state] = *pMatrix;
if ((state == X_D3DTS_VIEW) || ((X_D3DTS_WORLD <= state) && (state <= X_D3DTS_WORLD3))) {
bWorldViewDirty = true;
if (state == X_D3DTS_VIEW) {
bWorldViewDirty.fill(true);
}
if ((X_D3DTS_WORLD <= state) && (state <= X_D3DTS_WORLD3)) {
bWorldViewDirty[state - X_D3DTS_WORLD] = true;
}
}
void D3D8TransformState::RecalculateDependentMatrices()
void D3D8TransformState::RecalculateDependentMatrices(unsigned i)
{
for (unsigned i = 0; i < 4; i++) {
auto worldState = xbox::X_D3DTS_WORLD + i;
D3DXMATRIX worldView;
D3DXMatrixMultiply(&worldView, (D3DXMATRIX*)&Transforms[worldState], (D3DXMATRIX*)&Transforms[xbox::X_D3DTS_VIEW]);
this->WorldView[i] = worldView;
auto worldState = xbox::X_D3DTS_WORLD + i;
D3DXMATRIX worldView;
D3DXMatrixMultiply(&worldView, (D3DXMATRIX*)&Transforms[worldState], (D3DXMATRIX*)&Transforms[xbox::X_D3DTS_VIEW]);
this->WorldView[i] = worldView;
D3DXMATRIX worldViewInverseTranspose;
D3DXMatrixInverse(&worldViewInverseTranspose, nullptr, &worldView);
D3DXMatrixTranspose(&worldViewInverseTranspose, &worldViewInverseTranspose);
this->WorldViewInverseTranspose[i] = worldViewInverseTranspose;
}
D3DXMATRIX worldViewInverseTranspose;
D3DXMatrixInverse(&worldViewInverseTranspose, nullptr, &worldView);
D3DXMatrixTranspose(&worldViewInverseTranspose, &worldViewInverseTranspose);
this->WorldViewInverseTranspose[i] = worldViewInverseTranspose;
}
D3DMATRIX* D3D8TransformState::GetWorldView(unsigned i)
{
assert(i < 4);
if (bWorldViewDirty) {
RecalculateDependentMatrices();
bWorldViewDirty = false;
if (bWorldViewDirty[i]) {
RecalculateDependentMatrices(i);
bWorldViewDirty[i] = false;
}
return &WorldView[i];
@ -147,9 +148,9 @@ D3DMATRIX* D3D8TransformState::GetWorldViewInverseTranspose(unsigned i)
{
assert(i < 4);
if (bWorldViewDirty) {
RecalculateDependentMatrices();
bWorldViewDirty = false;
if (bWorldViewDirty[i]) {
RecalculateDependentMatrices(i);
bWorldViewDirty[i] = false;
}
return &WorldViewInverseTranspose[i];

View File

@ -34,9 +34,9 @@ public:
std::array<D3DMATRIX, xbox::X_D3DTS_MAX> Transforms;
private:
void RecalculateDependentMatrices();
void RecalculateDependentMatrices(unsigned i);
bool bWorldViewDirty;
std::array<bool, 4> bWorldViewDirty;
// Combines world/view matrices
std::array<D3DMATRIX, 4> WorldView;
// World/view inverse transpose for lighting calculations

View File

@ -1796,7 +1796,7 @@ typedef enum _TXBType {
xtD3DTEXTUREOP, // Used for TextureStageState X_D3DTSS_COLOROP and X_D3DTSS_ALPHAOP
xtD3DTEXTURESTAGESTATETYPE,
xtD3DTRANSFORMSTATETYPE,
xtD3DVERTEXBLENDFLAGS,
xtD3DVERTEXBLENDFLAGS, // Used for X_D3DRS_VERTEXBLEND
xtD3DVSDE,
xtD3DWRAP,
xtDWORD,

View File

@ -1007,6 +1007,18 @@ constexpr DWORD X_D3DCOLORWRITEENABLE_ALL = 0x01010101; // Xbox ext.
// deferred texture stage state "unknown" flag
#define X_D3DTSS_UNK 0x7fffffff
typedef enum _D3DVERTEXBLENDFLAGS
{
X_D3DVBF_DISABLE = 0, // 1 matrix, 0 weights => final weight effectively 1 (Disable vertex blending)
X_D3DVBF_1WEIGHTS = 1, // 2 matrices, 1 weights => final weight calculated
X_D3DVBF_2WEIGHTS = 3, // 3 matrices, 2 weights => final weight calculated
X_D3DVBF_3WEIGHTS = 5, // 4 matrices, 3 weights => final weight calculated
X_D3DVBF_2WEIGHTS2MATRICES = 2, // 2 matrices, 2 weights (Xbox ext.)
X_D3DVBF_3WEIGHTS3MATRICES = 4, // 3 matrices, 3 weights (Xbox ext.)
X_D3DVBF_4WEIGHTS4MATRICES = 6, // 4 matrices, 4 weights (Xbox ext.)
X_D3DVBF_FORCE_DWORD = 0x7fffffff
} X_D3DVERTEXBLENDFLAGS;
typedef DWORD X_VERTEXSHADERCONSTANTMODE;
// Xbox vertex shader constant modes