Optimized vertex blending by only recalculating the required world/view matrices, and a simplified HLSL implementation
This commit is contained in:
parent
8200cd8e43
commit
0245cc6ee8
|
@ -6337,13 +6337,38 @@ void UpdateFixedFunctionVertexShaderState()
|
|||
{
|
||||
using namespace xbox;
|
||||
|
||||
// Preprocessing
|
||||
// Prepare vertex blending mode variables used in transforms, below
|
||||
auto VertexBlend = XboxRenderStates.GetXboxRenderState(X_D3DRS_VERTEXBLEND);
|
||||
// Xbox and host D3DVERTEXBLENDFLAGS :
|
||||
// D3DVBF_DISABLE = 0 : 1 matrix, 0 weights => final weight 1
|
||||
// D3DVBF_1WEIGHTS = 1 : 2 matrices, 1 weights => final weight calculated
|
||||
// D3DVBF_2WEIGHTS = 3 : 3 matrices, 2 weights => final weight calculated
|
||||
// D3DVBF_3WEIGHTS = 5 : 4 matrices, 3 weights => final weight calculated
|
||||
// Xbox X_D3DVERTEXBLENDFLAGS :
|
||||
// X_D3DVBF_2WEIGHTS2MATRICES = 2 : 2 matrices, 2 weights
|
||||
// X_D3DVBF_3WEIGHTS3MATRICES = 4 : 3 matrices, 3 weights
|
||||
// X_D3DVBF_4WEIGHTS4MATRICES = 6 : 4 matrices, 4 weights
|
||||
//
|
||||
if (VertexBlend > xbox::X_D3DVBF_4WEIGHTS4MATRICES) LOG_TEST_CASE("X_D3DRS_VERTEXBLEND out of range");
|
||||
// Calculate the number of matrices, by adding the LSB to turn (0,1,3,5) and (0,2,4,6) into (0,2,4,6); Then divide by 2 to get (0,1,2,3), and add 1 to get 1, 2, 3 or 4 matrices :
|
||||
auto NrBlendMatrices = ((VertexBlend + (VertexBlend & 1)) / 2) + 1;
|
||||
// Looking at the above values, 0 or the LSB of VertexBlend signals that the final weight needs to be calculated from all previous weigths (deducting them all from an initial 1) :
|
||||
auto CalcLastBlendWeight = (VertexBlend == xbox::X_D3DVBF_DISABLE) || (VertexBlend & 1);
|
||||
// Copy the resulting values over to shader state :
|
||||
ffShaderState.Modes.VertexBlend_NrOfMatrices = (float)NrBlendMatrices;
|
||||
ffShaderState.Modes.VertexBlend_CalcLastWeight = (float)CalcLastBlendWeight;
|
||||
|
||||
// Transforms
|
||||
// Transpose row major to column major for HLSL
|
||||
D3DXMatrixTranspose((D3DXMATRIX*)&ffShaderState.Transforms.Projection, (D3DXMATRIX*)&d3d8TransformState.Transforms[X_D3DTS_PROJECTION]);
|
||||
D3DXMatrixTranspose((D3DXMATRIX*)&ffShaderState.Transforms.View, (D3DXMATRIX*)&d3d8TransformState.Transforms[X_D3DTS_VIEW]);
|
||||
|
||||
for (unsigned i = 0; i < 4; i++) {
|
||||
for (unsigned i = 0; i < 4; i++) { // TODO : Would it help to limit this to just the active texture channels?
|
||||
D3DXMatrixTranspose((D3DXMATRIX*)&ffShaderState.Transforms.Texture[i], (D3DXMATRIX*)&d3d8TransformState.Transforms[X_D3DTS_TEXTURE0 + i]);
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < ffShaderState.Modes.VertexBlend_NrOfMatrices; i++) {
|
||||
D3DXMatrixTranspose((D3DXMATRIX*)&ffShaderState.Transforms.WorldView[i], (D3DXMATRIX*)d3d8TransformState.GetWorldView(i));
|
||||
D3DXMatrixTranspose((D3DXMATRIX*)&ffShaderState.Transforms.WorldViewInverseTranspose[i], (D3DXMATRIX*)d3d8TransformState.GetWorldViewInverseTranspose(i));
|
||||
}
|
||||
|
@ -6372,7 +6397,7 @@ void UpdateFixedFunctionVertexShaderState()
|
|||
auto pointSize = XboxRenderStates.GetXboxRenderState(X_D3DRS_POINTSIZE);
|
||||
ffShaderState.PointSprite.PointSize = *reinterpret_cast<float*>(&pointSize);
|
||||
ffShaderState.PointSprite.PointScaleEnable = (float)XboxRenderStates.GetXboxRenderState(X_D3DRS_POINTSCALEENABLE);
|
||||
ffShaderState.PointSprite.RenderTargetHeight = GetPixelContainerHeight(g_pXbox_RenderTarget);
|
||||
ffShaderState.PointSprite.RenderTargetHeight = (float)GetPixelContainerHeight(g_pXbox_RenderTarget);
|
||||
auto scaleA = XboxRenderStates.GetXboxRenderState(X_D3DRS_POINTSCALE_A);
|
||||
ffShaderState.PointSprite.ScaleA = *reinterpret_cast<float*>(&scaleA);
|
||||
auto scaleB = XboxRenderStates.GetXboxRenderState(X_D3DRS_POINTSCALE_B);
|
||||
|
@ -6428,7 +6453,6 @@ void UpdateFixedFunctionVertexShaderState()
|
|||
}
|
||||
|
||||
// Misc flags
|
||||
ffShaderState.Modes.VertexBlend = (float)XboxRenderStates.GetXboxRenderState(X_D3DRS_VERTEXBLEND);
|
||||
ffShaderState.Modes.NormalizeNormals = (float)XboxRenderStates.GetXboxRenderState(X_D3DRS_NORMALIZENORMALS);
|
||||
|
||||
// Update lights
|
||||
|
|
|
@ -290,44 +290,23 @@ TransformInfo DoTransform(const float4 position, const float3 normal, const floa
|
|||
output.Position = float4(0, 0, 0, 0);
|
||||
output.Normal = float3(0, 0, 0);
|
||||
|
||||
// D3D
|
||||
const int _BLEND_OFF = 0;
|
||||
const int _1WEIGHT_2MAT = 1;
|
||||
const int _2WEIGHT_3MAT = 3;
|
||||
const int _3WEIGHT_4MAT = 5;
|
||||
// Xbox
|
||||
const int _2WEIGHT_2MAT = 2;
|
||||
const int _3WEIGHT_3MAT = 4;
|
||||
const int _4WEIGHT_4MAT = 6;
|
||||
|
||||
if (state.Modes.VertexBlend == _BLEND_OFF) {
|
||||
output.Position = mul(position, state.Transforms.WorldView[0]);
|
||||
output.Normal = mul(normal, (float3x3)state.Transforms.WorldViewInverseTranspose[0]);
|
||||
return output;
|
||||
}
|
||||
|
||||
// The number of matrices to blend
|
||||
int mats = floor((state.Modes.VertexBlend - 1) / 2 + 2);
|
||||
// If we have to calculate the last blend value
|
||||
bool calcLastBlend = fmod(state.Modes.VertexBlend, 2) == 1;
|
||||
// The number of matrices to blend (always in the range [1..4])
|
||||
int matrices = state.Modes.VertexBlend_NrOfMatrices;
|
||||
|
||||
// Initialize the final matrix its blend weight at 1, from which all preceding blend weights will be deducted :
|
||||
float lastBlend = 1;
|
||||
for (int i = 0; i < mats - 1; i++)
|
||||
for (int i = 0; i < matrices; i++)
|
||||
{
|
||||
output.Position += mul(position, state.Transforms.WorldView[i]) * blendWeights[i];
|
||||
output.Normal += mul(normal, (float3x3) state.Transforms.WorldViewInverseTranspose[i]) * blendWeights[i];
|
||||
// Do we have to calculate the last blend value (never happens when there's already 4 matrices) ?
|
||||
bool bCalcFinalWeight = (state.Modes.VertexBlend_CalcLastWeight > 0) && (i == (matrices - 1));
|
||||
// Note : In case of X_D3DVBF_DISABLE, no prior weights have been deducted from lastBlend, so it will still be 1.
|
||||
// The number of matrices will also be 1, which effectively turns this into non-weighted single-matrix multiplications :
|
||||
float blendWeight = bCalcFinalWeight ? lastBlend : blendWeights[i];
|
||||
// Reduce the blend weight for the final matrix :
|
||||
lastBlend -= blendWeights[i];
|
||||
}
|
||||
|
||||
if (calcLastBlend)
|
||||
{
|
||||
output.Position += mul(position, state.Transforms.WorldView[mats-1]) * lastBlend;
|
||||
output.Normal += mul(normal, (float3x3) state.Transforms.WorldViewInverseTranspose[mats-1]) * lastBlend;
|
||||
}
|
||||
else
|
||||
{
|
||||
output.Position += mul(position, state.Transforms.WorldView[mats-1]) * blendWeights[mats-1];
|
||||
output.Normal += mul(normal, (float3x3) state.Transforms.WorldViewInverseTranspose[mats-1]) * blendWeights[mats-1];
|
||||
// Add this matrix (multiplied by its blend weight) to the output :
|
||||
output.Position += mul(position, state.Transforms.WorldView[i]) * blendWeight;
|
||||
output.Normal += mul(normal, (float3x3) state.Transforms.WorldViewInverseTranspose[i]) * blendWeight;
|
||||
}
|
||||
|
||||
return output;
|
||||
|
|
|
@ -98,7 +98,8 @@ struct Modes {
|
|||
alignas(16) float LocalViewer;
|
||||
|
||||
alignas(16) float ColorVertex;
|
||||
alignas(16) float VertexBlend;
|
||||
alignas(16) float VertexBlend_NrOfMatrices;
|
||||
alignas(16) float VertexBlend_CalcLastWeight; // Could be a bool in higer shader models
|
||||
alignas(16) float NormalizeNormals;
|
||||
};
|
||||
|
||||
|
|
|
@ -396,14 +396,13 @@ void XboxRenderStateConverter::ApplyComplexRenderState(uint32_t State, uint32_t
|
|||
|
||||
switch (State) {
|
||||
case xbox::X_D3DRS_VERTEXBLEND:
|
||||
// convert from Xbox direct3d to PC direct3d enumeration
|
||||
if (Value <= 1) {
|
||||
Value = Value;
|
||||
} else if (Value == 3) {
|
||||
Value = 2;
|
||||
} else if (Value == 5) {
|
||||
Value = 3;
|
||||
} else {
|
||||
// convert from Xbox X_D3DVERTEXBLENDFLAGS to PC D3DVERTEXBLENDFLAGS enumeration
|
||||
switch (Value) {
|
||||
case xbox::X_D3DVBF_DISABLE: Value = D3DVBF_DISABLE; break;
|
||||
case xbox::X_D3DVBF_1WEIGHTS: Value = D3DVBF_1WEIGHTS; break;
|
||||
case xbox::X_D3DVBF_2WEIGHTS: Value = D3DVBF_2WEIGHTS; break;
|
||||
case xbox::X_D3DVBF_3WEIGHTS: Value = D3DVBF_3WEIGHTS; break;
|
||||
default:
|
||||
LOG_TEST_CASE("Unsupported D3DVERTEXBLENDFLAGS (%d)");
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -94,7 +94,7 @@ D3D8TransformState::D3D8TransformState() {
|
|||
this->Transforms.fill(identity);
|
||||
this->WorldView.fill(identity);
|
||||
this->WorldViewInverseTranspose.fill(identity);
|
||||
bWorldViewDirty = true;
|
||||
bWorldViewDirty.fill(true);
|
||||
}
|
||||
|
||||
void D3D8TransformState::SetTransform(xbox::X_D3DTRANSFORMSTATETYPE state, const D3DMATRIX* pMatrix)
|
||||
|
@ -111,33 +111,34 @@ void D3D8TransformState::SetTransform(xbox::X_D3DTRANSFORMSTATETYPE state, const
|
|||
// Update transform state
|
||||
this->Transforms[state] = *pMatrix;
|
||||
|
||||
if ((state == X_D3DTS_VIEW) || ((X_D3DTS_WORLD <= state) && (state <= X_D3DTS_WORLD3))) {
|
||||
bWorldViewDirty = true;
|
||||
if (state == X_D3DTS_VIEW) {
|
||||
bWorldViewDirty.fill(true);
|
||||
}
|
||||
if ((X_D3DTS_WORLD <= state) && (state <= X_D3DTS_WORLD3)) {
|
||||
bWorldViewDirty[state - X_D3DTS_WORLD] = true;
|
||||
}
|
||||
}
|
||||
|
||||
void D3D8TransformState::RecalculateDependentMatrices()
|
||||
void D3D8TransformState::RecalculateDependentMatrices(unsigned i)
|
||||
{
|
||||
for (unsigned i = 0; i < 4; i++) {
|
||||
auto worldState = xbox::X_D3DTS_WORLD + i;
|
||||
D3DXMATRIX worldView;
|
||||
D3DXMatrixMultiply(&worldView, (D3DXMATRIX*)&Transforms[worldState], (D3DXMATRIX*)&Transforms[xbox::X_D3DTS_VIEW]);
|
||||
this->WorldView[i] = worldView;
|
||||
auto worldState = xbox::X_D3DTS_WORLD + i;
|
||||
D3DXMATRIX worldView;
|
||||
D3DXMatrixMultiply(&worldView, (D3DXMATRIX*)&Transforms[worldState], (D3DXMATRIX*)&Transforms[xbox::X_D3DTS_VIEW]);
|
||||
this->WorldView[i] = worldView;
|
||||
|
||||
D3DXMATRIX worldViewInverseTranspose;
|
||||
D3DXMatrixInverse(&worldViewInverseTranspose, nullptr, &worldView);
|
||||
D3DXMatrixTranspose(&worldViewInverseTranspose, &worldViewInverseTranspose);
|
||||
this->WorldViewInverseTranspose[i] = worldViewInverseTranspose;
|
||||
}
|
||||
D3DXMATRIX worldViewInverseTranspose;
|
||||
D3DXMatrixInverse(&worldViewInverseTranspose, nullptr, &worldView);
|
||||
D3DXMatrixTranspose(&worldViewInverseTranspose, &worldViewInverseTranspose);
|
||||
this->WorldViewInverseTranspose[i] = worldViewInverseTranspose;
|
||||
}
|
||||
|
||||
D3DMATRIX* D3D8TransformState::GetWorldView(unsigned i)
|
||||
{
|
||||
assert(i < 4);
|
||||
|
||||
if (bWorldViewDirty) {
|
||||
RecalculateDependentMatrices();
|
||||
bWorldViewDirty = false;
|
||||
if (bWorldViewDirty[i]) {
|
||||
RecalculateDependentMatrices(i);
|
||||
bWorldViewDirty[i] = false;
|
||||
}
|
||||
|
||||
return &WorldView[i];
|
||||
|
@ -147,9 +148,9 @@ D3DMATRIX* D3D8TransformState::GetWorldViewInverseTranspose(unsigned i)
|
|||
{
|
||||
assert(i < 4);
|
||||
|
||||
if (bWorldViewDirty) {
|
||||
RecalculateDependentMatrices();
|
||||
bWorldViewDirty = false;
|
||||
if (bWorldViewDirty[i]) {
|
||||
RecalculateDependentMatrices(i);
|
||||
bWorldViewDirty[i] = false;
|
||||
}
|
||||
|
||||
return &WorldViewInverseTranspose[i];
|
||||
|
|
|
@ -34,9 +34,9 @@ public:
|
|||
std::array<D3DMATRIX, xbox::X_D3DTS_MAX> Transforms;
|
||||
|
||||
private:
|
||||
void RecalculateDependentMatrices();
|
||||
void RecalculateDependentMatrices(unsigned i);
|
||||
|
||||
bool bWorldViewDirty;
|
||||
std::array<bool, 4> bWorldViewDirty;
|
||||
// Combines world/view matrices
|
||||
std::array<D3DMATRIX, 4> WorldView;
|
||||
// World/view inverse transpose for lighting calculations
|
||||
|
|
|
@ -1796,7 +1796,7 @@ typedef enum _TXBType {
|
|||
xtD3DTEXTUREOP, // Used for TextureStageState X_D3DTSS_COLOROP and X_D3DTSS_ALPHAOP
|
||||
xtD3DTEXTURESTAGESTATETYPE,
|
||||
xtD3DTRANSFORMSTATETYPE,
|
||||
xtD3DVERTEXBLENDFLAGS,
|
||||
xtD3DVERTEXBLENDFLAGS, // Used for X_D3DRS_VERTEXBLEND
|
||||
xtD3DVSDE,
|
||||
xtD3DWRAP,
|
||||
xtDWORD,
|
||||
|
|
|
@ -1007,6 +1007,18 @@ constexpr DWORD X_D3DCOLORWRITEENABLE_ALL = 0x01010101; // Xbox ext.
|
|||
// deferred texture stage state "unknown" flag
|
||||
#define X_D3DTSS_UNK 0x7fffffff
|
||||
|
||||
typedef enum _D3DVERTEXBLENDFLAGS
|
||||
{
|
||||
X_D3DVBF_DISABLE = 0, // 1 matrix, 0 weights => final weight effectively 1 (Disable vertex blending)
|
||||
X_D3DVBF_1WEIGHTS = 1, // 2 matrices, 1 weights => final weight calculated
|
||||
X_D3DVBF_2WEIGHTS = 3, // 3 matrices, 2 weights => final weight calculated
|
||||
X_D3DVBF_3WEIGHTS = 5, // 4 matrices, 3 weights => final weight calculated
|
||||
X_D3DVBF_2WEIGHTS2MATRICES = 2, // 2 matrices, 2 weights (Xbox ext.)
|
||||
X_D3DVBF_3WEIGHTS3MATRICES = 4, // 3 matrices, 3 weights (Xbox ext.)
|
||||
X_D3DVBF_4WEIGHTS4MATRICES = 6, // 4 matrices, 4 weights (Xbox ext.)
|
||||
X_D3DVBF_FORCE_DWORD = 0x7fffffff
|
||||
} X_D3DVERTEXBLENDFLAGS;
|
||||
|
||||
typedef DWORD X_VERTEXSHADERCONSTANTMODE;
|
||||
|
||||
// Xbox vertex shader constant modes
|
||||
|
|
Loading…
Reference in New Issue