[D3D12] DXBC vertex W format

This commit is contained in:
Triang3l 2018-09-12 21:39:13 +03:00
parent 283461be6c
commit 099de4487b
4 changed files with 96 additions and 16 deletions

View File

@ -1347,15 +1347,15 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
// = false: multiply the Z coordinate by 1/W0.
// VTX_W0_FMT = true: the incoming W0 is not 1/W0. Perform the reciprocal to
// get 1/W0.
float vtx_xy_fmt = (pa_cl_vte_cntl & (1 << 8)) ? 1.0f : 0.0f;
float vtx_z_fmt = (pa_cl_vte_cntl & (1 << 9)) ? 1.0f : 0.0f;
float vtx_w0_fmt = (pa_cl_vte_cntl & (1 << 10)) ? 1.0f : 0.0f;
dirty |= system_constants_.mul_rcp_w[0] != vtx_xy_fmt;
dirty |= system_constants_.mul_rcp_w[1] != vtx_z_fmt;
dirty |= system_constants_.mul_rcp_w[2] != vtx_w0_fmt;
system_constants_.mul_rcp_w[0] = vtx_xy_fmt;
system_constants_.mul_rcp_w[1] = vtx_z_fmt;
system_constants_.mul_rcp_w[2] = vtx_w0_fmt;
uint32_t vtx_xy_fmt = (pa_cl_vte_cntl >> 8) & 1;
uint32_t vtx_z_fmt = (pa_cl_vte_cntl >> 9) & 1;
uint32_t vtx_w0_fmt = (pa_cl_vte_cntl >> 10) & 1;
dirty |= system_constants_.vertex_w_format[0] != vtx_xy_fmt;
dirty |= system_constants_.vertex_w_format[1] != vtx_z_fmt;
dirty |= system_constants_.vertex_w_format[2] != vtx_w0_fmt;
system_constants_.vertex_w_format[0] = vtx_xy_fmt;
system_constants_.vertex_w_format[1] = vtx_z_fmt;
system_constants_.vertex_w_format[2] = vtx_w0_fmt;
// Conversion to Direct3D 12 normalized device coordinates.
// See viewport configuration in UpdateFixedFunctionState for explanations.

View File

@ -3,7 +3,7 @@
cbuffer xe_system_cbuffer : register(b0) {
// vec4 0
float3 xe_mul_rcp_w;
uint3 xe_vertex_w_format;
uint xe_vertex_base_index;
// vec4 1
float3 xe_ndc_scale;

View File

@ -517,7 +517,85 @@ void DxbcShaderTranslator::StartTranslation() {
}
void DxbcShaderTranslator::CompleteVertexShader() {
// TODO(Triang3l): vtx_fmt.
// Revert getting the reciprocal of W and dividing XY by W if needed.
// TODO(Triang3l): Check if having XY or Z pre-divided by W should enable
// affine interpolation.
rdef_constants_used_ |= 1ull
<< uint32_t(RdefConstantIndex::kSysVertexWFormat);
uint32_t w_format_temp = PushSystemTemp();
// If the shader has returned 1/W, restore W. First take the reciprocal, which
// may be either W (what we need) or 1/W, depending on the vertex W format.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_RCP) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(w_format_temp);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 3, 1));
shader_code_.push_back(system_temp_position_);
++stat_.instruction_count;
++stat_.float_instruction_count;
// Then, if the shader returns 1/W (vtx_w0_fmt is 0), write 1/(1/W) to the
// position.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(11));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1000, 1));
shader_code_.push_back(system_temp_position_);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER,
kSysConst_VertexWFormat_Comp + 2, 3));
shader_code_.push_back(uint32_t(RdefConstantBufferIndex::kSystemConstants));
shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants));
shader_code_.push_back(kSysConst_VertexWFormat_Vec);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 3, 1));
shader_code_.push_back(system_temp_position_);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(w_format_temp);
++stat_.instruction_count;
++stat_.movc_instruction_count;
// Multiply XYZ by W in case the shader returns XYZ/W and we'll need to
// restore XYZ.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MUL) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0111, 1));
shader_code_.push_back(w_format_temp);
shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(system_temp_position_);
shader_code_.push_back(
EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 3, 1));
shader_code_.push_back(system_temp_position_);
++stat_.instruction_count;
++stat_.float_instruction_count;
// If vtx_xy_fmt and/or vtx_z_fmt are 1, XY and/or Z are pre-divided by W.
// Restore them in this case.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(11));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0111, 1));
shader_code_.push_back(system_temp_position_);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER,
kSysConst_VertexWFormat_Comp | (kSysConst_VertexWFormat_Comp << 2) |
((kSysConst_VertexWFormat_Comp + 1) << 4),
3));
shader_code_.push_back(uint32_t(RdefConstantBufferIndex::kSystemConstants));
shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants));
shader_code_.push_back(kSysConst_VertexWFormat_Vec);
shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(w_format_temp);
shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(system_temp_position_);
++stat_.instruction_count;
++stat_.movc_instruction_count;
// Release w_format_temp.
PopSystemTemp();
// Apply scale for drawing without a viewport.
rdef_constants_used_ |= 1ull << uint32_t(RdefConstantIndex::kSysNDCScale);
@ -5976,6 +6054,7 @@ const DxbcShaderTranslator::RdefType DxbcShaderTranslator::rdef_types_[size_t(
{"float4", 1, 3, 1, 4, 0, 0, RdefTypeIndex::kUnknown, nullptr},
{"int", 0, 2, 1, 1, 0, 0, RdefTypeIndex::kUnknown, nullptr},
{"uint", 0, 19, 1, 1, 0, 0, RdefTypeIndex::kUnknown, nullptr},
{"uint3", 1, 19, 1, 3, 0, 0, RdefTypeIndex::kUnknown, nullptr},
{"uint4", 1, 19, 1, 4, 0, 0, RdefTypeIndex::kUnknown, nullptr},
{nullptr, 1, 19, 1, 4, 8, 0, RdefTypeIndex::kUint4, nullptr},
{nullptr, 1, 19, 1, 4, 32, 0, RdefTypeIndex::kUint4, nullptr},
@ -5991,7 +6070,7 @@ const DxbcShaderTranslator::RdefConstant
DxbcShaderTranslator::RdefConstantIndex::kCount)] = {
// SYSTEM CONSTANTS MUST BE UPDATED IF THEIR LAYOUT CHANGES!
// System constants vec4 0.
{"xe_mul_rcp_w", RdefTypeIndex::kFloat3, 0, 12},
{"xe_vertex_w_format", RdefTypeIndex::kUint3, 0, 12},
{"xe_vertex_base_index", RdefTypeIndex::kUint, 12, 4},
// System constants vec4 1.
{"xe_ndc_scale", RdefTypeIndex::kFloat3, 16, 12},

View File

@ -33,7 +33,7 @@ class DxbcShaderTranslator : public ShaderTranslator {
// - d3d12/shaders/xenos_draw.hlsli (for geometry shaders).
struct SystemConstants {
// vec4 0
float mul_rcp_w[3];
uint32_t vertex_w_format[3];
uint32_t vertex_base_index;
// vec4 1
@ -139,8 +139,8 @@ class DxbcShaderTranslator : public ShaderTranslator {
};
enum : uint32_t {
kSysConst_MulRcpW_Vec = 0,
kSysConst_MulRcpW_Comp = 0,
kSysConst_VertexWFormat_Vec = 0,
kSysConst_VertexWFormat_Comp = 0,
kSysConst_VertexBaseIndex_Vec = 0,
kSysConst_VertexBaseIndex_Comp = 3,
@ -397,6 +397,7 @@ class DxbcShaderTranslator : public ShaderTranslator {
kFloat4,
kInt,
kUint,
kUint3,
kUint4,
// Bool constants.
kUint4Array8,
@ -438,7 +439,7 @@ class DxbcShaderTranslator : public ShaderTranslator {
enum class RdefConstantIndex {
kSystemConstantFirst,
kSysMulRcpW = kSystemConstantFirst,
kSysVertexWFormat = kSystemConstantFirst,
kSysVertexBaseIndex,
kSysNDCScale,
kSysVertexIndexEndian,