[DXBC] All prologues and epilogues to new DXBC code
This commit is contained in:
parent
1799585e92
commit
96a61bc623
|
@ -634,6 +634,12 @@ void DxbcShaderTranslator::StartPixelShader() {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!edram_rov_used_ && writes_depth()) {
|
||||||
|
// Initialize the depth output if used, which must be written to regardless
|
||||||
|
// of the taken execution path.
|
||||||
|
DxbcOpMov(DxbcDest::ODepth(), DxbcSrc::LF(0.0f));
|
||||||
|
}
|
||||||
|
|
||||||
uint32_t interpolator_count = std::min(kInterpolatorCount, register_count());
|
uint32_t interpolator_count = std::min(kInterpolatorCount, register_count());
|
||||||
if (interpolator_count != 0) {
|
if (interpolator_count != 0) {
|
||||||
// Copy interpolants to GPRs.
|
// Copy interpolants to GPRs.
|
||||||
|
@ -901,333 +907,136 @@ void DxbcShaderTranslator::StartTranslation() {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Start the main loop (for jumping to labels by setting pc and continuing).
|
// Start the main loop (for jumping to labels by setting pc and continuing).
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_LOOP) |
|
DxbcOpLoop();
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
|
|
||||||
++stat_.instruction_count;
|
|
||||||
++stat_.dynamic_flow_control_count;
|
|
||||||
// Switch and the first label (pc == 0).
|
// Switch and the first label (pc == 0).
|
||||||
if (UseSwitchForControlFlow()) {
|
if (UseSwitchForControlFlow()) {
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_SWITCH) |
|
DxbcOpSwitch(DxbcSrc::R(system_temp_ps_pc_p0_a0_, DxbcSrc::kYYYY));
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
|
DxbcOpCase(DxbcSrc::LU(0));
|
||||||
shader_code_.push_back(
|
|
||||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1));
|
|
||||||
shader_code_.push_back(system_temp_ps_pc_p0_a0_);
|
|
||||||
++stat_.instruction_count;
|
|
||||||
++stat_.dynamic_flow_control_count;
|
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_CASE) |
|
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
|
|
||||||
shader_code_.push_back(
|
|
||||||
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
|
|
||||||
shader_code_.push_back(0);
|
|
||||||
++stat_.instruction_count;
|
|
||||||
++stat_.static_flow_control_count;
|
|
||||||
} else {
|
} else {
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
|
DxbcOpIf(false, DxbcSrc::R(system_temp_ps_pc_p0_a0_, DxbcSrc::kYYYY));
|
||||||
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(
|
|
||||||
D3D10_SB_INSTRUCTION_TEST_ZERO) |
|
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
|
|
||||||
shader_code_.push_back(
|
|
||||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1));
|
|
||||||
shader_code_.push_back(system_temp_ps_pc_p0_a0_);
|
|
||||||
++stat_.instruction_count;
|
|
||||||
++stat_.dynamic_flow_control_count;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void DxbcShaderTranslator::CompleteVertexOrDomainShader() {
|
void DxbcShaderTranslator::CompleteVertexOrDomainShader() {
|
||||||
// Get what we need to do with the position.
|
uint32_t temp = PushSystemTemp();
|
||||||
uint32_t ndc_control_temp = PushSystemTemp();
|
DxbcDest temp_x_dest(DxbcDest::R(temp, 0b0001));
|
||||||
|
DxbcSrc temp_x_src(DxbcSrc::R(temp, DxbcSrc::kXXXX));
|
||||||
|
|
||||||
system_constants_used_ |= 1ull << kSysConst_Flags_Index;
|
system_constants_used_ |= 1ull << kSysConst_Flags_Index;
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
|
DxbcSrc flags_src(DxbcSrc::CB(cbuffer_index_system_constants_,
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(12));
|
uint32_t(CbufferRegister::kSystemConstants),
|
||||||
shader_code_.push_back(
|
kSysConst_Flags_Vec)
|
||||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
.Select(kSysConst_Flags_Comp));
|
||||||
shader_code_.push_back(ndc_control_temp);
|
|
||||||
shader_code_.push_back(EncodeVectorReplicatedOperand(
|
|
||||||
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSysConst_Flags_Comp, 3));
|
|
||||||
shader_code_.push_back(cbuffer_index_system_constants_);
|
|
||||||
shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants));
|
|
||||||
shader_code_.push_back(kSysConst_Flags_Vec);
|
|
||||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
|
||||||
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
|
|
||||||
shader_code_.push_back(kSysFlag_XYDividedByW);
|
|
||||||
shader_code_.push_back(kSysFlag_ZDividedByW);
|
|
||||||
shader_code_.push_back(kSysFlag_WNotReciprocal);
|
|
||||||
shader_code_.push_back(kSysFlag_ReverseZ);
|
|
||||||
++stat_.instruction_count;
|
|
||||||
++stat_.uint_instruction_count;
|
|
||||||
|
|
||||||
// Revert getting the reciprocal of W and dividing XY by W if needed.
|
// Check if the shader already returns W, not 1/W, and if it doesn't, turn 1/W
|
||||||
// TODO(Triang3l): Check if having XY or Z pre-divided by W should enable
|
// into W.
|
||||||
|
DxbcOpAnd(temp_x_dest, flags_src, DxbcSrc::LU(kSysFlag_WNotReciprocal));
|
||||||
|
DxbcOpIf(false, temp_x_src);
|
||||||
|
DxbcOpRcp(DxbcDest::R(system_temp_position_, 0b1000),
|
||||||
|
DxbcSrc::R(system_temp_position_, DxbcSrc::kWWWW));
|
||||||
|
DxbcOpEndIf();
|
||||||
|
|
||||||
|
// Check if the shader returns XY/W rather than XY, and if it does, revert
|
||||||
|
// that.
|
||||||
|
// TODO(Triang3l): Check if having XY or Z pre-divided by W should result in
|
||||||
// affine interpolation.
|
// affine interpolation.
|
||||||
uint32_t w_format_temp = PushSystemTemp();
|
DxbcOpAnd(temp_x_dest, flags_src, DxbcSrc::LU(kSysFlag_XYDividedByW));
|
||||||
// If the shader has returned 1/W, restore W. First take the reciprocal, which
|
DxbcOpIf(true, temp_x_src);
|
||||||
// may be either W (what we need) or 1/W, depending on the vertex W format.
|
DxbcOpMul(DxbcDest::R(system_temp_position_, 0b0011),
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_RCP) |
|
DxbcSrc::R(system_temp_position_),
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5));
|
DxbcSrc::R(system_temp_position_, DxbcSrc::kWWWW));
|
||||||
shader_code_.push_back(
|
DxbcOpEndIf();
|
||||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
|
|
||||||
shader_code_.push_back(w_format_temp);
|
|
||||||
shader_code_.push_back(
|
|
||||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 3, 1));
|
|
||||||
shader_code_.push_back(system_temp_position_);
|
|
||||||
++stat_.instruction_count;
|
|
||||||
++stat_.float_instruction_count;
|
|
||||||
// Then, if the shader returns 1/W (vtx_w0_fmt is 0), write 1/(1/W) to the
|
|
||||||
// position.
|
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
|
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
|
||||||
shader_code_.push_back(
|
|
||||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1000, 1));
|
|
||||||
shader_code_.push_back(system_temp_position_);
|
|
||||||
shader_code_.push_back(
|
|
||||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1));
|
|
||||||
shader_code_.push_back(ndc_control_temp);
|
|
||||||
shader_code_.push_back(
|
|
||||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 3, 1));
|
|
||||||
shader_code_.push_back(system_temp_position_);
|
|
||||||
shader_code_.push_back(
|
|
||||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
|
|
||||||
shader_code_.push_back(w_format_temp);
|
|
||||||
++stat_.instruction_count;
|
|
||||||
++stat_.movc_instruction_count;
|
|
||||||
// Multiply XYZ by W in case the shader returns XYZ/W and we'll need to
|
|
||||||
// restore XYZ.
|
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MUL) |
|
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
|
|
||||||
shader_code_.push_back(
|
|
||||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0111, 1));
|
|
||||||
shader_code_.push_back(w_format_temp);
|
|
||||||
shader_code_.push_back(
|
|
||||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
|
||||||
shader_code_.push_back(system_temp_position_);
|
|
||||||
shader_code_.push_back(
|
|
||||||
EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 3, 1));
|
|
||||||
shader_code_.push_back(system_temp_position_);
|
|
||||||
++stat_.instruction_count;
|
|
||||||
++stat_.float_instruction_count;
|
|
||||||
// If vtx_xy_fmt and/or vtx_z_fmt are 1, XY and/or Z are pre-divided by W.
|
|
||||||
// Restore them in this case.
|
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
|
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
|
||||||
shader_code_.push_back(
|
|
||||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0111, 1));
|
|
||||||
shader_code_.push_back(system_temp_position_);
|
|
||||||
shader_code_.push_back(
|
|
||||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b00010000, 1));
|
|
||||||
shader_code_.push_back(ndc_control_temp);
|
|
||||||
shader_code_.push_back(
|
|
||||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
|
||||||
shader_code_.push_back(w_format_temp);
|
|
||||||
shader_code_.push_back(
|
|
||||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
|
||||||
shader_code_.push_back(system_temp_position_);
|
|
||||||
++stat_.instruction_count;
|
|
||||||
++stat_.movc_instruction_count;
|
|
||||||
// Release w_format_temp.
|
|
||||||
PopSystemTemp();
|
|
||||||
|
|
||||||
|
// Check if the shader returns Z/W rather than Z, and if it does, revert that.
|
||||||
|
// TODO(Triang3l): Check if having XY or Z pre-divided by W should result in
|
||||||
|
// affine interpolation.
|
||||||
|
DxbcOpAnd(temp_x_dest, flags_src, DxbcSrc::LU(kSysFlag_ZDividedByW));
|
||||||
|
DxbcOpIf(true, temp_x_src);
|
||||||
|
DxbcOpMul(DxbcDest::R(system_temp_position_, 0b0100),
|
||||||
|
DxbcSrc::R(system_temp_position_, DxbcSrc::kZZZZ),
|
||||||
|
DxbcSrc::R(system_temp_position_, DxbcSrc::kWWWW));
|
||||||
|
DxbcOpEndIf();
|
||||||
|
|
||||||
|
// Zero-initialize SV_ClipDistance# (for user clip planes) and SV_CullDistance
|
||||||
|
// (for vertex kill) in case they're not needed.
|
||||||
|
DxbcOpMov(DxbcDest::O(uint32_t(InOutRegister::kVSDSOutClipDistance0123)),
|
||||||
|
DxbcSrc::LF(0.0f));
|
||||||
|
DxbcOpMov(DxbcDest::O(
|
||||||
|
uint32_t(InOutRegister::kVSDSOutClipDistance45AndCullDistance),
|
||||||
|
0b0111),
|
||||||
|
DxbcSrc::LF(0.0f));
|
||||||
// Clip against user clip planes.
|
// Clip against user clip planes.
|
||||||
// Not possible to handle UCP_CULL_ONLY_ENA with the same shader though, since
|
// Not possible to handle UCP_CULL_ONLY_ENA with the same shader though, since
|
||||||
// there can be only 8 SV_ClipDistance + SV_CullDistance values at most, but
|
// there can be only 8 SV_ClipDistance + SV_CullDistance values at most, but
|
||||||
// 12 would be needed.
|
// 12 would be needed.
|
||||||
uint32_t ucp_dot_temp = PushSystemTemp();
|
system_constants_used_ |= 1ull << kSysConst_UserClipPlanes_Index;
|
||||||
uint32_t ucp_enabled_temp = PushSystemTemp();
|
for (uint32_t i = 0; i < 6; ++i) {
|
||||||
system_constants_used_ |= (1ull << kSysConst_UserClipPlanes_Index) |
|
// Check if the clip plane is enabled - this `if` is needed, as opposed to
|
||||||
(1ull << kSysConst_Flags_Index);
|
// just zeroing the clip planes in the constants, so Infinity and NaN in the
|
||||||
for (uint32_t i = 0; i < 2; ++i) {
|
// position won't have any effect caused by this if clip planes are
|
||||||
uint32_t ucp_count = i ? 2 : 4;
|
// disabled.
|
||||||
uint32_t ucp_mask = (1 << ucp_count) - 1;
|
DxbcOpAnd(temp_x_dest, flags_src,
|
||||||
for (uint32_t j = 0; j < ucp_count; ++j) {
|
DxbcSrc::LU(kSysFlag_UserClipPlane0 << i));
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DP4) |
|
DxbcOpIf(true, temp_x_src);
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
DxbcOpDP4(DxbcDest::O(
|
||||||
shader_code_.push_back(
|
uint32_t(InOutRegister::kVSDSOutClipDistance0123) + (i >> 2),
|
||||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1 << j, 1));
|
1 << (i & 3)),
|
||||||
shader_code_.push_back(ucp_dot_temp);
|
DxbcSrc::R(system_temp_position_),
|
||||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
DxbcSrc::CB(cbuffer_index_system_constants_,
|
||||||
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
uint32_t(CbufferRegister::kSystemConstants),
|
||||||
shader_code_.push_back(system_temp_position_);
|
kSysConst_UserClipPlanes_Vec + i));
|
||||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
DxbcOpEndIf();
|
||||||
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSwizzleXYZW, 3));
|
|
||||||
shader_code_.push_back(cbuffer_index_system_constants_);
|
|
||||||
shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants));
|
|
||||||
shader_code_.push_back(kSysConst_UserClipPlanes_Vec + i * 4 + j);
|
|
||||||
++stat_.instruction_count;
|
|
||||||
++stat_.float_instruction_count;
|
|
||||||
}
|
|
||||||
// Using movc rather than zeroing the planes in the constants because dp4
|
|
||||||
// would handle Infinity and NaN in an unexpected way.
|
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
|
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(12));
|
|
||||||
shader_code_.push_back(
|
|
||||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, ucp_mask, 1));
|
|
||||||
shader_code_.push_back(ucp_enabled_temp);
|
|
||||||
shader_code_.push_back(EncodeVectorReplicatedOperand(
|
|
||||||
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSysConst_Flags_Comp, 3));
|
|
||||||
shader_code_.push_back(cbuffer_index_system_constants_);
|
|
||||||
shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants));
|
|
||||||
shader_code_.push_back(kSysConst_Flags_Vec);
|
|
||||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
|
||||||
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
|
|
||||||
for (uint32_t j = 0; j < ucp_count; ++j) {
|
|
||||||
shader_code_.push_back(kSysFlag_UserClipPlane0 << (i * 4 + j));
|
|
||||||
}
|
|
||||||
for (uint32_t j = ucp_count; j < 4; ++j) {
|
|
||||||
shader_code_.push_back(0);
|
|
||||||
}
|
|
||||||
++stat_.instruction_count;
|
|
||||||
++stat_.uint_instruction_count;
|
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
|
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(12));
|
|
||||||
shader_code_.push_back(
|
|
||||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_OUTPUT, ucp_mask, 1));
|
|
||||||
shader_code_.push_back(uint32_t(InOutRegister::kVSDSOutClipDistance0123) +
|
|
||||||
i);
|
|
||||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
|
||||||
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
|
||||||
shader_code_.push_back(ucp_enabled_temp);
|
|
||||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
|
||||||
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
|
||||||
shader_code_.push_back(ucp_dot_temp);
|
|
||||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
|
||||||
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
|
|
||||||
shader_code_.push_back(0);
|
|
||||||
shader_code_.push_back(0);
|
|
||||||
shader_code_.push_back(0);
|
|
||||||
shader_code_.push_back(0);
|
|
||||||
++stat_.instruction_count;
|
|
||||||
++stat_.movc_instruction_count;
|
|
||||||
}
|
}
|
||||||
// Release ucp_dot_temp and ucp_enabled_temp.
|
|
||||||
PopSystemTemp(2);
|
|
||||||
|
|
||||||
// Apply scale for drawing without a viewport, and also remap from OpenGL
|
// Apply scale for drawing without a viewport, and also remap from OpenGL
|
||||||
// Z clip space to Direct3D if needed. Also, if the vertex shader is
|
// Z clip space to Direct3D if needed. Also, if the vertex shader is
|
||||||
// multipass, the NDC scale constant can be used to set position to NaN to
|
// multipass, the NDC scale constant can be used to set position to NaN to
|
||||||
// kill all primitives.
|
// kill all primitives.
|
||||||
system_constants_used_ |= 1ull << kSysConst_NDCScale_Index;
|
system_constants_used_ |= 1ull << kSysConst_NDCScale_Index;
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MUL) |
|
DxbcOpMul(DxbcDest::R(system_temp_position_, 0b0111),
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
DxbcSrc::R(system_temp_position_),
|
||||||
shader_code_.push_back(
|
DxbcSrc::CB(cbuffer_index_system_constants_,
|
||||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0111, 1));
|
uint32_t(CbufferRegister::kSystemConstants),
|
||||||
shader_code_.push_back(system_temp_position_);
|
kSysConst_NDCScale_Vec,
|
||||||
shader_code_.push_back(
|
kSysConst_NDCScale_Comp * 0b010101 + 0b100100));
|
||||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
|
||||||
shader_code_.push_back(system_temp_position_);
|
|
||||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
|
||||||
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER,
|
|
||||||
kSysConst_NDCScale_Comp | ((kSysConst_NDCScale_Comp + 1) << 2) |
|
|
||||||
((kSysConst_NDCScale_Comp + 2) << 4),
|
|
||||||
3));
|
|
||||||
shader_code_.push_back(cbuffer_index_system_constants_);
|
|
||||||
shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants));
|
|
||||||
shader_code_.push_back(kSysConst_NDCScale_Vec);
|
|
||||||
++stat_.instruction_count;
|
|
||||||
++stat_.float_instruction_count;
|
|
||||||
|
|
||||||
// Reverse Z (Z = W - Z) if the viewport depth is inverted.
|
// Reverse Z (Z = W - Z) if the viewport depth is inverted.
|
||||||
uint32_t reverse_z_temp = PushSystemTemp();
|
DxbcOpAnd(temp_x_dest, flags_src, DxbcSrc::LU(kSysFlag_ReverseZ));
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ADD) |
|
DxbcOpIf(true, temp_x_src);
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8));
|
DxbcOpAdd(DxbcDest::R(system_temp_position_, 0b0100),
|
||||||
shader_code_.push_back(
|
DxbcSrc::R(system_temp_position_, DxbcSrc::kWWWW),
|
||||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
|
-DxbcSrc::R(system_temp_position_, DxbcSrc::kZZZZ));
|
||||||
shader_code_.push_back(reverse_z_temp);
|
DxbcOpEndIf();
|
||||||
shader_code_.push_back(
|
|
||||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 3, 1));
|
|
||||||
shader_code_.push_back(system_temp_position_);
|
|
||||||
shader_code_.push_back(
|
|
||||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1) |
|
|
||||||
ENCODE_D3D10_SB_OPERAND_EXTENDED(1));
|
|
||||||
shader_code_.push_back(
|
|
||||||
ENCODE_D3D10_SB_EXTENDED_OPERAND_MODIFIER(D3D10_SB_OPERAND_MODIFIER_NEG));
|
|
||||||
shader_code_.push_back(system_temp_position_);
|
|
||||||
++stat_.instruction_count;
|
|
||||||
++stat_.float_instruction_count;
|
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
|
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
|
||||||
shader_code_.push_back(
|
|
||||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0100, 1));
|
|
||||||
shader_code_.push_back(system_temp_position_);
|
|
||||||
shader_code_.push_back(
|
|
||||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 3, 1));
|
|
||||||
shader_code_.push_back(ndc_control_temp);
|
|
||||||
shader_code_.push_back(
|
|
||||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
|
|
||||||
shader_code_.push_back(reverse_z_temp);
|
|
||||||
shader_code_.push_back(
|
|
||||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1));
|
|
||||||
shader_code_.push_back(system_temp_position_);
|
|
||||||
++stat_.instruction_count;
|
|
||||||
++stat_.movc_instruction_count;
|
|
||||||
// Release reverse_z_temp.
|
|
||||||
PopSystemTemp();
|
|
||||||
|
|
||||||
// Release ndc_control_temp.
|
|
||||||
PopSystemTemp();
|
|
||||||
|
|
||||||
// Apply offset (multiplied by W) for drawing without a viewport and for half
|
// Apply offset (multiplied by W) for drawing without a viewport and for half
|
||||||
// pixel offset.
|
// pixel offset.
|
||||||
system_constants_used_ |= 1ull << kSysConst_NDCOffset_Index;
|
system_constants_used_ |= 1ull << kSysConst_NDCOffset_Index;
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MAD) |
|
DxbcOpMAd(DxbcDest::R(system_temp_position_, 0b0111),
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(11));
|
DxbcSrc::CB(cbuffer_index_system_constants_,
|
||||||
shader_code_.push_back(
|
uint32_t(CbufferRegister::kSystemConstants),
|
||||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0111, 1));
|
kSysConst_NDCOffset_Vec,
|
||||||
shader_code_.push_back(system_temp_position_);
|
kSysConst_NDCOffset_Comp * 0b010101 + 0b100100),
|
||||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
DxbcSrc::R(system_temp_position_, DxbcSrc::kWWWW),
|
||||||
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER,
|
DxbcSrc::R(system_temp_position_));
|
||||||
kSysConst_NDCOffset_Comp | ((kSysConst_NDCOffset_Comp + 1) << 2) |
|
|
||||||
((kSysConst_NDCOffset_Comp + 2) << 4),
|
|
||||||
3));
|
|
||||||
shader_code_.push_back(cbuffer_index_system_constants_);
|
|
||||||
shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants));
|
|
||||||
shader_code_.push_back(kSysConst_NDCOffset_Vec);
|
|
||||||
shader_code_.push_back(
|
|
||||||
EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 3, 1));
|
|
||||||
shader_code_.push_back(system_temp_position_);
|
|
||||||
shader_code_.push_back(
|
|
||||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
|
||||||
shader_code_.push_back(system_temp_position_);
|
|
||||||
++stat_.instruction_count;
|
|
||||||
++stat_.float_instruction_count;
|
|
||||||
|
|
||||||
// Write Z and W of the position to a separate attribute so ROV output can get
|
// Write Z and W of the position to a separate attribute so ROV output can get
|
||||||
// per-sample depth.
|
// per-sample depth.
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
|
DxbcOpMov(DxbcDest::O(uint32_t(InOutRegister::kVSDSOutClipSpaceZW), 0b0011),
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5));
|
DxbcSrc::R(system_temp_position_, 0b1110));
|
||||||
shader_code_.push_back(
|
|
||||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_OUTPUT, 0b0011, 1));
|
|
||||||
shader_code_.push_back(uint32_t(InOutRegister::kVSDSOutClipSpaceZW));
|
|
||||||
shader_code_.push_back(
|
|
||||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b11111110, 1));
|
|
||||||
shader_code_.push_back(system_temp_position_);
|
|
||||||
++stat_.instruction_count;
|
|
||||||
++stat_.mov_instruction_count;
|
|
||||||
|
|
||||||
// Initialize SV_CullDistance.
|
// Assuming SV_CullDistance was zeroed earlier in this function.
|
||||||
DxbcOpMov(DxbcDest::O(
|
|
||||||
uint32_t(InOutRegister::kVSDSOutClipDistance45AndCullDistance),
|
|
||||||
0b0100),
|
|
||||||
DxbcSrc::LF(0.0f));
|
|
||||||
// Kill the primitive if needed - check if the shader wants to kill.
|
// Kill the primitive if needed - check if the shader wants to kill.
|
||||||
// TODO(Triang3l): Find if the condition is actually the flag being non-zero.
|
// TODO(Triang3l): Find if the condition is actually the flag being non-zero.
|
||||||
uint32_t kill_temp = PushSystemTemp();
|
|
||||||
DxbcOpNE(
|
DxbcOpNE(
|
||||||
DxbcDest::R(kill_temp, 0b0001),
|
temp_x_dest,
|
||||||
DxbcSrc::R(system_temp_point_size_edge_flag_kill_vertex_, DxbcSrc::kZZZZ),
|
DxbcSrc::R(system_temp_point_size_edge_flag_kill_vertex_, DxbcSrc::kZZZZ),
|
||||||
DxbcSrc::LF(0.0f));
|
DxbcSrc::LF(0.0f));
|
||||||
DxbcOpIf(true, DxbcSrc::R(kill_temp, DxbcSrc::kXXXX));
|
DxbcOpIf(true, temp_x_src);
|
||||||
{
|
{
|
||||||
// Extract the killing condition.
|
// Extract the killing condition.
|
||||||
system_constants_used_ |= 1ull << kSysConst_Flags_Index;
|
DxbcOpAnd(temp_x_dest, flags_src,
|
||||||
DxbcOpAnd(DxbcDest::R(kill_temp, 0b0001),
|
|
||||||
DxbcSrc::CB(cbuffer_index_system_constants_,
|
|
||||||
uint32_t(CbufferRegister::kSystemConstants),
|
|
||||||
kSysConst_Flags_Vec)
|
|
||||||
.Select(kSysConst_Flags_Comp),
|
|
||||||
DxbcSrc::LU(kSysFlag_KillIfAnyVertexKilled_Shift));
|
DxbcSrc::LU(kSysFlag_KillIfAnyVertexKilled_Shift));
|
||||||
DxbcOpIf(true, DxbcSrc::R(kill_temp, DxbcSrc::kXXXX));
|
DxbcOpIf(true, temp_x_src);
|
||||||
// Release kill_temp.
|
|
||||||
PopSystemTemp();
|
|
||||||
{
|
{
|
||||||
// Kill the primitive if any vertex is killed - write NaN to position.
|
// Kill the primitive if any vertex is killed - write NaN to position.
|
||||||
DxbcOpMov(DxbcDest::R(system_temp_position_, 0b1000),
|
DxbcOpMov(DxbcDest::R(system_temp_position_, 0b1000),
|
||||||
|
@ -1248,16 +1057,8 @@ void DxbcShaderTranslator::CompleteVertexOrDomainShader() {
|
||||||
DxbcOpEndIf();
|
DxbcOpEndIf();
|
||||||
|
|
||||||
// Write the position to the output.
|
// Write the position to the output.
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
|
DxbcOpMov(DxbcDest::O(uint32_t(InOutRegister::kVSDSOutPosition)),
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5));
|
DxbcSrc::R(system_temp_position_));
|
||||||
shader_code_.push_back(
|
|
||||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_OUTPUT, 0b1111, 1));
|
|
||||||
shader_code_.push_back(uint32_t(InOutRegister::kVSDSOutPosition));
|
|
||||||
shader_code_.push_back(
|
|
||||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
|
||||||
shader_code_.push_back(system_temp_position_);
|
|
||||||
++stat_.instruction_count;
|
|
||||||
++stat_.mov_instruction_count;
|
|
||||||
|
|
||||||
// Zero the point coordinate (will be set in the geometry shader if needed)
|
// Zero the point coordinate (will be set in the geometry shader if needed)
|
||||||
// and write the point size.
|
// and write the point size.
|
||||||
|
@ -1268,6 +1069,9 @@ void DxbcShaderTranslator::CompleteVertexOrDomainShader() {
|
||||||
DxbcDest::O(uint32_t(InOutRegister::kVSDSOutPointParameters), 0b0100),
|
DxbcDest::O(uint32_t(InOutRegister::kVSDSOutPointParameters), 0b0100),
|
||||||
DxbcSrc::R(system_temp_point_size_edge_flag_kill_vertex_,
|
DxbcSrc::R(system_temp_point_size_edge_flag_kill_vertex_,
|
||||||
DxbcSrc::kXXXX));
|
DxbcSrc::kXXXX));
|
||||||
|
|
||||||
|
// Release temp.
|
||||||
|
PopSystemTemp();
|
||||||
}
|
}
|
||||||
|
|
||||||
void DxbcShaderTranslator::CompleteShaderCode() {
|
void DxbcShaderTranslator::CompleteShaderCode() {
|
||||||
|
@ -1277,28 +1081,14 @@ void DxbcShaderTranslator::CompleteShaderCode() {
|
||||||
CloseExecConditionals();
|
CloseExecConditionals();
|
||||||
// Close the last label and the switch.
|
// Close the last label and the switch.
|
||||||
if (UseSwitchForControlFlow()) {
|
if (UseSwitchForControlFlow()) {
|
||||||
shader_code_.push_back(
|
DxbcOpBreak();
|
||||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_BREAK) |
|
DxbcOpEndSwitch();
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
|
|
||||||
++stat_.instruction_count;
|
|
||||||
shader_code_.push_back(
|
|
||||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDSWITCH) |
|
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
|
|
||||||
++stat_.instruction_count;
|
|
||||||
} else {
|
} else {
|
||||||
shader_code_.push_back(
|
DxbcOpEndIf();
|
||||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
|
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
|
|
||||||
++stat_.instruction_count;
|
|
||||||
}
|
}
|
||||||
// End the main loop.
|
// End the main loop.
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_BREAK) |
|
DxbcOpBreak();
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
|
DxbcOpEndLoop();
|
||||||
++stat_.instruction_count;
|
|
||||||
shader_code_.push_back(
|
|
||||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDLOOP) |
|
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
|
|
||||||
++stat_.instruction_count;
|
|
||||||
|
|
||||||
// Release the following system temporary values so epilogue can reuse them:
|
// Release the following system temporary values so epilogue can reuse them:
|
||||||
// - system_temp_pv_.
|
// - system_temp_pv_.
|
||||||
|
@ -1339,10 +1129,7 @@ void DxbcShaderTranslator::CompleteShaderCode() {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Return from `main`.
|
// Return from `main`.
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_RET) |
|
DxbcOpRet();
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
|
|
||||||
++stat_.instruction_count;
|
|
||||||
++stat_.static_flow_control_count;
|
|
||||||
|
|
||||||
// Write subroutines - can only do this immediately after `ret`. They still
|
// Write subroutines - can only do this immediately after `ret`. They still
|
||||||
// need the global system temps, and can't allocate their own temps (since
|
// need the global system temps, and can't allocate their own temps (since
|
||||||
|
@ -4261,20 +4048,6 @@ void DxbcShaderTranslator::WriteShaderCode() {
|
||||||
stat_.temp_array_count += register_count();
|
stat_.temp_array_count += register_count();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialize the depth output if used, which must be initialized on every
|
|
||||||
// execution path.
|
|
||||||
if (!edram_rov_used_ && IsDxbcPixelShader() && writes_depth()) {
|
|
||||||
shader_object_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
|
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(4));
|
|
||||||
shader_object_.push_back(
|
|
||||||
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH, 0));
|
|
||||||
shader_object_.push_back(
|
|
||||||
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
|
|
||||||
shader_object_.push_back(0);
|
|
||||||
++stat_.instruction_count;
|
|
||||||
++stat_.mov_instruction_count;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write the translated shader code.
|
// Write the translated shader code.
|
||||||
size_t code_size_dwords = shader_code_.size();
|
size_t code_size_dwords = shader_code_.size();
|
||||||
// So [] won't crash in case the size is zero somehow.
|
// So [] won't crash in case the size is zero somehow.
|
||||||
|
|
|
@ -1082,6 +1082,7 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
||||||
kDefault = 10,
|
kDefault = 10,
|
||||||
kDiscard = 13,
|
kDiscard = 13,
|
||||||
kDiv = 14,
|
kDiv = 14,
|
||||||
|
kDP4 = 17,
|
||||||
kElse = 18,
|
kElse = 18,
|
||||||
kEndIf = 21,
|
kEndIf = 21,
|
||||||
kEndLoop = 22,
|
kEndLoop = 22,
|
||||||
|
@ -1101,6 +1102,7 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
||||||
kIShL = 41,
|
kIShL = 41,
|
||||||
kIToF = 43,
|
kIToF = 43,
|
||||||
kLabel = 44,
|
kLabel = 44,
|
||||||
|
kLoop = 48,
|
||||||
kLT = 49,
|
kLT = 49,
|
||||||
kMAd = 50,
|
kMAd = 50,
|
||||||
kMin = 51,
|
kMin = 51,
|
||||||
|
@ -1129,6 +1131,7 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
||||||
kDerivRTXFine = 123,
|
kDerivRTXFine = 123,
|
||||||
kDerivRTYCoarse = 124,
|
kDerivRTYCoarse = 124,
|
||||||
kDerivRTYFine = 125,
|
kDerivRTYFine = 125,
|
||||||
|
kRcp = 129,
|
||||||
kF32ToF16 = 130,
|
kF32ToF16 = 130,
|
||||||
kF16ToF32 = 131,
|
kF16ToF32 = 131,
|
||||||
kFirstBitHi = 135,
|
kFirstBitHi = 135,
|
||||||
|
@ -1286,6 +1289,19 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
||||||
DxbcEmitAluOp(DxbcOpcode::kDiv, 0b00, dest, src0, src1, saturate);
|
DxbcEmitAluOp(DxbcOpcode::kDiv, 0b00, dest, src0, src1, saturate);
|
||||||
++stat_.float_instruction_count;
|
++stat_.float_instruction_count;
|
||||||
}
|
}
|
||||||
|
void DxbcOpDP4(const DxbcDest& dest, const DxbcSrc& src0, const DxbcSrc& src1,
|
||||||
|
bool saturate = false) {
|
||||||
|
uint32_t operands_length =
|
||||||
|
dest.GetLength() + src0.GetLength(0b1111) + src1.GetLength(0b1111);
|
||||||
|
shader_code_.reserve(shader_code_.size() + 1 + operands_length);
|
||||||
|
shader_code_.push_back(
|
||||||
|
DxbcOpcodeToken(DxbcOpcode::kDP4, operands_length, saturate));
|
||||||
|
dest.Write(shader_code_);
|
||||||
|
src0.Write(shader_code_, false, 0b1111);
|
||||||
|
src1.Write(shader_code_, false, 0b1111);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.float_instruction_count;
|
||||||
|
}
|
||||||
void DxbcOpElse() {
|
void DxbcOpElse() {
|
||||||
shader_code_.push_back(DxbcOpcodeToken(DxbcOpcode::kElse, 0));
|
shader_code_.push_back(DxbcOpcodeToken(DxbcOpcode::kElse, 0));
|
||||||
++stat_.instruction_count;
|
++stat_.instruction_count;
|
||||||
|
@ -1378,6 +1394,11 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
||||||
label.Write(shader_code_, true, 0b0000);
|
label.Write(shader_code_, true, 0b0000);
|
||||||
// Doesn't count towards stat_.instruction_count.
|
// Doesn't count towards stat_.instruction_count.
|
||||||
}
|
}
|
||||||
|
void DxbcOpLoop() {
|
||||||
|
shader_code_.push_back(DxbcOpcodeToken(DxbcOpcode::kLoop, 0));
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.dynamic_flow_control_count;
|
||||||
|
}
|
||||||
void DxbcOpLT(const DxbcDest& dest, const DxbcSrc& src0,
|
void DxbcOpLT(const DxbcDest& dest, const DxbcSrc& src0,
|
||||||
const DxbcSrc& src1) {
|
const DxbcSrc& src1) {
|
||||||
DxbcEmitAluOp(DxbcOpcode::kLT, 0b00, dest, src0, src1);
|
DxbcEmitAluOp(DxbcOpcode::kLT, 0b00, dest, src0, src1);
|
||||||
|
@ -1521,6 +1542,11 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
||||||
DxbcEmitAluOp(DxbcOpcode::kDerivRTYFine, 0b0, dest, src, saturate);
|
DxbcEmitAluOp(DxbcOpcode::kDerivRTYFine, 0b0, dest, src, saturate);
|
||||||
++stat_.float_instruction_count;
|
++stat_.float_instruction_count;
|
||||||
}
|
}
|
||||||
|
void DxbcOpRcp(const DxbcDest& dest, const DxbcSrc& src,
|
||||||
|
bool saturate = false) {
|
||||||
|
DxbcEmitAluOp(DxbcOpcode::kRcp, 0b0, dest, src, saturate);
|
||||||
|
++stat_.float_instruction_count;
|
||||||
|
}
|
||||||
void DxbcOpF32ToF16(const DxbcDest& dest, const DxbcSrc& src) {
|
void DxbcOpF32ToF16(const DxbcDest& dest, const DxbcSrc& src) {
|
||||||
DxbcEmitAluOp(DxbcOpcode::kF32ToF16, 0b0, dest, src);
|
DxbcEmitAluOp(DxbcOpcode::kF32ToF16, 0b0, dest, src);
|
||||||
++stat_.conversion_instruction_count;
|
++stat_.conversion_instruction_count;
|
||||||
|
|
Loading…
Reference in New Issue