[D3D12] ROV: Alpha to coverage (without dithering)
This commit is contained in:
parent
6f5d616372
commit
0a9feb5eca
|
@ -2028,6 +2028,10 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
||||||
DxbcShaderTranslator::kSysFlag_AlphaPassIfEqual |
|
DxbcShaderTranslator::kSysFlag_AlphaPassIfEqual |
|
||||||
DxbcShaderTranslator::kSysFlag_AlphaPassIfGreater;
|
DxbcShaderTranslator::kSysFlag_AlphaPassIfGreater;
|
||||||
}
|
}
|
||||||
|
// Alpha to coverage.
|
||||||
|
if (rb_colorcontrol & 0x10) {
|
||||||
|
flags |= DxbcShaderTranslator::kSysFlag_AlphaToCoverage;
|
||||||
|
}
|
||||||
// Gamma writing.
|
// Gamma writing.
|
||||||
if (((regs[XE_GPU_REG_RB_COLOR_INFO].u32 >> 16) & 0xF) ==
|
if (((regs[XE_GPU_REG_RB_COLOR_INFO].u32 >> 16) & 0xF) ==
|
||||||
uint32_t(ColorRenderTargetFormat::k_8_8_8_8_GAMMA)) {
|
uint32_t(ColorRenderTargetFormat::k_8_8_8_8_GAMMA)) {
|
||||||
|
|
|
@ -56,6 +56,7 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
||||||
kSysFlag_AlphaPassIfLess_Shift,
|
kSysFlag_AlphaPassIfLess_Shift,
|
||||||
kSysFlag_AlphaPassIfEqual_Shift,
|
kSysFlag_AlphaPassIfEqual_Shift,
|
||||||
kSysFlag_AlphaPassIfGreater_Shift,
|
kSysFlag_AlphaPassIfGreater_Shift,
|
||||||
|
kSysFlag_AlphaToCoverage_Shift,
|
||||||
kSysFlag_DepthStencil_Shift,
|
kSysFlag_DepthStencil_Shift,
|
||||||
kSysFlag_DepthFloat24_Shift,
|
kSysFlag_DepthFloat24_Shift,
|
||||||
// Depth/stencil testing not done if DepthStencilRead is disabled, but
|
// Depth/stencil testing not done if DepthStencilRead is disabled, but
|
||||||
|
@ -83,6 +84,7 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
||||||
kSysFlag_AlphaPassIfLess = 1u << kSysFlag_AlphaPassIfLess_Shift,
|
kSysFlag_AlphaPassIfLess = 1u << kSysFlag_AlphaPassIfLess_Shift,
|
||||||
kSysFlag_AlphaPassIfEqual = 1u << kSysFlag_AlphaPassIfEqual_Shift,
|
kSysFlag_AlphaPassIfEqual = 1u << kSysFlag_AlphaPassIfEqual_Shift,
|
||||||
kSysFlag_AlphaPassIfGreater = 1u << kSysFlag_AlphaPassIfGreater_Shift,
|
kSysFlag_AlphaPassIfGreater = 1u << kSysFlag_AlphaPassIfGreater_Shift,
|
||||||
|
kSysFlag_AlphaToCoverage = 1u << kSysFlag_AlphaToCoverage_Shift,
|
||||||
kSysFlag_DepthStencil = 1u << kSysFlag_DepthStencil_Shift,
|
kSysFlag_DepthStencil = 1u << kSysFlag_DepthStencil_Shift,
|
||||||
kSysFlag_DepthFloat24 = 1u << kSysFlag_DepthFloat24_Shift,
|
kSysFlag_DepthFloat24 = 1u << kSysFlag_DepthFloat24_Shift,
|
||||||
kSysFlag_DepthPassIfLess = 1u << kSysFlag_DepthPassIfLess_Shift,
|
kSysFlag_DepthPassIfLess = 1u << kSysFlag_DepthPassIfLess_Shift,
|
||||||
|
@ -849,6 +851,8 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
||||||
// Converts four depth values to 24-bit unorm or float, depending on the flag
|
// Converts four depth values to 24-bit unorm or float, depending on the flag
|
||||||
// value.
|
// value.
|
||||||
void CompletePixelShader_DepthTo24Bit(uint32_t depths_temp);
|
void CompletePixelShader_DepthTo24Bit(uint32_t depths_temp);
|
||||||
|
// Applies the exponent bias from the constant to colors.
|
||||||
|
void CompletePixelShader_ApplyColorExpBias();
|
||||||
// This just converts the color output value from/to gamma space, not checking
|
// This just converts the color output value from/to gamma space, not checking
|
||||||
// any conditions.
|
// any conditions.
|
||||||
void CompletePixelShader_GammaCorrect(uint32_t color_temp, bool to_gamma);
|
void CompletePixelShader_GammaCorrect(uint32_t color_temp, bool to_gamma);
|
||||||
|
@ -858,15 +862,23 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
||||||
// there's no xe_shared_memory_uav, it's U0.
|
// there's no xe_shared_memory_uav, it's U0.
|
||||||
return is_depth_only_pixel_shader_ ? 0 : 1;
|
return is_depth_only_pixel_shader_ ? 0 : 1;
|
||||||
}
|
}
|
||||||
// Performs depth/stencil testing. After the test, coverage_out_temp will
|
// Extracts the coverage from SV_Coverage and performs alpha to coverage if
|
||||||
// contain non-zero values for samples that passed the depth/stencil test and
|
// necessary. Does not perform any depth/stencil testing. For covered samples,
|
||||||
// are included in SV_Coverage, and zeros for those who didn't.
|
// writes a non-zero component, for non-covered, writes 0. Discards the pixel
|
||||||
|
// if no coverage.
|
||||||
|
void CompletePixelShader_WriteToROV_GetCoverage(uint32_t coverage_out_temp);
|
||||||
|
// Performs depth/stencil testing. coverage_in_out_temp should contain the
|
||||||
|
// coverage mask obtained from CompletePixelShader_WriteToROV_GetCoverage to
|
||||||
|
// indicate which samples need to be depth/stencil-tested, and after the
|
||||||
|
// execution contains which covered samples have passed the depth/stencil test
|
||||||
|
// (non-zero components where covered, zero where not covered or failed the
|
||||||
|
// test).
|
||||||
//
|
//
|
||||||
// edram_dword_offset_temp.x must contain the address of the first
|
// edram_dword_offset_temp.x must contain the address of the first
|
||||||
// depth/stencil sample - .yzw will be overwritten by this function with the
|
// depth/stencil sample - .yzw will be overwritten by this function with the
|
||||||
// addresses for the other samples if depth/stencil is enabled.
|
// addresses for the other samples if depth/stencil is enabled.
|
||||||
void CompletePixelShader_WriteToROV_DepthStencil(
|
void CompletePixelShader_WriteToROV_DepthStencil(
|
||||||
uint32_t edram_dword_offset_temp, uint32_t coverage_out_temp);
|
uint32_t edram_dword_offset_temp, uint32_t coverage_in_out_temp);
|
||||||
// Extracts widths and offsets of the components in the lower or the upper
|
// Extracts widths and offsets of the components in the lower or the upper
|
||||||
// dword of a pixel from the format constants, for use as ibfe and bfi
|
// dword of a pixel from the format constants, for use as ibfe and bfi
|
||||||
// operands later.
|
// operands later.
|
||||||
|
|
|
@ -788,6 +788,34 @@ void DxbcShaderTranslator::CompletePixelShader_DepthTo24Bit(
|
||||||
PopSystemTemp(2);
|
PopSystemTemp(2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void DxbcShaderTranslator::CompletePixelShader_ApplyColorExpBias() {
|
||||||
|
if (is_depth_only_pixel_shader_) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// The constant contains 2.0^bias.
|
||||||
|
for (uint32_t i = 0; i < 4; ++i) {
|
||||||
|
if (!writes_color_target(i)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
system_constants_used_ |= 1ull << kSysConst_ColorExpBias_Index;
|
||||||
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MUL) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||||
|
shader_code_.push_back(system_temps_color_ + i);
|
||||||
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||||
|
shader_code_.push_back(system_temps_color_ + i);
|
||||||
|
shader_code_.push_back(EncodeVectorReplicatedOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, i, 3));
|
||||||
|
shader_code_.push_back(cbuffer_index_system_constants_);
|
||||||
|
shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants));
|
||||||
|
shader_code_.push_back(kSysConst_ColorExpBias_Vec);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.float_instruction_count;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void DxbcShaderTranslator::CompletePixelShader_GammaCorrect(uint32_t color_temp,
|
void DxbcShaderTranslator::CompletePixelShader_GammaCorrect(uint32_t color_temp,
|
||||||
bool to_gamma) {
|
bool to_gamma) {
|
||||||
uint32_t pieces_temp = PushSystemTemp();
|
uint32_t pieces_temp = PushSystemTemp();
|
||||||
|
@ -886,6 +914,11 @@ void DxbcShaderTranslator::CompletePixelShader_GammaCorrect(uint32_t color_temp,
|
||||||
}
|
}
|
||||||
|
|
||||||
void DxbcShaderTranslator::CompletePixelShader_WriteToRTVs() {
|
void DxbcShaderTranslator::CompletePixelShader_WriteToRTVs() {
|
||||||
|
// TODO(Triang3l): Alpha to coverage.
|
||||||
|
|
||||||
|
// Apply the exponent bias before writing the color.
|
||||||
|
CompletePixelShader_ApplyColorExpBias();
|
||||||
|
|
||||||
// Convert to gamma space - this is incorrect, since it must be done after
|
// Convert to gamma space - this is incorrect, since it must be done after
|
||||||
// blending on the Xbox 360, but this is just one of many blending issues in
|
// blending on the Xbox 360, but this is just one of many blending issues in
|
||||||
// the RTV path.
|
// the RTV path.
|
||||||
|
@ -991,12 +1024,11 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToRTVs() {
|
||||||
PopSystemTemp(2);
|
PopSystemTemp(2);
|
||||||
}
|
}
|
||||||
|
|
||||||
void DxbcShaderTranslator::CompletePixelShader_WriteToROV_DepthStencil(
|
void DxbcShaderTranslator::CompletePixelShader_WriteToROV_GetCoverage(
|
||||||
uint32_t edram_dword_offset_temp, uint32_t coverage_out_temp) {
|
uint32_t coverage_out_temp) {
|
||||||
// Load the coverage before the depth/stencil test - if depth/stencil is not
|
// Load the coverage from the rasterizer. For 2x AA, use samples 0 and 3
|
||||||
// needed, this is still needed to determine which samples to write color for.
|
// (top-left and bottom-right), for 4x, use all, because ForcedSampleCount
|
||||||
// For 2x AA, use samples 0 and 3 (top-left and bottom-right), for 4x, use
|
// can't be 2.
|
||||||
// all, because ForcedSampleCount can't be 2.
|
|
||||||
system_constants_used_ |= 1ull << kSysConst_SampleCountLog2_Index;
|
system_constants_used_ |= 1ull << kSysConst_SampleCountLog2_Index;
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(17));
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(17));
|
||||||
|
@ -1036,6 +1068,208 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_DepthStencil(
|
||||||
++stat_.instruction_count;
|
++stat_.instruction_count;
|
||||||
++stat_.uint_instruction_count;
|
++stat_.uint_instruction_count;
|
||||||
|
|
||||||
|
// Check if alpha to coverage can be done at all in this shader.
|
||||||
|
if (is_depth_only_pixel_shader_ || !writes_color_target(0)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t atoc_temp = PushSystemTemp();
|
||||||
|
|
||||||
|
// Extract the flag to check if alpha to coverage is enabled.
|
||||||
|
system_constants_used_ |= 1ull << kSysConst_Flags_Index;
|
||||||
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
|
||||||
|
shader_code_.push_back(atoc_temp);
|
||||||
|
shader_code_.push_back(EncodeVectorSelectOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSysConst_Flags_Comp, 3));
|
||||||
|
shader_code_.push_back(cbuffer_index_system_constants_);
|
||||||
|
shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants));
|
||||||
|
shader_code_.push_back(kSysConst_Flags_Vec);
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
|
||||||
|
shader_code_.push_back(kSysFlag_AlphaToCoverage);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.uint_instruction_count;
|
||||||
|
|
||||||
|
// Check if alpha to coverage is enabled.
|
||||||
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
|
||||||
|
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(
|
||||||
|
D3D10_SB_INSTRUCTION_TEST_NONZERO) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
|
||||||
|
shader_code_.push_back(atoc_temp);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.dynamic_flow_control_count;
|
||||||
|
|
||||||
|
// According to tests on an Adreno 200 device (LG Optimus L7), without
|
||||||
|
// dithering, done by drawing 0.5x0.5 rectangles in different corners of four
|
||||||
|
// pixels in a quad to a multisampled GLSurfaceView, the coverage is the
|
||||||
|
// following for 4 samples:
|
||||||
|
// 0.25) [0.25, 0.5) [0.5, 0.75) [0.75, 1) [1
|
||||||
|
// -- -- -- -- --
|
||||||
|
// | | | | | #| |##| |##|
|
||||||
|
// | | |# | |# | |# | |##|
|
||||||
|
// -- -- -- -- --
|
||||||
|
// (VPOS near 0 on the top, near 1 on the bottom here.)
|
||||||
|
// For 2 samples, the top sample (closer to VPOS 0) is covered when alpha is
|
||||||
|
// in [0.5, 1).
|
||||||
|
// With these values, however, in Red Dead Redemption, almost all distant
|
||||||
|
// trees are transparent, and it's also weird that the values are so
|
||||||
|
// unbalanced (0.25-wide range with zero coverage, but only one point with
|
||||||
|
// full coverage), so ranges are halfway offset here.
|
||||||
|
// TODO(Triang3l): Find an Adreno device with dithering enabled, and where the
|
||||||
|
// numbers 3, 1, 0, 2 look meaningful for pixels in quads, and implement
|
||||||
|
// offsets.
|
||||||
|
// Choose the thresholds based on the sample count - first between 2 and 1
|
||||||
|
// samples.
|
||||||
|
system_constants_used_ |= 1ull << kSysConst_SampleCountLog2_Index;
|
||||||
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(17));
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||||
|
shader_code_.push_back(atoc_temp);
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER,
|
||||||
|
kSysConst_SampleCountLog2_Comp + 1, 3));
|
||||||
|
shader_code_.push_back(cbuffer_index_system_constants_);
|
||||||
|
shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants));
|
||||||
|
shader_code_.push_back(kSysConst_SampleCountLog2_Vec);
|
||||||
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
|
||||||
|
// 0.75
|
||||||
|
shader_code_.push_back(0x3F400000);
|
||||||
|
// 0.25
|
||||||
|
shader_code_.push_back(0x3E800000);
|
||||||
|
// NaN
|
||||||
|
shader_code_.push_back(0x7FC00000);
|
||||||
|
shader_code_.push_back(0x7FC00000);
|
||||||
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
|
||||||
|
// 0.5
|
||||||
|
shader_code_.push_back(0x3F000000);
|
||||||
|
shader_code_.push_back(0x7FC00000);
|
||||||
|
shader_code_.push_back(0x7FC00000);
|
||||||
|
shader_code_.push_back(0x7FC00000);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.movc_instruction_count;
|
||||||
|
// Choose the thresholds based on the sample count - between 4 or 1/2 samples.
|
||||||
|
system_constants_used_ |= 1ull << kSysConst_SampleCountLog2_Index;
|
||||||
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(14));
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||||
|
shader_code_.push_back(atoc_temp);
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER,
|
||||||
|
kSysConst_SampleCountLog2_Comp, 3));
|
||||||
|
shader_code_.push_back(cbuffer_index_system_constants_);
|
||||||
|
shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants));
|
||||||
|
shader_code_.push_back(kSysConst_SampleCountLog2_Vec);
|
||||||
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
|
||||||
|
// 0.625
|
||||||
|
shader_code_.push_back(0x3F200000);
|
||||||
|
// 0.375
|
||||||
|
shader_code_.push_back(0x3EC00000);
|
||||||
|
// 0.125
|
||||||
|
shader_code_.push_back(0x3E000000);
|
||||||
|
// 0.875
|
||||||
|
shader_code_.push_back(0x3F600000);
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||||
|
shader_code_.push_back(atoc_temp);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.movc_instruction_count;
|
||||||
|
|
||||||
|
// Check if alpha of oC0 is greater than the threshold for each sample or
|
||||||
|
// equal to it.
|
||||||
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_GE) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||||
|
shader_code_.push_back(atoc_temp);
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 3, 1));
|
||||||
|
shader_code_.push_back(system_temps_color_);
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||||
|
shader_code_.push_back(atoc_temp);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.float_instruction_count;
|
||||||
|
|
||||||
|
// Mask the sample coverage.
|
||||||
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||||
|
shader_code_.push_back(coverage_out_temp);
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||||
|
shader_code_.push_back(coverage_out_temp);
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||||
|
shader_code_.push_back(atoc_temp);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.uint_instruction_count;
|
||||||
|
|
||||||
|
// Check if the pixel can be discarded totally - merge masked coverage of
|
||||||
|
// samples 01 and 23.
|
||||||
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_OR) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0011, 1));
|
||||||
|
shader_code_.push_back(atoc_temp);
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||||
|
shader_code_.push_back(coverage_out_temp);
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b01001110, 1));
|
||||||
|
shader_code_.push_back(coverage_out_temp);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.uint_instruction_count;
|
||||||
|
|
||||||
|
// Check if the pixel can be discarded totally - merge masked coverage of
|
||||||
|
// samples 0|2 and 1|3.
|
||||||
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_OR) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
|
||||||
|
shader_code_.push_back(atoc_temp);
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
|
||||||
|
shader_code_.push_back(atoc_temp);
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1));
|
||||||
|
shader_code_.push_back(atoc_temp);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.uint_instruction_count;
|
||||||
|
|
||||||
|
// Don't even do depth/stencil for pixels fully discarded by alpha to
|
||||||
|
// coverage.
|
||||||
|
shader_code_.push_back(
|
||||||
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_RETC) |
|
||||||
|
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(D3D10_SB_INSTRUCTION_TEST_ZERO) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
|
||||||
|
shader_code_.push_back(atoc_temp);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.dynamic_flow_control_count;
|
||||||
|
|
||||||
|
// Close the alpha to coverage check.
|
||||||
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
|
||||||
|
++stat_.instruction_count;
|
||||||
|
|
||||||
|
// Release atoc_temp.
|
||||||
|
PopSystemTemp();
|
||||||
|
}
|
||||||
|
|
||||||
|
void DxbcShaderTranslator::CompletePixelShader_WriteToROV_DepthStencil(
|
||||||
|
uint32_t edram_dword_offset_temp, uint32_t coverage_in_out_temp) {
|
||||||
uint32_t flags_temp = PushSystemTemp();
|
uint32_t flags_temp = PushSystemTemp();
|
||||||
|
|
||||||
// Check if anything related to depth/stencil needs to be done at all, and get
|
// Check if anything related to depth/stencil needs to be done at all, and get
|
||||||
|
@ -1503,7 +1737,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_DepthStencil(
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, i, 1));
|
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, i, 1));
|
||||||
shader_code_.push_back(coverage_out_temp);
|
shader_code_.push_back(coverage_in_out_temp);
|
||||||
++stat_.instruction_count;
|
++stat_.instruction_count;
|
||||||
++stat_.dynamic_flow_control_count;
|
++stat_.dynamic_flow_control_count;
|
||||||
|
|
||||||
|
@ -2272,10 +2506,10 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_DepthStencil(
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||||
shader_code_.push_back(coverage_out_temp);
|
shader_code_.push_back(coverage_in_out_temp);
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||||
shader_code_.push_back(coverage_out_temp);
|
shader_code_.push_back(coverage_in_out_temp);
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||||
shader_code_.push_back(depth_test_results_temp);
|
shader_code_.push_back(depth_test_results_temp);
|
||||||
|
@ -2362,7 +2596,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_DepthStencil(
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, i, 1));
|
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, i, 1));
|
||||||
shader_code_.push_back(coverage_out_temp);
|
shader_code_.push_back(coverage_in_out_temp);
|
||||||
++stat_.instruction_count;
|
++stat_.instruction_count;
|
||||||
++stat_.dynamic_flow_control_count;
|
++stat_.dynamic_flow_control_count;
|
||||||
|
|
||||||
|
@ -2399,10 +2633,10 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_DepthStencil(
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||||
shader_code_.push_back(coverage_out_temp);
|
shader_code_.push_back(coverage_in_out_temp);
|
||||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||||
shader_code_.push_back(coverage_out_temp);
|
shader_code_.push_back(coverage_in_out_temp);
|
||||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||||
shader_code_.push_back(depth_test_results_temp);
|
shader_code_.push_back(depth_test_results_temp);
|
||||||
|
@ -4553,6 +4787,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
||||||
// Perform all the depth/stencil-related operations, and get the samples that
|
// Perform all the depth/stencil-related operations, and get the samples that
|
||||||
// have passed the depth test.
|
// have passed the depth test.
|
||||||
uint32_t coverage_temp = PushSystemTemp();
|
uint32_t coverage_temp = PushSystemTemp();
|
||||||
|
CompletePixelShader_WriteToROV_GetCoverage(coverage_temp);
|
||||||
CompletePixelShader_WriteToROV_DepthStencil(edram_coord_pixel_depth_temp,
|
CompletePixelShader_WriteToROV_DepthStencil(edram_coord_pixel_depth_temp,
|
||||||
coverage_temp);
|
coverage_temp);
|
||||||
|
|
||||||
|
@ -4561,6 +4796,10 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
||||||
// ***************************************************************************
|
// ***************************************************************************
|
||||||
|
|
||||||
if (color_targets_written) {
|
if (color_targets_written) {
|
||||||
|
// Apply the exponent bias after having done alpha to coverage, which needs
|
||||||
|
// the original alpha from the shader.
|
||||||
|
CompletePixelShader_ApplyColorExpBias();
|
||||||
|
|
||||||
system_constants_used_ |= 1ull << kSysConst_EDRAMRTFlags_Index;
|
system_constants_used_ |= 1ull << kSysConst_EDRAMRTFlags_Index;
|
||||||
|
|
||||||
// Get if any sample is covered to exit earlier if all have failed the depth
|
// Get if any sample is covered to exit earlier if all have failed the depth
|
||||||
|
@ -5671,36 +5910,9 @@ void DxbcShaderTranslator::CompletePixelShader() {
|
||||||
PopSystemTemp();
|
PopSystemTemp();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Apply color exponent bias (the constant contains 2.0^bias).
|
// Write the values to the render targets. Not applying the exponent bias yet
|
||||||
// Not sure if this should be done before alpha testing or after, but this is
|
// because the original 0 to 1 alpha value is needed for alpha to coverage,
|
||||||
// render target state, and alpha test works with values obtained mainly from
|
// which is done differently for ROV and RTV/DSV.
|
||||||
// textures (so conceptually closer to the shader rather than the
|
|
||||||
// output-merger in the pipeline).
|
|
||||||
// TODO(Triang3l): Verify whether the order of alpha testing and exponent bias
|
|
||||||
// is correct.
|
|
||||||
for (uint32_t i = 0; i < 4; ++i) {
|
|
||||||
if (!writes_color_target(i)) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
system_constants_used_ |= 1ull << kSysConst_ColorExpBias_Index;
|
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MUL) |
|
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
|
||||||
shader_code_.push_back(
|
|
||||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
|
||||||
shader_code_.push_back(system_temps_color_ + i);
|
|
||||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
|
||||||
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
|
||||||
shader_code_.push_back(system_temps_color_ + i);
|
|
||||||
shader_code_.push_back(EncodeVectorReplicatedOperand(
|
|
||||||
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, i, 3));
|
|
||||||
shader_code_.push_back(cbuffer_index_system_constants_);
|
|
||||||
shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants));
|
|
||||||
shader_code_.push_back(kSysConst_ColorExpBias_Vec);
|
|
||||||
++stat_.instruction_count;
|
|
||||||
++stat_.float_instruction_count;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write the values to the render targets.
|
|
||||||
if (edram_rov_used_) {
|
if (edram_rov_used_) {
|
||||||
CompletePixelShader_WriteToROV();
|
CompletePixelShader_WriteToROV();
|
||||||
} else {
|
} else {
|
||||||
|
|
Loading…
Reference in New Issue