[D3D12] ROV: Alpha to coverage (without dithering)

This commit is contained in:
Triang3l 2019-01-21 21:28:26 +03:00
parent 6f5d616372
commit 0a9feb5eca
3 changed files with 274 additions and 46 deletions

View File

@ -2028,6 +2028,10 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
DxbcShaderTranslator::kSysFlag_AlphaPassIfEqual | DxbcShaderTranslator::kSysFlag_AlphaPassIfEqual |
DxbcShaderTranslator::kSysFlag_AlphaPassIfGreater; DxbcShaderTranslator::kSysFlag_AlphaPassIfGreater;
} }
// Alpha to coverage.
if (rb_colorcontrol & 0x10) {
flags |= DxbcShaderTranslator::kSysFlag_AlphaToCoverage;
}
// Gamma writing. // Gamma writing.
if (((regs[XE_GPU_REG_RB_COLOR_INFO].u32 >> 16) & 0xF) == if (((regs[XE_GPU_REG_RB_COLOR_INFO].u32 >> 16) & 0xF) ==
uint32_t(ColorRenderTargetFormat::k_8_8_8_8_GAMMA)) { uint32_t(ColorRenderTargetFormat::k_8_8_8_8_GAMMA)) {

View File

@ -56,6 +56,7 @@ class DxbcShaderTranslator : public ShaderTranslator {
kSysFlag_AlphaPassIfLess_Shift, kSysFlag_AlphaPassIfLess_Shift,
kSysFlag_AlphaPassIfEqual_Shift, kSysFlag_AlphaPassIfEqual_Shift,
kSysFlag_AlphaPassIfGreater_Shift, kSysFlag_AlphaPassIfGreater_Shift,
kSysFlag_AlphaToCoverage_Shift,
kSysFlag_DepthStencil_Shift, kSysFlag_DepthStencil_Shift,
kSysFlag_DepthFloat24_Shift, kSysFlag_DepthFloat24_Shift,
// Depth/stencil testing not done if DepthStencilRead is disabled, but // Depth/stencil testing not done if DepthStencilRead is disabled, but
@ -83,6 +84,7 @@ class DxbcShaderTranslator : public ShaderTranslator {
kSysFlag_AlphaPassIfLess = 1u << kSysFlag_AlphaPassIfLess_Shift, kSysFlag_AlphaPassIfLess = 1u << kSysFlag_AlphaPassIfLess_Shift,
kSysFlag_AlphaPassIfEqual = 1u << kSysFlag_AlphaPassIfEqual_Shift, kSysFlag_AlphaPassIfEqual = 1u << kSysFlag_AlphaPassIfEqual_Shift,
kSysFlag_AlphaPassIfGreater = 1u << kSysFlag_AlphaPassIfGreater_Shift, kSysFlag_AlphaPassIfGreater = 1u << kSysFlag_AlphaPassIfGreater_Shift,
kSysFlag_AlphaToCoverage = 1u << kSysFlag_AlphaToCoverage_Shift,
kSysFlag_DepthStencil = 1u << kSysFlag_DepthStencil_Shift, kSysFlag_DepthStencil = 1u << kSysFlag_DepthStencil_Shift,
kSysFlag_DepthFloat24 = 1u << kSysFlag_DepthFloat24_Shift, kSysFlag_DepthFloat24 = 1u << kSysFlag_DepthFloat24_Shift,
kSysFlag_DepthPassIfLess = 1u << kSysFlag_DepthPassIfLess_Shift, kSysFlag_DepthPassIfLess = 1u << kSysFlag_DepthPassIfLess_Shift,
@ -849,6 +851,8 @@ class DxbcShaderTranslator : public ShaderTranslator {
// Converts four depth values to 24-bit unorm or float, depending on the flag // Converts four depth values to 24-bit unorm or float, depending on the flag
// value. // value.
void CompletePixelShader_DepthTo24Bit(uint32_t depths_temp); void CompletePixelShader_DepthTo24Bit(uint32_t depths_temp);
// Applies the exponent bias from the constant to colors.
void CompletePixelShader_ApplyColorExpBias();
// This just converts the color output value from/to gamma space, not checking // This just converts the color output value from/to gamma space, not checking
// any conditions. // any conditions.
void CompletePixelShader_GammaCorrect(uint32_t color_temp, bool to_gamma); void CompletePixelShader_GammaCorrect(uint32_t color_temp, bool to_gamma);
@ -858,15 +862,23 @@ class DxbcShaderTranslator : public ShaderTranslator {
// there's no xe_shared_memory_uav, it's U0. // there's no xe_shared_memory_uav, it's U0.
return is_depth_only_pixel_shader_ ? 0 : 1; return is_depth_only_pixel_shader_ ? 0 : 1;
} }
// Performs depth/stencil testing. After the test, coverage_out_temp will // Extracts the coverage from SV_Coverage and performs alpha to coverage if
// contain non-zero values for samples that passed the depth/stencil test and // necessary. Does not perform any depth/stencil testing. For covered samples,
// are included in SV_Coverage, and zeros for those who didn't. // writes a non-zero component, for non-covered, writes 0. Discards the pixel
// if no coverage.
void CompletePixelShader_WriteToROV_GetCoverage(uint32_t coverage_out_temp);
// Performs depth/stencil testing. coverage_in_out_temp should contain the
// coverage mask obtained from CompletePixelShader_WriteToROV_GetCoverage to
// indicate which samples need to be depth/stencil-tested, and after the
// execution contains which covered samples have passed the depth/stencil test
// (non-zero components where covered, zero where not covered or failed the
// test).
// //
// edram_dword_offset_temp.x must contain the address of the first // edram_dword_offset_temp.x must contain the address of the first
// depth/stencil sample - .yzw will be overwritten by this function with the // depth/stencil sample - .yzw will be overwritten by this function with the
// addresses for the other samples if depth/stencil is enabled. // addresses for the other samples if depth/stencil is enabled.
void CompletePixelShader_WriteToROV_DepthStencil( void CompletePixelShader_WriteToROV_DepthStencil(
uint32_t edram_dword_offset_temp, uint32_t coverage_out_temp); uint32_t edram_dword_offset_temp, uint32_t coverage_in_out_temp);
// Extracts widths and offsets of the components in the lower or the upper // Extracts widths and offsets of the components in the lower or the upper
// dword of a pixel from the format constants, for use as ibfe and bfi // dword of a pixel from the format constants, for use as ibfe and bfi
// operands later. // operands later.

View File

@ -788,6 +788,34 @@ void DxbcShaderTranslator::CompletePixelShader_DepthTo24Bit(
PopSystemTemp(2); PopSystemTemp(2);
} }
void DxbcShaderTranslator::CompletePixelShader_ApplyColorExpBias() {
if (is_depth_only_pixel_shader_) {
return;
}
// The constant contains 2.0^bias.
for (uint32_t i = 0; i < 4; ++i) {
if (!writes_color_target(i)) {
continue;
}
system_constants_used_ |= 1ull << kSysConst_ColorExpBias_Index;
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MUL) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(system_temps_color_ + i);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(system_temps_color_ + i);
shader_code_.push_back(EncodeVectorReplicatedOperand(
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, i, 3));
shader_code_.push_back(cbuffer_index_system_constants_);
shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants));
shader_code_.push_back(kSysConst_ColorExpBias_Vec);
++stat_.instruction_count;
++stat_.float_instruction_count;
}
}
void DxbcShaderTranslator::CompletePixelShader_GammaCorrect(uint32_t color_temp, void DxbcShaderTranslator::CompletePixelShader_GammaCorrect(uint32_t color_temp,
bool to_gamma) { bool to_gamma) {
uint32_t pieces_temp = PushSystemTemp(); uint32_t pieces_temp = PushSystemTemp();
@ -886,6 +914,11 @@ void DxbcShaderTranslator::CompletePixelShader_GammaCorrect(uint32_t color_temp,
} }
void DxbcShaderTranslator::CompletePixelShader_WriteToRTVs() { void DxbcShaderTranslator::CompletePixelShader_WriteToRTVs() {
// TODO(Triang3l): Alpha to coverage.
// Apply the exponent bias before writing the color.
CompletePixelShader_ApplyColorExpBias();
// Convert to gamma space - this is incorrect, since it must be done after // Convert to gamma space - this is incorrect, since it must be done after
// blending on the Xbox 360, but this is just one of many blending issues in // blending on the Xbox 360, but this is just one of many blending issues in
// the RTV path. // the RTV path.
@ -991,12 +1024,11 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToRTVs() {
PopSystemTemp(2); PopSystemTemp(2);
} }
void DxbcShaderTranslator::CompletePixelShader_WriteToROV_DepthStencil( void DxbcShaderTranslator::CompletePixelShader_WriteToROV_GetCoverage(
uint32_t edram_dword_offset_temp, uint32_t coverage_out_temp) { uint32_t coverage_out_temp) {
// Load the coverage before the depth/stencil test - if depth/stencil is not // Load the coverage from the rasterizer. For 2x AA, use samples 0 and 3
// needed, this is still needed to determine which samples to write color for. // (top-left and bottom-right), for 4x, use all, because ForcedSampleCount
// For 2x AA, use samples 0 and 3 (top-left and bottom-right), for 4x, use // can't be 2.
// all, because ForcedSampleCount can't be 2.
system_constants_used_ |= 1ull << kSysConst_SampleCountLog2_Index; system_constants_used_ |= 1ull << kSysConst_SampleCountLog2_Index;
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) | shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(17)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(17));
@ -1036,6 +1068,208 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_DepthStencil(
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.uint_instruction_count; ++stat_.uint_instruction_count;
// Check if alpha to coverage can be done at all in this shader.
if (is_depth_only_pixel_shader_ || !writes_color_target(0)) {
return;
}
uint32_t atoc_temp = PushSystemTemp();
// Extract the flag to check if alpha to coverage is enabled.
system_constants_used_ |= 1ull << kSysConst_Flags_Index;
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(atoc_temp);
shader_code_.push_back(EncodeVectorSelectOperand(
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSysConst_Flags_Comp, 3));
shader_code_.push_back(cbuffer_index_system_constants_);
shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants));
shader_code_.push_back(kSysConst_Flags_Vec);
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(kSysFlag_AlphaToCoverage);
++stat_.instruction_count;
++stat_.uint_instruction_count;
// Check if alpha to coverage is enabled.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(
D3D10_SB_INSTRUCTION_TEST_NONZERO) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(atoc_temp);
++stat_.instruction_count;
++stat_.dynamic_flow_control_count;
// According to tests on an Adreno 200 device (LG Optimus L7), without
// dithering, done by drawing 0.5x0.5 rectangles in different corners of four
// pixels in a quad to a multisampled GLSurfaceView, the coverage is the
// following for 4 samples:
// 0.25) [0.25, 0.5) [0.5, 0.75) [0.75, 1) [1
// -- -- -- -- --
// | | | | | #| |##| |##|
// | | |# | |# | |# | |##|
// -- -- -- -- --
// (VPOS near 0 on the top, near 1 on the bottom here.)
// For 2 samples, the top sample (closer to VPOS 0) is covered when alpha is
// in [0.5, 1).
// With these values, however, in Red Dead Redemption, almost all distant
// trees are transparent, and it's also weird that the values are so
// unbalanced (0.25-wide range with zero coverage, but only one point with
// full coverage), so ranges are halfway offset here.
// TODO(Triang3l): Find an Adreno device with dithering enabled, and where the
// numbers 3, 1, 0, 2 look meaningful for pixels in quads, and implement
// offsets.
// Choose the thresholds based on the sample count - first between 2 and 1
// samples.
system_constants_used_ |= 1ull << kSysConst_SampleCountLog2_Index;
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(17));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(atoc_temp);
shader_code_.push_back(
EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER,
kSysConst_SampleCountLog2_Comp + 1, 3));
shader_code_.push_back(cbuffer_index_system_constants_);
shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants));
shader_code_.push_back(kSysConst_SampleCountLog2_Vec);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
// 0.75
shader_code_.push_back(0x3F400000);
// 0.25
shader_code_.push_back(0x3E800000);
// NaN
shader_code_.push_back(0x7FC00000);
shader_code_.push_back(0x7FC00000);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
// 0.5
shader_code_.push_back(0x3F000000);
shader_code_.push_back(0x7FC00000);
shader_code_.push_back(0x7FC00000);
shader_code_.push_back(0x7FC00000);
++stat_.instruction_count;
++stat_.movc_instruction_count;
// Choose the thresholds based on the sample count - between 4 or 1/2 samples.
system_constants_used_ |= 1ull << kSysConst_SampleCountLog2_Index;
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(14));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(atoc_temp);
shader_code_.push_back(
EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER,
kSysConst_SampleCountLog2_Comp, 3));
shader_code_.push_back(cbuffer_index_system_constants_);
shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants));
shader_code_.push_back(kSysConst_SampleCountLog2_Vec);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
// 0.625
shader_code_.push_back(0x3F200000);
// 0.375
shader_code_.push_back(0x3EC00000);
// 0.125
shader_code_.push_back(0x3E000000);
// 0.875
shader_code_.push_back(0x3F600000);
shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(atoc_temp);
++stat_.instruction_count;
++stat_.movc_instruction_count;
// Check if alpha of oC0 is greater than the threshold for each sample or
// equal to it.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_GE) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(atoc_temp);
shader_code_.push_back(
EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 3, 1));
shader_code_.push_back(system_temps_color_);
shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(atoc_temp);
++stat_.instruction_count;
++stat_.float_instruction_count;
// Mask the sample coverage.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(coverage_out_temp);
shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(coverage_out_temp);
shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(atoc_temp);
++stat_.instruction_count;
++stat_.uint_instruction_count;
// Check if the pixel can be discarded totally - merge masked coverage of
// samples 01 and 23.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_OR) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0011, 1));
shader_code_.push_back(atoc_temp);
shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(coverage_out_temp);
shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b01001110, 1));
shader_code_.push_back(coverage_out_temp);
++stat_.instruction_count;
++stat_.uint_instruction_count;
// Check if the pixel can be discarded totally - merge masked coverage of
// samples 0|2 and 1|3.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_OR) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(atoc_temp);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(atoc_temp);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1));
shader_code_.push_back(atoc_temp);
++stat_.instruction_count;
++stat_.uint_instruction_count;
// Don't even do depth/stencil for pixels fully discarded by alpha to
// coverage.
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_RETC) |
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(D3D10_SB_INSTRUCTION_TEST_ZERO) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(atoc_temp);
++stat_.instruction_count;
++stat_.dynamic_flow_control_count;
// Close the alpha to coverage check.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
++stat_.instruction_count;
// Release atoc_temp.
PopSystemTemp();
}
void DxbcShaderTranslator::CompletePixelShader_WriteToROV_DepthStencil(
uint32_t edram_dword_offset_temp, uint32_t coverage_in_out_temp) {
uint32_t flags_temp = PushSystemTemp(); uint32_t flags_temp = PushSystemTemp();
// Check if anything related to depth/stencil needs to be done at all, and get // Check if anything related to depth/stencil needs to be done at all, and get
@ -1503,7 +1737,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_DepthStencil(
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
shader_code_.push_back( shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, i, 1)); EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, i, 1));
shader_code_.push_back(coverage_out_temp); shader_code_.push_back(coverage_in_out_temp);
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.dynamic_flow_control_count; ++stat_.dynamic_flow_control_count;
@ -2272,10 +2506,10 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_DepthStencil(
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back( shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(coverage_out_temp); shader_code_.push_back(coverage_in_out_temp);
shader_code_.push_back( shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(coverage_out_temp); shader_code_.push_back(coverage_in_out_temp);
shader_code_.push_back( shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(depth_test_results_temp); shader_code_.push_back(depth_test_results_temp);
@ -2362,7 +2596,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_DepthStencil(
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
shader_code_.push_back( shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, i, 1)); EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, i, 1));
shader_code_.push_back(coverage_out_temp); shader_code_.push_back(coverage_in_out_temp);
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.dynamic_flow_control_count; ++stat_.dynamic_flow_control_count;
@ -2399,10 +2633,10 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_DepthStencil(
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back( shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(coverage_out_temp); shader_code_.push_back(coverage_in_out_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand( shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(coverage_out_temp); shader_code_.push_back(coverage_in_out_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand( shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(depth_test_results_temp); shader_code_.push_back(depth_test_results_temp);
@ -4553,6 +4787,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
// Perform all the depth/stencil-related operations, and get the samples that // Perform all the depth/stencil-related operations, and get the samples that
// have passed the depth test. // have passed the depth test.
uint32_t coverage_temp = PushSystemTemp(); uint32_t coverage_temp = PushSystemTemp();
CompletePixelShader_WriteToROV_GetCoverage(coverage_temp);
CompletePixelShader_WriteToROV_DepthStencil(edram_coord_pixel_depth_temp, CompletePixelShader_WriteToROV_DepthStencil(edram_coord_pixel_depth_temp,
coverage_temp); coverage_temp);
@ -4561,6 +4796,10 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
// *************************************************************************** // ***************************************************************************
if (color_targets_written) { if (color_targets_written) {
// Apply the exponent bias after having done alpha to coverage, which needs
// the original alpha from the shader.
CompletePixelShader_ApplyColorExpBias();
system_constants_used_ |= 1ull << kSysConst_EDRAMRTFlags_Index; system_constants_used_ |= 1ull << kSysConst_EDRAMRTFlags_Index;
// Get if any sample is covered to exit earlier if all have failed the depth // Get if any sample is covered to exit earlier if all have failed the depth
@ -5671,36 +5910,9 @@ void DxbcShaderTranslator::CompletePixelShader() {
PopSystemTemp(); PopSystemTemp();
} }
// Apply color exponent bias (the constant contains 2.0^bias). // Write the values to the render targets. Not applying the exponent bias yet
// Not sure if this should be done before alpha testing or after, but this is // because the original 0 to 1 alpha value is needed for alpha to coverage,
// render target state, and alpha test works with values obtained mainly from // which is done differently for ROV and RTV/DSV.
// textures (so conceptually closer to the shader rather than the
// output-merger in the pipeline).
// TODO(Triang3l): Verify whether the order of alpha testing and exponent bias
// is correct.
for (uint32_t i = 0; i < 4; ++i) {
if (!writes_color_target(i)) {
continue;
}
system_constants_used_ |= 1ull << kSysConst_ColorExpBias_Index;
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MUL) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(system_temps_color_ + i);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(system_temps_color_ + i);
shader_code_.push_back(EncodeVectorReplicatedOperand(
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, i, 3));
shader_code_.push_back(cbuffer_index_system_constants_);
shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants));
shader_code_.push_back(kSysConst_ColorExpBias_Vec);
++stat_.instruction_count;
++stat_.float_instruction_count;
}
// Write the values to the render targets.
if (edram_rov_used_) { if (edram_rov_used_) {
CompletePixelShader_WriteToROV(); CompletePixelShader_WriteToROV();
} else { } else {