diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index 01c94faf5..456f93a1e 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -2028,6 +2028,10 @@ void D3D12CommandProcessor::UpdateSystemConstantValues( DxbcShaderTranslator::kSysFlag_AlphaPassIfEqual | DxbcShaderTranslator::kSysFlag_AlphaPassIfGreater; } + // Alpha to coverage. + if (rb_colorcontrol & 0x10) { + flags |= DxbcShaderTranslator::kSysFlag_AlphaToCoverage; + } // Gamma writing. if (((regs[XE_GPU_REG_RB_COLOR_INFO].u32 >> 16) & 0xF) == uint32_t(ColorRenderTargetFormat::k_8_8_8_8_GAMMA)) { diff --git a/src/xenia/gpu/dxbc_shader_translator.h b/src/xenia/gpu/dxbc_shader_translator.h index 50452243e..3c887e065 100644 --- a/src/xenia/gpu/dxbc_shader_translator.h +++ b/src/xenia/gpu/dxbc_shader_translator.h @@ -56,6 +56,7 @@ class DxbcShaderTranslator : public ShaderTranslator { kSysFlag_AlphaPassIfLess_Shift, kSysFlag_AlphaPassIfEqual_Shift, kSysFlag_AlphaPassIfGreater_Shift, + kSysFlag_AlphaToCoverage_Shift, kSysFlag_DepthStencil_Shift, kSysFlag_DepthFloat24_Shift, // Depth/stencil testing not done if DepthStencilRead is disabled, but @@ -83,6 +84,7 @@ class DxbcShaderTranslator : public ShaderTranslator { kSysFlag_AlphaPassIfLess = 1u << kSysFlag_AlphaPassIfLess_Shift, kSysFlag_AlphaPassIfEqual = 1u << kSysFlag_AlphaPassIfEqual_Shift, kSysFlag_AlphaPassIfGreater = 1u << kSysFlag_AlphaPassIfGreater_Shift, + kSysFlag_AlphaToCoverage = 1u << kSysFlag_AlphaToCoverage_Shift, kSysFlag_DepthStencil = 1u << kSysFlag_DepthStencil_Shift, kSysFlag_DepthFloat24 = 1u << kSysFlag_DepthFloat24_Shift, kSysFlag_DepthPassIfLess = 1u << kSysFlag_DepthPassIfLess_Shift, @@ -849,6 +851,8 @@ class DxbcShaderTranslator : public ShaderTranslator { // Converts four depth values to 24-bit unorm or float, depending on the flag // value. void CompletePixelShader_DepthTo24Bit(uint32_t depths_temp); + // Applies the exponent bias from the constant to colors. + void CompletePixelShader_ApplyColorExpBias(); // This just converts the color output value from/to gamma space, not checking // any conditions. void CompletePixelShader_GammaCorrect(uint32_t color_temp, bool to_gamma); @@ -858,15 +862,23 @@ class DxbcShaderTranslator : public ShaderTranslator { // there's no xe_shared_memory_uav, it's U0. return is_depth_only_pixel_shader_ ? 0 : 1; } - // Performs depth/stencil testing. After the test, coverage_out_temp will - // contain non-zero values for samples that passed the depth/stencil test and - // are included in SV_Coverage, and zeros for those who didn't. + // Extracts the coverage from SV_Coverage and performs alpha to coverage if + // necessary. Does not perform any depth/stencil testing. For covered samples, + // writes a non-zero component, for non-covered, writes 0. Discards the pixel + // if no coverage. + void CompletePixelShader_WriteToROV_GetCoverage(uint32_t coverage_out_temp); + // Performs depth/stencil testing. coverage_in_out_temp should contain the + // coverage mask obtained from CompletePixelShader_WriteToROV_GetCoverage to + // indicate which samples need to be depth/stencil-tested, and after the + // execution contains which covered samples have passed the depth/stencil test + // (non-zero components where covered, zero where not covered or failed the + // test). // // edram_dword_offset_temp.x must contain the address of the first // depth/stencil sample - .yzw will be overwritten by this function with the // addresses for the other samples if depth/stencil is enabled. void CompletePixelShader_WriteToROV_DepthStencil( - uint32_t edram_dword_offset_temp, uint32_t coverage_out_temp); + uint32_t edram_dword_offset_temp, uint32_t coverage_in_out_temp); // Extracts widths and offsets of the components in the lower or the upper // dword of a pixel from the format constants, for use as ibfe and bfi // operands later. diff --git a/src/xenia/gpu/dxbc_shader_translator_om.cc b/src/xenia/gpu/dxbc_shader_translator_om.cc index 2fb5a399a..5eed7d52c 100644 --- a/src/xenia/gpu/dxbc_shader_translator_om.cc +++ b/src/xenia/gpu/dxbc_shader_translator_om.cc @@ -788,6 +788,34 @@ void DxbcShaderTranslator::CompletePixelShader_DepthTo24Bit( PopSystemTemp(2); } +void DxbcShaderTranslator::CompletePixelShader_ApplyColorExpBias() { + if (is_depth_only_pixel_shader_) { + return; + } + // The constant contains 2.0^bias. + for (uint32_t i = 0; i < 4; ++i) { + if (!writes_color_target(i)) { + continue; + } + system_constants_used_ |= 1ull << kSysConst_ColorExpBias_Index; + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MUL) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); + shader_code_.push_back(system_temps_color_ + i); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(system_temps_color_ + i); + shader_code_.push_back(EncodeVectorReplicatedOperand( + D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, i, 3)); + shader_code_.push_back(cbuffer_index_system_constants_); + shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants)); + shader_code_.push_back(kSysConst_ColorExpBias_Vec); + ++stat_.instruction_count; + ++stat_.float_instruction_count; + } +} + void DxbcShaderTranslator::CompletePixelShader_GammaCorrect(uint32_t color_temp, bool to_gamma) { uint32_t pieces_temp = PushSystemTemp(); @@ -886,6 +914,11 @@ void DxbcShaderTranslator::CompletePixelShader_GammaCorrect(uint32_t color_temp, } void DxbcShaderTranslator::CompletePixelShader_WriteToRTVs() { + // TODO(Triang3l): Alpha to coverage. + + // Apply the exponent bias before writing the color. + CompletePixelShader_ApplyColorExpBias(); + // Convert to gamma space - this is incorrect, since it must be done after // blending on the Xbox 360, but this is just one of many blending issues in // the RTV path. @@ -991,12 +1024,11 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToRTVs() { PopSystemTemp(2); } -void DxbcShaderTranslator::CompletePixelShader_WriteToROV_DepthStencil( - uint32_t edram_dword_offset_temp, uint32_t coverage_out_temp) { - // Load the coverage before the depth/stencil test - if depth/stencil is not - // needed, this is still needed to determine which samples to write color for. - // For 2x AA, use samples 0 and 3 (top-left and bottom-right), for 4x, use - // all, because ForcedSampleCount can't be 2. +void DxbcShaderTranslator::CompletePixelShader_WriteToROV_GetCoverage( + uint32_t coverage_out_temp) { + // Load the coverage from the rasterizer. For 2x AA, use samples 0 and 3 + // (top-left and bottom-right), for 4x, use all, because ForcedSampleCount + // can't be 2. system_constants_used_ |= 1ull << kSysConst_SampleCountLog2_Index; shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(17)); @@ -1036,6 +1068,208 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_DepthStencil( ++stat_.instruction_count; ++stat_.uint_instruction_count; + // Check if alpha to coverage can be done at all in this shader. + if (is_depth_only_pixel_shader_ || !writes_color_target(0)) { + return; + } + + uint32_t atoc_temp = PushSystemTemp(); + + // Extract the flag to check if alpha to coverage is enabled. + system_constants_used_ |= 1ull << kSysConst_Flags_Index; + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); + shader_code_.push_back(atoc_temp); + shader_code_.push_back(EncodeVectorSelectOperand( + D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSysConst_Flags_Comp, 3)); + shader_code_.push_back(cbuffer_index_system_constants_); + shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants)); + shader_code_.push_back(kSysConst_Flags_Vec); + shader_code_.push_back( + EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); + shader_code_.push_back(kSysFlag_AlphaToCoverage); + ++stat_.instruction_count; + ++stat_.uint_instruction_count; + + // Check if alpha to coverage is enabled. + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) | + ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN( + D3D10_SB_INSTRUCTION_TEST_NONZERO) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3)); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); + shader_code_.push_back(atoc_temp); + ++stat_.instruction_count; + ++stat_.dynamic_flow_control_count; + + // According to tests on an Adreno 200 device (LG Optimus L7), without + // dithering, done by drawing 0.5x0.5 rectangles in different corners of four + // pixels in a quad to a multisampled GLSurfaceView, the coverage is the + // following for 4 samples: + // 0.25) [0.25, 0.5) [0.5, 0.75) [0.75, 1) [1 + // -- -- -- -- -- + // | | | | | #| |##| |##| + // | | |# | |# | |# | |##| + // -- -- -- -- -- + // (VPOS near 0 on the top, near 1 on the bottom here.) + // For 2 samples, the top sample (closer to VPOS 0) is covered when alpha is + // in [0.5, 1). + // With these values, however, in Red Dead Redemption, almost all distant + // trees are transparent, and it's also weird that the values are so + // unbalanced (0.25-wide range with zero coverage, but only one point with + // full coverage), so ranges are halfway offset here. + // TODO(Triang3l): Find an Adreno device with dithering enabled, and where the + // numbers 3, 1, 0, 2 look meaningful for pixels in quads, and implement + // offsets. + // Choose the thresholds based on the sample count - first between 2 and 1 + // samples. + system_constants_used_ |= 1ull << kSysConst_SampleCountLog2_Index; + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(17)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); + shader_code_.push_back(atoc_temp); + shader_code_.push_back( + EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, + kSysConst_SampleCountLog2_Comp + 1, 3)); + shader_code_.push_back(cbuffer_index_system_constants_); + shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants)); + shader_code_.push_back(kSysConst_SampleCountLog2_Vec); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0)); + // 0.75 + shader_code_.push_back(0x3F400000); + // 0.25 + shader_code_.push_back(0x3E800000); + // NaN + shader_code_.push_back(0x7FC00000); + shader_code_.push_back(0x7FC00000); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0)); + // 0.5 + shader_code_.push_back(0x3F000000); + shader_code_.push_back(0x7FC00000); + shader_code_.push_back(0x7FC00000); + shader_code_.push_back(0x7FC00000); + ++stat_.instruction_count; + ++stat_.movc_instruction_count; + // Choose the thresholds based on the sample count - between 4 or 1/2 samples. + system_constants_used_ |= 1ull << kSysConst_SampleCountLog2_Index; + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(14)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); + shader_code_.push_back(atoc_temp); + shader_code_.push_back( + EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, + kSysConst_SampleCountLog2_Comp, 3)); + shader_code_.push_back(cbuffer_index_system_constants_); + shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants)); + shader_code_.push_back(kSysConst_SampleCountLog2_Vec); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0)); + // 0.625 + shader_code_.push_back(0x3F200000); + // 0.375 + shader_code_.push_back(0x3EC00000); + // 0.125 + shader_code_.push_back(0x3E000000); + // 0.875 + shader_code_.push_back(0x3F600000); + shader_code_.push_back( + EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(atoc_temp); + ++stat_.instruction_count; + ++stat_.movc_instruction_count; + + // Check if alpha of oC0 is greater than the threshold for each sample or + // equal to it. + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_GE) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); + shader_code_.push_back(atoc_temp); + shader_code_.push_back( + EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 3, 1)); + shader_code_.push_back(system_temps_color_); + shader_code_.push_back( + EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(atoc_temp); + ++stat_.instruction_count; + ++stat_.float_instruction_count; + + // Mask the sample coverage. + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); + shader_code_.push_back(coverage_out_temp); + shader_code_.push_back( + EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(coverage_out_temp); + shader_code_.push_back( + EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(atoc_temp); + ++stat_.instruction_count; + ++stat_.uint_instruction_count; + + // Check if the pixel can be discarded totally - merge masked coverage of + // samples 01 and 23. + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_OR) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0011, 1)); + shader_code_.push_back(atoc_temp); + shader_code_.push_back( + EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(coverage_out_temp); + shader_code_.push_back( + EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b01001110, 1)); + shader_code_.push_back(coverage_out_temp); + ++stat_.instruction_count; + ++stat_.uint_instruction_count; + + // Check if the pixel can be discarded totally - merge masked coverage of + // samples 0|2 and 1|3. + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_OR) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); + shader_code_.push_back(atoc_temp); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); + shader_code_.push_back(atoc_temp); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1)); + shader_code_.push_back(atoc_temp); + ++stat_.instruction_count; + ++stat_.uint_instruction_count; + + // Don't even do depth/stencil for pixels fully discarded by alpha to + // coverage. + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_RETC) | + ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(D3D10_SB_INSTRUCTION_TEST_ZERO) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3)); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); + shader_code_.push_back(atoc_temp); + ++stat_.instruction_count; + ++stat_.dynamic_flow_control_count; + + // Close the alpha to coverage check. + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1)); + ++stat_.instruction_count; + + // Release atoc_temp. + PopSystemTemp(); +} + +void DxbcShaderTranslator::CompletePixelShader_WriteToROV_DepthStencil( + uint32_t edram_dword_offset_temp, uint32_t coverage_in_out_temp) { uint32_t flags_temp = PushSystemTemp(); // Check if anything related to depth/stencil needs to be done at all, and get @@ -1503,7 +1737,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_DepthStencil( ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3)); shader_code_.push_back( EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, i, 1)); - shader_code_.push_back(coverage_out_temp); + shader_code_.push_back(coverage_in_out_temp); ++stat_.instruction_count; ++stat_.dynamic_flow_control_count; @@ -2272,10 +2506,10 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_DepthStencil( ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); shader_code_.push_back( EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); - shader_code_.push_back(coverage_out_temp); + shader_code_.push_back(coverage_in_out_temp); shader_code_.push_back( EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); - shader_code_.push_back(coverage_out_temp); + shader_code_.push_back(coverage_in_out_temp); shader_code_.push_back( EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); shader_code_.push_back(depth_test_results_temp); @@ -2362,7 +2596,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_DepthStencil( ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3)); shader_code_.push_back( EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, i, 1)); - shader_code_.push_back(coverage_out_temp); + shader_code_.push_back(coverage_in_out_temp); ++stat_.instruction_count; ++stat_.dynamic_flow_control_count; @@ -2399,10 +2633,10 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_DepthStencil( ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); shader_code_.push_back( EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); - shader_code_.push_back(coverage_out_temp); + shader_code_.push_back(coverage_in_out_temp); shader_code_.push_back(EncodeVectorSwizzledOperand( D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); - shader_code_.push_back(coverage_out_temp); + shader_code_.push_back(coverage_in_out_temp); shader_code_.push_back(EncodeVectorSwizzledOperand( D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); shader_code_.push_back(depth_test_results_temp); @@ -4553,6 +4787,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() { // Perform all the depth/stencil-related operations, and get the samples that // have passed the depth test. uint32_t coverage_temp = PushSystemTemp(); + CompletePixelShader_WriteToROV_GetCoverage(coverage_temp); CompletePixelShader_WriteToROV_DepthStencil(edram_coord_pixel_depth_temp, coverage_temp); @@ -4561,6 +4796,10 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() { // *************************************************************************** if (color_targets_written) { + // Apply the exponent bias after having done alpha to coverage, which needs + // the original alpha from the shader. + CompletePixelShader_ApplyColorExpBias(); + system_constants_used_ |= 1ull << kSysConst_EDRAMRTFlags_Index; // Get if any sample is covered to exit earlier if all have failed the depth @@ -5671,36 +5910,9 @@ void DxbcShaderTranslator::CompletePixelShader() { PopSystemTemp(); } - // Apply color exponent bias (the constant contains 2.0^bias). - // Not sure if this should be done before alpha testing or after, but this is - // render target state, and alpha test works with values obtained mainly from - // textures (so conceptually closer to the shader rather than the - // output-merger in the pipeline). - // TODO(Triang3l): Verify whether the order of alpha testing and exponent bias - // is correct. - for (uint32_t i = 0; i < 4; ++i) { - if (!writes_color_target(i)) { - continue; - } - system_constants_used_ |= 1ull << kSysConst_ColorExpBias_Index; - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MUL) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); - shader_code_.push_back(system_temps_color_ + i); - shader_code_.push_back(EncodeVectorSwizzledOperand( - D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); - shader_code_.push_back(system_temps_color_ + i); - shader_code_.push_back(EncodeVectorReplicatedOperand( - D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, i, 3)); - shader_code_.push_back(cbuffer_index_system_constants_); - shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants)); - shader_code_.push_back(kSysConst_ColorExpBias_Vec); - ++stat_.instruction_count; - ++stat_.float_instruction_count; - } - - // Write the values to the render targets. + // Write the values to the render targets. Not applying the exponent bias yet + // because the original 0 to 1 alpha value is needed for alpha to coverage, + // which is done differently for ROV and RTV/DSV. if (edram_rov_used_) { CompletePixelShader_WriteToROV(); } else {