[D3D12] Alpha to coverage (w/o dithering) with RTV, tiny AtoC fixes
This commit is contained in:
parent
0a9feb5eca
commit
9dd62c1e15
|
@ -625,13 +625,15 @@ bool PipelineCache::GetCurrentStateDescription(
|
|||
description_out.depth_func = 0b111;
|
||||
}
|
||||
|
||||
// Forced early Z if the shader allows that and alpha testing is disabled.
|
||||
// Forced early Z if the shader allows that and alpha testing and alpha to
|
||||
// coverage are disabled.
|
||||
// TODO(Triang3l): For memexporting shaders, possibly choose this according
|
||||
// to the early Z toggle in RB_DEPTHCONTROL (the correct behavior is still
|
||||
// unknown).
|
||||
if (pixel_shader != nullptr &&
|
||||
pixel_shader->GetForcedEarlyZShaderObject().size() != 0 &&
|
||||
(!(rb_colorcontrol & 0x8) || (rb_colorcontrol & 0x7) == 0x7)) {
|
||||
(!(rb_colorcontrol & 0x8) || (rb_colorcontrol & 0x7) == 0x7) &&
|
||||
!(rb_colorcontrol & 0x10)) {
|
||||
description_out.force_early_z = 1;
|
||||
}
|
||||
|
||||
|
|
|
@ -856,6 +856,8 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
// This just converts the color output value from/to gamma space, not checking
|
||||
// any conditions.
|
||||
void CompletePixelShader_GammaCorrect(uint32_t color_temp, bool to_gamma);
|
||||
// Discards the SSAA sample if it fails alpha to coverage.
|
||||
void CompletePixelShader_WriteToRTVs_AlphaToCoverage();
|
||||
void CompletePixelShader_WriteToRTVs();
|
||||
inline uint32_t GetEDRAMUAVIndex() const {
|
||||
// xe_edram is U1 when there's xe_shared_memory_uav which is U0, but when
|
||||
|
|
|
@ -913,10 +913,269 @@ void DxbcShaderTranslator::CompletePixelShader_GammaCorrect(uint32_t color_temp,
|
|||
PopSystemTemp();
|
||||
}
|
||||
|
||||
void DxbcShaderTranslator::CompletePixelShader_WriteToRTVs() {
|
||||
// TODO(Triang3l): Alpha to coverage.
|
||||
void DxbcShaderTranslator::CompletePixelShader_WriteToRTVs_AlphaToCoverage() {
|
||||
if (is_depth_only_pixel_shader_ || !writes_color_target(0)) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Apply the exponent bias before writing the color.
|
||||
// Refer to CompletePixelShader_WriteToROV_GetCoverage for the description of
|
||||
// the alpha to coverage pattern used.
|
||||
|
||||
uint32_t atoc_temp = PushSystemTemp();
|
||||
|
||||
// Extract the flag to check if alpha to coverage is enabled.
|
||||
system_constants_used_ |= 1ull << kSysConst_Flags_Index;
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
|
||||
shader_code_.push_back(atoc_temp);
|
||||
shader_code_.push_back(EncodeVectorSelectOperand(
|
||||
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSysConst_Flags_Comp, 3));
|
||||
shader_code_.push_back(cbuffer_index_system_constants_);
|
||||
shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants));
|
||||
shader_code_.push_back(kSysConst_Flags_Vec);
|
||||
shader_code_.push_back(
|
||||
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
|
||||
shader_code_.push_back(kSysFlag_AlphaToCoverage);
|
||||
++stat_.instruction_count;
|
||||
++stat_.uint_instruction_count;
|
||||
|
||||
// Check if alpha to coverage is enabled.
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
|
||||
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(
|
||||
D3D10_SB_INSTRUCTION_TEST_NONZERO) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
|
||||
shader_code_.push_back(atoc_temp);
|
||||
++stat_.instruction_count;
|
||||
++stat_.dynamic_flow_control_count;
|
||||
|
||||
// Convert SSAA sample position to integer.
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_FTOU) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0011, 1));
|
||||
shader_code_.push_back(atoc_temp);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_INPUT, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(uint32_t(InOutRegister::kPSInPosition));
|
||||
++stat_.instruction_count;
|
||||
++stat_.conversion_instruction_count;
|
||||
|
||||
// Get SSAA sample coordinates in the pixel.
|
||||
system_constants_used_ |= 1ull << kSysConst_SampleCountLog2_Index;
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0011, 1));
|
||||
shader_code_.push_back(atoc_temp);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(atoc_temp);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER,
|
||||
kSysConst_SampleCountLog2_Comp |
|
||||
((kSysConst_SampleCountLog2_Comp + 1) << 2),
|
||||
3));
|
||||
shader_code_.push_back(cbuffer_index_system_constants_);
|
||||
shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants));
|
||||
shader_code_.push_back(kSysConst_SampleCountLog2_Vec);
|
||||
++stat_.instruction_count;
|
||||
++stat_.uint_instruction_count;
|
||||
|
||||
// Get the sample index - 0 and 2 being the top ones, 1 and 3 being the bottom
|
||||
// ones (because at 2x SSAA, 1 is the bottom).
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_UMAD) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
|
||||
shader_code_.push_back(atoc_temp);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
|
||||
shader_code_.push_back(atoc_temp);
|
||||
shader_code_.push_back(
|
||||
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
|
||||
shader_code_.push_back(2);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1));
|
||||
shader_code_.push_back(atoc_temp);
|
||||
++stat_.instruction_count;
|
||||
++stat_.uint_instruction_count;
|
||||
|
||||
// Create a mask to choose the specific threshold to compare to.
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IEQ) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||
shader_code_.push_back(atoc_temp);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
|
||||
shader_code_.push_back(atoc_temp);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
|
||||
shader_code_.push_back(0);
|
||||
shader_code_.push_back(1);
|
||||
shader_code_.push_back(2);
|
||||
shader_code_.push_back(3);
|
||||
++stat_.instruction_count;
|
||||
++stat_.int_instruction_count;
|
||||
|
||||
uint32_t atoc_thresholds_temp = PushSystemTemp();
|
||||
|
||||
// Choose the thresholds based on the sample count - first between 2 and 1
|
||||
// samples.
|
||||
system_constants_used_ |= 1ull << kSysConst_SampleCountLog2_Index;
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(17));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||
shader_code_.push_back(atoc_thresholds_temp);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER,
|
||||
kSysConst_SampleCountLog2_Comp + 1, 3));
|
||||
shader_code_.push_back(cbuffer_index_system_constants_);
|
||||
shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants));
|
||||
shader_code_.push_back(kSysConst_SampleCountLog2_Vec);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
|
||||
// 0.25
|
||||
shader_code_.push_back(0x3E800000);
|
||||
// 0.75
|
||||
shader_code_.push_back(0x3F400000);
|
||||
// NaN - comparison always fails
|
||||
shader_code_.push_back(0x7FC00000);
|
||||
shader_code_.push_back(0x7FC00000);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
|
||||
// 0.5
|
||||
shader_code_.push_back(0x3F000000);
|
||||
shader_code_.push_back(0x7FC00000);
|
||||
shader_code_.push_back(0x7FC00000);
|
||||
shader_code_.push_back(0x7FC00000);
|
||||
++stat_.instruction_count;
|
||||
++stat_.movc_instruction_count;
|
||||
// Choose the thresholds based on the sample count - between 4 or 1/2 samples.
|
||||
system_constants_used_ |= 1ull << kSysConst_SampleCountLog2_Index;
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(14));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||
shader_code_.push_back(atoc_thresholds_temp);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER,
|
||||
kSysConst_SampleCountLog2_Comp, 3));
|
||||
shader_code_.push_back(cbuffer_index_system_constants_);
|
||||
shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants));
|
||||
shader_code_.push_back(kSysConst_SampleCountLog2_Vec);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
|
||||
// 0.625
|
||||
shader_code_.push_back(0x3F200000);
|
||||
// 0.125
|
||||
shader_code_.push_back(0x3E000000);
|
||||
// 0.375
|
||||
shader_code_.push_back(0x3EC00000);
|
||||
// 0.875
|
||||
shader_code_.push_back(0x3F600000);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(atoc_thresholds_temp);
|
||||
++stat_.instruction_count;
|
||||
++stat_.movc_instruction_count;
|
||||
|
||||
// Choose the threshold to compare the alpha to according to the current
|
||||
// sample index - mask.
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||
shader_code_.push_back(atoc_temp);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(atoc_thresholds_temp);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(atoc_temp);
|
||||
++stat_.instruction_count;
|
||||
++stat_.uint_instruction_count;
|
||||
|
||||
// Release atoc_thresholds_temp.
|
||||
PopSystemTemp();
|
||||
|
||||
// Choose the threshold to compare the alpha to according to the current
|
||||
// sample index - select within pairs.
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_OR) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0011, 1));
|
||||
shader_code_.push_back(atoc_temp);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(atoc_temp);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b01001110, 1));
|
||||
shader_code_.push_back(atoc_temp);
|
||||
++stat_.instruction_count;
|
||||
++stat_.uint_instruction_count;
|
||||
|
||||
// Choose the threshold to compare the alpha to according to the current
|
||||
// sample index - combine pairs.
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_OR) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
|
||||
shader_code_.push_back(atoc_temp);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
|
||||
shader_code_.push_back(atoc_temp);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1));
|
||||
shader_code_.push_back(atoc_temp);
|
||||
++stat_.instruction_count;
|
||||
++stat_.uint_instruction_count;
|
||||
|
||||
// Compare the alpha to the threshold.
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_GE) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
|
||||
shader_code_.push_back(atoc_temp);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 3, 1));
|
||||
shader_code_.push_back(system_temps_color_);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
|
||||
shader_code_.push_back(atoc_temp);
|
||||
++stat_.instruction_count;
|
||||
++stat_.float_instruction_count;
|
||||
|
||||
// Discard the SSAA sample if it's not covered.
|
||||
shader_code_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DISCARD) |
|
||||
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(D3D10_SB_INSTRUCTION_TEST_ZERO) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
|
||||
shader_code_.push_back(atoc_temp);
|
||||
++stat_.instruction_count;
|
||||
|
||||
// Close the alpha to coverage check.
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
|
||||
++stat_.instruction_count;
|
||||
|
||||
// Release atoc_temp.
|
||||
PopSystemTemp();
|
||||
}
|
||||
|
||||
void DxbcShaderTranslator::CompletePixelShader_WriteToRTVs() {
|
||||
// Check if this sample needs to be discarded by alpha to coverage.
|
||||
CompletePixelShader_WriteToRTVs_AlphaToCoverage();
|
||||
|
||||
// Apply the exponent bias after alpha to coverage because it needs the
|
||||
// unbiased alpha from the shader.
|
||||
CompletePixelShader_ApplyColorExpBias();
|
||||
|
||||
// Convert to gamma space - this is incorrect, since it must be done after
|
||||
|
@ -1139,11 +1398,11 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_GetCoverage(
|
|||
shader_code_.push_back(kSysConst_SampleCountLog2_Vec);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
|
||||
// 0.75
|
||||
shader_code_.push_back(0x3F400000);
|
||||
// 0.25
|
||||
shader_code_.push_back(0x3E800000);
|
||||
// NaN
|
||||
// 0.75
|
||||
shader_code_.push_back(0x3F400000);
|
||||
// NaN - comparison always fails
|
||||
shader_code_.push_back(0x7FC00000);
|
||||
shader_code_.push_back(0x7FC00000);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
|
|
Loading…
Reference in New Issue