[D3D12] ROV: Clamp values participating in blending
This commit is contained in:
parent
2d56c9ae30
commit
5f2c4500d6
|
@ -2385,6 +2385,48 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_ExtractBlendScales(
|
|||
++stat_.conversion_instruction_count;
|
||||
}
|
||||
|
||||
void DxbcShaderTranslator::CompletePixelShader_WriteToROV_ClampColor(
|
||||
uint32_t rt_index, uint32_t color_temp) {
|
||||
uint32_t rt_pair_index = rt_index >> 1;
|
||||
uint32_t rt_pair_swizzle = rt_index & 1 ? 0b11101010 : 0b01000000;
|
||||
|
||||
system_constants_used_ |= (1ull << kSysConst_EDRAMStoreMinRT01_Index)
|
||||
<< rt_pair_index;
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MAX) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||
shader_code_.push_back(color_temp);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(color_temp);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, rt_pair_swizzle, 3));
|
||||
shader_code_.push_back(cbuffer_index_system_constants_);
|
||||
shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants));
|
||||
shader_code_.push_back(kSysConst_EDRAMStoreMinRT01_Vec + rt_pair_index);
|
||||
++stat_.instruction_count;
|
||||
++stat_.float_instruction_count;
|
||||
|
||||
system_constants_used_ |= (1ull << kSysConst_EDRAMStoreMaxRT01_Index)
|
||||
<< rt_pair_index;
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MIN) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||
shader_code_.push_back(color_temp);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(color_temp);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, rt_pair_swizzle, 3));
|
||||
shader_code_.push_back(cbuffer_index_system_constants_);
|
||||
shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants));
|
||||
shader_code_.push_back(kSysConst_EDRAMStoreMaxRT01_Vec + rt_pair_index);
|
||||
++stat_.instruction_count;
|
||||
++stat_.float_instruction_count;
|
||||
}
|
||||
|
||||
void DxbcShaderTranslator::CompletePixelShader_WriteToROV_Blend(
|
||||
uint32_t rt_index, uint32_t src_color_and_output_temp,
|
||||
uint32_t dest_color_temp) {
|
||||
|
@ -2554,6 +2596,11 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_Blend(
|
|||
++stat_.float_instruction_count;
|
||||
}
|
||||
|
||||
// Clamp the factors.
|
||||
// https://stackoverflow.com/questions/30153911/untangling-when-and-what-values-are-clamped-in-opengl-blending-on-different-rend
|
||||
CompletePixelShader_WriteToROV_ClampColor(rt_index, src_factor_temp);
|
||||
CompletePixelShader_WriteToROV_ClampColor(rt_index, dest_factor_temp);
|
||||
|
||||
// Apply the signs to the factors for addition/subtraction/inverse subtraction
|
||||
// (for min/max, they are set to positive in the constant, so will be a nop).
|
||||
CompletePixelShader_WriteToROV_ExtractBlendScales(
|
||||
|
@ -2595,6 +2642,10 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_Blend(
|
|||
|
||||
// Release scale_temp, src_factor_temp and dest_factor_temp.
|
||||
PopSystemTemp();
|
||||
|
||||
// Clamp the resulting color.
|
||||
CompletePixelShader_WriteToROV_ClampColor(rt_index,
|
||||
src_color_and_output_temp);
|
||||
}
|
||||
|
||||
void DxbcShaderTranslator::CompletePixelShader_WriteToROV_StoreColor(
|
||||
|
@ -2626,43 +2677,6 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_StoreColor(
|
|||
++stat_.instruction_count;
|
||||
++stat_.uint_instruction_count;
|
||||
|
||||
// Clamp to min/max - this will also remove NaN since min and max return the
|
||||
// non-NaN value.
|
||||
system_constants_used_ |= (1ull << kSysConst_EDRAMStoreMinRT01_Index)
|
||||
<< rt_pair_index;
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MAX) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||
shader_code_.push_back(source_and_scratch_temp);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(source_and_scratch_temp);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, rt_pair_swizzle, 3));
|
||||
shader_code_.push_back(cbuffer_index_system_constants_);
|
||||
shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants));
|
||||
shader_code_.push_back(kSysConst_EDRAMStoreMinRT01_Vec + rt_pair_index);
|
||||
++stat_.instruction_count;
|
||||
++stat_.float_instruction_count;
|
||||
system_constants_used_ |= (1ull << kSysConst_EDRAMStoreMaxRT01_Index)
|
||||
<< rt_pair_index;
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MIN) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||
shader_code_.push_back(source_and_scratch_temp);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(source_and_scratch_temp);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, rt_pair_swizzle, 3));
|
||||
shader_code_.push_back(cbuffer_index_system_constants_);
|
||||
shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants));
|
||||
shader_code_.push_back(kSysConst_EDRAMStoreMaxRT01_Vec + rt_pair_index);
|
||||
++stat_.instruction_count;
|
||||
++stat_.float_instruction_count;
|
||||
|
||||
// Scale by the fixed-point conversion factor.
|
||||
system_constants_used_ |= (1ull << kSysConst_EDRAMStoreScaleRT01_Index)
|
||||
<< rt_pair_index;
|
||||
|
@ -3860,6 +3874,11 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
|||
++stat_.instruction_count;
|
||||
++stat_.dynamic_flow_control_count;
|
||||
|
||||
// Clamp the color (the source value) before blending.
|
||||
// https://stackoverflow.com/questions/30153911/untangling-when-and-what-values-are-clamped-in-opengl-blending-on-different-rend
|
||||
CompletePixelShader_WriteToROV_ClampColor(rt_index,
|
||||
system_temp_color_[rt_index]);
|
||||
|
||||
// Load the previous value in the render target to blend and to apply the
|
||||
// write mask.
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
|
||||
|
|
|
@ -648,6 +648,10 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
uint32_t edram_dword_offset_low_temp,
|
||||
uint32_t edram_dword_offset_high_temp, uint32_t rt_index,
|
||||
uint32_t target_temp);
|
||||
// Clamps the color to the range representable by the render target's format.
|
||||
// Will also remove NaN since min and max return the non-NaN value.
|
||||
void CompletePixelShader_WriteToROV_ClampColor(uint32_t rt_index,
|
||||
uint32_t color_temp);
|
||||
void CompletePixelShader_WriteToROV_Blend(uint32_t rt_index,
|
||||
uint32_t src_color_and_output_temp,
|
||||
uint32_t dest_color_temp);
|
||||
|
@ -662,6 +666,8 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
uint32_t rt_index, uint32_t constant_swizzle, bool is_signed,
|
||||
uint32_t shift_x, uint32_t shift_y, uint32_t shift_z, uint32_t shift_w,
|
||||
uint32_t target_temp, uint32_t write_mask = 0b1111);
|
||||
// Assumes the incoming color is already clamped to the range representable by
|
||||
// the RT format.
|
||||
void CompletePixelShader_WriteToROV_StoreColor(
|
||||
uint32_t edram_dword_offset_low_temp,
|
||||
uint32_t edram_dword_offset_high_temp, uint32_t rt_index,
|
||||
|
|
Loading…
Reference in New Issue