[D3D12] ROV: Don't clamp during blending for floats
This commit is contained in:
parent
eb185ab64c
commit
24bf39e942
|
@ -1746,33 +1746,11 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_ExtractPackLayout(
|
||||||
|
|
||||||
void DxbcShaderTranslator::CompletePixelShader_WriteToROV_LoadColor(
|
void DxbcShaderTranslator::CompletePixelShader_WriteToROV_LoadColor(
|
||||||
uint32_t edram_dword_offset_low_temp, uint32_t edram_dword_offset_high_temp,
|
uint32_t edram_dword_offset_low_temp, uint32_t edram_dword_offset_high_temp,
|
||||||
uint32_t rt_index, uint32_t target_temp) {
|
uint32_t rt_index, uint32_t rt_format_flags_temp, uint32_t target_temp) {
|
||||||
// For indexing of the format constants.
|
// For indexing of the format constants.
|
||||||
uint32_t rt_pair_index = rt_index >> 1;
|
uint32_t rt_pair_index = rt_index >> 1;
|
||||||
uint32_t rt_pair_swizzle = rt_index & 1 ? 0b11101010 : 0b01000000;
|
uint32_t rt_pair_swizzle = rt_index & 1 ? 0b11101010 : 0b01000000;
|
||||||
|
|
||||||
// Extract the needed flags.
|
|
||||||
uint32_t flags_temp = PushSystemTemp();
|
|
||||||
system_constants_used_ |= 1ull << kSysConst_EDRAMRTFlags_Index;
|
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
|
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(12));
|
|
||||||
shader_code_.push_back(
|
|
||||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0111, 1));
|
|
||||||
shader_code_.push_back(flags_temp);
|
|
||||||
shader_code_.push_back(EncodeVectorReplicatedOperand(
|
|
||||||
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, rt_index, 3));
|
|
||||||
shader_code_.push_back(cbuffer_index_system_constants_);
|
|
||||||
shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants));
|
|
||||||
shader_code_.push_back(kSysConst_EDRAMRTFlags_Vec);
|
|
||||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
|
||||||
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
|
|
||||||
shader_code_.push_back(kRTFlag_FormatFixed);
|
|
||||||
shader_code_.push_back(kRTFlag_FormatFloat10);
|
|
||||||
shader_code_.push_back(kRTFlag_FormatFloat16);
|
|
||||||
shader_code_.push_back(0);
|
|
||||||
++stat_.instruction_count;
|
|
||||||
++stat_.uint_instruction_count;
|
|
||||||
|
|
||||||
// Allocate temporary registers for unpacking pixels.
|
// Allocate temporary registers for unpacking pixels.
|
||||||
uint32_t pack_width_temp = PushSystemTemp();
|
uint32_t pack_width_temp = PushSystemTemp();
|
||||||
uint32_t pack_offset_temp = PushSystemTemp();
|
uint32_t pack_offset_temp = PushSystemTemp();
|
||||||
|
@ -1934,9 +1912,9 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_LoadColor(
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||||
shader_code_.push_back(target_temp);
|
shader_code_.push_back(target_temp);
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
|
D3D10_SB_OPERAND_TYPE_TEMP, kROVRTFormatFlagTemp_Fixed_Swizzle, 1));
|
||||||
shader_code_.push_back(flags_temp);
|
shader_code_.push_back(rt_format_flags_temp);
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||||
shader_code_.push_back(fixed_temp);
|
shader_code_.push_back(fixed_temp);
|
||||||
|
@ -1959,9 +1937,9 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_LoadColor(
|
||||||
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(
|
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(
|
||||||
D3D10_SB_INSTRUCTION_TEST_NONZERO) |
|
D3D10_SB_INSTRUCTION_TEST_NONZERO) |
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(EncodeVectorSelectOperand(
|
||||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1));
|
D3D10_SB_OPERAND_TYPE_TEMP, kROVRTFormatFlagTemp_Float10, 1));
|
||||||
shader_code_.push_back(flags_temp);
|
shader_code_.push_back(rt_format_flags_temp);
|
||||||
++stat_.instruction_count;
|
++stat_.instruction_count;
|
||||||
++stat_.dynamic_flow_control_count;
|
++stat_.dynamic_flow_control_count;
|
||||||
|
|
||||||
|
@ -2300,9 +2278,9 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_LoadColor(
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||||
shader_code_.push_back(target_temp);
|
shader_code_.push_back(target_temp);
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(EncodeVectorReplicatedOperand(
|
||||||
EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1));
|
D3D10_SB_OPERAND_TYPE_TEMP, kROVRTFormatFlagTemp_Float16, 1));
|
||||||
shader_code_.push_back(flags_temp);
|
shader_code_.push_back(rt_format_flags_temp);
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||||
shader_code_.push_back(f16_temp);
|
shader_code_.push_back(f16_temp);
|
||||||
|
@ -2314,9 +2292,6 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_LoadColor(
|
||||||
// Release f16_temp.
|
// Release f16_temp.
|
||||||
PopSystemTemp();
|
PopSystemTemp();
|
||||||
|
|
||||||
// Release flags_temp.
|
|
||||||
PopSystemTemp();
|
|
||||||
|
|
||||||
// Scale by the fixed-point conversion factor.
|
// Scale by the fixed-point conversion factor.
|
||||||
system_constants_used_ |= (1ull << kSysConst_EDRAMLoadScaleRT01_Index)
|
system_constants_used_ |= (1ull << kSysConst_EDRAMLoadScaleRT01_Index)
|
||||||
<< rt_pair_index;
|
<< rt_pair_index;
|
||||||
|
@ -2397,7 +2372,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_ExtractBlendScales(
|
||||||
}
|
}
|
||||||
|
|
||||||
void DxbcShaderTranslator::CompletePixelShader_WriteToROV_ClampColor(
|
void DxbcShaderTranslator::CompletePixelShader_WriteToROV_ClampColor(
|
||||||
uint32_t rt_index, uint32_t color_temp) {
|
uint32_t rt_index, uint32_t color_in_temp, uint32_t color_out_temp) {
|
||||||
uint32_t rt_pair_index = rt_index >> 1;
|
uint32_t rt_pair_index = rt_index >> 1;
|
||||||
uint32_t rt_pair_swizzle = rt_index & 1 ? 0b11101010 : 0b01000000;
|
uint32_t rt_pair_swizzle = rt_index & 1 ? 0b11101010 : 0b01000000;
|
||||||
|
|
||||||
|
@ -2407,10 +2382,10 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_ClampColor(
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||||
shader_code_.push_back(color_temp);
|
shader_code_.push_back(color_out_temp);
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||||
shader_code_.push_back(color_temp);
|
shader_code_.push_back(color_in_temp);
|
||||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, rt_pair_swizzle, 3));
|
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, rt_pair_swizzle, 3));
|
||||||
shader_code_.push_back(cbuffer_index_system_constants_);
|
shader_code_.push_back(cbuffer_index_system_constants_);
|
||||||
|
@ -2425,10 +2400,10 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_ClampColor(
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||||
shader_code_.push_back(color_temp);
|
shader_code_.push_back(color_out_temp);
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||||
shader_code_.push_back(color_temp);
|
shader_code_.push_back(color_out_temp);
|
||||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, rt_pair_swizzle, 3));
|
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, rt_pair_swizzle, 3));
|
||||||
shader_code_.push_back(cbuffer_index_system_constants_);
|
shader_code_.push_back(cbuffer_index_system_constants_);
|
||||||
|
@ -2463,16 +2438,40 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_ApplyZeroBlendScale(
|
||||||
}
|
}
|
||||||
|
|
||||||
void DxbcShaderTranslator::CompletePixelShader_WriteToROV_Blend(
|
void DxbcShaderTranslator::CompletePixelShader_WriteToROV_Blend(
|
||||||
uint32_t rt_index, uint32_t src_color_and_output_temp,
|
uint32_t rt_index, uint32_t rt_format_flags_temp,
|
||||||
uint32_t dest_color_temp) {
|
uint32_t src_color_and_output_temp, uint32_t dest_color_temp) {
|
||||||
// Temporary register for scales of things that contribute to the blending,
|
// Temporary register for scales of things that contribute to the blending,
|
||||||
// usually -1.0, 0.0 or 1.0.
|
// usually -1.0, 0.0 or 1.0.
|
||||||
uint32_t scale_temp = PushSystemTemp();
|
uint32_t scale_temp = PushSystemTemp();
|
||||||
// Temporary register for making 0 * Infinity result in 0 rather than NaN.
|
// Temporary register for making 0 * Infinity result in 0 rather than NaN,
|
||||||
uint32_t mul_non_ieee_temp = PushSystemTemp();
|
// and for clamping of the source color and the factors.
|
||||||
|
uint32_t mul_non_ieee_and_clamp_temp = PushSystemTemp();
|
||||||
uint32_t src_factor_and_result_temp = PushSystemTemp();
|
uint32_t src_factor_and_result_temp = PushSystemTemp();
|
||||||
uint32_t dest_factor_and_minmax_temp = PushSystemTemp();
|
uint32_t dest_factor_and_minmax_temp = PushSystemTemp();
|
||||||
|
|
||||||
|
// Clamp the source color if needed. For fixed-point formats, clamping must
|
||||||
|
// always be done, for floating-point, it must not be, however,
|
||||||
|
// k_2_10_10_10_FLOAT has fixed-point alpha.
|
||||||
|
// https://docs.microsoft.com/en-us/windows/desktop/direct3d11/d3d10-graphics-programming-guide-output-merger-stage
|
||||||
|
CompletePixelShader_WriteToROV_ClampColor(rt_index, src_color_and_output_temp,
|
||||||
|
mul_non_ieee_and_clamp_temp);
|
||||||
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||||
|
shader_code_.push_back(src_color_and_output_temp);
|
||||||
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, kROVRTFormatFlagTemp_Fixed_Swizzle, 1));
|
||||||
|
shader_code_.push_back(rt_format_flags_temp);
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||||
|
shader_code_.push_back(mul_non_ieee_and_clamp_temp);
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||||
|
shader_code_.push_back(src_color_and_output_temp);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.movc_instruction_count;
|
||||||
|
|
||||||
// Interleaving source and destination writes when possible to reduce
|
// Interleaving source and destination writes when possible to reduce
|
||||||
// write-read dependencies.
|
// write-read dependencies.
|
||||||
|
|
||||||
|
@ -2494,7 +2493,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_Blend(
|
||||||
uint32_t swizzle = i ? 0b11101010 : 0b01000000;
|
uint32_t swizzle = i ? 0b11101010 : 0b01000000;
|
||||||
CompletePixelShader_WriteToROV_ApplyZeroBlendScale(
|
CompletePixelShader_WriteToROV_ApplyZeroBlendScale(
|
||||||
scale_temp, swizzle, src_color_and_output_temp, kSwizzleXYZW,
|
scale_temp, swizzle, src_color_and_output_temp, kSwizzleXYZW,
|
||||||
mul_non_ieee_temp);
|
mul_non_ieee_and_clamp_temp);
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MAD) |
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MAD) |
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
|
@ -2503,7 +2502,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_Blend(
|
||||||
: src_factor_and_result_temp);
|
: src_factor_and_result_temp);
|
||||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||||
shader_code_.push_back(mul_non_ieee_temp);
|
shader_code_.push_back(mul_non_ieee_and_clamp_temp);
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, swizzle, 1));
|
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, swizzle, 1));
|
||||||
shader_code_.push_back(scale_temp);
|
shader_code_.push_back(scale_temp);
|
||||||
|
@ -2524,7 +2523,8 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_Blend(
|
||||||
for (uint32_t i = 0; i < 2; ++i) {
|
for (uint32_t i = 0; i < 2; ++i) {
|
||||||
uint32_t swizzle = i ? 0b11101010 : 0b01000000;
|
uint32_t swizzle = i ? 0b11101010 : 0b01000000;
|
||||||
CompletePixelShader_WriteToROV_ApplyZeroBlendScale(
|
CompletePixelShader_WriteToROV_ApplyZeroBlendScale(
|
||||||
scale_temp, swizzle, dest_color_temp, kSwizzleXYZW, mul_non_ieee_temp);
|
scale_temp, swizzle, dest_color_temp, kSwizzleXYZW,
|
||||||
|
mul_non_ieee_and_clamp_temp);
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MAD) |
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MAD) |
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
|
@ -2533,7 +2533,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_Blend(
|
||||||
: src_factor_and_result_temp);
|
: src_factor_and_result_temp);
|
||||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||||
shader_code_.push_back(mul_non_ieee_temp);
|
shader_code_.push_back(mul_non_ieee_and_clamp_temp);
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, swizzle, 1));
|
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, swizzle, 1));
|
||||||
shader_code_.push_back(scale_temp);
|
shader_code_.push_back(scale_temp);
|
||||||
|
@ -2552,10 +2552,10 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_Blend(
|
||||||
kBlendX_Dest_DestAlpha_Shift, scale_temp);
|
kBlendX_Dest_DestAlpha_Shift, scale_temp);
|
||||||
CompletePixelShader_WriteToROV_ApplyZeroBlendScale(
|
CompletePixelShader_WriteToROV_ApplyZeroBlendScale(
|
||||||
scale_temp, kSwizzleXYZW, src_color_and_output_temp, kSwizzleWWWW,
|
scale_temp, kSwizzleXYZW, src_color_and_output_temp, kSwizzleWWWW,
|
||||||
mul_non_ieee_temp, 0b0011);
|
mul_non_ieee_and_clamp_temp, 0b0011);
|
||||||
CompletePixelShader_WriteToROV_ApplyZeroBlendScale(
|
CompletePixelShader_WriteToROV_ApplyZeroBlendScale(
|
||||||
scale_temp, kSwizzleXYZW, dest_color_temp, kSwizzleWWWW,
|
scale_temp, kSwizzleXYZW, dest_color_temp, kSwizzleWWWW,
|
||||||
mul_non_ieee_temp, 0b1100);
|
mul_non_ieee_and_clamp_temp, 0b1100);
|
||||||
for (uint32_t i = 0; i < 4; ++i) {
|
for (uint32_t i = 0; i < 4; ++i) {
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MAD) |
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MAD) |
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
||||||
|
@ -2565,7 +2565,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_Blend(
|
||||||
: src_factor_and_result_temp);
|
: src_factor_and_result_temp);
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, i, 1));
|
EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, i, 1));
|
||||||
shader_code_.push_back(mul_non_ieee_temp);
|
shader_code_.push_back(mul_non_ieee_and_clamp_temp);
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, i, 1));
|
EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, i, 1));
|
||||||
shader_code_.push_back(scale_temp);
|
shader_code_.push_back(scale_temp);
|
||||||
|
@ -2589,7 +2589,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_Blend(
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(14));
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(14));
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||||
shader_code_.push_back(mul_non_ieee_temp);
|
shader_code_.push_back(mul_non_ieee_and_clamp_temp);
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, swizzle, 1));
|
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, swizzle, 1));
|
||||||
shader_code_.push_back(scale_temp);
|
shader_code_.push_back(scale_temp);
|
||||||
|
@ -2614,7 +2614,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_Blend(
|
||||||
: src_factor_and_result_temp);
|
: src_factor_and_result_temp);
|
||||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||||
shader_code_.push_back(mul_non_ieee_temp);
|
shader_code_.push_back(mul_non_ieee_and_clamp_temp);
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, swizzle, 1));
|
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, swizzle, 1));
|
||||||
shader_code_.push_back(scale_temp);
|
shader_code_.push_back(scale_temp);
|
||||||
|
@ -2634,7 +2634,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_Blend(
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(14));
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(14));
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0011, 1));
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0011, 1));
|
||||||
shader_code_.push_back(mul_non_ieee_temp);
|
shader_code_.push_back(mul_non_ieee_and_clamp_temp);
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||||
shader_code_.push_back(scale_temp);
|
shader_code_.push_back(scale_temp);
|
||||||
|
@ -2660,7 +2660,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_Blend(
|
||||||
: src_factor_and_result_temp);
|
: src_factor_and_result_temp);
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, i, 1));
|
EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, i, 1));
|
||||||
shader_code_.push_back(mul_non_ieee_temp);
|
shader_code_.push_back(mul_non_ieee_and_clamp_temp);
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, i, 1));
|
EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, i, 1));
|
||||||
shader_code_.push_back(scale_temp);
|
shader_code_.push_back(scale_temp);
|
||||||
|
@ -2685,7 +2685,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_Blend(
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10));
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10));
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||||
shader_code_.push_back(mul_non_ieee_temp);
|
shader_code_.push_back(mul_non_ieee_and_clamp_temp);
|
||||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||||
shader_code_.push_back(factor_temp);
|
shader_code_.push_back(factor_temp);
|
||||||
|
@ -2721,7 +2721,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_Blend(
|
||||||
shader_code_.push_back(factor_temp);
|
shader_code_.push_back(factor_temp);
|
||||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||||
shader_code_.push_back(mul_non_ieee_temp);
|
shader_code_.push_back(mul_non_ieee_and_clamp_temp);
|
||||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
|
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
|
||||||
shader_code_.push_back(0);
|
shader_code_.push_back(0);
|
||||||
|
@ -2739,7 +2739,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_Blend(
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10));
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10));
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||||
shader_code_.push_back(mul_non_ieee_temp);
|
shader_code_.push_back(mul_non_ieee_and_clamp_temp);
|
||||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||||
shader_code_.push_back(color_temp);
|
shader_code_.push_back(color_temp);
|
||||||
|
@ -2760,7 +2760,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_Blend(
|
||||||
shader_code_.push_back(factor_temp);
|
shader_code_.push_back(factor_temp);
|
||||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||||
shader_code_.push_back(mul_non_ieee_temp);
|
shader_code_.push_back(mul_non_ieee_and_clamp_temp);
|
||||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
|
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
|
||||||
shader_code_.push_back(0);
|
shader_code_.push_back(0);
|
||||||
|
@ -2772,14 +2772,28 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_Blend(
|
||||||
shader_code_.push_back(factor_temp);
|
shader_code_.push_back(factor_temp);
|
||||||
++stat_.instruction_count;
|
++stat_.instruction_count;
|
||||||
++stat_.movc_instruction_count;
|
++stat_.movc_instruction_count;
|
||||||
}
|
|
||||||
|
|
||||||
// Clamp the factors.
|
// Clamp the factor if the components aren't floating-point.
|
||||||
// https://stackoverflow.com/questions/30153911/untangling-when-and-what-values-are-clamped-in-opengl-blending-on-different-rend
|
// https://docs.microsoft.com/en-us/windows/desktop/direct3d11/d3d10-graphics-programming-guide-output-merger-stage
|
||||||
CompletePixelShader_WriteToROV_ClampColor(rt_index,
|
CompletePixelShader_WriteToROV_ClampColor(rt_index, factor_temp,
|
||||||
src_factor_and_result_temp);
|
mul_non_ieee_and_clamp_temp);
|
||||||
CompletePixelShader_WriteToROV_ClampColor(rt_index,
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
|
||||||
dest_factor_and_minmax_temp);
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||||
|
shader_code_.push_back(factor_temp);
|
||||||
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, kROVRTFormatFlagTemp_Fixed_Swizzle, 1));
|
||||||
|
shader_code_.push_back(rt_format_flags_temp);
|
||||||
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||||
|
shader_code_.push_back(mul_non_ieee_and_clamp_temp);
|
||||||
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||||
|
shader_code_.push_back(factor_temp);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.movc_instruction_count;
|
||||||
|
}
|
||||||
|
|
||||||
// Apply the signs to the factors for addition/subtraction/inverse subtraction
|
// Apply the signs to the factors for addition/subtraction/inverse subtraction
|
||||||
// and add/subtract/inverse subtract (for min/max, this will be overwritten
|
// and add/subtract/inverse subtract (for min/max, this will be overwritten
|
||||||
|
@ -2888,44 +2902,19 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_Blend(
|
||||||
++stat_.movc_instruction_count;
|
++stat_.movc_instruction_count;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Release scale_temp, mul_non_ieee_temp, src_factor_and_result_temp and
|
// Release scale_temp, mul_non_ieee_and_clamp_temp, src_factor_and_result_temp
|
||||||
// dest_factor_and_minmax_temp.
|
// and dest_factor_and_minmax_temp.
|
||||||
PopSystemTemp(4);
|
PopSystemTemp(4);
|
||||||
|
|
||||||
// Clamp the resulting color.
|
|
||||||
CompletePixelShader_WriteToROV_ClampColor(rt_index,
|
|
||||||
src_color_and_output_temp);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void DxbcShaderTranslator::CompletePixelShader_WriteToROV_StoreColor(
|
void DxbcShaderTranslator::CompletePixelShader_WriteToROV_StoreColor(
|
||||||
uint32_t edram_dword_offset_low_temp, uint32_t edram_dword_offset_high_temp,
|
uint32_t edram_dword_offset_low_temp, uint32_t edram_dword_offset_high_temp,
|
||||||
uint32_t rt_index, uint32_t source_and_scratch_temp) {
|
uint32_t rt_index, uint32_t rt_format_flags_temp,
|
||||||
|
uint32_t source_and_scratch_temp) {
|
||||||
// For indexing of the format constants.
|
// For indexing of the format constants.
|
||||||
uint32_t rt_pair_index = rt_index >> 1;
|
uint32_t rt_pair_index = rt_index >> 1;
|
||||||
uint32_t rt_pair_swizzle = rt_index & 1 ? 0b11101010 : 0b01000000;
|
uint32_t rt_pair_swizzle = rt_index & 1 ? 0b11101010 : 0b01000000;
|
||||||
|
|
||||||
// Extract the needed flags.
|
|
||||||
uint32_t flags_temp = PushSystemTemp();
|
|
||||||
system_constants_used_ |= 1ull << kSysConst_EDRAMRTFlags_Index;
|
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
|
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(12));
|
|
||||||
shader_code_.push_back(
|
|
||||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0111, 1));
|
|
||||||
shader_code_.push_back(flags_temp);
|
|
||||||
shader_code_.push_back(EncodeVectorReplicatedOperand(
|
|
||||||
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, rt_index, 3));
|
|
||||||
shader_code_.push_back(cbuffer_index_system_constants_);
|
|
||||||
shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants));
|
|
||||||
shader_code_.push_back(kSysConst_EDRAMRTFlags_Vec);
|
|
||||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
|
||||||
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
|
|
||||||
shader_code_.push_back(kRTFlag_FormatFixed);
|
|
||||||
shader_code_.push_back(kRTFlag_FormatFloat10);
|
|
||||||
shader_code_.push_back(kRTFlag_FormatFloat16);
|
|
||||||
shader_code_.push_back(0);
|
|
||||||
++stat_.instruction_count;
|
|
||||||
++stat_.uint_instruction_count;
|
|
||||||
|
|
||||||
// Scale by the fixed-point conversion factor.
|
// Scale by the fixed-point conversion factor.
|
||||||
system_constants_used_ |= (1ull << kSysConst_EDRAMStoreScaleRT01_Index)
|
system_constants_used_ |= (1ull << kSysConst_EDRAMStoreScaleRT01_Index)
|
||||||
<< rt_pair_index;
|
<< rt_pair_index;
|
||||||
|
@ -2962,9 +2951,9 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_StoreColor(
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||||
shader_code_.push_back(source_and_scratch_temp);
|
shader_code_.push_back(source_and_scratch_temp);
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
|
D3D10_SB_OPERAND_TYPE_TEMP, kROVRTFormatFlagTemp_Fixed_Swizzle, 1));
|
||||||
shader_code_.push_back(flags_temp);
|
shader_code_.push_back(rt_format_flags_temp);
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||||
shader_code_.push_back(fixed_temp);
|
shader_code_.push_back(fixed_temp);
|
||||||
|
@ -2987,9 +2976,9 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_StoreColor(
|
||||||
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(
|
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(
|
||||||
D3D10_SB_INSTRUCTION_TEST_NONZERO) |
|
D3D10_SB_INSTRUCTION_TEST_NONZERO) |
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(EncodeVectorSelectOperand(
|
||||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1));
|
D3D10_SB_OPERAND_TYPE_TEMP, kROVRTFormatFlagTemp_Float10, 1));
|
||||||
shader_code_.push_back(flags_temp);
|
shader_code_.push_back(rt_format_flags_temp);
|
||||||
++stat_.instruction_count;
|
++stat_.instruction_count;
|
||||||
++stat_.dynamic_flow_control_count;
|
++stat_.dynamic_flow_control_count;
|
||||||
|
|
||||||
|
@ -3294,9 +3283,9 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_StoreColor(
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||||
shader_code_.push_back(source_and_scratch_temp);
|
shader_code_.push_back(source_and_scratch_temp);
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(EncodeVectorReplicatedOperand(
|
||||||
EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1));
|
D3D10_SB_OPERAND_TYPE_TEMP, kROVRTFormatFlagTemp_Float16, 1));
|
||||||
shader_code_.push_back(flags_temp);
|
shader_code_.push_back(rt_format_flags_temp);
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||||
shader_code_.push_back(f16_temp);
|
shader_code_.push_back(f16_temp);
|
||||||
|
@ -3413,8 +3402,8 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_StoreColor(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Release pack_temp, pack_width_temp, pack_offset_temp and flags_temp.
|
// Release pack_temp, pack_width_temp, pack_offset_temp.
|
||||||
PopSystemTemp(4);
|
PopSystemTemp(3);
|
||||||
}
|
}
|
||||||
|
|
||||||
void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
||||||
|
@ -4772,12 +4761,41 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
||||||
++stat_.instruction_count;
|
++stat_.instruction_count;
|
||||||
++stat_.dynamic_flow_control_count;
|
++stat_.dynamic_flow_control_count;
|
||||||
|
|
||||||
// Clamp the color (the source value) before blending.
|
// Load the format flags:
|
||||||
// https://stackoverflow.com/questions/30153911/untangling-when-and-what-values-are-clamped-in-opengl-blending-on-different-rend
|
// X - color is fixed-point (kROVRTFormatFlagTemp_ColorFixed).
|
||||||
CompletePixelShader_WriteToROV_ClampColor(i, system_temp_color_[i]);
|
// Y - alpha is fixed-point (kROVRTFormatFlagTemp_AlphaFixed).
|
||||||
|
// Z - format is 2:10:10:10 floating-point (kROVRTFormatFlagTemp_Float10).
|
||||||
|
// W - format is 16-bit floating-point (kROVRTFormatFlagTemp_Float16).
|
||||||
|
uint32_t format_flags_temp = PushSystemTemp();
|
||||||
|
system_constants_used_ |= 1ull << kSysConst_EDRAMRTFlags_Index;
|
||||||
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(12));
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||||
|
shader_code_.push_back(format_flags_temp);
|
||||||
|
shader_code_.push_back(EncodeVectorReplicatedOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, i, 3));
|
||||||
|
shader_code_.push_back(cbuffer_index_system_constants_);
|
||||||
|
shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants));
|
||||||
|
shader_code_.push_back(kSysConst_EDRAMRTFlags_Vec);
|
||||||
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
|
||||||
|
shader_code_.push_back(kRTFlag_FormatFixed);
|
||||||
|
shader_code_.push_back(kRTFlag_FormatFixed | kRTFlag_FormatFloat10);
|
||||||
|
shader_code_.push_back(kRTFlag_FormatFloat10);
|
||||||
|
shader_code_.push_back(kRTFlag_FormatFloat16);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.uint_instruction_count;
|
||||||
|
|
||||||
// Load the previous value in the render target to blend and to apply the
|
// Allocate temporary registers for loading the previous color and for the
|
||||||
// write mask.
|
// write mask. This is done because some operations - clamping, gamma
|
||||||
|
// correction - should be done only for the source color. If no need to
|
||||||
|
// get the previous color, will just assume use the 1111 write mask for
|
||||||
|
// the movc.
|
||||||
|
uint32_t dest_color_temp = PushSystemTemp();
|
||||||
|
uint32_t write_mask_temp = PushSystemTemp();
|
||||||
|
|
||||||
|
// Check if need to load the previous value in the render target.
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
|
||||||
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(
|
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(
|
||||||
D3D10_SB_INSTRUCTION_TEST_ZERO) |
|
D3D10_SB_INSTRUCTION_TEST_ZERO) |
|
||||||
|
@ -4787,9 +4805,12 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
||||||
shader_code_.push_back(rt_overwritten_temp);
|
shader_code_.push_back(rt_overwritten_temp);
|
||||||
++stat_.instruction_count;
|
++stat_.instruction_count;
|
||||||
++stat_.dynamic_flow_control_count;
|
++stat_.dynamic_flow_control_count;
|
||||||
uint32_t dest_color_temp = PushSystemTemp();
|
|
||||||
|
// Load the previous value in the render target to blend and to apply the
|
||||||
|
// write mask.
|
||||||
CompletePixelShader_WriteToROV_LoadColor(
|
CompletePixelShader_WriteToROV_LoadColor(
|
||||||
edram_coord_low_temp, edram_coord_high_temp, i, dest_color_temp);
|
edram_coord_low_temp, edram_coord_high_temp, i, format_flags_temp,
|
||||||
|
dest_color_temp);
|
||||||
|
|
||||||
// Blend if needed.
|
// Blend if needed.
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
|
||||||
|
@ -4801,15 +4822,14 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
||||||
shader_code_.push_back(rt_blend_temp);
|
shader_code_.push_back(rt_blend_temp);
|
||||||
++stat_.instruction_count;
|
++stat_.instruction_count;
|
||||||
++stat_.dynamic_flow_control_count;
|
++stat_.dynamic_flow_control_count;
|
||||||
CompletePixelShader_WriteToROV_Blend(i, system_temp_color_[i],
|
CompletePixelShader_WriteToROV_Blend(
|
||||||
dest_color_temp);
|
i, format_flags_temp, system_temp_color_[i], dest_color_temp);
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
|
||||||
++stat_.instruction_count;
|
++stat_.instruction_count;
|
||||||
|
|
||||||
// Mask the components to overwrite.
|
// Mask the components to overwrite.
|
||||||
uint32_t write_mask_temp = PushSystemTemp();
|
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10));
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10));
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
|
@ -4826,6 +4846,40 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
||||||
shader_code_.push_back(1 << 3);
|
shader_code_.push_back(1 << 3);
|
||||||
++stat_.instruction_count;
|
++stat_.instruction_count;
|
||||||
++stat_.uint_instruction_count;
|
++stat_.uint_instruction_count;
|
||||||
|
|
||||||
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ELSE) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
|
||||||
|
++stat_.instruction_count;
|
||||||
|
|
||||||
|
// If not using the previous color, set the write mask to 1111 to ignore
|
||||||
|
// the uninitialized register with the previous color.
|
||||||
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8));
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||||
|
shader_code_.push_back(write_mask_temp);
|
||||||
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
|
||||||
|
shader_code_.push_back(1);
|
||||||
|
shader_code_.push_back(1);
|
||||||
|
shader_code_.push_back(1);
|
||||||
|
shader_code_.push_back(1);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.mov_instruction_count;
|
||||||
|
|
||||||
|
shader_code_.push_back(
|
||||||
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
|
||||||
|
++stat_.instruction_count;
|
||||||
|
|
||||||
|
// Clamp to the representable range after blending (for float10 and
|
||||||
|
// float16, clamping is not done during blending) and before storing.
|
||||||
|
CompletePixelShader_WriteToROV_ClampColor(i, system_temp_color_[i],
|
||||||
|
system_temp_color_[i]);
|
||||||
|
|
||||||
|
// TODO(Triang3l): Convert to sRGB for k_8_8_8_8_GAMMA.
|
||||||
|
|
||||||
|
// Keep previous values of the components where needed.
|
||||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
|
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
|
@ -4842,22 +4896,14 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
|
||||||
shader_code_.push_back(dest_color_temp);
|
shader_code_.push_back(dest_color_temp);
|
||||||
++stat_.instruction_count;
|
++stat_.instruction_count;
|
||||||
++stat_.movc_instruction_count;
|
++stat_.movc_instruction_count;
|
||||||
// Release write_mask_temp.
|
|
||||||
PopSystemTemp();
|
|
||||||
|
|
||||||
// Release dest_color_temp.
|
|
||||||
PopSystemTemp();
|
|
||||||
shader_code_.push_back(
|
|
||||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
|
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
|
|
||||||
++stat_.instruction_count;
|
|
||||||
|
|
||||||
// TODO(Triang3l): Convert to sRGB for k_8_8_8_8_GAMMA.
|
|
||||||
|
|
||||||
// Write the new color, which may have been modified by blending.
|
// Write the new color, which may have been modified by blending.
|
||||||
CompletePixelShader_WriteToROV_StoreColor(edram_coord_low_temp,
|
CompletePixelShader_WriteToROV_StoreColor(
|
||||||
edram_coord_high_temp, i,
|
edram_coord_low_temp, edram_coord_high_temp, i, format_flags_temp,
|
||||||
system_temp_color_[i]);
|
system_temp_color_[i]);
|
||||||
|
|
||||||
|
// Release format_flags_temp, dest_color_temp and write_mask_temp.
|
||||||
|
PopSystemTemp(3);
|
||||||
|
|
||||||
// Close the check whether the RT is used.
|
// Close the check whether the RT is used.
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
|
|
|
@ -745,14 +745,27 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
||||||
bool high,
|
bool high,
|
||||||
uint32_t width_temp,
|
uint32_t width_temp,
|
||||||
uint32_t offset_temp);
|
uint32_t offset_temp);
|
||||||
|
// Components of rt_format_flags_temp.
|
||||||
|
enum : uint32_t {
|
||||||
|
kROVRTFormatFlagTemp_ColorFixed,
|
||||||
|
kROVRTFormatFlagTemp_AlphaFixed,
|
||||||
|
kROVRTFormatFlagTemp_Float10,
|
||||||
|
kROVRTFormatFlagTemp_Float16,
|
||||||
|
|
||||||
|
kROVRTFormatFlagTemp_Fixed_Swizzle =
|
||||||
|
kROVRTFormatFlagTemp_ColorFixed * 0b00010101 +
|
||||||
|
kROVRTFormatFlagTemp_AlphaFixed * 0b01000000,
|
||||||
|
};
|
||||||
void CompletePixelShader_WriteToROV_LoadColor(
|
void CompletePixelShader_WriteToROV_LoadColor(
|
||||||
uint32_t edram_dword_offset_low_temp,
|
uint32_t edram_dword_offset_low_temp,
|
||||||
uint32_t edram_dword_offset_high_temp, uint32_t rt_index,
|
uint32_t edram_dword_offset_high_temp, uint32_t rt_index,
|
||||||
uint32_t target_temp);
|
uint32_t rt_format_flags_temp, uint32_t target_temp);
|
||||||
// Clamps the color to the range representable by the render target's format.
|
// Clamps the color to the range representable by the render target's format.
|
||||||
// Will also remove NaN since min and max return the non-NaN value.
|
// Will also remove NaN since min and max return the non-NaN value.
|
||||||
|
// color_in_temp and color_out_temp may be the same.
|
||||||
void CompletePixelShader_WriteToROV_ClampColor(uint32_t rt_index,
|
void CompletePixelShader_WriteToROV_ClampColor(uint32_t rt_index,
|
||||||
uint32_t color_temp);
|
uint32_t color_in_temp,
|
||||||
|
uint32_t color_out_temp);
|
||||||
// Extracts 0.0 or plus/minus 1.0 from a blend constant. For example, it can
|
// Extracts 0.0 or plus/minus 1.0 from a blend constant. For example, it can
|
||||||
// be used to extract one scale for color and alpha into XY, and another scale
|
// be used to extract one scale for color and alpha into XY, and another scale
|
||||||
// for color and alpha into ZW. constant_swizzle is a bit mask indicating
|
// for color and alpha into ZW. constant_swizzle is a bit mask indicating
|
||||||
|
@ -769,6 +782,7 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
||||||
uint32_t factor_swizzle, uint32_t factor_out_temp,
|
uint32_t factor_swizzle, uint32_t factor_out_temp,
|
||||||
uint32_t write_mask = 0b1111);
|
uint32_t write_mask = 0b1111);
|
||||||
void CompletePixelShader_WriteToROV_Blend(uint32_t rt_index,
|
void CompletePixelShader_WriteToROV_Blend(uint32_t rt_index,
|
||||||
|
uint32_t rt_format_flags_temp,
|
||||||
uint32_t src_color_and_output_temp,
|
uint32_t src_color_and_output_temp,
|
||||||
uint32_t dest_color_temp);
|
uint32_t dest_color_temp);
|
||||||
// Assumes the incoming color is already clamped to the range representable by
|
// Assumes the incoming color is already clamped to the range representable by
|
||||||
|
@ -776,7 +790,7 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
||||||
void CompletePixelShader_WriteToROV_StoreColor(
|
void CompletePixelShader_WriteToROV_StoreColor(
|
||||||
uint32_t edram_dword_offset_low_temp,
|
uint32_t edram_dword_offset_low_temp,
|
||||||
uint32_t edram_dword_offset_high_temp, uint32_t rt_index,
|
uint32_t edram_dword_offset_high_temp, uint32_t rt_index,
|
||||||
uint32_t source_and_scratch_temp);
|
uint32_t rt_format_flags_temp, uint32_t source_and_scratch_temp);
|
||||||
void CompletePixelShader_WriteToROV();
|
void CompletePixelShader_WriteToROV();
|
||||||
void CompletePixelShader();
|
void CompletePixelShader();
|
||||||
void CompleteShaderCode();
|
void CompleteShaderCode();
|
||||||
|
|
Loading…
Reference in New Issue