From 1d45a4f67e017c2e1cbaee50cdb7ccc314912787 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Fri, 26 Oct 2018 22:04:11 +0300 Subject: [PATCH] [D3D12] Round to nearest when converting to fixed-point --- .../shaders/dxbc/edram_store_color_7e3_cs.cso | Bin 3244 -> 3284 bytes .../shaders/dxbc/edram_store_color_7e3_cs.h | 245 +++++++++--------- .../shaders/dxbc/edram_store_color_7e3_cs.txt | 4 +- .../gpu/d3d12/shaders/pixel_formats.hlsli | 4 +- src/xenia/gpu/dxbc_shader_translator.cc | 30 ++- 5 files changed, 158 insertions(+), 125 deletions(-) diff --git a/src/xenia/gpu/d3d12/shaders/dxbc/edram_store_color_7e3_cs.cso b/src/xenia/gpu/d3d12/shaders/dxbc/edram_store_color_7e3_cs.cso index 537db6b13705a477aba3dc854c9f4de8882b9c77..911198073f5311f5c8b8cfae5b3b737a44f74f58 100644 GIT binary patch delta 158 zcmZ1@c}3FDCBn&hIp^UVw=CB?4=1#@dT1RsU}RumxWdE0zzU>IfH;PUfguNoYk=5d zqoF?&=N3)|hCrZv?&eM=Z59y+pjssc0iaR_1}P}Lc?+8fGgzhxDq;nbdBJJGBpF delta 115 zcmca2xkl2^CBn)1@FWSLM*D_O@sEn0EJ> 16u) & 1u)) >> 16u) & 0x3FFu; + // Rounding alpha to the nearest integer. + // https://docs.microsoft.com/en-us/windows/desktop/direct3d10/d3d10-graphics-programming-guide-resources-data-conversion return rgb_f10u32.r | (rgb_f10u32.g << 10u) | (rgb_f10u32.b << 20u) | - (uint(saturate(rgba_f32.a) * 3.0) << 30u); + (uint(round(saturate(rgba_f32.a) * 3.0)) << 30u); } uint4 XeFloat7e3To16(uint rgba_packed) { diff --git a/src/xenia/gpu/dxbc_shader_translator.cc b/src/xenia/gpu/dxbc_shader_translator.cc index 37953a5cb..f62e8ad59 100644 --- a/src/xenia/gpu/dxbc_shader_translator.cc +++ b/src/xenia/gpu/dxbc_shader_translator.cc @@ -1657,6 +1657,21 @@ void DxbcShaderTranslator::CompletePixelShader_DepthTo24Bit() { ++stat_.instruction_count; ++stat_.float_instruction_count; + // Round to the nearest integer. This is the correct way of rounding, rounding + // towards zero gives 0xFF instead of 0x100 in clear shaders in, for instance, + // Halo 3. + // https://docs.microsoft.com/en-us/windows/desktop/direct3d10/d3d10-graphics-programming-guide-resources-data-conversion + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ROUND_NE) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); + shader_code_.push_back(system_temp_depth_); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); + shader_code_.push_back(system_temp_depth_); + ++stat_.instruction_count; + ++stat_.float_instruction_count; + // Convert to fixed-point. shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_FTOU) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5)); @@ -3199,9 +3214,10 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_StoreColor( ++stat_.instruction_count; ++stat_.float_instruction_count; - // Convert to fixed-point. + // Convert to fixed-point, rounding to the nearest integer. + // https://docs.microsoft.com/en-us/windows/desktop/direct3d10/d3d10-graphics-programming-guide-resources-data-conversion uint32_t fixed_temp = PushSystemTemp(); - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_FTOI) | + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ROUND_NE) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5)); shader_code_.push_back( EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); @@ -3210,6 +3226,16 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_StoreColor( EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); shader_code_.push_back(source_and_scratch_temp); ++stat_.instruction_count; + ++stat_.float_instruction_count; + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_FTOI) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); + shader_code_.push_back(fixed_temp); + shader_code_.push_back( + EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(fixed_temp); + ++stat_.instruction_count; ++stat_.conversion_instruction_count; shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));