From fa968a9ecdcf3dc435dfb8b71a15bddfb0adba84 Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sun, 14 Oct 2018 15:43:41 +0300 Subject: [PATCH] [D3D12] ROV: Fix 7e3 denormal read --- src/xenia/gpu/dxbc_shader_translator.cc | 56 +++++++++++++++++-------- 1 file changed, 39 insertions(+), 17 deletions(-) diff --git a/src/xenia/gpu/dxbc_shader_translator.cc b/src/xenia/gpu/dxbc_shader_translator.cc index 0da896619..106877cac 100644 --- a/src/xenia/gpu/dxbc_shader_translator.cc +++ b/src/xenia/gpu/dxbc_shader_translator.cc @@ -1645,8 +1645,11 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_LoadColor( // Normalize the mantissa for denormalized numbers (with zero exponent - // exponent can be used for selection in movc). + // Note that HLSL firstbithigh(x) is compiled to DXBC like: + // `x ? 31 - firstbit_hi(x) : -1` + // (it returns the index from the LSB, not the MSB, but -1 for zero as well). - // denormalized_temp = firstbithigh(mantissa) + // denormalized_temp = firstbit_hi(mantissa) shader_code_.push_back( ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_FIRSTBIT_HI) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5)); @@ -1659,27 +1662,50 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_LoadColor( ++stat_.instruction_count; ++stat_.uint_instruction_count; - // denormalized_temp = 7 - firstbithigh(mantissa) + // denormalized_temp = 7 - (31 - firstbit_hi(mantissa)) + // Or, if expanded: + // denormalized_temp = firstbit_hi(mantissa) - 24 shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IADD) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(11)); + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10)); shader_code_.push_back( EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0111, 1)); shader_code_.push_back(f10_denormalized_temp); + shader_code_.push_back( + EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(f10_denormalized_temp); shader_code_.push_back(EncodeVectorSwizzledOperand( D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0)); - shader_code_.push_back(7); - shader_code_.push_back(7); - shader_code_.push_back(7); - shader_code_.push_back(7); - shader_code_.push_back( - EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1) | - ENCODE_D3D10_SB_OPERAND_EXTENDED(1)); - shader_code_.push_back( - ENCODE_D3D10_SB_EXTENDED_OPERAND_MODIFIER(D3D10_SB_OPERAND_MODIFIER_NEG)); - shader_code_.push_back(f10_denormalized_temp); + shader_code_.push_back(uint32_t(-24)); + shader_code_.push_back(uint32_t(-24)); + shader_code_.push_back(uint32_t(-24)); + shader_code_.push_back(uint32_t(-24)); ++stat_.instruction_count; ++stat_.int_instruction_count; + // If mantissa is zero, then: + // denormalized_temp = 7 - (-1) = 8 + // After this, it works like the following HLSL: + // denormalized_temp = 7 - firstbithigh(mantissa) + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(12)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0111, 1)); + shader_code_.push_back(f10_denormalized_temp); + shader_code_.push_back( + EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(f10_mantissa_temp); + shader_code_.push_back( + EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(f10_denormalized_temp); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0)); + shader_code_.push_back(8); + shader_code_.push_back(8); + shader_code_.push_back(8); + shader_code_.push_back(8); + ++stat_.instruction_count; + ++stat_.movc_instruction_count; + // If the number is not denormalized, make // `(mantissa << (7 - firstbithigh(mantissa))) & 0x7F` // a no-op - zero 7 - firstbithigh(mantissa). @@ -2858,10 +2884,6 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() { ++stat_.instruction_count; ++stat_.int_instruction_count; - // TODO(Triang3l): Handle 64bpp - the pitch in tiles and the X tile index are - // multiplied by 2, the tile index now contains the index of a pair of tiles, - // not one tile. - // Calculate the address in the EDRAM buffer. // 1) Multiply tile Y index by the pitch and add X tile index to it to