From 2a98d29edac92df73ed5b0e10d82bb63290a5cea Mon Sep 17 00:00:00 2001 From: Triang3l Date: Tue, 11 Sep 2018 22:49:46 +0300 Subject: [PATCH] [D3D12] DXBC: Most of tfetch --- src/xenia/gpu/d3d12/d3d12_shader.cc | 2 +- src/xenia/gpu/d3d12/texture_cache.cc | 2 +- src/xenia/gpu/dxbc_shader_translator.cc | 565 ++++++++++++++++++---- src/xenia/gpu/dxbc_shader_translator.h | 3 +- src/xenia/gpu/shader.h | 1 + src/xenia/gpu/shader_translator.cc | 1 + src/xenia/gpu/shader_translator_disasm.cc | 3 + src/xenia/gpu/ucode.h | 4 + 8 files changed, 473 insertions(+), 108 deletions(-) diff --git a/src/xenia/gpu/d3d12/d3d12_shader.cc b/src/xenia/gpu/d3d12/d3d12_shader.cc index d99b664b6..295b75407 100644 --- a/src/xenia/gpu/d3d12/d3d12_shader.cc +++ b/src/xenia/gpu/d3d12/d3d12_shader.cc @@ -45,7 +45,7 @@ void D3D12Shader::SetTexturesAndSamplers( } sampler_bindings_.clear(); sampler_bindings_.reserve(sampler_binding_count); - for (uint32_t i = 0; i < texture_srv_count; ++i) { + for (uint32_t i = 0; i < sampler_binding_count; ++i) { SamplerBinding sampler; const DxbcShaderTranslator::SamplerBinding& translator_sampler = sampler_bindings[i]; diff --git a/src/xenia/gpu/d3d12/texture_cache.cc b/src/xenia/gpu/d3d12/texture_cache.cc index 74bff2937..8a32127a0 100644 --- a/src/xenia/gpu/d3d12/texture_cache.cc +++ b/src/xenia/gpu/d3d12/texture_cache.cc @@ -468,7 +468,7 @@ void TextureCache::WriteSampler(uint32_t fetch_constant, aniso_filter = AnisoFilter(fetch.aniso_filter); } D3D12_SAMPLER_DESC desc; - if (fetch.aniso_filter) { + if (aniso_filter != AnisoFilter::kDisabled) { desc.Filter = D3D12_FILTER_ANISOTROPIC; desc.MaxAnisotropy = std::min(1u << (uint32_t(aniso_filter) - 1), 16u); } else { diff --git a/src/xenia/gpu/dxbc_shader_translator.cc b/src/xenia/gpu/dxbc_shader_translator.cc index 3c237bf21..4a7b88100 100644 --- a/src/xenia/gpu/dxbc_shader_translator.cc +++ b/src/xenia/gpu/dxbc_shader_translator.cc @@ -2955,6 +2955,9 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( CheckPredicate(instr.is_predicated, instr.predicate_condition); bool store_result = false; + // Whether the result is only in X and all components should be remapped to X + // while storing. + bool replicate_result = false; DxbcSourceOperand operand; uint32_t operand_length = 0; @@ -2970,33 +2973,58 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( // tf1[2] tf1[3] tf1[4] tf1[5] uint32_t tfetch_pair_offset = (tfetch_index >> 1) * 3; - // TODO(Triang3l): kTextureFetch, kGetTextureBorderColorFrac, - // kGetTextureComputedLod, kGetTextureGradients, kGetTextureWeights, - if (instr.opcode == FetchOpcode::kTextureFetch || - instr.opcode == FetchOpcode::kGetTextureComputedLod) { + // TODO(Triang3l): kGetTextureBorderColorFrac, kGetTextureGradients. + if (!is_pixel_shader() && + (instr.opcode == FetchOpcode::kGetTextureComputedLod || + instr.opcode == FetchOpcode::kGetTextureGradients)) { + // Quickly skip everything if tried to get anything involving derivatives + // not in a pixel shader because only the pixel shader has derivatives. + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); + shader_code_.push_back(system_temp_pv_); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0)); + shader_code_.push_back(0); + shader_code_.push_back(0); + shader_code_.push_back(0); + shader_code_.push_back(0); + ++stat_.instruction_count; + ++stat_.mov_instruction_count; + } else if (instr.opcode == FetchOpcode::kTextureFetch || + instr.opcode == FetchOpcode::kGetTextureComputedLod || + instr.opcode == FetchOpcode::kGetTextureWeights) { store_result = true; - uint32_t srv_register = FindOrAddTextureSRV(tfetch_index, instr.dimension); - // 3D or 2D stacked is selected dynamically. + uint32_t srv_register; uint32_t srv_register_stacked; - if (instr.dimension == TextureDimension::k3D) { - srv_register_stacked = - FindOrAddTextureSRV(tfetch_index, TextureDimension::k2D); - } else { + uint32_t sampler_register; + if (instr.opcode == FetchOpcode::kGetTextureWeights) { + // Only the fetch constant needed. + srv_register = UINT32_MAX; srv_register_stacked = UINT32_MAX; + sampler_register = UINT32_MAX; + } else { + srv_register = FindOrAddTextureSRV(tfetch_index, instr.dimension); + // 3D or 2D stacked is selected dynamically. + if (instr.dimension == TextureDimension::k3D) { + srv_register_stacked = + FindOrAddTextureSRV(tfetch_index, TextureDimension::k2D); + } else { + srv_register_stacked = UINT32_MAX; + } + sampler_register = FindOrAddSamplerBinding( + tfetch_index, instr.attributes.mag_filter, + instr.attributes.min_filter, instr.attributes.mip_filter, + instr.attributes.aniso_filter); } - uint32_t sampler_register = FindOrAddSamplerBinding( - tfetch_index, instr.attributes.mag_filter, instr.attributes.min_filter, - instr.attributes.mip_filter, instr.attributes.aniso_filter); - - // Will use the fetch constants unconditionally, for exponent bias. - rdef_constants_used_ |= 1ull - << uint32_t(RdefConstantIndex::kFetchConstants); - // Move coordinates to pv temporarily so zeros can be added to expand them - // to Texture2DArray coordinates and to apply offset. - uint32_t coord_mask; + // to Texture2DArray coordinates and to apply offset. Or, if the instruction + // is getWeights, move them to pv because their fractional part will be + // returned. + uint32_t coord_mask = 0b0111; switch (instr.dimension) { case TextureDimension::k1D: coord_mask = 0b0001; @@ -3004,8 +3032,15 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( case TextureDimension::k2D: coord_mask = 0b0011; break; - default: + case TextureDimension::k3D: coord_mask = 0b0111; + break; + case TextureDimension::kCube: + // Don't need the 3rd component for getWeights because it's the face + // index, so it doesn't participate in bilinear filtering. + coord_mask = + instr.opcode == FetchOpcode::kGetTextureWeights ? 0b0011 : 0b0111; + break; } shader_code_.push_back( ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | @@ -3018,12 +3053,16 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( ++stat_.mov_instruction_count; // If 1D or 2D, fill the unused coordinates with zeros (sampling the only - // row of the only slice). - if (0b0111 & ~coord_mask) { + // row of the only slice). For getWeights, also clear the 4th component + // because the coordinates will be returned. + uint32_t coord_all_components_mask = + instr.opcode == FetchOpcode::kGetTextureWeights ? 0b1111 : 0b0111; + uint32_t coord_zero_mask = coord_all_components_mask & ~coord_mask; + if (coord_zero_mask) { shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8)); shader_code_.push_back(EncodeVectorMaskedOperand( - D3D10_SB_OPERAND_TYPE_TEMP, 0b0111 & ~coord_mask, 1)); + D3D10_SB_OPERAND_TYPE_TEMP, coord_zero_mask, 1)); shader_code_.push_back(system_temp_pv_); shader_code_.push_back(EncodeVectorSwizzledOperand( D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0)); @@ -3045,8 +3084,12 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( instr.dimension == TextureDimension::k3D || instr.dimension == TextureDimension::kCube) { offset_y = instr.attributes.offset_y; + // Don't care about the Z offset for cubemaps when getting weights because + // zero Z will be returned anyway (the face index doesn't participate in + // bilinear filtering). if (instr.dimension == TextureDimension::k3D || - instr.dimension == TextureDimension::kCube) { + (instr.dimension == TextureDimension::kCube && + instr.opcode != FetchOpcode::kGetTextureWeights)) { offset_z = instr.attributes.offset_z; } } @@ -3054,17 +3097,24 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( // Get the texture size if needed, apply offset and switch between // normalized and unnormalized coordinates if needed. The offset is // fractional on the Xbox 360 (has 0.5 granularity), unlike in Direct3D 12, - // and cubemaps + // and cubemaps possibly can have offset and their coordinates are different + // than in Direct3D 12 (like an array texture rather than a direction). + // getWeights instructions also need the texture size because they work like + // frac(coord * texture_size). // TODO(Triang3l): Unnormalized coordinates should be disabled when the // wrap mode is not a clamped one, though it's probably a very rare case, // unlikely to be used on purpose. // http://web.archive.org/web/20090514012026/http://msdn.microsoft.com:80/en-us/library/bb313957.aspx uint32_t size_and_is_3d_temp = UINT32_MAX; bool has_offset = offset_x != 0.0f || offset_y != 0.0f || offset_z != 0.0f; - if (has_offset || instr.attributes.unnormalized_coordinates || + if (instr.opcode == FetchOpcode::kGetTextureWeights || has_offset || + instr.attributes.unnormalized_coordinates || instr.dimension == TextureDimension::k3D) { size_and_is_3d_temp = PushSystemTemp(); + rdef_constants_used_ |= 1ull + << uint32_t(RdefConstantIndex::kFetchConstants); + // Get 2D texture size and array layer count, in bits 0:12, 13:25, 26:31 // of dword 2 ([0].z or [2].x). shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_UBFE) | @@ -3095,8 +3145,9 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( ++stat_.uint_instruction_count; if (instr.dimension == TextureDimension::k3D) { - // Write whether the texture is 3D to W if it's 3D/stacked. The - // dimension is in dword 5 in bith 9:10. + // Write whether the texture is 3D to W if it's 3D/stacked, as + // 0xFFFFFFFF for 3D or 0 for stacked. The dimension is in dword 5 in + // bits 9:10. shader_code_.push_back( ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9)); @@ -3216,105 +3267,409 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( ++stat_.instruction_count; ++stat_.float_instruction_count; - if (instr.dimension == TextureDimension::k3D) { - // Both 3D textures and 2D arrays have their Z coordinate normalized, - // however, on PC, array elements have unnormalized indices. - // https://www.slideshare.net/blackdevilvikas/next-generation-graphics-programming-on-xbox-360 - // Put the array layer in W - Z * depth if the fetch uses normalized - // coordinates, and Z if it uses unnormalized. - if (instr.attributes.unnormalized_coordinates) { - shader_code_.push_back( - ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1000, 1)); - shader_code_.push_back(system_temp_pv_); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1)); - shader_code_.push_back(system_temp_pv_); - ++stat_.instruction_count; - ++stat_.mov_instruction_count; - } else { - shader_code_.push_back( - ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MUL) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1000, 1)); - shader_code_.push_back(system_temp_pv_); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1)); - shader_code_.push_back(system_temp_pv_); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1)); - shader_code_.push_back(size_and_is_3d_temp); - ++stat_.instruction_count; - ++stat_.float_instruction_count; - } - } + if (instr.opcode == FetchOpcode::kGetTextureWeights) { + // Weights for bilinear filtering - need to get the fractional part of + // unnormalized coordinates. - if (has_offset || instr.attributes.unnormalized_coordinates) { - // Take the reciprocal of the size to normalize the coordinates and the - // offset (this is not necessary to just sample 3D/array with normalized - // coordinates and no offset). For cubemaps, there will be 1 in Z, so - // this will work. - shader_code_.push_back( - ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_RCP) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5)); - shader_code_.push_back(EncodeVectorMaskedOperand( - D3D10_SB_OPERAND_TYPE_TEMP, coord_mask, 1)); - shader_code_.push_back(size_and_is_3d_temp); - shader_code_.push_back(EncodeVectorSwizzledOperand( - D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); - shader_code_.push_back(size_and_is_3d_temp); - ++stat_.instruction_count; - ++stat_.float_instruction_count; - - // Normalize the coordinates. if (instr.attributes.unnormalized_coordinates) { + // Apply the offset. shader_code_.push_back( - ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MUL) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ADD) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10)); shader_code_.push_back(EncodeVectorMaskedOperand( D3D10_SB_OPERAND_TYPE_TEMP, coord_mask, 1)); shader_code_.push_back(system_temp_pv_); shader_code_.push_back(EncodeVectorSwizzledOperand( D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); shader_code_.push_back(system_temp_pv_); - shader_code_.push_back(EncodeVectorSwizzledOperand( - D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); - shader_code_.push_back(size_and_is_3d_temp); - ++stat_.instruction_count; - ++stat_.float_instruction_count; - } - - // Apply the offset. - if (has_offset) { - shader_code_.push_back( - ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MAD) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(12)); - shader_code_.push_back(EncodeVectorMaskedOperand( - D3D10_SB_OPERAND_TYPE_TEMP, coord_mask, 1)); - shader_code_.push_back(system_temp_pv_); shader_code_.push_back(EncodeVectorSwizzledOperand( D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0)); shader_code_.push_back(*reinterpret_cast(&offset_x)); shader_code_.push_back(*reinterpret_cast(&offset_y)); shader_code_.push_back(*reinterpret_cast(&offset_z)); shader_code_.push_back(0); - shader_code_.push_back(EncodeVectorSwizzledOperand( - D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); - shader_code_.push_back(size_and_is_3d_temp); + ++stat_.instruction_count; + ++stat_.float_instruction_count; + } else { + // Unnormalize the coordinates and apply the offset. + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(has_offset ? D3D10_SB_OPCODE_MAD + : D3D10_SB_OPCODE_MUL) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(has_offset ? 12 + : 7)); + shader_code_.push_back(EncodeVectorMaskedOperand( + D3D10_SB_OPERAND_TYPE_TEMP, coord_mask, 1)); + shader_code_.push_back(system_temp_pv_); shader_code_.push_back(EncodeVectorSwizzledOperand( D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); shader_code_.push_back(system_temp_pv_); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(size_and_is_3d_temp); + if (has_offset) { + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0)); + shader_code_.push_back( + *reinterpret_cast(&offset_x)); + shader_code_.push_back( + *reinterpret_cast(&offset_y)); + shader_code_.push_back( + *reinterpret_cast(&offset_z)); + shader_code_.push_back(0); + } ++stat_.instruction_count; ++stat_.float_instruction_count; } + + if (instr.dimension == TextureDimension::k3D) { + // Ignore Z if it's the texture is stacked - it's the array layer, so + // there's no filtering across Z. Keep it only for 3D textures. This + // assumes that the 3D/stacked flag is 0xFFFFFFFF or 0. + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0100, 1)); + shader_code_.push_back(system_temp_pv_); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1)); + shader_code_.push_back(system_temp_pv_); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 3, 1)); + shader_code_.push_back(size_and_is_3d_temp); + ++stat_.instruction_count; + ++stat_.uint_instruction_count; + } + } else { + // Texture fetch - need to get normalized coordinates (with unnormalized + // Z for stacked textures). + + if (instr.dimension == TextureDimension::k3D) { + // Both 3D textures and 2D arrays have their Z coordinate normalized, + // however, on PC, array elements have unnormalized indices. + // https://www.slideshare.net/blackdevilvikas/next-generation-graphics-programming-on-xbox-360 + // Put the array layer in W - Z * depth if the fetch uses normalized + // coordinates, and Z if it uses unnormalized. + if (instr.attributes.unnormalized_coordinates) { + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5)); + shader_code_.push_back(EncodeVectorMaskedOperand( + D3D10_SB_OPERAND_TYPE_TEMP, 0b1000, 1)); + shader_code_.push_back(system_temp_pv_); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1)); + shader_code_.push_back(system_temp_pv_); + ++stat_.instruction_count; + ++stat_.mov_instruction_count; + } else { + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MUL) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); + shader_code_.push_back(EncodeVectorMaskedOperand( + D3D10_SB_OPERAND_TYPE_TEMP, 0b1000, 1)); + shader_code_.push_back(system_temp_pv_); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1)); + shader_code_.push_back(system_temp_pv_); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1)); + shader_code_.push_back(size_and_is_3d_temp); + ++stat_.instruction_count; + ++stat_.float_instruction_count; + } + } + + if (has_offset || instr.attributes.unnormalized_coordinates) { + // Take the reciprocal of the size to normalize the coordinates and + // the offset (this is not necessary to just sample 3D/array with + // normalized coordinates and no offset). For cubemaps, there will be + // 1 in Z, so this will work. + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_RCP) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5)); + shader_code_.push_back(EncodeVectorMaskedOperand( + D3D10_SB_OPERAND_TYPE_TEMP, coord_mask, 1)); + shader_code_.push_back(size_and_is_3d_temp); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(size_and_is_3d_temp); + ++stat_.instruction_count; + ++stat_.float_instruction_count; + + // Normalize the coordinates. + if (instr.attributes.unnormalized_coordinates) { + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MUL) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); + shader_code_.push_back(EncodeVectorMaskedOperand( + D3D10_SB_OPERAND_TYPE_TEMP, coord_mask, 1)); + shader_code_.push_back(system_temp_pv_); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(system_temp_pv_); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(size_and_is_3d_temp); + ++stat_.instruction_count; + ++stat_.float_instruction_count; + } + + // Apply the offset (coord = offset * 1/size + coord). + if (has_offset) { + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MAD) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(12)); + shader_code_.push_back(EncodeVectorMaskedOperand( + D3D10_SB_OPERAND_TYPE_TEMP, coord_mask, 1)); + shader_code_.push_back(system_temp_pv_); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0)); + shader_code_.push_back( + *reinterpret_cast(&offset_x)); + shader_code_.push_back( + *reinterpret_cast(&offset_y)); + shader_code_.push_back( + *reinterpret_cast(&offset_z)); + shader_code_.push_back(0); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(size_and_is_3d_temp); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(system_temp_pv_); + ++stat_.instruction_count; + ++stat_.float_instruction_count; + } + } } } - // TODO(Triang3l): Revert the `cube` instruction. - // TODO(Triang3l): Actually fetch the texture. + if (instr.opcode == FetchOpcode::kGetTextureWeights) { + // Return the fractional part of unnormalized coordinates (already in pv) + // as bilinear filtering weights. + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_FRC) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, coord_mask, 1)); + shader_code_.push_back(system_temp_pv_); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(system_temp_pv_); + ++stat_.instruction_count; + ++stat_.float_instruction_count; + } else { + // TODO(Triang3l): Revert the `cube` instruction. + + // tfetch1D/2D/Cube just fetch directly. tfetch3D needs to fetch either + // the 3D texture or the 2D stacked texture, so two sample instructions + // selected conditionally are used in this case. + if (instr.dimension == TextureDimension::k3D) { + assert_true(size_and_is_3d_temp != UINT32_MAX); + shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) | + ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN( + D3D10_SB_INSTRUCTION_TEST_NONZERO) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3)); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 3, 1)); + shader_code_.push_back(size_and_is_3d_temp); + ++stat_.instruction_count; + ++stat_.dynamic_flow_control_count; + } + for (uint32_t i = 0; + i < (instr.dimension == TextureDimension::k3D ? 2u : 1u); ++i) { + if (i != 0) { + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ELSE) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1)); + ++stat_.instruction_count; + } + uint32_t srv_register_current = + i != 0 ? srv_register_stacked : srv_register; + if (instr.opcode == FetchOpcode::kGetTextureComputedLod) { + // The non-pixel-shader case should be handled before because it just + // returns a constant in this case. + assert_true(is_pixel_shader()); + replicate_result = true; + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_1_SB_OPCODE_LOD) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(11)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); + shader_code_.push_back(system_temp_pv_); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(system_temp_pv_); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_RESOURCE, kSwizzleXYZW, 2)); + shader_code_.push_back(srv_register_current); + shader_code_.push_back(srv_register_current); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_SAMPLER, kSwizzleXYZW, 2)); + shader_code_.push_back(sampler_register); + shader_code_.push_back(sampler_register); + ++stat_.instruction_count; + ++stat_.lod_instructions; + // Apply the LOD bias if used. + if (instr.attributes.lod_bias != 0.0f) { + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ADD) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); + shader_code_.push_back(EncodeVectorMaskedOperand( + D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); + shader_code_.push_back(system_temp_pv_); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); + shader_code_.push_back(system_temp_pv_); + shader_code_.push_back( + EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); + shader_code_.push_back( + *reinterpret_cast(&instr.attributes.lod_bias)); + ++stat_.instruction_count; + ++stat_.float_instruction_count; + } + } else if (instr.attributes.use_register_lod) { + uint32_t lod_register, lod_component; + if (instr.attributes.lod_bias != 0.0f) { + // Bias the LOD in the register. + lod_register = PushSystemTemp(); + lod_component = 0; + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ADD) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); + shader_code_.push_back(EncodeVectorMaskedOperand( + D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); + shader_code_.push_back(lod_register); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 3, 1)); + shader_code_.push_back(system_temp_grad_h_lod_); + shader_code_.push_back( + EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); + shader_code_.push_back( + *reinterpret_cast(&instr.attributes.lod_bias)); + ++stat_.instruction_count; + ++stat_.float_instruction_count; + } else { + lod_register = system_temp_grad_h_lod_; + lod_component = 3; + } + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_SAMPLE_L) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(13)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); + shader_code_.push_back(system_temp_pv_); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(system_temp_pv_); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_RESOURCE, kSwizzleXYZW, 2)); + shader_code_.push_back(srv_register_current); + shader_code_.push_back(srv_register_current); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_SAMPLER, kSwizzleXYZW, 2)); + shader_code_.push_back(sampler_register); + shader_code_.push_back(sampler_register); + shader_code_.push_back(EncodeVectorSelectOperand( + D3D10_SB_OPERAND_TYPE_TEMP, lod_component, 1)); + shader_code_.push_back(lod_register); + ++stat_.instruction_count; + ++stat_.texture_normal_instructions; + if (instr.attributes.lod_bias != 0.0f) { + // Release the allocated lod_register. + PopSystemTemp(); + } + } else if (instr.attributes.use_register_gradients) { + // TODO(Triang3l): Apply the LOD bias somehow for register gradients + // (possibly will require moving the bias to the sampler, which may be + // not very good considering the sampler count is very limited). + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_SAMPLE_D) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(15)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); + shader_code_.push_back(system_temp_pv_); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(system_temp_pv_); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_RESOURCE, kSwizzleXYZW, 2)); + shader_code_.push_back(srv_register_current); + shader_code_.push_back(srv_register_current); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_SAMPLER, kSwizzleXYZW, 2)); + shader_code_.push_back(sampler_register); + shader_code_.push_back(sampler_register); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(system_temp_grad_h_lod_); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(system_temp_grad_v_); + ++stat_.instruction_count; + ++stat_.texture_gradient_instructions; + } else { + // 3 different DXBC opcodes handled here: + // - sample_l, when not using a computed LOD or not in a pixel shader, + // in this case, LOD (0 + bias) is sampled. + // - sample, when sampling in a pixel shader (thus with derivatives) + // with a computed LOD. + // - sample_b, when sampling in a pixel shader with a biased computed + // LOD. + // Both sample_l and sample_b should add the LOD bias as the last + // operand in our case. + bool explicit_lod = + !instr.attributes.use_computed_lod || !is_pixel_shader(); + if (explicit_lod) { + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_SAMPLE_L) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(13)); + } else if (instr.attributes.lod_bias != 0.0f) { + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_SAMPLE_B) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(13)); + } else { + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_SAMPLE) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(11)); + } + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); + shader_code_.push_back(system_temp_pv_); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(system_temp_pv_); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_RESOURCE, kSwizzleXYZW, 2)); + shader_code_.push_back(srv_register_current); + shader_code_.push_back(srv_register_current); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_SAMPLER, kSwizzleXYZW, 2)); + shader_code_.push_back(sampler_register); + shader_code_.push_back(sampler_register); + if (explicit_lod || instr.attributes.lod_bias != 0.0f) { + shader_code_.push_back( + EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); + shader_code_.push_back( + *reinterpret_cast(&instr.attributes.lod_bias)); + } + ++stat_.instruction_count; + if (!explicit_lod && instr.attributes.lod_bias != 0.0f) { + ++stat_.texture_bias_instructions; + } else { + ++stat_.texture_normal_instructions; + } + } + } + if (instr.dimension == TextureDimension::k3D) { + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1)); + ++stat_.instruction_count; + } + + // TODO(Triang3l): Apply exponent bias, sign bias and gamma. + } if (size_and_is_3d_temp != UINT32_MAX) { PopSystemTemp(); @@ -3350,7 +3705,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( } if (store_result) { - StoreResult(instr.result, system_temp_pv_, false); + StoreResult(instr.result, system_temp_pv_, replicate_result); } } diff --git a/src/xenia/gpu/dxbc_shader_translator.h b/src/xenia/gpu/dxbc_shader_translator.h index 62ce07fb2..45a5c42b5 100644 --- a/src/xenia/gpu/dxbc_shader_translator.h +++ b/src/xenia/gpu/dxbc_shader_translator.h @@ -583,7 +583,8 @@ class DxbcShaderTranslator : public ShaderTranslator { uint32_t gs_output_topology; uint32_t gs_max_output_vertex_count; uint32_t unknown_26; - uint32_t unknown_27; + // Unknown in Wine, but confirmed by testing. + uint32_t lod_instructions; uint32_t unknown_28; uint32_t unknown_29; uint32_t c_control_points; diff --git a/src/xenia/gpu/shader.h b/src/xenia/gpu/shader.h index ebea8b9f3..0a1196e9e 100644 --- a/src/xenia/gpu/shader.h +++ b/src/xenia/gpu/shader.h @@ -421,6 +421,7 @@ struct ParsedTextureFetchInstruction { bool use_computed_lod = true; bool use_register_lod = false; bool use_register_gradients = false; + float lod_bias = 0.0f; float offset_x = 0.0f; float offset_y = 0.0f; float offset_z = 0.0f; diff --git a/src/xenia/gpu/shader_translator.cc b/src/xenia/gpu/shader_translator.cc index c15cb6ec0..83d8bae5b 100644 --- a/src/xenia/gpu/shader_translator.cc +++ b/src/xenia/gpu/shader_translator.cc @@ -951,6 +951,7 @@ void ShaderTranslator::ParseTextureFetchInstruction( i.attributes.use_computed_lod = op.use_computed_lod(); i.attributes.use_register_lod = op.use_register_lod(); i.attributes.use_register_gradients = op.use_register_gradients(); + i.attributes.lod_bias = op.lod_bias(); i.attributes.offset_x = op.offset_x(); i.attributes.offset_y = op.offset_y(); i.attributes.offset_z = op.offset_z(); diff --git a/src/xenia/gpu/shader_translator_disasm.cc b/src/xenia/gpu/shader_translator_disasm.cc index 6a2007eaa..ab180ccf4 100644 --- a/src/xenia/gpu/shader_translator_disasm.cc +++ b/src/xenia/gpu/shader_translator_disasm.cc @@ -432,6 +432,9 @@ void ParsedTextureFetchInstruction::Disassemble(StringBuffer* out) const { if (attributes.use_register_gradients) { out->Append(", UseRegisterGradients=true"); } + if (attributes.lod_bias != 0.0f) { + out->AppendFormat(", LODBias=%g", attributes.lod_bias); + } int component_count = GetTextureDimensionComponentCount(dimension); if (attributes.offset_x != 0.0f) { out->AppendFormat(", OffsetX=%g", attributes.offset_x); diff --git a/src/xenia/gpu/ucode.h b/src/xenia/gpu/ucode.h index 93a05f6cf..e4487c405 100644 --- a/src/xenia/gpu/ucode.h +++ b/src/xenia/gpu/ucode.h @@ -640,6 +640,10 @@ struct TextureFetchInstruction { SampleLocation sample_location() const { return static_cast(data_.sample_location); } + float lod_bias() const { + // http://web.archive.org/web/20090514012026/http://msdn.microsoft.com:80/en-us/library/bb313957.aspx + return ((static_cast(data_.lod_bias) << 25) >> 25) / 16.0f; + } float offset_x() const { return ((static_cast(data_.offset_x) << 27) >> 27) / 2.0f; }