diff --git a/src/xenia/gpu/dxbc_shader_translator_fetch.cc b/src/xenia/gpu/dxbc_shader_translator_fetch.cc index 57cb40e26..78d9eb762 100644 --- a/src/xenia/gpu/dxbc_shader_translator_fetch.cc +++ b/src/xenia/gpu/dxbc_shader_translator_fetch.cc @@ -1384,8 +1384,9 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( // thick outlines with SSAA there. float offset_x = instr.attributes.offset_x + (1.0f / 1024.0f); if (instr.opcode == FetchOpcode::kGetTextureWeights) { - // Needed for correct shadow filtering (at least in Halo 3). - offset_x += 0.5f; + // Bilinear filtering (for shadows, for instance, in Halo 3), 0.5 - + // exactly the pixel. + offset_x -= 0.5f; } float offset_y = 0.0f, offset_z = 0.0f; if (instr.dimension == TextureDimension::k2D || @@ -1393,7 +1394,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( instr.dimension == TextureDimension::kCube) { offset_y = instr.attributes.offset_y + (1.0f / 1024.0f); if (instr.opcode == FetchOpcode::kGetTextureWeights) { - offset_y += 0.5f; + offset_y -= 0.5f; } // Don't care about the Z offset for cubemaps when getting weights because // zero Z will be returned anyway (the face index doesn't participate in @@ -1406,12 +1407,34 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( // Z is the face index for cubemaps, so don't apply the epsilon to it. offset_z += 1.0f / 1024.0f; if (instr.opcode == FetchOpcode::kGetTextureWeights) { - offset_z += 0.5f; + offset_z -= 0.5f; } } } } + // Gather info about filtering across array layers. + // Use the magnification filter when no derivatives: + // https://stackoverflow.com/questions/40328956/difference-between-sample-and-samplelevel-wrt-texture-filtering + bool vol_min_filter_applicable = + instr.opcode == FetchOpcode::kTextureFetch && + (instr.attributes.use_register_gradients || + (instr.attributes.use_computed_lod && IsDxbcPixelShader())); + bool has_vol_mag_filter = + instr.attributes.vol_mag_filter != TextureFilter::kUseFetchConst; + bool has_vol_min_filter = + vol_min_filter_applicable + ? instr.attributes.vol_min_filter != TextureFilter::kUseFetchConst + : has_vol_mag_filter; + bool vol_mag_filter_linear = + instr.attributes.vol_mag_filter == TextureFilter::kLinear; + bool vol_min_filter_linear = + vol_min_filter_applicable + ? instr.attributes.vol_min_filter == TextureFilter::kLinear + : vol_mag_filter_linear; + bool vol_mag_filter_point = has_vol_mag_filter && !vol_mag_filter_linear; + bool vol_min_filter_point = has_vol_min_filter && !vol_min_filter_linear; + // Get the texture size if needed, apply offset and switch between // normalized and unnormalized coordinates if needed. The offset is // fractional on the Xbox 360 (has 0.5 granularity), unlike in Direct3D 12, @@ -1424,6 +1447,14 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( // unlikely to be used on purpose. // http://web.archive.org/web/20090514012026/http://msdn.microsoft.com:80/en-us/library/bb313957.aspx uint32_t size_and_is_3d_temp = UINT32_MAX; + // For stacked textures, if point sampling is not forced in the instruction: + // X - whether linear filtering should be done across layers (for color + // grading LUTs in Unreal Engine 3 games and Burnout Revenge), unless + // the filter is known from the instruction for all cases. + // Y - lerp factor between the two layers, unless only point sampling can be + // used. + uint32_t vol_filter_temp = UINT32_MAX; + bool vol_filter_temp_linear_test = D3D10_SB_INSTRUCTION_TEST_NONZERO; // With 1/1024 this will always be true anyway, but let's keep the shorter // path without the offset in case some day this hack won't be used anymore // somehow. @@ -1432,8 +1463,22 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( instr.attributes.unnormalized_coordinates || instr.dimension == TextureDimension::k3D) { size_and_is_3d_temp = PushSystemTemp(); + if (instr.opcode == FetchOpcode::kTextureFetch && + instr.dimension == TextureDimension::k3D) { + uint32_t vol_filter_temp_components = 0b0000; + if (!has_vol_mag_filter || !has_vol_min_filter || + vol_mag_filter_linear != vol_min_filter_linear) { + vol_filter_temp_components |= 0b0011; + } else if (vol_mag_filter_linear || vol_min_filter_linear) { + vol_filter_temp_components |= 0b0010; + } + // Initialize to 0 to break register dependency. + if (vol_filter_temp_components != 0) { + vol_filter_temp = PushSystemTemp(vol_filter_temp_components); + } + } - // Will use fetch constants for the size. + // Will use fetch constants for the size and for stacked texture filter. if (cbuffer_index_fetch_constants_ == kCbufferIndexUnallocated) { cbuffer_index_fetch_constants_ = cbuffer_count_++; } @@ -1720,8 +1765,13 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( ++stat_.instruction_count; ++stat_.dynamic_flow_control_count; + // Layers on the Xenos are indexed like texels, with 0.5 being exactly + // layer 0, but in D3D10+ 0.0 is exactly layer 0. Halo 3 uses i + 0.5 + // offset for lightmap index, for instance. + float offset_layer = offset_z - 0.5f; + if (instr.attributes.unnormalized_coordinates) { - if (offset_z != 0.f) { + if (offset_layer != 0.0f) { // Add the offset to the array layer. shader_code_.push_back( ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ADD) | @@ -1735,13 +1785,13 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( shader_code_.push_back( EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); shader_code_.push_back( - *reinterpret_cast(&offset_z)); + *reinterpret_cast(&offset_layer)); ++stat_.instruction_count; ++stat_.float_instruction_count; } } else { // Unnormalize the array layer and apply the offset. - if (offset_z != 0.0f) { + if (offset_layer != 0.0f) { shader_code_.push_back( ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MAD) | ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9)); @@ -1759,34 +1809,337 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( shader_code_.push_back( EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1)); shader_code_.push_back(size_and_is_3d_temp); - if (offset_z != 0.0f) { + if (offset_layer != 0.0f) { shader_code_.push_back( EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); shader_code_.push_back( - *reinterpret_cast(&offset_z)); + *reinterpret_cast(&offset_layer)); } ++stat_.instruction_count; ++stat_.float_instruction_count; } - // Truncate the array layer index. Halo 3 uses integer.5 coordinates, - // with Direct3D 10+ round-to-nearest-even rule + epsilon wrong layers - // are fetched. - // TODO(Triang3l): Investigate the correct rounding. - // TODO(Triang3l): Support vol_mag_filter and vol_min_filter for 2D - // arrays and maybe even 3D textures (color gradint LUT in Burnout - // Revenge). - shader_code_.push_back( - ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ROUND_Z) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0100, 1)); - shader_code_.push_back(coord_temp); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1)); - shader_code_.push_back(coord_temp); - ++stat_.instruction_count; - ++stat_.float_instruction_count; + if (vol_filter_temp != UINT32_MAX) { + if (vol_min_filter_applicable) { + if (!has_vol_mag_filter || !has_vol_min_filter || + vol_mag_filter_linear != vol_min_filter_linear) { + // Check if magnifying (derivative <= 1, according to OpenGL + // rules) or minifying (> 1) the texture across Z. Get the + // maximum of absolutes of the two derivatives of the array + // layer, either explicit or implicit. + if (instr.attributes.use_register_gradients) { + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MAX) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9)); + shader_code_.push_back(EncodeVectorMaskedOperand( + D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); + shader_code_.push_back(vol_filter_temp); + shader_code_.push_back(EncodeVectorSelectOperand( + D3D10_SB_OPERAND_TYPE_TEMP, 2, 1) | + ENCODE_D3D10_SB_OPERAND_EXTENDED(1)); + shader_code_.push_back( + ENCODE_D3D10_SB_EXTENDED_OPERAND_MODIFIER( + D3D10_SB_OPERAND_MODIFIER_ABS)); + shader_code_.push_back(system_temp_grad_h_lod_); + shader_code_.push_back(EncodeVectorSelectOperand( + D3D10_SB_OPERAND_TYPE_TEMP, 2, 1) | + ENCODE_D3D10_SB_OPERAND_EXTENDED(1)); + shader_code_.push_back( + ENCODE_D3D10_SB_EXTENDED_OPERAND_MODIFIER( + D3D10_SB_OPERAND_MODIFIER_ABS)); + shader_code_.push_back(system_temp_grad_v_); + ++stat_.instruction_count; + ++stat_.float_instruction_count; + } else { + for (uint32_t i = 0; i < 2; ++i) { + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE( + i ? D3D11_SB_OPCODE_DERIV_RTY_COARSE + : D3D11_SB_OPCODE_DERIV_RTX_COARSE) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5)); + shader_code_.push_back(EncodeVectorMaskedOperand( + D3D10_SB_OPERAND_TYPE_TEMP, 1 << i, 1)); + shader_code_.push_back(vol_filter_temp); + shader_code_.push_back(EncodeVectorSelectOperand( + D3D10_SB_OPERAND_TYPE_TEMP, 2, 1)); + shader_code_.push_back(coord_temp); + ++stat_.instruction_count; + ++stat_.float_instruction_count; + } + + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MAX) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9)); + shader_code_.push_back(EncodeVectorMaskedOperand( + D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); + shader_code_.push_back(vol_filter_temp); + shader_code_.push_back(EncodeVectorSelectOperand( + D3D10_SB_OPERAND_TYPE_TEMP, 0, 1) | + ENCODE_D3D10_SB_OPERAND_EXTENDED(1)); + shader_code_.push_back( + ENCODE_D3D10_SB_EXTENDED_OPERAND_MODIFIER( + D3D10_SB_OPERAND_MODIFIER_ABS)); + shader_code_.push_back(vol_filter_temp); + shader_code_.push_back(EncodeVectorSelectOperand( + D3D10_SB_OPERAND_TYPE_TEMP, 1, 1) | + ENCODE_D3D10_SB_OPERAND_EXTENDED(1)); + shader_code_.push_back( + ENCODE_D3D10_SB_EXTENDED_OPERAND_MODIFIER( + D3D10_SB_OPERAND_MODIFIER_ABS)); + shader_code_.push_back(vol_filter_temp); + ++stat_.instruction_count; + ++stat_.float_instruction_count; + } + + // Check if minifying. + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_LT) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); + shader_code_.push_back(EncodeVectorMaskedOperand( + D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); + shader_code_.push_back(vol_filter_temp); + shader_code_.push_back( + EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); + shader_code_.push_back(0x3F800000); + shader_code_.push_back(EncodeVectorSelectOperand( + D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); + shader_code_.push_back(vol_filter_temp); + ++stat_.instruction_count; + ++stat_.float_instruction_count; + + if (has_vol_mag_filter || has_vol_min_filter) { + if (has_vol_mag_filter && has_vol_min_filter) { + // Both from the instruction. + assert_true(vol_mag_filter_linear != vol_min_filter_linear); + if (vol_mag_filter_linear) { + // Either linear when minifying (non-zero) or linear when + // magnifying (zero). + vol_filter_temp_linear_test = + D3D10_SB_INSTRUCTION_TEST_ZERO; + } + } else { + // Check if need to use the filter from the fetch constant. + // Has mag filter - need the fetch constant filter when + // minifying (non-zero minification test result). + // Has min filter - need it when magnifying (zero). + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) | + ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN( + has_vol_mag_filter + ? D3D10_SB_INSTRUCTION_TEST_NONZERO + : D3D10_SB_INSTRUCTION_TEST_ZERO) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3)); + shader_code_.push_back(EncodeVectorSelectOperand( + D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); + shader_code_.push_back(vol_filter_temp); + ++stat_.instruction_count; + ++stat_.dynamic_flow_control_count; + + // Take the filter from the dword 4 of the fetch constant + // ([1].x or [2].z) if it's not in the instruction. + // Has mag filter - this will be executed for minification + // (bit 1). + // Has min filter - for magnification (bit 0). + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9)); + shader_code_.push_back(EncodeVectorMaskedOperand( + D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); + shader_code_.push_back(vol_filter_temp); + shader_code_.push_back(EncodeVectorSelectOperand( + D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, + 2 * (tfetch_index & 1), 3)); + shader_code_.push_back(cbuffer_index_fetch_constants_); + shader_code_.push_back( + uint32_t(CbufferRegister::kFetchConstants)); + shader_code_.push_back(tfetch_pair_offset + 1 + + (tfetch_index & 1)); + shader_code_.push_back(EncodeScalarOperand( + D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); + shader_code_.push_back(has_vol_mag_filter ? (1 << 1) + : (1 << 0)); + ++stat_.instruction_count; + ++stat_.uint_instruction_count; + + // If not using the filter from the fetch constant, set the + // value from the instruction. + // Need to change this for: + // - Magnifying (zero set) and linear (non-zero needed) vol + // mag filter. + // - Minifying (non-zero set) and point (zero needed) vol + // min filter. + // Already the expected zero or non-zero value for: + // - Magnifying (zero set) and point (zero needed) vol mag + // filter. + // - Minifying (non-zero set) and linear (non-zero needed) + // vol min filter. + if (vol_mag_filter_linear || vol_min_filter_point) { + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ELSE) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1)); + ++stat_.instruction_count; + + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5)); + shader_code_.push_back(EncodeVectorMaskedOperand( + D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); + shader_code_.push_back(vol_filter_temp); + shader_code_.push_back(EncodeScalarOperand( + D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); + shader_code_.push_back(uint32_t(has_vol_mag_filter)); + ++stat_.instruction_count; + ++stat_.mov_instruction_count; + } + + // Close the fetch constant filter check. + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1)); + ++stat_.instruction_count; + } + } else { + // Mask the bit offset (1 for vol_min_filter, 0 for + // vol_mag_filter) in the fetch constant. + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); + shader_code_.push_back(EncodeVectorMaskedOperand( + D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); + shader_code_.push_back(vol_filter_temp); + shader_code_.push_back(EncodeVectorSelectOperand( + D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); + shader_code_.push_back(vol_filter_temp); + shader_code_.push_back(EncodeScalarOperand( + D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); + shader_code_.push_back(1); + ++stat_.instruction_count; + ++stat_.uint_instruction_count; + + // Extract the filter from dword 4 of the fetch constant + // ([1].x or [2].z). + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_UBFE) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(11)); + shader_code_.push_back(EncodeVectorMaskedOperand( + D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); + shader_code_.push_back(vol_filter_temp); + shader_code_.push_back(EncodeScalarOperand( + D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); + shader_code_.push_back(1); + shader_code_.push_back(EncodeVectorSelectOperand( + D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); + shader_code_.push_back(vol_filter_temp); + shader_code_.push_back(EncodeVectorSelectOperand( + D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, + 2 * (tfetch_index & 1), 3)); + shader_code_.push_back(cbuffer_index_fetch_constants_); + shader_code_.push_back( + uint32_t(CbufferRegister::kFetchConstants)); + shader_code_.push_back(tfetch_pair_offset + 1 + + (tfetch_index & 1)); + ++stat_.instruction_count; + ++stat_.uint_instruction_count; + } + } + } else { + if (!has_vol_mag_filter) { + // Extract the magnification filter when there are no + // derivatives from bit 0 of dword 4 of the fetch constant + // ([1].x or [2].z). + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9)); + shader_code_.push_back(EncodeVectorMaskedOperand( + D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); + shader_code_.push_back(vol_filter_temp); + shader_code_.push_back(EncodeVectorSelectOperand( + D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, + 2 * (tfetch_index & 1), 3)); + shader_code_.push_back(cbuffer_index_fetch_constants_); + shader_code_.push_back( + uint32_t(CbufferRegister::kFetchConstants)); + shader_code_.push_back(tfetch_pair_offset + 1 + + (tfetch_index & 1)); + shader_code_.push_back( + EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); + shader_code_.push_back(1); + ++stat_.instruction_count; + ++stat_.uint_instruction_count; + } + } + } + + if (!vol_mag_filter_point || !vol_min_filter_point) { + if (!vol_mag_filter_linear || !vol_min_filter_linear) { + // Check if using linear filtering between array layers. + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) | + ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN( + vol_filter_temp_linear_test) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3)); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); + shader_code_.push_back(vol_filter_temp); + ++stat_.instruction_count; + ++stat_.dynamic_flow_control_count; + } + + // Floor the layer index to get the linear interpolation factor. + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ROUND_NI) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5)); + shader_code_.push_back(EncodeVectorMaskedOperand( + D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1)); + shader_code_.push_back(vol_filter_temp); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1)); + shader_code_.push_back(coord_temp); + ++stat_.instruction_count; + ++stat_.float_instruction_count; + + // Get the fraction of the layer index, with i + 0.5 right between + // layers, as the linear interpolation factor between layers Z and + // Z + 1. + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ADD) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8)); + shader_code_.push_back(EncodeVectorMaskedOperand( + D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1)); + shader_code_.push_back(vol_filter_temp); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1)); + shader_code_.push_back(coord_temp); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1) | + ENCODE_D3D10_SB_OPERAND_EXTENDED(1)); + shader_code_.push_back(ENCODE_D3D10_SB_EXTENDED_OPERAND_MODIFIER( + D3D10_SB_OPERAND_MODIFIER_NEG)); + shader_code_.push_back(vol_filter_temp); + ++stat_.instruction_count; + ++stat_.float_instruction_count; + + // Floor the layer index again for sampling. + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ROUND_NI) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5)); + shader_code_.push_back(EncodeVectorMaskedOperand( + D3D10_SB_OPERAND_TYPE_TEMP, 0b0100, 1)); + shader_code_.push_back(coord_temp); + shader_code_.push_back( + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1)); + shader_code_.push_back(coord_temp); + ++stat_.instruction_count; + ++stat_.float_instruction_count; + + if (!vol_mag_filter_linear || !vol_min_filter_linear) { + // Close the linear filtering check. + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1)); + ++stat_.instruction_count; + } + } if (instr.attributes.unnormalized_coordinates || offset_z != 0.0f) { // Handle 3D texture coordinates - may need to normalize and/or add @@ -1907,10 +2260,16 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( ++stat_.float_instruction_count; } - // Allocate the register for the value from the signed texture. + // Allocate the register for the value from the signed texture, and also + // for the second array layer and lerping the layers. uint32_t signed_value_temp = instr.opcode == FetchOpcode::kTextureFetch ? PushSystemTemp() : UINT32_MAX; + uint32_t vol_filter_lerp_temp = UINT32_MAX; + if (vol_filter_temp != UINT32_MAX && + (!vol_mag_filter_point || !vol_min_filter_point)) { + vol_filter_lerp_temp = PushSystemTemp(); + } // tfetch1D/2D/Cube just fetch directly. tfetch3D needs to fetch either // the 3D texture or the 2D stacked texture, so two sample instructions @@ -1936,159 +2295,265 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1)); ++stat_.instruction_count; } - // Sample both unsigned and signed. - for (uint32_t j = 0; j < 2; ++j) { + if (instr.opcode == FetchOpcode::kGetTextureComputedLod) { + // The non-pixel-shader case should be handled before because it + // just returns a constant in this case. + assert_true(IsDxbcPixelShader()); uint32_t srv_register_current = - i ? srv_registers_stacked[j] : srv_registers[j]; - uint32_t target_temp_current = - j ? signed_value_temp : system_temp_pv_; - if (instr.opcode == FetchOpcode::kGetTextureComputedLod) { - // The non-pixel-shader case should be handled before because it - // just returns a constant in this case. - assert_true(IsDxbcPixelShader()); - replicate_result = true; + i ? srv_registers_stacked[0] : srv_registers[0]; + replicate_result = true; + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_1_SB_OPCODE_LOD) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(11)); + shader_code_.push_back( + EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); + shader_code_.push_back(system_temp_pv_); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(coord_temp); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_RESOURCE, kSwizzleXYZW, 2)); + shader_code_.push_back(srv_register_current); + shader_code_.push_back(srv_register_current); + shader_code_.push_back( + EncodeZeroComponentOperand(D3D10_SB_OPERAND_TYPE_SAMPLER, 2)); + shader_code_.push_back(sampler_register); + shader_code_.push_back(sampler_register); + ++stat_.instruction_count; + ++stat_.lod_instructions; + // Apply the LOD bias if used. + if (instr.attributes.lod_bias != 0.0f) { shader_code_.push_back( - ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_1_SB_OPCODE_LOD) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(11)); + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ADD) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); shader_code_.push_back(EncodeVectorMaskedOperand( D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); - shader_code_.push_back(target_temp_current); - shader_code_.push_back(EncodeVectorSwizzledOperand( - D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); - shader_code_.push_back(coord_temp); - shader_code_.push_back(EncodeVectorSwizzledOperand( - D3D10_SB_OPERAND_TYPE_RESOURCE, kSwizzleXYZW, 2)); - shader_code_.push_back(srv_register_current); - shader_code_.push_back(srv_register_current); + shader_code_.push_back(system_temp_pv_); shader_code_.push_back( - EncodeZeroComponentOperand(D3D10_SB_OPERAND_TYPE_SAMPLER, 2)); - shader_code_.push_back(sampler_register); - shader_code_.push_back(sampler_register); + EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); + shader_code_.push_back(system_temp_pv_); + shader_code_.push_back( + EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); + shader_code_.push_back( + *reinterpret_cast(&instr.attributes.lod_bias)); ++stat_.instruction_count; - ++stat_.lod_instructions; - // Apply the LOD bias if used. - if (instr.attributes.lod_bias != 0.0f) { - shader_code_.push_back( - ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ADD) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); - shader_code_.push_back(EncodeVectorMaskedOperand( - D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); - shader_code_.push_back(target_temp_current); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); - shader_code_.push_back(target_temp_current); - shader_code_.push_back( - EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); - shader_code_.push_back(*reinterpret_cast( - &instr.attributes.lod_bias)); - ++stat_.instruction_count; - ++stat_.float_instruction_count; - } - // In this case, only the unsigned variant is accessed because data - // doesn't matter. - break; - } else if (instr.attributes.use_register_lod) { - shader_code_.push_back( - ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_SAMPLE_L) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(13)); - shader_code_.push_back(EncodeVectorMaskedOperand( - D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); - shader_code_.push_back(target_temp_current); - shader_code_.push_back(EncodeVectorSwizzledOperand( - D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); - shader_code_.push_back(coord_temp); - shader_code_.push_back(EncodeVectorSwizzledOperand( - D3D10_SB_OPERAND_TYPE_RESOURCE, kSwizzleXYZW, 2)); - shader_code_.push_back(srv_register_current); - shader_code_.push_back(srv_register_current); - shader_code_.push_back( - EncodeZeroComponentOperand(D3D10_SB_OPERAND_TYPE_SAMPLER, 2)); - shader_code_.push_back(sampler_register); - shader_code_.push_back(sampler_register); - shader_code_.push_back(EncodeVectorSelectOperand( - D3D10_SB_OPERAND_TYPE_TEMP, lod_temp_component, 1)); - shader_code_.push_back(lod_temp); - ++stat_.instruction_count; - ++stat_.texture_normal_instructions; - } else if (instr.attributes.use_register_gradients) { - // TODO(Triang3l): Apply the LOD bias somehow for register gradients - // (possibly will require moving the bias to the sampler, which may - // be not very good considering the sampler count is very limited). - shader_code_.push_back( - ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_SAMPLE_D) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(15)); - shader_code_.push_back(EncodeVectorMaskedOperand( - D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); - shader_code_.push_back(target_temp_current); - shader_code_.push_back(EncodeVectorSwizzledOperand( - D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); - shader_code_.push_back(coord_temp); - shader_code_.push_back(EncodeVectorSwizzledOperand( - D3D10_SB_OPERAND_TYPE_RESOURCE, kSwizzleXYZW, 2)); - shader_code_.push_back(srv_register_current); - shader_code_.push_back(srv_register_current); - shader_code_.push_back( - EncodeZeroComponentOperand(D3D10_SB_OPERAND_TYPE_SAMPLER, 2)); - shader_code_.push_back(sampler_register); - shader_code_.push_back(sampler_register); - shader_code_.push_back(EncodeVectorSwizzledOperand( - D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); - shader_code_.push_back(system_temp_grad_h_lod_); - shader_code_.push_back(EncodeVectorSwizzledOperand( - D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); - shader_code_.push_back(system_temp_grad_v_); - ++stat_.instruction_count; - ++stat_.texture_gradient_instructions; - } else { - // 3 different DXBC opcodes handled here: - // - sample_l, when not using a computed LOD or not in a pixel - // shader, in this case, LOD (0 + bias) is sampled. - // - sample, when sampling in a pixel shader (thus with derivatives) - // with a computed LOD. - // - sample_b, when sampling in a pixel shader with a biased - // computed LOD. - // Both sample_l and sample_b should add the LOD bias as the last - // operand in our case. - bool explicit_lod = - !instr.attributes.use_computed_lod || !IsDxbcPixelShader(); - if (explicit_lod) { - shader_code_.push_back( - ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_SAMPLE_L) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(13)); - } else if (instr.attributes.lod_bias != 0.0f) { - shader_code_.push_back( - ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_SAMPLE_B) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(13)); - } else { - shader_code_.push_back( - ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_SAMPLE) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(11)); - } - shader_code_.push_back(EncodeVectorMaskedOperand( - D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); - shader_code_.push_back(target_temp_current); - shader_code_.push_back(EncodeVectorSwizzledOperand( - D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); - shader_code_.push_back(coord_temp); - shader_code_.push_back(EncodeVectorSwizzledOperand( - D3D10_SB_OPERAND_TYPE_RESOURCE, kSwizzleXYZW, 2)); - shader_code_.push_back(srv_register_current); - shader_code_.push_back(srv_register_current); - shader_code_.push_back( - EncodeZeroComponentOperand(D3D10_SB_OPERAND_TYPE_SAMPLER, 2)); - shader_code_.push_back(sampler_register); - shader_code_.push_back(sampler_register); - if (explicit_lod || instr.attributes.lod_bias != 0.0f) { - shader_code_.push_back( - EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); - shader_code_.push_back(*reinterpret_cast( - &instr.attributes.lod_bias)); - } - ++stat_.instruction_count; - if (!explicit_lod && instr.attributes.lod_bias != 0.0f) { - ++stat_.texture_bias_instructions; - } else { - ++stat_.texture_normal_instructions; + ++stat_.float_instruction_count; + } + } else { + // Sample both unsigned and signed, and for stacked textures, two + // samples if filtering is needed. + for (uint32_t j = 0; j < 2; ++j) { + uint32_t srv_register_current = + i ? srv_registers_stacked[j] : srv_registers[j]; + uint32_t target_temp_sign = j ? signed_value_temp : system_temp_pv_; + for (uint32_t k = 0; + k < (vol_filter_lerp_temp != UINT32_MAX ? 2u : 1u); ++k) { + uint32_t target_temp_current = + k ? vol_filter_lerp_temp : target_temp_sign; + if (k) { + if (!vol_mag_filter_linear || !vol_min_filter_linear) { + // Check if array layer filtering is enabled and need one more + // sample. + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) | + ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN( + vol_filter_temp_linear_test) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3)); + shader_code_.push_back(EncodeVectorSelectOperand( + D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); + shader_code_.push_back(vol_filter_temp); + ++stat_.instruction_count; + ++stat_.dynamic_flow_control_count; + } + + // Go to the next array texture sample. + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ADD) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); + shader_code_.push_back(EncodeVectorMaskedOperand( + D3D10_SB_OPERAND_TYPE_TEMP, 0b0100, 1)); + shader_code_.push_back(coord_temp); + shader_code_.push_back(EncodeVectorSelectOperand( + D3D10_SB_OPERAND_TYPE_TEMP, 2, 1)); + shader_code_.push_back(coord_temp); + shader_code_.push_back( + EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); + shader_code_.push_back(0x3F800000); + ++stat_.instruction_count; + ++stat_.float_instruction_count; + } + if (instr.attributes.use_register_lod) { + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_SAMPLE_L) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(13)); + shader_code_.push_back(EncodeVectorMaskedOperand( + D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); + shader_code_.push_back(target_temp_current); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(coord_temp); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_RESOURCE, kSwizzleXYZW, 2)); + shader_code_.push_back(srv_register_current); + shader_code_.push_back(srv_register_current); + shader_code_.push_back(EncodeZeroComponentOperand( + D3D10_SB_OPERAND_TYPE_SAMPLER, 2)); + shader_code_.push_back(sampler_register); + shader_code_.push_back(sampler_register); + shader_code_.push_back(EncodeVectorSelectOperand( + D3D10_SB_OPERAND_TYPE_TEMP, lod_temp_component, 1)); + shader_code_.push_back(lod_temp); + ++stat_.instruction_count; + ++stat_.texture_normal_instructions; + } else if (instr.attributes.use_register_gradients) { + // TODO(Triang3l): Apply the LOD bias somehow for register + // gradients (possibly will require moving the bias to the + // sampler, which may be not very good considering the sampler + // count is very limited). + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_SAMPLE_D) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(15)); + shader_code_.push_back(EncodeVectorMaskedOperand( + D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); + shader_code_.push_back(target_temp_current); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(coord_temp); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_RESOURCE, kSwizzleXYZW, 2)); + shader_code_.push_back(srv_register_current); + shader_code_.push_back(srv_register_current); + shader_code_.push_back(EncodeZeroComponentOperand( + D3D10_SB_OPERAND_TYPE_SAMPLER, 2)); + shader_code_.push_back(sampler_register); + shader_code_.push_back(sampler_register); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(system_temp_grad_h_lod_); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(system_temp_grad_v_); + ++stat_.instruction_count; + ++stat_.texture_gradient_instructions; + } else { + // 3 different DXBC opcodes handled here: + // - sample_l, when not using a computed LOD or not in a pixel + // shader, in this case, LOD (0 + bias) is sampled. + // - sample, when sampling in a pixel shader (thus with + // derivatives) with a computed LOD. + // - sample_b, when sampling in a pixel shader with a biased + // computed LOD. + // Both sample_l and sample_b should add the LOD bias as the + // last operand in our case. + bool explicit_lod = + !instr.attributes.use_computed_lod || !IsDxbcPixelShader(); + if (explicit_lod) { + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_SAMPLE_L) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(13)); + } else if (instr.attributes.lod_bias != 0.0f) { + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_SAMPLE_B) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(13)); + } else { + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_SAMPLE) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(11)); + } + shader_code_.push_back(EncodeVectorMaskedOperand( + D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); + shader_code_.push_back(target_temp_current); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(coord_temp); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_RESOURCE, kSwizzleXYZW, 2)); + shader_code_.push_back(srv_register_current); + shader_code_.push_back(srv_register_current); + shader_code_.push_back(EncodeZeroComponentOperand( + D3D10_SB_OPERAND_TYPE_SAMPLER, 2)); + shader_code_.push_back(sampler_register); + shader_code_.push_back(sampler_register); + if (explicit_lod || instr.attributes.lod_bias != 0.0f) { + shader_code_.push_back(EncodeScalarOperand( + D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); + shader_code_.push_back(*reinterpret_cast( + &instr.attributes.lod_bias)); + } + ++stat_.instruction_count; + if (!explicit_lod && instr.attributes.lod_bias != 0.0f) { + ++stat_.texture_bias_instructions; + } else { + ++stat_.texture_normal_instructions; + } + } + if (k) { + // b - a + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ADD) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8)); + shader_code_.push_back(EncodeVectorMaskedOperand( + D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); + shader_code_.push_back(target_temp_current); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(target_temp_current); + shader_code_.push_back( + EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, + kSwizzleXYZW, 1) | + ENCODE_D3D10_SB_OPERAND_EXTENDED(1)); + shader_code_.push_back( + ENCODE_D3D10_SB_EXTENDED_OPERAND_MODIFIER( + D3D10_SB_OPERAND_MODIFIER_NEG)); + shader_code_.push_back(target_temp_sign); + ++stat_.instruction_count; + ++stat_.float_instruction_count; + + // a + (b - a) * factor + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MAD) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9)); + shader_code_.push_back(EncodeVectorMaskedOperand( + D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); + shader_code_.push_back(target_temp_sign); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(target_temp_current); + shader_code_.push_back(EncodeVectorReplicatedOperand( + D3D10_SB_OPERAND_TYPE_TEMP, 1, 1)); + shader_code_.push_back(vol_filter_temp); + shader_code_.push_back(EncodeVectorSwizzledOperand( + D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); + shader_code_.push_back(target_temp_sign); + ++stat_.instruction_count; + ++stat_.float_instruction_count; + + if (!j) { + // Go back to the first layer to sample the signed texture. + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ADD) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); + shader_code_.push_back(EncodeVectorMaskedOperand( + D3D10_SB_OPERAND_TYPE_TEMP, 0b0100, 1)); + shader_code_.push_back(coord_temp); + shader_code_.push_back(EncodeVectorSelectOperand( + D3D10_SB_OPERAND_TYPE_TEMP, 2, 1)); + shader_code_.push_back(coord_temp); + shader_code_.push_back(EncodeScalarOperand( + D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); + shader_code_.push_back(0xBF800000u); + ++stat_.instruction_count; + ++stat_.float_instruction_count; + } + + if (!vol_mag_filter_linear || !vol_min_filter_linear) { + // Close the array layer filtering check. + shader_code_.push_back( + ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) | + ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1)); + ++stat_.instruction_count; + } + } } } } @@ -2343,6 +2808,9 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( PopSystemTemp(); } + if (vol_filter_lerp_temp != UINT32_MAX) { + PopSystemTemp(); + } if (signed_value_temp != UINT32_MAX) { PopSystemTemp(); } @@ -2351,6 +2819,9 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( } } + if (vol_filter_temp != UINT32_MAX) { + PopSystemTemp(); + } if (size_and_is_3d_temp != UINT32_MAX) { PopSystemTemp(); } diff --git a/src/xenia/gpu/shader.h b/src/xenia/gpu/shader.h index 92e8f6bfd..3bffa5d0e 100644 --- a/src/xenia/gpu/shader.h +++ b/src/xenia/gpu/shader.h @@ -440,6 +440,8 @@ struct ParsedTextureFetchInstruction { TextureFilter min_filter = TextureFilter::kUseFetchConst; TextureFilter mip_filter = TextureFilter::kUseFetchConst; AnisoFilter aniso_filter = AnisoFilter::kUseFetchConst; + TextureFilter vol_mag_filter = TextureFilter::kUseFetchConst; + TextureFilter vol_min_filter = TextureFilter::kUseFetchConst; bool use_computed_lod = true; bool use_register_lod = false; bool use_register_gradients = false; diff --git a/src/xenia/gpu/shader_translator.cc b/src/xenia/gpu/shader_translator.cc index ba3a3cbcf..8680577ae 100644 --- a/src/xenia/gpu/shader_translator.cc +++ b/src/xenia/gpu/shader_translator.cc @@ -1035,6 +1035,8 @@ void ShaderTranslator::ParseTextureFetchInstruction( i.attributes.min_filter = op.min_filter(); i.attributes.mip_filter = op.mip_filter(); i.attributes.aniso_filter = op.aniso_filter(); + i.attributes.vol_mag_filter = op.vol_mag_filter(); + i.attributes.vol_min_filter = op.vol_min_filter(); i.attributes.use_computed_lod = op.use_computed_lod(); i.attributes.use_register_lod = op.use_register_lod(); i.attributes.use_register_gradients = op.use_register_gradients(); diff --git a/src/xenia/gpu/shader_translator_disasm.cc b/src/xenia/gpu/shader_translator_disasm.cc index 1de31efb1..65d87be06 100644 --- a/src/xenia/gpu/shader_translator_disasm.cc +++ b/src/xenia/gpu/shader_translator_disasm.cc @@ -423,6 +423,16 @@ void ParsedTextureFetchInstruction::Disassemble(StringBuffer* out) const { ", AnisoFilter=%s", kAnisoFilterNames[static_cast(attributes.aniso_filter)]); } + if (attributes.vol_mag_filter != TextureFilter::kUseFetchConst) { + out->AppendFormat( + ", VolMagFilter=%s", + kTextureFilterNames[static_cast(attributes.vol_mag_filter)]); + } + if (attributes.vol_min_filter != TextureFilter::kUseFetchConst) { + out->AppendFormat( + ", VolMinFilter=%s", + kTextureFilterNames[static_cast(attributes.vol_min_filter)]); + } if (!attributes.use_computed_lod) { out->Append(", UseComputedLOD=false"); } diff --git a/src/xenia/gpu/ucode.h b/src/xenia/gpu/ucode.h index bfcece663..79cb07fab 100644 --- a/src/xenia/gpu/ucode.h +++ b/src/xenia/gpu/ucode.h @@ -634,6 +634,14 @@ struct TextureFetchInstruction { AnisoFilter aniso_filter() const { return static_cast(data_.aniso_filter); } + bool has_vol_mag_filter() const { return data_.vol_mag_filter != 0x3; } + TextureFilter vol_mag_filter() const { + return static_cast(data_.vol_mag_filter); + } + bool has_vol_min_filter() const { return data_.vol_min_filter != 0x3; } + TextureFilter vol_min_filter() const { + return static_cast(data_.vol_min_filter); + } bool use_computed_lod() const { return data_.use_comp_lod == 1; } bool use_register_lod() const { return data_.use_reg_lod == 1; } bool use_register_gradients() const { return data_.use_reg_gradients == 1; } diff --git a/src/xenia/kernel/util/shim_utils.h b/src/xenia/kernel/util/shim_utils.h index f4bf06a5f..60e7525c5 100644 --- a/src/xenia/kernel/util/shim_utils.h +++ b/src/xenia/kernel/util/shim_utils.h @@ -103,12 +103,22 @@ inline std::string TranslateAnsiStringAddress(const Memory* memory, inline std::wstring TranslateUnicodeString( const Memory* memory, const X_UNICODE_STRING* unicode_string) { - if (!unicode_string || !unicode_string->length) { + if (!unicode_string) { return L""; } - return std::wstring( - memory->TranslateVirtual(unicode_string->pointer), - unicode_string->length); + uint16_t length = unicode_string->length; + if (!length) { + return L""; + } + const xe::be* guest_string = + memory->TranslateVirtual*>( + unicode_string->pointer); + std::wstring translated_string; + translated_string.reserve(length); + for (uint16_t i = 0; i < length; ++i) { + translated_string += wchar_t(uint16_t(guest_string[i])); + } + return translated_string; } } // namespace util diff --git a/src/xenia/kernel/xboxkrnl/xboxkrnl_audio_xma.cc b/src/xenia/kernel/xboxkrnl/xboxkrnl_audio_xma.cc index 84b8502d5..00493cc06 100644 --- a/src/xenia/kernel/xboxkrnl/xboxkrnl_audio_xma.cc +++ b/src/xenia/kernel/xboxkrnl/xboxkrnl_audio_xma.cc @@ -119,18 +119,58 @@ static_assert_size(XMA_CONTEXT_INIT, 56); dword_result_t XMAInitializeContext(lpvoid_t context_ptr, pointer_t context_init) { + // Input buffers may be null (buffer 1 in Tony Hawk's American Wasteland). + // Convert to host endianness. + uint32_t input_buffer_0_guest_ptr = context_init->input_buffer_0_ptr; + uint32_t input_buffer_0_physical_address = 0; + if (input_buffer_0_guest_ptr) { + input_buffer_0_physical_address = + kernel_memory()->GetPhysicalAddress(input_buffer_0_guest_ptr); + // Xenia-specific safety check. + assert_true(input_buffer_0_physical_address != UINT32_MAX); + if (input_buffer_0_physical_address == UINT32_MAX) { + XELOGE( + "XMAInitializeContext: Invalid input buffer 0 virtual address %.8X", + input_buffer_0_guest_ptr); + return X_E_FALSE; + } + } + uint32_t input_buffer_1_guest_ptr = context_init->input_buffer_1_ptr; + uint32_t input_buffer_1_physical_address = 0; + if (input_buffer_1_guest_ptr) { + input_buffer_1_physical_address = + kernel_memory()->GetPhysicalAddress(input_buffer_1_guest_ptr); + assert_true(input_buffer_1_physical_address != UINT32_MAX); + if (input_buffer_1_physical_address == UINT32_MAX) { + XELOGE( + "XMAInitializeContext: Invalid input buffer 1 virtual address %.8X", + input_buffer_1_guest_ptr); + return X_E_FALSE; + } + } + uint32_t output_buffer_guest_ptr = context_init->output_buffer_ptr; + assert_not_zero(output_buffer_guest_ptr); + uint32_t output_buffer_physical_address = + kernel_memory()->GetPhysicalAddress(output_buffer_guest_ptr); + assert_true(output_buffer_physical_address != UINT32_MAX); + if (output_buffer_physical_address == UINT32_MAX) { + XELOGE("XMAInitializeContext: Invalid output buffer virtual address %.8X", + output_buffer_guest_ptr); + return X_E_FALSE; + } + std::memset(context_ptr, 0, sizeof(XMA_CONTEXT_DATA)); XMA_CONTEXT_DATA context(context_ptr); - context.input_buffer_0_ptr = context_init->input_buffer_0_ptr; + context.input_buffer_0_ptr = input_buffer_0_physical_address; context.input_buffer_0_packet_count = context_init->input_buffer_0_packet_count; - context.input_buffer_1_ptr = context_init->input_buffer_1_ptr; + context.input_buffer_1_ptr = input_buffer_1_physical_address; context.input_buffer_1_packet_count = context_init->input_buffer_1_packet_count; context.input_buffer_read_offset = context_init->input_buffer_read_offset; - context.output_buffer_ptr = context_init->output_buffer_ptr; + context.output_buffer_ptr = output_buffer_physical_address; context.output_buffer_block_count = context_init->output_buffer_block_count; // context.work_buffer = context_init->work_buffer; // ?