Merge remote-tracking branch 'upstream/master' into canary

This commit is contained in:
illusion98 2019-09-04 05:50:18 -05:00
commit bcc571e574
7 changed files with 726 additions and 183 deletions

View File

@ -1384,8 +1384,9 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
// thick outlines with SSAA there. // thick outlines with SSAA there.
float offset_x = instr.attributes.offset_x + (1.0f / 1024.0f); float offset_x = instr.attributes.offset_x + (1.0f / 1024.0f);
if (instr.opcode == FetchOpcode::kGetTextureWeights) { if (instr.opcode == FetchOpcode::kGetTextureWeights) {
// Needed for correct shadow filtering (at least in Halo 3). // Bilinear filtering (for shadows, for instance, in Halo 3), 0.5 -
offset_x += 0.5f; // exactly the pixel.
offset_x -= 0.5f;
} }
float offset_y = 0.0f, offset_z = 0.0f; float offset_y = 0.0f, offset_z = 0.0f;
if (instr.dimension == TextureDimension::k2D || if (instr.dimension == TextureDimension::k2D ||
@ -1393,7 +1394,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
instr.dimension == TextureDimension::kCube) { instr.dimension == TextureDimension::kCube) {
offset_y = instr.attributes.offset_y + (1.0f / 1024.0f); offset_y = instr.attributes.offset_y + (1.0f / 1024.0f);
if (instr.opcode == FetchOpcode::kGetTextureWeights) { if (instr.opcode == FetchOpcode::kGetTextureWeights) {
offset_y += 0.5f; offset_y -= 0.5f;
} }
// Don't care about the Z offset for cubemaps when getting weights because // Don't care about the Z offset for cubemaps when getting weights because
// zero Z will be returned anyway (the face index doesn't participate in // zero Z will be returned anyway (the face index doesn't participate in
@ -1406,12 +1407,34 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
// Z is the face index for cubemaps, so don't apply the epsilon to it. // Z is the face index for cubemaps, so don't apply the epsilon to it.
offset_z += 1.0f / 1024.0f; offset_z += 1.0f / 1024.0f;
if (instr.opcode == FetchOpcode::kGetTextureWeights) { if (instr.opcode == FetchOpcode::kGetTextureWeights) {
offset_z += 0.5f; offset_z -= 0.5f;
} }
} }
} }
} }
// Gather info about filtering across array layers.
// Use the magnification filter when no derivatives:
// https://stackoverflow.com/questions/40328956/difference-between-sample-and-samplelevel-wrt-texture-filtering
bool vol_min_filter_applicable =
instr.opcode == FetchOpcode::kTextureFetch &&
(instr.attributes.use_register_gradients ||
(instr.attributes.use_computed_lod && IsDxbcPixelShader()));
bool has_vol_mag_filter =
instr.attributes.vol_mag_filter != TextureFilter::kUseFetchConst;
bool has_vol_min_filter =
vol_min_filter_applicable
? instr.attributes.vol_min_filter != TextureFilter::kUseFetchConst
: has_vol_mag_filter;
bool vol_mag_filter_linear =
instr.attributes.vol_mag_filter == TextureFilter::kLinear;
bool vol_min_filter_linear =
vol_min_filter_applicable
? instr.attributes.vol_min_filter == TextureFilter::kLinear
: vol_mag_filter_linear;
bool vol_mag_filter_point = has_vol_mag_filter && !vol_mag_filter_linear;
bool vol_min_filter_point = has_vol_min_filter && !vol_min_filter_linear;
// Get the texture size if needed, apply offset and switch between // Get the texture size if needed, apply offset and switch between
// normalized and unnormalized coordinates if needed. The offset is // normalized and unnormalized coordinates if needed. The offset is
// fractional on the Xbox 360 (has 0.5 granularity), unlike in Direct3D 12, // fractional on the Xbox 360 (has 0.5 granularity), unlike in Direct3D 12,
@ -1424,6 +1447,14 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
// unlikely to be used on purpose. // unlikely to be used on purpose.
// http://web.archive.org/web/20090514012026/http://msdn.microsoft.com:80/en-us/library/bb313957.aspx // http://web.archive.org/web/20090514012026/http://msdn.microsoft.com:80/en-us/library/bb313957.aspx
uint32_t size_and_is_3d_temp = UINT32_MAX; uint32_t size_and_is_3d_temp = UINT32_MAX;
// For stacked textures, if point sampling is not forced in the instruction:
// X - whether linear filtering should be done across layers (for color
// grading LUTs in Unreal Engine 3 games and Burnout Revenge), unless
// the filter is known from the instruction for all cases.
// Y - lerp factor between the two layers, unless only point sampling can be
// used.
uint32_t vol_filter_temp = UINT32_MAX;
bool vol_filter_temp_linear_test = D3D10_SB_INSTRUCTION_TEST_NONZERO;
// With 1/1024 this will always be true anyway, but let's keep the shorter // With 1/1024 this will always be true anyway, but let's keep the shorter
// path without the offset in case some day this hack won't be used anymore // path without the offset in case some day this hack won't be used anymore
// somehow. // somehow.
@ -1432,8 +1463,22 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
instr.attributes.unnormalized_coordinates || instr.attributes.unnormalized_coordinates ||
instr.dimension == TextureDimension::k3D) { instr.dimension == TextureDimension::k3D) {
size_and_is_3d_temp = PushSystemTemp(); size_and_is_3d_temp = PushSystemTemp();
if (instr.opcode == FetchOpcode::kTextureFetch &&
instr.dimension == TextureDimension::k3D) {
uint32_t vol_filter_temp_components = 0b0000;
if (!has_vol_mag_filter || !has_vol_min_filter ||
vol_mag_filter_linear != vol_min_filter_linear) {
vol_filter_temp_components |= 0b0011;
} else if (vol_mag_filter_linear || vol_min_filter_linear) {
vol_filter_temp_components |= 0b0010;
}
// Initialize to 0 to break register dependency.
if (vol_filter_temp_components != 0) {
vol_filter_temp = PushSystemTemp(vol_filter_temp_components);
}
}
// Will use fetch constants for the size. // Will use fetch constants for the size and for stacked texture filter.
if (cbuffer_index_fetch_constants_ == kCbufferIndexUnallocated) { if (cbuffer_index_fetch_constants_ == kCbufferIndexUnallocated) {
cbuffer_index_fetch_constants_ = cbuffer_count_++; cbuffer_index_fetch_constants_ = cbuffer_count_++;
} }
@ -1720,8 +1765,13 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.dynamic_flow_control_count; ++stat_.dynamic_flow_control_count;
// Layers on the Xenos are indexed like texels, with 0.5 being exactly
// layer 0, but in D3D10+ 0.0 is exactly layer 0. Halo 3 uses i + 0.5
// offset for lightmap index, for instance.
float offset_layer = offset_z - 0.5f;
if (instr.attributes.unnormalized_coordinates) { if (instr.attributes.unnormalized_coordinates) {
if (offset_z != 0.f) { if (offset_layer != 0.0f) {
// Add the offset to the array layer. // Add the offset to the array layer.
shader_code_.push_back( shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ADD) | ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ADD) |
@ -1735,13 +1785,13 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
shader_code_.push_back( shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back( shader_code_.push_back(
*reinterpret_cast<const uint32_t*>(&offset_z)); *reinterpret_cast<const uint32_t*>(&offset_layer));
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.float_instruction_count; ++stat_.float_instruction_count;
} }
} else { } else {
// Unnormalize the array layer and apply the offset. // Unnormalize the array layer and apply the offset.
if (offset_z != 0.0f) { if (offset_layer != 0.0f) {
shader_code_.push_back( shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MAD) | ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MAD) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
@ -1759,28 +1809,322 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
shader_code_.push_back( shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1)); EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1));
shader_code_.push_back(size_and_is_3d_temp); shader_code_.push_back(size_and_is_3d_temp);
if (offset_z != 0.0f) { if (offset_layer != 0.0f) {
shader_code_.push_back( shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back( shader_code_.push_back(
*reinterpret_cast<const uint32_t*>(&offset_z)); *reinterpret_cast<const uint32_t*>(&offset_layer));
} }
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.float_instruction_count; ++stat_.float_instruction_count;
} }
// Truncate the array layer index. Halo 3 uses integer.5 coordinates, if (vol_filter_temp != UINT32_MAX) {
// with Direct3D 10+ round-to-nearest-even rule + epsilon wrong layers if (vol_min_filter_applicable) {
// are fetched. if (!has_vol_mag_filter || !has_vol_min_filter ||
// TODO(Triang3l): Investigate the correct rounding. vol_mag_filter_linear != vol_min_filter_linear) {
// TODO(Triang3l): Support vol_mag_filter and vol_min_filter for 2D // Check if magnifying (derivative <= 1, according to OpenGL
// arrays and maybe even 3D textures (color gradint LUT in Burnout // rules) or minifying (> 1) the texture across Z. Get the
// Revenge). // maximum of absolutes of the two derivatives of the array
// layer, either explicit or implicit.
if (instr.attributes.use_register_gradients) {
shader_code_.push_back( shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ROUND_Z) | ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MAX) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(vol_filter_temp);
shader_code_.push_back(EncodeVectorSelectOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 2, 1) |
ENCODE_D3D10_SB_OPERAND_EXTENDED(1));
shader_code_.push_back(
ENCODE_D3D10_SB_EXTENDED_OPERAND_MODIFIER(
D3D10_SB_OPERAND_MODIFIER_ABS));
shader_code_.push_back(system_temp_grad_h_lod_);
shader_code_.push_back(EncodeVectorSelectOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 2, 1) |
ENCODE_D3D10_SB_OPERAND_EXTENDED(1));
shader_code_.push_back(
ENCODE_D3D10_SB_EXTENDED_OPERAND_MODIFIER(
D3D10_SB_OPERAND_MODIFIER_ABS));
shader_code_.push_back(system_temp_grad_v_);
++stat_.instruction_count;
++stat_.float_instruction_count;
} else {
for (uint32_t i = 0; i < 2; ++i) {
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(
i ? D3D11_SB_OPCODE_DERIV_RTY_COARSE
: D3D11_SB_OPCODE_DERIV_RTX_COARSE) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5));
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 1 << i, 1));
shader_code_.push_back(vol_filter_temp);
shader_code_.push_back(EncodeVectorSelectOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 2, 1));
shader_code_.push_back(coord_temp);
++stat_.instruction_count;
++stat_.float_instruction_count;
}
shader_code_.push_back( shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0100, 1)); ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MAX) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(vol_filter_temp);
shader_code_.push_back(EncodeVectorSelectOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 0, 1) |
ENCODE_D3D10_SB_OPERAND_EXTENDED(1));
shader_code_.push_back(
ENCODE_D3D10_SB_EXTENDED_OPERAND_MODIFIER(
D3D10_SB_OPERAND_MODIFIER_ABS));
shader_code_.push_back(vol_filter_temp);
shader_code_.push_back(EncodeVectorSelectOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 1, 1) |
ENCODE_D3D10_SB_OPERAND_EXTENDED(1));
shader_code_.push_back(
ENCODE_D3D10_SB_EXTENDED_OPERAND_MODIFIER(
D3D10_SB_OPERAND_MODIFIER_ABS));
shader_code_.push_back(vol_filter_temp);
++stat_.instruction_count;
++stat_.float_instruction_count;
}
// Check if minifying.
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_LT) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(vol_filter_temp);
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(0x3F800000);
shader_code_.push_back(EncodeVectorSelectOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(vol_filter_temp);
++stat_.instruction_count;
++stat_.float_instruction_count;
if (has_vol_mag_filter || has_vol_min_filter) {
if (has_vol_mag_filter && has_vol_min_filter) {
// Both from the instruction.
assert_true(vol_mag_filter_linear != vol_min_filter_linear);
if (vol_mag_filter_linear) {
// Either linear when minifying (non-zero) or linear when
// magnifying (zero).
vol_filter_temp_linear_test =
D3D10_SB_INSTRUCTION_TEST_ZERO;
}
} else {
// Check if need to use the filter from the fetch constant.
// Has mag filter - need the fetch constant filter when
// minifying (non-zero minification test result).
// Has min filter - need it when magnifying (zero).
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(
has_vol_mag_filter
? D3D10_SB_INSTRUCTION_TEST_NONZERO
: D3D10_SB_INSTRUCTION_TEST_ZERO) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
shader_code_.push_back(EncodeVectorSelectOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(vol_filter_temp);
++stat_.instruction_count;
++stat_.dynamic_flow_control_count;
// Take the filter from the dword 4 of the fetch constant
// ([1].x or [2].z) if it's not in the instruction.
// Has mag filter - this will be executed for minification
// (bit 1).
// Has min filter - for magnification (bit 0).
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(vol_filter_temp);
shader_code_.push_back(EncodeVectorSelectOperand(
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER,
2 * (tfetch_index & 1), 3));
shader_code_.push_back(cbuffer_index_fetch_constants_);
shader_code_.push_back(
uint32_t(CbufferRegister::kFetchConstants));
shader_code_.push_back(tfetch_pair_offset + 1 +
(tfetch_index & 1));
shader_code_.push_back(EncodeScalarOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(has_vol_mag_filter ? (1 << 1)
: (1 << 0));
++stat_.instruction_count;
++stat_.uint_instruction_count;
// If not using the filter from the fetch constant, set the
// value from the instruction.
// Need to change this for:
// - Magnifying (zero set) and linear (non-zero needed) vol
// mag filter.
// - Minifying (non-zero set) and point (zero needed) vol
// min filter.
// Already the expected zero or non-zero value for:
// - Magnifying (zero set) and point (zero needed) vol mag
// filter.
// - Minifying (non-zero set) and linear (non-zero needed)
// vol min filter.
if (vol_mag_filter_linear || vol_min_filter_point) {
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ELSE) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
++stat_.instruction_count;
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5));
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(vol_filter_temp);
shader_code_.push_back(EncodeScalarOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(uint32_t(has_vol_mag_filter));
++stat_.instruction_count;
++stat_.mov_instruction_count;
}
// Close the fetch constant filter check.
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
++stat_.instruction_count;
}
} else {
// Mask the bit offset (1 for vol_min_filter, 0 for
// vol_mag_filter) in the fetch constant.
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(vol_filter_temp);
shader_code_.push_back(EncodeVectorSelectOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(vol_filter_temp);
shader_code_.push_back(EncodeScalarOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(1);
++stat_.instruction_count;
++stat_.uint_instruction_count;
// Extract the filter from dword 4 of the fetch constant
// ([1].x or [2].z).
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_UBFE) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(11));
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(vol_filter_temp);
shader_code_.push_back(EncodeScalarOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(1);
shader_code_.push_back(EncodeVectorSelectOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(vol_filter_temp);
shader_code_.push_back(EncodeVectorSelectOperand(
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER,
2 * (tfetch_index & 1), 3));
shader_code_.push_back(cbuffer_index_fetch_constants_);
shader_code_.push_back(
uint32_t(CbufferRegister::kFetchConstants));
shader_code_.push_back(tfetch_pair_offset + 1 +
(tfetch_index & 1));
++stat_.instruction_count;
++stat_.uint_instruction_count;
}
}
} else {
if (!has_vol_mag_filter) {
// Extract the magnification filter when there are no
// derivatives from bit 0 of dword 4 of the fetch constant
// ([1].x or [2].z).
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(vol_filter_temp);
shader_code_.push_back(EncodeVectorSelectOperand(
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER,
2 * (tfetch_index & 1), 3));
shader_code_.push_back(cbuffer_index_fetch_constants_);
shader_code_.push_back(
uint32_t(CbufferRegister::kFetchConstants));
shader_code_.push_back(tfetch_pair_offset + 1 +
(tfetch_index & 1));
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(1);
++stat_.instruction_count;
++stat_.uint_instruction_count;
}
}
}
if (!vol_mag_filter_point || !vol_min_filter_point) {
if (!vol_mag_filter_linear || !vol_min_filter_linear) {
// Check if using linear filtering between array layers.
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(
vol_filter_temp_linear_test) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(vol_filter_temp);
++stat_.instruction_count;
++stat_.dynamic_flow_control_count;
}
// Floor the layer index to get the linear interpolation factor.
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ROUND_NI) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5));
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1));
shader_code_.push_back(vol_filter_temp);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1));
shader_code_.push_back(coord_temp);
++stat_.instruction_count;
++stat_.float_instruction_count;
// Get the fraction of the layer index, with i + 0.5 right between
// layers, as the linear interpolation factor between layers Z and
// Z + 1.
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ADD) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8));
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1));
shader_code_.push_back(vol_filter_temp);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1));
shader_code_.push_back(coord_temp);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1) |
ENCODE_D3D10_SB_OPERAND_EXTENDED(1));
shader_code_.push_back(ENCODE_D3D10_SB_EXTENDED_OPERAND_MODIFIER(
D3D10_SB_OPERAND_MODIFIER_NEG));
shader_code_.push_back(vol_filter_temp);
++stat_.instruction_count;
++stat_.float_instruction_count;
// Floor the layer index again for sampling.
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ROUND_NI) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5));
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 0b0100, 1));
shader_code_.push_back(coord_temp); shader_code_.push_back(coord_temp);
shader_code_.push_back( shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1)); EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1));
@ -1788,6 +2132,15 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.float_instruction_count; ++stat_.float_instruction_count;
if (!vol_mag_filter_linear || !vol_min_filter_linear) {
// Close the linear filtering check.
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
++stat_.instruction_count;
}
}
if (instr.attributes.unnormalized_coordinates || offset_z != 0.0f) { if (instr.attributes.unnormalized_coordinates || offset_z != 0.0f) {
// Handle 3D texture coordinates - may need to normalize and/or add // Handle 3D texture coordinates - may need to normalize and/or add
// the offset. Check if 3D. // the offset. Check if 3D.
@ -1907,10 +2260,16 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
++stat_.float_instruction_count; ++stat_.float_instruction_count;
} }
// Allocate the register for the value from the signed texture. // Allocate the register for the value from the signed texture, and also
// for the second array layer and lerping the layers.
uint32_t signed_value_temp = instr.opcode == FetchOpcode::kTextureFetch uint32_t signed_value_temp = instr.opcode == FetchOpcode::kTextureFetch
? PushSystemTemp() ? PushSystemTemp()
: UINT32_MAX; : UINT32_MAX;
uint32_t vol_filter_lerp_temp = UINT32_MAX;
if (vol_filter_temp != UINT32_MAX &&
(!vol_mag_filter_point || !vol_min_filter_point)) {
vol_filter_lerp_temp = PushSystemTemp();
}
// tfetch1D/2D/Cube just fetch directly. tfetch3D needs to fetch either // tfetch1D/2D/Cube just fetch directly. tfetch3D needs to fetch either
// the 3D texture or the 2D stacked texture, so two sample instructions // the 3D texture or the 2D stacked texture, so two sample instructions
@ -1936,23 +2295,19 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
++stat_.instruction_count; ++stat_.instruction_count;
} }
// Sample both unsigned and signed.
for (uint32_t j = 0; j < 2; ++j) {
uint32_t srv_register_current =
i ? srv_registers_stacked[j] : srv_registers[j];
uint32_t target_temp_current =
j ? signed_value_temp : system_temp_pv_;
if (instr.opcode == FetchOpcode::kGetTextureComputedLod) { if (instr.opcode == FetchOpcode::kGetTextureComputedLod) {
// The non-pixel-shader case should be handled before because it // The non-pixel-shader case should be handled before because it
// just returns a constant in this case. // just returns a constant in this case.
assert_true(IsDxbcPixelShader()); assert_true(IsDxbcPixelShader());
uint32_t srv_register_current =
i ? srv_registers_stacked[0] : srv_registers[0];
replicate_result = true; replicate_result = true;
shader_code_.push_back( shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_1_SB_OPCODE_LOD) | ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_1_SB_OPCODE_LOD) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(11)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(11));
shader_code_.push_back(EncodeVectorMaskedOperand( shader_code_.push_back(
D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(target_temp_current); shader_code_.push_back(system_temp_pv_);
shader_code_.push_back(EncodeVectorSwizzledOperand( shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(coord_temp); shader_code_.push_back(coord_temp);
@ -1973,21 +2328,61 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(EncodeVectorMaskedOperand( shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(target_temp_current); shader_code_.push_back(system_temp_pv_);
shader_code_.push_back( shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(target_temp_current); shader_code_.push_back(system_temp_pv_);
shader_code_.push_back( shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(*reinterpret_cast<const uint32_t*>( shader_code_.push_back(
&instr.attributes.lod_bias)); *reinterpret_cast<const uint32_t*>(&instr.attributes.lod_bias));
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.float_instruction_count; ++stat_.float_instruction_count;
} }
// In this case, only the unsigned variant is accessed because data } else {
// doesn't matter. // Sample both unsigned and signed, and for stacked textures, two
break; // samples if filtering is needed.
} else if (instr.attributes.use_register_lod) { for (uint32_t j = 0; j < 2; ++j) {
uint32_t srv_register_current =
i ? srv_registers_stacked[j] : srv_registers[j];
uint32_t target_temp_sign = j ? signed_value_temp : system_temp_pv_;
for (uint32_t k = 0;
k < (vol_filter_lerp_temp != UINT32_MAX ? 2u : 1u); ++k) {
uint32_t target_temp_current =
k ? vol_filter_lerp_temp : target_temp_sign;
if (k) {
if (!vol_mag_filter_linear || !vol_min_filter_linear) {
// Check if array layer filtering is enabled and need one more
// sample.
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(
vol_filter_temp_linear_test) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
shader_code_.push_back(EncodeVectorSelectOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(vol_filter_temp);
++stat_.instruction_count;
++stat_.dynamic_flow_control_count;
}
// Go to the next array texture sample.
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ADD) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 0b0100, 1));
shader_code_.push_back(coord_temp);
shader_code_.push_back(EncodeVectorSelectOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 2, 1));
shader_code_.push_back(coord_temp);
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(0x3F800000);
++stat_.instruction_count;
++stat_.float_instruction_count;
}
if (instr.attributes.use_register_lod) {
shader_code_.push_back( shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_SAMPLE_L) | ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_SAMPLE_L) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(13)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(13));
@ -2001,8 +2396,8 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
D3D10_SB_OPERAND_TYPE_RESOURCE, kSwizzleXYZW, 2)); D3D10_SB_OPERAND_TYPE_RESOURCE, kSwizzleXYZW, 2));
shader_code_.push_back(srv_register_current); shader_code_.push_back(srv_register_current);
shader_code_.push_back(srv_register_current); shader_code_.push_back(srv_register_current);
shader_code_.push_back( shader_code_.push_back(EncodeZeroComponentOperand(
EncodeZeroComponentOperand(D3D10_SB_OPERAND_TYPE_SAMPLER, 2)); D3D10_SB_OPERAND_TYPE_SAMPLER, 2));
shader_code_.push_back(sampler_register); shader_code_.push_back(sampler_register);
shader_code_.push_back(sampler_register); shader_code_.push_back(sampler_register);
shader_code_.push_back(EncodeVectorSelectOperand( shader_code_.push_back(EncodeVectorSelectOperand(
@ -2011,9 +2406,10 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.texture_normal_instructions; ++stat_.texture_normal_instructions;
} else if (instr.attributes.use_register_gradients) { } else if (instr.attributes.use_register_gradients) {
// TODO(Triang3l): Apply the LOD bias somehow for register gradients // TODO(Triang3l): Apply the LOD bias somehow for register
// (possibly will require moving the bias to the sampler, which may // gradients (possibly will require moving the bias to the
// be not very good considering the sampler count is very limited). // sampler, which may be not very good considering the sampler
// count is very limited).
shader_code_.push_back( shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_SAMPLE_D) | ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_SAMPLE_D) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(15)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(15));
@ -2027,8 +2423,8 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
D3D10_SB_OPERAND_TYPE_RESOURCE, kSwizzleXYZW, 2)); D3D10_SB_OPERAND_TYPE_RESOURCE, kSwizzleXYZW, 2));
shader_code_.push_back(srv_register_current); shader_code_.push_back(srv_register_current);
shader_code_.push_back(srv_register_current); shader_code_.push_back(srv_register_current);
shader_code_.push_back( shader_code_.push_back(EncodeZeroComponentOperand(
EncodeZeroComponentOperand(D3D10_SB_OPERAND_TYPE_SAMPLER, 2)); D3D10_SB_OPERAND_TYPE_SAMPLER, 2));
shader_code_.push_back(sampler_register); shader_code_.push_back(sampler_register);
shader_code_.push_back(sampler_register); shader_code_.push_back(sampler_register);
shader_code_.push_back(EncodeVectorSwizzledOperand( shader_code_.push_back(EncodeVectorSwizzledOperand(
@ -2043,12 +2439,12 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
// 3 different DXBC opcodes handled here: // 3 different DXBC opcodes handled here:
// - sample_l, when not using a computed LOD or not in a pixel // - sample_l, when not using a computed LOD or not in a pixel
// shader, in this case, LOD (0 + bias) is sampled. // shader, in this case, LOD (0 + bias) is sampled.
// - sample, when sampling in a pixel shader (thus with derivatives) // - sample, when sampling in a pixel shader (thus with
// with a computed LOD. // derivatives) with a computed LOD.
// - sample_b, when sampling in a pixel shader with a biased // - sample_b, when sampling in a pixel shader with a biased
// computed LOD. // computed LOD.
// Both sample_l and sample_b should add the LOD bias as the last // Both sample_l and sample_b should add the LOD bias as the
// operand in our case. // last operand in our case.
bool explicit_lod = bool explicit_lod =
!instr.attributes.use_computed_lod || !IsDxbcPixelShader(); !instr.attributes.use_computed_lod || !IsDxbcPixelShader();
if (explicit_lod) { if (explicit_lod) {
@ -2074,13 +2470,13 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
D3D10_SB_OPERAND_TYPE_RESOURCE, kSwizzleXYZW, 2)); D3D10_SB_OPERAND_TYPE_RESOURCE, kSwizzleXYZW, 2));
shader_code_.push_back(srv_register_current); shader_code_.push_back(srv_register_current);
shader_code_.push_back(srv_register_current); shader_code_.push_back(srv_register_current);
shader_code_.push_back( shader_code_.push_back(EncodeZeroComponentOperand(
EncodeZeroComponentOperand(D3D10_SB_OPERAND_TYPE_SAMPLER, 2)); D3D10_SB_OPERAND_TYPE_SAMPLER, 2));
shader_code_.push_back(sampler_register); shader_code_.push_back(sampler_register);
shader_code_.push_back(sampler_register); shader_code_.push_back(sampler_register);
if (explicit_lod || instr.attributes.lod_bias != 0.0f) { if (explicit_lod || instr.attributes.lod_bias != 0.0f) {
shader_code_.push_back( shader_code_.push_back(EncodeScalarOperand(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(*reinterpret_cast<const uint32_t*>( shader_code_.push_back(*reinterpret_cast<const uint32_t*>(
&instr.attributes.lod_bias)); &instr.attributes.lod_bias));
} }
@ -2091,6 +2487,75 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
++stat_.texture_normal_instructions; ++stat_.texture_normal_instructions;
} }
} }
if (k) {
// b - a
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ADD) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8));
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(target_temp_current);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(target_temp_current);
shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP,
kSwizzleXYZW, 1) |
ENCODE_D3D10_SB_OPERAND_EXTENDED(1));
shader_code_.push_back(
ENCODE_D3D10_SB_EXTENDED_OPERAND_MODIFIER(
D3D10_SB_OPERAND_MODIFIER_NEG));
shader_code_.push_back(target_temp_sign);
++stat_.instruction_count;
++stat_.float_instruction_count;
// a + (b - a) * factor
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MAD) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(target_temp_sign);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(target_temp_current);
shader_code_.push_back(EncodeVectorReplicatedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 1, 1));
shader_code_.push_back(vol_filter_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(target_temp_sign);
++stat_.instruction_count;
++stat_.float_instruction_count;
if (!j) {
// Go back to the first layer to sample the signed texture.
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ADD) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 0b0100, 1));
shader_code_.push_back(coord_temp);
shader_code_.push_back(EncodeVectorSelectOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 2, 1));
shader_code_.push_back(coord_temp);
shader_code_.push_back(EncodeScalarOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(0xBF800000u);
++stat_.instruction_count;
++stat_.float_instruction_count;
}
if (!vol_mag_filter_linear || !vol_min_filter_linear) {
// Close the array layer filtering check.
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
++stat_.instruction_count;
}
}
}
}
} }
} }
if (instr.dimension == TextureDimension::k3D) { if (instr.dimension == TextureDimension::k3D) {
@ -2343,6 +2808,9 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
PopSystemTemp(); PopSystemTemp();
} }
if (vol_filter_lerp_temp != UINT32_MAX) {
PopSystemTemp();
}
if (signed_value_temp != UINT32_MAX) { if (signed_value_temp != UINT32_MAX) {
PopSystemTemp(); PopSystemTemp();
} }
@ -2351,6 +2819,9 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
} }
} }
if (vol_filter_temp != UINT32_MAX) {
PopSystemTemp();
}
if (size_and_is_3d_temp != UINT32_MAX) { if (size_and_is_3d_temp != UINT32_MAX) {
PopSystemTemp(); PopSystemTemp();
} }

View File

@ -440,6 +440,8 @@ struct ParsedTextureFetchInstruction {
TextureFilter min_filter = TextureFilter::kUseFetchConst; TextureFilter min_filter = TextureFilter::kUseFetchConst;
TextureFilter mip_filter = TextureFilter::kUseFetchConst; TextureFilter mip_filter = TextureFilter::kUseFetchConst;
AnisoFilter aniso_filter = AnisoFilter::kUseFetchConst; AnisoFilter aniso_filter = AnisoFilter::kUseFetchConst;
TextureFilter vol_mag_filter = TextureFilter::kUseFetchConst;
TextureFilter vol_min_filter = TextureFilter::kUseFetchConst;
bool use_computed_lod = true; bool use_computed_lod = true;
bool use_register_lod = false; bool use_register_lod = false;
bool use_register_gradients = false; bool use_register_gradients = false;

View File

@ -1035,6 +1035,8 @@ void ShaderTranslator::ParseTextureFetchInstruction(
i.attributes.min_filter = op.min_filter(); i.attributes.min_filter = op.min_filter();
i.attributes.mip_filter = op.mip_filter(); i.attributes.mip_filter = op.mip_filter();
i.attributes.aniso_filter = op.aniso_filter(); i.attributes.aniso_filter = op.aniso_filter();
i.attributes.vol_mag_filter = op.vol_mag_filter();
i.attributes.vol_min_filter = op.vol_min_filter();
i.attributes.use_computed_lod = op.use_computed_lod(); i.attributes.use_computed_lod = op.use_computed_lod();
i.attributes.use_register_lod = op.use_register_lod(); i.attributes.use_register_lod = op.use_register_lod();
i.attributes.use_register_gradients = op.use_register_gradients(); i.attributes.use_register_gradients = op.use_register_gradients();

View File

@ -423,6 +423,16 @@ void ParsedTextureFetchInstruction::Disassemble(StringBuffer* out) const {
", AnisoFilter=%s", ", AnisoFilter=%s",
kAnisoFilterNames[static_cast<int>(attributes.aniso_filter)]); kAnisoFilterNames[static_cast<int>(attributes.aniso_filter)]);
} }
if (attributes.vol_mag_filter != TextureFilter::kUseFetchConst) {
out->AppendFormat(
", VolMagFilter=%s",
kTextureFilterNames[static_cast<int>(attributes.vol_mag_filter)]);
}
if (attributes.vol_min_filter != TextureFilter::kUseFetchConst) {
out->AppendFormat(
", VolMinFilter=%s",
kTextureFilterNames[static_cast<int>(attributes.vol_min_filter)]);
}
if (!attributes.use_computed_lod) { if (!attributes.use_computed_lod) {
out->Append(", UseComputedLOD=false"); out->Append(", UseComputedLOD=false");
} }

View File

@ -634,6 +634,14 @@ struct TextureFetchInstruction {
AnisoFilter aniso_filter() const { AnisoFilter aniso_filter() const {
return static_cast<AnisoFilter>(data_.aniso_filter); return static_cast<AnisoFilter>(data_.aniso_filter);
} }
bool has_vol_mag_filter() const { return data_.vol_mag_filter != 0x3; }
TextureFilter vol_mag_filter() const {
return static_cast<TextureFilter>(data_.vol_mag_filter);
}
bool has_vol_min_filter() const { return data_.vol_min_filter != 0x3; }
TextureFilter vol_min_filter() const {
return static_cast<TextureFilter>(data_.vol_min_filter);
}
bool use_computed_lod() const { return data_.use_comp_lod == 1; } bool use_computed_lod() const { return data_.use_comp_lod == 1; }
bool use_register_lod() const { return data_.use_reg_lod == 1; } bool use_register_lod() const { return data_.use_reg_lod == 1; }
bool use_register_gradients() const { return data_.use_reg_gradients == 1; } bool use_register_gradients() const { return data_.use_reg_gradients == 1; }

View File

@ -103,12 +103,22 @@ inline std::string TranslateAnsiStringAddress(const Memory* memory,
inline std::wstring TranslateUnicodeString( inline std::wstring TranslateUnicodeString(
const Memory* memory, const X_UNICODE_STRING* unicode_string) { const Memory* memory, const X_UNICODE_STRING* unicode_string) {
if (!unicode_string || !unicode_string->length) { if (!unicode_string) {
return L""; return L"";
} }
return std::wstring( uint16_t length = unicode_string->length;
memory->TranslateVirtual<const wchar_t*>(unicode_string->pointer), if (!length) {
unicode_string->length); return L"";
}
const xe::be<uint16_t>* guest_string =
memory->TranslateVirtual<const xe::be<uint16_t>*>(
unicode_string->pointer);
std::wstring translated_string;
translated_string.reserve(length);
for (uint16_t i = 0; i < length; ++i) {
translated_string += wchar_t(uint16_t(guest_string[i]));
}
return translated_string;
} }
} // namespace util } // namespace util

View File

@ -119,18 +119,58 @@ static_assert_size(XMA_CONTEXT_INIT, 56);
dword_result_t XMAInitializeContext(lpvoid_t context_ptr, dword_result_t XMAInitializeContext(lpvoid_t context_ptr,
pointer_t<XMA_CONTEXT_INIT> context_init) { pointer_t<XMA_CONTEXT_INIT> context_init) {
// Input buffers may be null (buffer 1 in Tony Hawk's American Wasteland).
// Convert to host endianness.
uint32_t input_buffer_0_guest_ptr = context_init->input_buffer_0_ptr;
uint32_t input_buffer_0_physical_address = 0;
if (input_buffer_0_guest_ptr) {
input_buffer_0_physical_address =
kernel_memory()->GetPhysicalAddress(input_buffer_0_guest_ptr);
// Xenia-specific safety check.
assert_true(input_buffer_0_physical_address != UINT32_MAX);
if (input_buffer_0_physical_address == UINT32_MAX) {
XELOGE(
"XMAInitializeContext: Invalid input buffer 0 virtual address %.8X",
input_buffer_0_guest_ptr);
return X_E_FALSE;
}
}
uint32_t input_buffer_1_guest_ptr = context_init->input_buffer_1_ptr;
uint32_t input_buffer_1_physical_address = 0;
if (input_buffer_1_guest_ptr) {
input_buffer_1_physical_address =
kernel_memory()->GetPhysicalAddress(input_buffer_1_guest_ptr);
assert_true(input_buffer_1_physical_address != UINT32_MAX);
if (input_buffer_1_physical_address == UINT32_MAX) {
XELOGE(
"XMAInitializeContext: Invalid input buffer 1 virtual address %.8X",
input_buffer_1_guest_ptr);
return X_E_FALSE;
}
}
uint32_t output_buffer_guest_ptr = context_init->output_buffer_ptr;
assert_not_zero(output_buffer_guest_ptr);
uint32_t output_buffer_physical_address =
kernel_memory()->GetPhysicalAddress(output_buffer_guest_ptr);
assert_true(output_buffer_physical_address != UINT32_MAX);
if (output_buffer_physical_address == UINT32_MAX) {
XELOGE("XMAInitializeContext: Invalid output buffer virtual address %.8X",
output_buffer_guest_ptr);
return X_E_FALSE;
}
std::memset(context_ptr, 0, sizeof(XMA_CONTEXT_DATA)); std::memset(context_ptr, 0, sizeof(XMA_CONTEXT_DATA));
XMA_CONTEXT_DATA context(context_ptr); XMA_CONTEXT_DATA context(context_ptr);
context.input_buffer_0_ptr = context_init->input_buffer_0_ptr; context.input_buffer_0_ptr = input_buffer_0_physical_address;
context.input_buffer_0_packet_count = context.input_buffer_0_packet_count =
context_init->input_buffer_0_packet_count; context_init->input_buffer_0_packet_count;
context.input_buffer_1_ptr = context_init->input_buffer_1_ptr; context.input_buffer_1_ptr = input_buffer_1_physical_address;
context.input_buffer_1_packet_count = context.input_buffer_1_packet_count =
context_init->input_buffer_1_packet_count; context_init->input_buffer_1_packet_count;
context.input_buffer_read_offset = context_init->input_buffer_read_offset; context.input_buffer_read_offset = context_init->input_buffer_read_offset;
context.output_buffer_ptr = context_init->output_buffer_ptr; context.output_buffer_ptr = output_buffer_physical_address;
context.output_buffer_block_count = context_init->output_buffer_block_count; context.output_buffer_block_count = context_init->output_buffer_block_count;
// context.work_buffer = context_init->work_buffer; // ? // context.work_buffer = context_init->work_buffer; // ?