Merge remote-tracking branch 'upstream/master' into canary

This commit is contained in:
illusion98 2019-09-04 05:50:18 -05:00
commit bcc571e574
7 changed files with 726 additions and 183 deletions

View File

@ -1384,8 +1384,9 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
// thick outlines with SSAA there.
float offset_x = instr.attributes.offset_x + (1.0f / 1024.0f);
if (instr.opcode == FetchOpcode::kGetTextureWeights) {
// Needed for correct shadow filtering (at least in Halo 3).
offset_x += 0.5f;
// Bilinear filtering (for shadows, for instance, in Halo 3), 0.5 -
// exactly the pixel.
offset_x -= 0.5f;
}
float offset_y = 0.0f, offset_z = 0.0f;
if (instr.dimension == TextureDimension::k2D ||
@ -1393,7 +1394,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
instr.dimension == TextureDimension::kCube) {
offset_y = instr.attributes.offset_y + (1.0f / 1024.0f);
if (instr.opcode == FetchOpcode::kGetTextureWeights) {
offset_y += 0.5f;
offset_y -= 0.5f;
}
// Don't care about the Z offset for cubemaps when getting weights because
// zero Z will be returned anyway (the face index doesn't participate in
@ -1406,12 +1407,34 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
// Z is the face index for cubemaps, so don't apply the epsilon to it.
offset_z += 1.0f / 1024.0f;
if (instr.opcode == FetchOpcode::kGetTextureWeights) {
offset_z += 0.5f;
offset_z -= 0.5f;
}
}
}
}
// Gather info about filtering across array layers.
// Use the magnification filter when no derivatives:
// https://stackoverflow.com/questions/40328956/difference-between-sample-and-samplelevel-wrt-texture-filtering
bool vol_min_filter_applicable =
instr.opcode == FetchOpcode::kTextureFetch &&
(instr.attributes.use_register_gradients ||
(instr.attributes.use_computed_lod && IsDxbcPixelShader()));
bool has_vol_mag_filter =
instr.attributes.vol_mag_filter != TextureFilter::kUseFetchConst;
bool has_vol_min_filter =
vol_min_filter_applicable
? instr.attributes.vol_min_filter != TextureFilter::kUseFetchConst
: has_vol_mag_filter;
bool vol_mag_filter_linear =
instr.attributes.vol_mag_filter == TextureFilter::kLinear;
bool vol_min_filter_linear =
vol_min_filter_applicable
? instr.attributes.vol_min_filter == TextureFilter::kLinear
: vol_mag_filter_linear;
bool vol_mag_filter_point = has_vol_mag_filter && !vol_mag_filter_linear;
bool vol_min_filter_point = has_vol_min_filter && !vol_min_filter_linear;
// Get the texture size if needed, apply offset and switch between
// normalized and unnormalized coordinates if needed. The offset is
// fractional on the Xbox 360 (has 0.5 granularity), unlike in Direct3D 12,
@ -1424,6 +1447,14 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
// unlikely to be used on purpose.
// http://web.archive.org/web/20090514012026/http://msdn.microsoft.com:80/en-us/library/bb313957.aspx
uint32_t size_and_is_3d_temp = UINT32_MAX;
// For stacked textures, if point sampling is not forced in the instruction:
// X - whether linear filtering should be done across layers (for color
// grading LUTs in Unreal Engine 3 games and Burnout Revenge), unless
// the filter is known from the instruction for all cases.
// Y - lerp factor between the two layers, unless only point sampling can be
// used.
uint32_t vol_filter_temp = UINT32_MAX;
bool vol_filter_temp_linear_test = D3D10_SB_INSTRUCTION_TEST_NONZERO;
// With 1/1024 this will always be true anyway, but let's keep the shorter
// path without the offset in case some day this hack won't be used anymore
// somehow.
@ -1432,8 +1463,22 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
instr.attributes.unnormalized_coordinates ||
instr.dimension == TextureDimension::k3D) {
size_and_is_3d_temp = PushSystemTemp();
if (instr.opcode == FetchOpcode::kTextureFetch &&
instr.dimension == TextureDimension::k3D) {
uint32_t vol_filter_temp_components = 0b0000;
if (!has_vol_mag_filter || !has_vol_min_filter ||
vol_mag_filter_linear != vol_min_filter_linear) {
vol_filter_temp_components |= 0b0011;
} else if (vol_mag_filter_linear || vol_min_filter_linear) {
vol_filter_temp_components |= 0b0010;
}
// Initialize to 0 to break register dependency.
if (vol_filter_temp_components != 0) {
vol_filter_temp = PushSystemTemp(vol_filter_temp_components);
}
}
// Will use fetch constants for the size.
// Will use fetch constants for the size and for stacked texture filter.
if (cbuffer_index_fetch_constants_ == kCbufferIndexUnallocated) {
cbuffer_index_fetch_constants_ = cbuffer_count_++;
}
@ -1720,8 +1765,13 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
++stat_.instruction_count;
++stat_.dynamic_flow_control_count;
// Layers on the Xenos are indexed like texels, with 0.5 being exactly
// layer 0, but in D3D10+ 0.0 is exactly layer 0. Halo 3 uses i + 0.5
// offset for lightmap index, for instance.
float offset_layer = offset_z - 0.5f;
if (instr.attributes.unnormalized_coordinates) {
if (offset_z != 0.f) {
if (offset_layer != 0.0f) {
// Add the offset to the array layer.
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ADD) |
@ -1735,13 +1785,13 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(
*reinterpret_cast<const uint32_t*>(&offset_z));
*reinterpret_cast<const uint32_t*>(&offset_layer));
++stat_.instruction_count;
++stat_.float_instruction_count;
}
} else {
// Unnormalize the array layer and apply the offset.
if (offset_z != 0.0f) {
if (offset_layer != 0.0f) {
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MAD) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
@ -1759,34 +1809,337 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1));
shader_code_.push_back(size_and_is_3d_temp);
if (offset_z != 0.0f) {
if (offset_layer != 0.0f) {
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(
*reinterpret_cast<const uint32_t*>(&offset_z));
*reinterpret_cast<const uint32_t*>(&offset_layer));
}
++stat_.instruction_count;
++stat_.float_instruction_count;
}
// Truncate the array layer index. Halo 3 uses integer.5 coordinates,
// with Direct3D 10+ round-to-nearest-even rule + epsilon wrong layers
// are fetched.
// TODO(Triang3l): Investigate the correct rounding.
// TODO(Triang3l): Support vol_mag_filter and vol_min_filter for 2D
// arrays and maybe even 3D textures (color gradint LUT in Burnout
// Revenge).
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ROUND_Z) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0100, 1));
shader_code_.push_back(coord_temp);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1));
shader_code_.push_back(coord_temp);
++stat_.instruction_count;
++stat_.float_instruction_count;
if (vol_filter_temp != UINT32_MAX) {
if (vol_min_filter_applicable) {
if (!has_vol_mag_filter || !has_vol_min_filter ||
vol_mag_filter_linear != vol_min_filter_linear) {
// Check if magnifying (derivative <= 1, according to OpenGL
// rules) or minifying (> 1) the texture across Z. Get the
// maximum of absolutes of the two derivatives of the array
// layer, either explicit or implicit.
if (instr.attributes.use_register_gradients) {
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MAX) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(vol_filter_temp);
shader_code_.push_back(EncodeVectorSelectOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 2, 1) |
ENCODE_D3D10_SB_OPERAND_EXTENDED(1));
shader_code_.push_back(
ENCODE_D3D10_SB_EXTENDED_OPERAND_MODIFIER(
D3D10_SB_OPERAND_MODIFIER_ABS));
shader_code_.push_back(system_temp_grad_h_lod_);
shader_code_.push_back(EncodeVectorSelectOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 2, 1) |
ENCODE_D3D10_SB_OPERAND_EXTENDED(1));
shader_code_.push_back(
ENCODE_D3D10_SB_EXTENDED_OPERAND_MODIFIER(
D3D10_SB_OPERAND_MODIFIER_ABS));
shader_code_.push_back(system_temp_grad_v_);
++stat_.instruction_count;
++stat_.float_instruction_count;
} else {
for (uint32_t i = 0; i < 2; ++i) {
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(
i ? D3D11_SB_OPCODE_DERIV_RTY_COARSE
: D3D11_SB_OPCODE_DERIV_RTX_COARSE) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5));
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 1 << i, 1));
shader_code_.push_back(vol_filter_temp);
shader_code_.push_back(EncodeVectorSelectOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 2, 1));
shader_code_.push_back(coord_temp);
++stat_.instruction_count;
++stat_.float_instruction_count;
}
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MAX) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(vol_filter_temp);
shader_code_.push_back(EncodeVectorSelectOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 0, 1) |
ENCODE_D3D10_SB_OPERAND_EXTENDED(1));
shader_code_.push_back(
ENCODE_D3D10_SB_EXTENDED_OPERAND_MODIFIER(
D3D10_SB_OPERAND_MODIFIER_ABS));
shader_code_.push_back(vol_filter_temp);
shader_code_.push_back(EncodeVectorSelectOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 1, 1) |
ENCODE_D3D10_SB_OPERAND_EXTENDED(1));
shader_code_.push_back(
ENCODE_D3D10_SB_EXTENDED_OPERAND_MODIFIER(
D3D10_SB_OPERAND_MODIFIER_ABS));
shader_code_.push_back(vol_filter_temp);
++stat_.instruction_count;
++stat_.float_instruction_count;
}
// Check if minifying.
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_LT) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(vol_filter_temp);
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(0x3F800000);
shader_code_.push_back(EncodeVectorSelectOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(vol_filter_temp);
++stat_.instruction_count;
++stat_.float_instruction_count;
if (has_vol_mag_filter || has_vol_min_filter) {
if (has_vol_mag_filter && has_vol_min_filter) {
// Both from the instruction.
assert_true(vol_mag_filter_linear != vol_min_filter_linear);
if (vol_mag_filter_linear) {
// Either linear when minifying (non-zero) or linear when
// magnifying (zero).
vol_filter_temp_linear_test =
D3D10_SB_INSTRUCTION_TEST_ZERO;
}
} else {
// Check if need to use the filter from the fetch constant.
// Has mag filter - need the fetch constant filter when
// minifying (non-zero minification test result).
// Has min filter - need it when magnifying (zero).
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(
has_vol_mag_filter
? D3D10_SB_INSTRUCTION_TEST_NONZERO
: D3D10_SB_INSTRUCTION_TEST_ZERO) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
shader_code_.push_back(EncodeVectorSelectOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(vol_filter_temp);
++stat_.instruction_count;
++stat_.dynamic_flow_control_count;
// Take the filter from the dword 4 of the fetch constant
// ([1].x or [2].z) if it's not in the instruction.
// Has mag filter - this will be executed for minification
// (bit 1).
// Has min filter - for magnification (bit 0).
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(vol_filter_temp);
shader_code_.push_back(EncodeVectorSelectOperand(
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER,
2 * (tfetch_index & 1), 3));
shader_code_.push_back(cbuffer_index_fetch_constants_);
shader_code_.push_back(
uint32_t(CbufferRegister::kFetchConstants));
shader_code_.push_back(tfetch_pair_offset + 1 +
(tfetch_index & 1));
shader_code_.push_back(EncodeScalarOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(has_vol_mag_filter ? (1 << 1)
: (1 << 0));
++stat_.instruction_count;
++stat_.uint_instruction_count;
// If not using the filter from the fetch constant, set the
// value from the instruction.
// Need to change this for:
// - Magnifying (zero set) and linear (non-zero needed) vol
// mag filter.
// - Minifying (non-zero set) and point (zero needed) vol
// min filter.
// Already the expected zero or non-zero value for:
// - Magnifying (zero set) and point (zero needed) vol mag
// filter.
// - Minifying (non-zero set) and linear (non-zero needed)
// vol min filter.
if (vol_mag_filter_linear || vol_min_filter_point) {
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ELSE) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
++stat_.instruction_count;
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5));
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(vol_filter_temp);
shader_code_.push_back(EncodeScalarOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(uint32_t(has_vol_mag_filter));
++stat_.instruction_count;
++stat_.mov_instruction_count;
}
// Close the fetch constant filter check.
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
++stat_.instruction_count;
}
} else {
// Mask the bit offset (1 for vol_min_filter, 0 for
// vol_mag_filter) in the fetch constant.
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(vol_filter_temp);
shader_code_.push_back(EncodeVectorSelectOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(vol_filter_temp);
shader_code_.push_back(EncodeScalarOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(1);
++stat_.instruction_count;
++stat_.uint_instruction_count;
// Extract the filter from dword 4 of the fetch constant
// ([1].x or [2].z).
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_UBFE) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(11));
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(vol_filter_temp);
shader_code_.push_back(EncodeScalarOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(1);
shader_code_.push_back(EncodeVectorSelectOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(vol_filter_temp);
shader_code_.push_back(EncodeVectorSelectOperand(
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER,
2 * (tfetch_index & 1), 3));
shader_code_.push_back(cbuffer_index_fetch_constants_);
shader_code_.push_back(
uint32_t(CbufferRegister::kFetchConstants));
shader_code_.push_back(tfetch_pair_offset + 1 +
(tfetch_index & 1));
++stat_.instruction_count;
++stat_.uint_instruction_count;
}
}
} else {
if (!has_vol_mag_filter) {
// Extract the magnification filter when there are no
// derivatives from bit 0 of dword 4 of the fetch constant
// ([1].x or [2].z).
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(vol_filter_temp);
shader_code_.push_back(EncodeVectorSelectOperand(
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER,
2 * (tfetch_index & 1), 3));
shader_code_.push_back(cbuffer_index_fetch_constants_);
shader_code_.push_back(
uint32_t(CbufferRegister::kFetchConstants));
shader_code_.push_back(tfetch_pair_offset + 1 +
(tfetch_index & 1));
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(1);
++stat_.instruction_count;
++stat_.uint_instruction_count;
}
}
}
if (!vol_mag_filter_point || !vol_min_filter_point) {
if (!vol_mag_filter_linear || !vol_min_filter_linear) {
// Check if using linear filtering between array layers.
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(
vol_filter_temp_linear_test) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(vol_filter_temp);
++stat_.instruction_count;
++stat_.dynamic_flow_control_count;
}
// Floor the layer index to get the linear interpolation factor.
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ROUND_NI) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5));
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1));
shader_code_.push_back(vol_filter_temp);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1));
shader_code_.push_back(coord_temp);
++stat_.instruction_count;
++stat_.float_instruction_count;
// Get the fraction of the layer index, with i + 0.5 right between
// layers, as the linear interpolation factor between layers Z and
// Z + 1.
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ADD) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8));
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1));
shader_code_.push_back(vol_filter_temp);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1));
shader_code_.push_back(coord_temp);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1) |
ENCODE_D3D10_SB_OPERAND_EXTENDED(1));
shader_code_.push_back(ENCODE_D3D10_SB_EXTENDED_OPERAND_MODIFIER(
D3D10_SB_OPERAND_MODIFIER_NEG));
shader_code_.push_back(vol_filter_temp);
++stat_.instruction_count;
++stat_.float_instruction_count;
// Floor the layer index again for sampling.
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ROUND_NI) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5));
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 0b0100, 1));
shader_code_.push_back(coord_temp);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1));
shader_code_.push_back(coord_temp);
++stat_.instruction_count;
++stat_.float_instruction_count;
if (!vol_mag_filter_linear || !vol_min_filter_linear) {
// Close the linear filtering check.
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
++stat_.instruction_count;
}
}
if (instr.attributes.unnormalized_coordinates || offset_z != 0.0f) {
// Handle 3D texture coordinates - may need to normalize and/or add
@ -1907,10 +2260,16 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
++stat_.float_instruction_count;
}
// Allocate the register for the value from the signed texture.
// Allocate the register for the value from the signed texture, and also
// for the second array layer and lerping the layers.
uint32_t signed_value_temp = instr.opcode == FetchOpcode::kTextureFetch
? PushSystemTemp()
: UINT32_MAX;
uint32_t vol_filter_lerp_temp = UINT32_MAX;
if (vol_filter_temp != UINT32_MAX &&
(!vol_mag_filter_point || !vol_min_filter_point)) {
vol_filter_lerp_temp = PushSystemTemp();
}
// tfetch1D/2D/Cube just fetch directly. tfetch3D needs to fetch either
// the 3D texture or the 2D stacked texture, so two sample instructions
@ -1936,159 +2295,265 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
++stat_.instruction_count;
}
// Sample both unsigned and signed.
for (uint32_t j = 0; j < 2; ++j) {
if (instr.opcode == FetchOpcode::kGetTextureComputedLod) {
// The non-pixel-shader case should be handled before because it
// just returns a constant in this case.
assert_true(IsDxbcPixelShader());
uint32_t srv_register_current =
i ? srv_registers_stacked[j] : srv_registers[j];
uint32_t target_temp_current =
j ? signed_value_temp : system_temp_pv_;
if (instr.opcode == FetchOpcode::kGetTextureComputedLod) {
// The non-pixel-shader case should be handled before because it
// just returns a constant in this case.
assert_true(IsDxbcPixelShader());
replicate_result = true;
i ? srv_registers_stacked[0] : srv_registers[0];
replicate_result = true;
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_1_SB_OPCODE_LOD) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(11));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(system_temp_pv_);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(coord_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_RESOURCE, kSwizzleXYZW, 2));
shader_code_.push_back(srv_register_current);
shader_code_.push_back(srv_register_current);
shader_code_.push_back(
EncodeZeroComponentOperand(D3D10_SB_OPERAND_TYPE_SAMPLER, 2));
shader_code_.push_back(sampler_register);
shader_code_.push_back(sampler_register);
++stat_.instruction_count;
++stat_.lod_instructions;
// Apply the LOD bias if used.
if (instr.attributes.lod_bias != 0.0f) {
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_1_SB_OPCODE_LOD) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(11));
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ADD) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(target_temp_current);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(coord_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_RESOURCE, kSwizzleXYZW, 2));
shader_code_.push_back(srv_register_current);
shader_code_.push_back(srv_register_current);
shader_code_.push_back(system_temp_pv_);
shader_code_.push_back(
EncodeZeroComponentOperand(D3D10_SB_OPERAND_TYPE_SAMPLER, 2));
shader_code_.push_back(sampler_register);
shader_code_.push_back(sampler_register);
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(system_temp_pv_);
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(
*reinterpret_cast<const uint32_t*>(&instr.attributes.lod_bias));
++stat_.instruction_count;
++stat_.lod_instructions;
// Apply the LOD bias if used.
if (instr.attributes.lod_bias != 0.0f) {
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ADD) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(target_temp_current);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(target_temp_current);
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(*reinterpret_cast<const uint32_t*>(
&instr.attributes.lod_bias));
++stat_.instruction_count;
++stat_.float_instruction_count;
}
// In this case, only the unsigned variant is accessed because data
// doesn't matter.
break;
} else if (instr.attributes.use_register_lod) {
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_SAMPLE_L) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(13));
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(target_temp_current);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(coord_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_RESOURCE, kSwizzleXYZW, 2));
shader_code_.push_back(srv_register_current);
shader_code_.push_back(srv_register_current);
shader_code_.push_back(
EncodeZeroComponentOperand(D3D10_SB_OPERAND_TYPE_SAMPLER, 2));
shader_code_.push_back(sampler_register);
shader_code_.push_back(sampler_register);
shader_code_.push_back(EncodeVectorSelectOperand(
D3D10_SB_OPERAND_TYPE_TEMP, lod_temp_component, 1));
shader_code_.push_back(lod_temp);
++stat_.instruction_count;
++stat_.texture_normal_instructions;
} else if (instr.attributes.use_register_gradients) {
// TODO(Triang3l): Apply the LOD bias somehow for register gradients
// (possibly will require moving the bias to the sampler, which may
// be not very good considering the sampler count is very limited).
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_SAMPLE_D) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(15));
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(target_temp_current);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(coord_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_RESOURCE, kSwizzleXYZW, 2));
shader_code_.push_back(srv_register_current);
shader_code_.push_back(srv_register_current);
shader_code_.push_back(
EncodeZeroComponentOperand(D3D10_SB_OPERAND_TYPE_SAMPLER, 2));
shader_code_.push_back(sampler_register);
shader_code_.push_back(sampler_register);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(system_temp_grad_h_lod_);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(system_temp_grad_v_);
++stat_.instruction_count;
++stat_.texture_gradient_instructions;
} else {
// 3 different DXBC opcodes handled here:
// - sample_l, when not using a computed LOD or not in a pixel
// shader, in this case, LOD (0 + bias) is sampled.
// - sample, when sampling in a pixel shader (thus with derivatives)
// with a computed LOD.
// - sample_b, when sampling in a pixel shader with a biased
// computed LOD.
// Both sample_l and sample_b should add the LOD bias as the last
// operand in our case.
bool explicit_lod =
!instr.attributes.use_computed_lod || !IsDxbcPixelShader();
if (explicit_lod) {
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_SAMPLE_L) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(13));
} else if (instr.attributes.lod_bias != 0.0f) {
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_SAMPLE_B) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(13));
} else {
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_SAMPLE) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(11));
}
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(target_temp_current);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(coord_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_RESOURCE, kSwizzleXYZW, 2));
shader_code_.push_back(srv_register_current);
shader_code_.push_back(srv_register_current);
shader_code_.push_back(
EncodeZeroComponentOperand(D3D10_SB_OPERAND_TYPE_SAMPLER, 2));
shader_code_.push_back(sampler_register);
shader_code_.push_back(sampler_register);
if (explicit_lod || instr.attributes.lod_bias != 0.0f) {
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(*reinterpret_cast<const uint32_t*>(
&instr.attributes.lod_bias));
}
++stat_.instruction_count;
if (!explicit_lod && instr.attributes.lod_bias != 0.0f) {
++stat_.texture_bias_instructions;
} else {
++stat_.texture_normal_instructions;
++stat_.float_instruction_count;
}
} else {
// Sample both unsigned and signed, and for stacked textures, two
// samples if filtering is needed.
for (uint32_t j = 0; j < 2; ++j) {
uint32_t srv_register_current =
i ? srv_registers_stacked[j] : srv_registers[j];
uint32_t target_temp_sign = j ? signed_value_temp : system_temp_pv_;
for (uint32_t k = 0;
k < (vol_filter_lerp_temp != UINT32_MAX ? 2u : 1u); ++k) {
uint32_t target_temp_current =
k ? vol_filter_lerp_temp : target_temp_sign;
if (k) {
if (!vol_mag_filter_linear || !vol_min_filter_linear) {
// Check if array layer filtering is enabled and need one more
// sample.
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(
vol_filter_temp_linear_test) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
shader_code_.push_back(EncodeVectorSelectOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(vol_filter_temp);
++stat_.instruction_count;
++stat_.dynamic_flow_control_count;
}
// Go to the next array texture sample.
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ADD) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 0b0100, 1));
shader_code_.push_back(coord_temp);
shader_code_.push_back(EncodeVectorSelectOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 2, 1));
shader_code_.push_back(coord_temp);
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(0x3F800000);
++stat_.instruction_count;
++stat_.float_instruction_count;
}
if (instr.attributes.use_register_lod) {
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_SAMPLE_L) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(13));
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(target_temp_current);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(coord_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_RESOURCE, kSwizzleXYZW, 2));
shader_code_.push_back(srv_register_current);
shader_code_.push_back(srv_register_current);
shader_code_.push_back(EncodeZeroComponentOperand(
D3D10_SB_OPERAND_TYPE_SAMPLER, 2));
shader_code_.push_back(sampler_register);
shader_code_.push_back(sampler_register);
shader_code_.push_back(EncodeVectorSelectOperand(
D3D10_SB_OPERAND_TYPE_TEMP, lod_temp_component, 1));
shader_code_.push_back(lod_temp);
++stat_.instruction_count;
++stat_.texture_normal_instructions;
} else if (instr.attributes.use_register_gradients) {
// TODO(Triang3l): Apply the LOD bias somehow for register
// gradients (possibly will require moving the bias to the
// sampler, which may be not very good considering the sampler
// count is very limited).
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_SAMPLE_D) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(15));
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(target_temp_current);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(coord_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_RESOURCE, kSwizzleXYZW, 2));
shader_code_.push_back(srv_register_current);
shader_code_.push_back(srv_register_current);
shader_code_.push_back(EncodeZeroComponentOperand(
D3D10_SB_OPERAND_TYPE_SAMPLER, 2));
shader_code_.push_back(sampler_register);
shader_code_.push_back(sampler_register);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(system_temp_grad_h_lod_);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(system_temp_grad_v_);
++stat_.instruction_count;
++stat_.texture_gradient_instructions;
} else {
// 3 different DXBC opcodes handled here:
// - sample_l, when not using a computed LOD or not in a pixel
// shader, in this case, LOD (0 + bias) is sampled.
// - sample, when sampling in a pixel shader (thus with
// derivatives) with a computed LOD.
// - sample_b, when sampling in a pixel shader with a biased
// computed LOD.
// Both sample_l and sample_b should add the LOD bias as the
// last operand in our case.
bool explicit_lod =
!instr.attributes.use_computed_lod || !IsDxbcPixelShader();
if (explicit_lod) {
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_SAMPLE_L) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(13));
} else if (instr.attributes.lod_bias != 0.0f) {
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_SAMPLE_B) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(13));
} else {
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_SAMPLE) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(11));
}
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(target_temp_current);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(coord_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_RESOURCE, kSwizzleXYZW, 2));
shader_code_.push_back(srv_register_current);
shader_code_.push_back(srv_register_current);
shader_code_.push_back(EncodeZeroComponentOperand(
D3D10_SB_OPERAND_TYPE_SAMPLER, 2));
shader_code_.push_back(sampler_register);
shader_code_.push_back(sampler_register);
if (explicit_lod || instr.attributes.lod_bias != 0.0f) {
shader_code_.push_back(EncodeScalarOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(*reinterpret_cast<const uint32_t*>(
&instr.attributes.lod_bias));
}
++stat_.instruction_count;
if (!explicit_lod && instr.attributes.lod_bias != 0.0f) {
++stat_.texture_bias_instructions;
} else {
++stat_.texture_normal_instructions;
}
}
if (k) {
// b - a
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ADD) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8));
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(target_temp_current);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(target_temp_current);
shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP,
kSwizzleXYZW, 1) |
ENCODE_D3D10_SB_OPERAND_EXTENDED(1));
shader_code_.push_back(
ENCODE_D3D10_SB_EXTENDED_OPERAND_MODIFIER(
D3D10_SB_OPERAND_MODIFIER_NEG));
shader_code_.push_back(target_temp_sign);
++stat_.instruction_count;
++stat_.float_instruction_count;
// a + (b - a) * factor
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MAD) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(target_temp_sign);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(target_temp_current);
shader_code_.push_back(EncodeVectorReplicatedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 1, 1));
shader_code_.push_back(vol_filter_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(target_temp_sign);
++stat_.instruction_count;
++stat_.float_instruction_count;
if (!j) {
// Go back to the first layer to sample the signed texture.
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ADD) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 0b0100, 1));
shader_code_.push_back(coord_temp);
shader_code_.push_back(EncodeVectorSelectOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 2, 1));
shader_code_.push_back(coord_temp);
shader_code_.push_back(EncodeScalarOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(0xBF800000u);
++stat_.instruction_count;
++stat_.float_instruction_count;
}
if (!vol_mag_filter_linear || !vol_min_filter_linear) {
// Close the array layer filtering check.
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
++stat_.instruction_count;
}
}
}
}
}
@ -2343,6 +2808,9 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
PopSystemTemp();
}
if (vol_filter_lerp_temp != UINT32_MAX) {
PopSystemTemp();
}
if (signed_value_temp != UINT32_MAX) {
PopSystemTemp();
}
@ -2351,6 +2819,9 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
}
}
if (vol_filter_temp != UINT32_MAX) {
PopSystemTemp();
}
if (size_and_is_3d_temp != UINT32_MAX) {
PopSystemTemp();
}

View File

@ -440,6 +440,8 @@ struct ParsedTextureFetchInstruction {
TextureFilter min_filter = TextureFilter::kUseFetchConst;
TextureFilter mip_filter = TextureFilter::kUseFetchConst;
AnisoFilter aniso_filter = AnisoFilter::kUseFetchConst;
TextureFilter vol_mag_filter = TextureFilter::kUseFetchConst;
TextureFilter vol_min_filter = TextureFilter::kUseFetchConst;
bool use_computed_lod = true;
bool use_register_lod = false;
bool use_register_gradients = false;

View File

@ -1035,6 +1035,8 @@ void ShaderTranslator::ParseTextureFetchInstruction(
i.attributes.min_filter = op.min_filter();
i.attributes.mip_filter = op.mip_filter();
i.attributes.aniso_filter = op.aniso_filter();
i.attributes.vol_mag_filter = op.vol_mag_filter();
i.attributes.vol_min_filter = op.vol_min_filter();
i.attributes.use_computed_lod = op.use_computed_lod();
i.attributes.use_register_lod = op.use_register_lod();
i.attributes.use_register_gradients = op.use_register_gradients();

View File

@ -423,6 +423,16 @@ void ParsedTextureFetchInstruction::Disassemble(StringBuffer* out) const {
", AnisoFilter=%s",
kAnisoFilterNames[static_cast<int>(attributes.aniso_filter)]);
}
if (attributes.vol_mag_filter != TextureFilter::kUseFetchConst) {
out->AppendFormat(
", VolMagFilter=%s",
kTextureFilterNames[static_cast<int>(attributes.vol_mag_filter)]);
}
if (attributes.vol_min_filter != TextureFilter::kUseFetchConst) {
out->AppendFormat(
", VolMinFilter=%s",
kTextureFilterNames[static_cast<int>(attributes.vol_min_filter)]);
}
if (!attributes.use_computed_lod) {
out->Append(", UseComputedLOD=false");
}

View File

@ -634,6 +634,14 @@ struct TextureFetchInstruction {
AnisoFilter aniso_filter() const {
return static_cast<AnisoFilter>(data_.aniso_filter);
}
bool has_vol_mag_filter() const { return data_.vol_mag_filter != 0x3; }
TextureFilter vol_mag_filter() const {
return static_cast<TextureFilter>(data_.vol_mag_filter);
}
bool has_vol_min_filter() const { return data_.vol_min_filter != 0x3; }
TextureFilter vol_min_filter() const {
return static_cast<TextureFilter>(data_.vol_min_filter);
}
bool use_computed_lod() const { return data_.use_comp_lod == 1; }
bool use_register_lod() const { return data_.use_reg_lod == 1; }
bool use_register_gradients() const { return data_.use_reg_gradients == 1; }

View File

@ -103,12 +103,22 @@ inline std::string TranslateAnsiStringAddress(const Memory* memory,
inline std::wstring TranslateUnicodeString(
const Memory* memory, const X_UNICODE_STRING* unicode_string) {
if (!unicode_string || !unicode_string->length) {
if (!unicode_string) {
return L"";
}
return std::wstring(
memory->TranslateVirtual<const wchar_t*>(unicode_string->pointer),
unicode_string->length);
uint16_t length = unicode_string->length;
if (!length) {
return L"";
}
const xe::be<uint16_t>* guest_string =
memory->TranslateVirtual<const xe::be<uint16_t>*>(
unicode_string->pointer);
std::wstring translated_string;
translated_string.reserve(length);
for (uint16_t i = 0; i < length; ++i) {
translated_string += wchar_t(uint16_t(guest_string[i]));
}
return translated_string;
}
} // namespace util

View File

@ -119,18 +119,58 @@ static_assert_size(XMA_CONTEXT_INIT, 56);
dword_result_t XMAInitializeContext(lpvoid_t context_ptr,
pointer_t<XMA_CONTEXT_INIT> context_init) {
// Input buffers may be null (buffer 1 in Tony Hawk's American Wasteland).
// Convert to host endianness.
uint32_t input_buffer_0_guest_ptr = context_init->input_buffer_0_ptr;
uint32_t input_buffer_0_physical_address = 0;
if (input_buffer_0_guest_ptr) {
input_buffer_0_physical_address =
kernel_memory()->GetPhysicalAddress(input_buffer_0_guest_ptr);
// Xenia-specific safety check.
assert_true(input_buffer_0_physical_address != UINT32_MAX);
if (input_buffer_0_physical_address == UINT32_MAX) {
XELOGE(
"XMAInitializeContext: Invalid input buffer 0 virtual address %.8X",
input_buffer_0_guest_ptr);
return X_E_FALSE;
}
}
uint32_t input_buffer_1_guest_ptr = context_init->input_buffer_1_ptr;
uint32_t input_buffer_1_physical_address = 0;
if (input_buffer_1_guest_ptr) {
input_buffer_1_physical_address =
kernel_memory()->GetPhysicalAddress(input_buffer_1_guest_ptr);
assert_true(input_buffer_1_physical_address != UINT32_MAX);
if (input_buffer_1_physical_address == UINT32_MAX) {
XELOGE(
"XMAInitializeContext: Invalid input buffer 1 virtual address %.8X",
input_buffer_1_guest_ptr);
return X_E_FALSE;
}
}
uint32_t output_buffer_guest_ptr = context_init->output_buffer_ptr;
assert_not_zero(output_buffer_guest_ptr);
uint32_t output_buffer_physical_address =
kernel_memory()->GetPhysicalAddress(output_buffer_guest_ptr);
assert_true(output_buffer_physical_address != UINT32_MAX);
if (output_buffer_physical_address == UINT32_MAX) {
XELOGE("XMAInitializeContext: Invalid output buffer virtual address %.8X",
output_buffer_guest_ptr);
return X_E_FALSE;
}
std::memset(context_ptr, 0, sizeof(XMA_CONTEXT_DATA));
XMA_CONTEXT_DATA context(context_ptr);
context.input_buffer_0_ptr = context_init->input_buffer_0_ptr;
context.input_buffer_0_ptr = input_buffer_0_physical_address;
context.input_buffer_0_packet_count =
context_init->input_buffer_0_packet_count;
context.input_buffer_1_ptr = context_init->input_buffer_1_ptr;
context.input_buffer_1_ptr = input_buffer_1_physical_address;
context.input_buffer_1_packet_count =
context_init->input_buffer_1_packet_count;
context.input_buffer_read_offset = context_init->input_buffer_read_offset;
context.output_buffer_ptr = context_init->output_buffer_ptr;
context.output_buffer_ptr = output_buffer_physical_address;
context.output_buffer_block_count = context_init->output_buffer_block_count;
// context.work_buffer = context_init->work_buffer; // ?