Merge remote-tracking branch 'upstream/master' into canary
This commit is contained in:
commit
bcc571e574
|
@ -1384,8 +1384,9 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
||||||
// thick outlines with SSAA there.
|
// thick outlines with SSAA there.
|
||||||
float offset_x = instr.attributes.offset_x + (1.0f / 1024.0f);
|
float offset_x = instr.attributes.offset_x + (1.0f / 1024.0f);
|
||||||
if (instr.opcode == FetchOpcode::kGetTextureWeights) {
|
if (instr.opcode == FetchOpcode::kGetTextureWeights) {
|
||||||
// Needed for correct shadow filtering (at least in Halo 3).
|
// Bilinear filtering (for shadows, for instance, in Halo 3), 0.5 -
|
||||||
offset_x += 0.5f;
|
// exactly the pixel.
|
||||||
|
offset_x -= 0.5f;
|
||||||
}
|
}
|
||||||
float offset_y = 0.0f, offset_z = 0.0f;
|
float offset_y = 0.0f, offset_z = 0.0f;
|
||||||
if (instr.dimension == TextureDimension::k2D ||
|
if (instr.dimension == TextureDimension::k2D ||
|
||||||
|
@ -1393,7 +1394,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
||||||
instr.dimension == TextureDimension::kCube) {
|
instr.dimension == TextureDimension::kCube) {
|
||||||
offset_y = instr.attributes.offset_y + (1.0f / 1024.0f);
|
offset_y = instr.attributes.offset_y + (1.0f / 1024.0f);
|
||||||
if (instr.opcode == FetchOpcode::kGetTextureWeights) {
|
if (instr.opcode == FetchOpcode::kGetTextureWeights) {
|
||||||
offset_y += 0.5f;
|
offset_y -= 0.5f;
|
||||||
}
|
}
|
||||||
// Don't care about the Z offset for cubemaps when getting weights because
|
// Don't care about the Z offset for cubemaps when getting weights because
|
||||||
// zero Z will be returned anyway (the face index doesn't participate in
|
// zero Z will be returned anyway (the face index doesn't participate in
|
||||||
|
@ -1406,12 +1407,34 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
||||||
// Z is the face index for cubemaps, so don't apply the epsilon to it.
|
// Z is the face index for cubemaps, so don't apply the epsilon to it.
|
||||||
offset_z += 1.0f / 1024.0f;
|
offset_z += 1.0f / 1024.0f;
|
||||||
if (instr.opcode == FetchOpcode::kGetTextureWeights) {
|
if (instr.opcode == FetchOpcode::kGetTextureWeights) {
|
||||||
offset_z += 0.5f;
|
offset_z -= 0.5f;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Gather info about filtering across array layers.
|
||||||
|
// Use the magnification filter when no derivatives:
|
||||||
|
// https://stackoverflow.com/questions/40328956/difference-between-sample-and-samplelevel-wrt-texture-filtering
|
||||||
|
bool vol_min_filter_applicable =
|
||||||
|
instr.opcode == FetchOpcode::kTextureFetch &&
|
||||||
|
(instr.attributes.use_register_gradients ||
|
||||||
|
(instr.attributes.use_computed_lod && IsDxbcPixelShader()));
|
||||||
|
bool has_vol_mag_filter =
|
||||||
|
instr.attributes.vol_mag_filter != TextureFilter::kUseFetchConst;
|
||||||
|
bool has_vol_min_filter =
|
||||||
|
vol_min_filter_applicable
|
||||||
|
? instr.attributes.vol_min_filter != TextureFilter::kUseFetchConst
|
||||||
|
: has_vol_mag_filter;
|
||||||
|
bool vol_mag_filter_linear =
|
||||||
|
instr.attributes.vol_mag_filter == TextureFilter::kLinear;
|
||||||
|
bool vol_min_filter_linear =
|
||||||
|
vol_min_filter_applicable
|
||||||
|
? instr.attributes.vol_min_filter == TextureFilter::kLinear
|
||||||
|
: vol_mag_filter_linear;
|
||||||
|
bool vol_mag_filter_point = has_vol_mag_filter && !vol_mag_filter_linear;
|
||||||
|
bool vol_min_filter_point = has_vol_min_filter && !vol_min_filter_linear;
|
||||||
|
|
||||||
// Get the texture size if needed, apply offset and switch between
|
// Get the texture size if needed, apply offset and switch between
|
||||||
// normalized and unnormalized coordinates if needed. The offset is
|
// normalized and unnormalized coordinates if needed. The offset is
|
||||||
// fractional on the Xbox 360 (has 0.5 granularity), unlike in Direct3D 12,
|
// fractional on the Xbox 360 (has 0.5 granularity), unlike in Direct3D 12,
|
||||||
|
@ -1424,6 +1447,14 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
||||||
// unlikely to be used on purpose.
|
// unlikely to be used on purpose.
|
||||||
// http://web.archive.org/web/20090514012026/http://msdn.microsoft.com:80/en-us/library/bb313957.aspx
|
// http://web.archive.org/web/20090514012026/http://msdn.microsoft.com:80/en-us/library/bb313957.aspx
|
||||||
uint32_t size_and_is_3d_temp = UINT32_MAX;
|
uint32_t size_and_is_3d_temp = UINT32_MAX;
|
||||||
|
// For stacked textures, if point sampling is not forced in the instruction:
|
||||||
|
// X - whether linear filtering should be done across layers (for color
|
||||||
|
// grading LUTs in Unreal Engine 3 games and Burnout Revenge), unless
|
||||||
|
// the filter is known from the instruction for all cases.
|
||||||
|
// Y - lerp factor between the two layers, unless only point sampling can be
|
||||||
|
// used.
|
||||||
|
uint32_t vol_filter_temp = UINT32_MAX;
|
||||||
|
bool vol_filter_temp_linear_test = D3D10_SB_INSTRUCTION_TEST_NONZERO;
|
||||||
// With 1/1024 this will always be true anyway, but let's keep the shorter
|
// With 1/1024 this will always be true anyway, but let's keep the shorter
|
||||||
// path without the offset in case some day this hack won't be used anymore
|
// path without the offset in case some day this hack won't be used anymore
|
||||||
// somehow.
|
// somehow.
|
||||||
|
@ -1432,8 +1463,22 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
||||||
instr.attributes.unnormalized_coordinates ||
|
instr.attributes.unnormalized_coordinates ||
|
||||||
instr.dimension == TextureDimension::k3D) {
|
instr.dimension == TextureDimension::k3D) {
|
||||||
size_and_is_3d_temp = PushSystemTemp();
|
size_and_is_3d_temp = PushSystemTemp();
|
||||||
|
if (instr.opcode == FetchOpcode::kTextureFetch &&
|
||||||
|
instr.dimension == TextureDimension::k3D) {
|
||||||
|
uint32_t vol_filter_temp_components = 0b0000;
|
||||||
|
if (!has_vol_mag_filter || !has_vol_min_filter ||
|
||||||
|
vol_mag_filter_linear != vol_min_filter_linear) {
|
||||||
|
vol_filter_temp_components |= 0b0011;
|
||||||
|
} else if (vol_mag_filter_linear || vol_min_filter_linear) {
|
||||||
|
vol_filter_temp_components |= 0b0010;
|
||||||
|
}
|
||||||
|
// Initialize to 0 to break register dependency.
|
||||||
|
if (vol_filter_temp_components != 0) {
|
||||||
|
vol_filter_temp = PushSystemTemp(vol_filter_temp_components);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Will use fetch constants for the size.
|
// Will use fetch constants for the size and for stacked texture filter.
|
||||||
if (cbuffer_index_fetch_constants_ == kCbufferIndexUnallocated) {
|
if (cbuffer_index_fetch_constants_ == kCbufferIndexUnallocated) {
|
||||||
cbuffer_index_fetch_constants_ = cbuffer_count_++;
|
cbuffer_index_fetch_constants_ = cbuffer_count_++;
|
||||||
}
|
}
|
||||||
|
@ -1720,8 +1765,13 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
||||||
++stat_.instruction_count;
|
++stat_.instruction_count;
|
||||||
++stat_.dynamic_flow_control_count;
|
++stat_.dynamic_flow_control_count;
|
||||||
|
|
||||||
|
// Layers on the Xenos are indexed like texels, with 0.5 being exactly
|
||||||
|
// layer 0, but in D3D10+ 0.0 is exactly layer 0. Halo 3 uses i + 0.5
|
||||||
|
// offset for lightmap index, for instance.
|
||||||
|
float offset_layer = offset_z - 0.5f;
|
||||||
|
|
||||||
if (instr.attributes.unnormalized_coordinates) {
|
if (instr.attributes.unnormalized_coordinates) {
|
||||||
if (offset_z != 0.f) {
|
if (offset_layer != 0.0f) {
|
||||||
// Add the offset to the array layer.
|
// Add the offset to the array layer.
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ADD) |
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ADD) |
|
||||||
|
@ -1735,13 +1785,13 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
|
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
*reinterpret_cast<const uint32_t*>(&offset_z));
|
*reinterpret_cast<const uint32_t*>(&offset_layer));
|
||||||
++stat_.instruction_count;
|
++stat_.instruction_count;
|
||||||
++stat_.float_instruction_count;
|
++stat_.float_instruction_count;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Unnormalize the array layer and apply the offset.
|
// Unnormalize the array layer and apply the offset.
|
||||||
if (offset_z != 0.0f) {
|
if (offset_layer != 0.0f) {
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MAD) |
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MAD) |
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
||||||
|
@ -1759,28 +1809,322 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1));
|
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1));
|
||||||
shader_code_.push_back(size_and_is_3d_temp);
|
shader_code_.push_back(size_and_is_3d_temp);
|
||||||
if (offset_z != 0.0f) {
|
if (offset_layer != 0.0f) {
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
|
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
*reinterpret_cast<const uint32_t*>(&offset_z));
|
*reinterpret_cast<const uint32_t*>(&offset_layer));
|
||||||
}
|
}
|
||||||
++stat_.instruction_count;
|
++stat_.instruction_count;
|
||||||
++stat_.float_instruction_count;
|
++stat_.float_instruction_count;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Truncate the array layer index. Halo 3 uses integer.5 coordinates,
|
if (vol_filter_temp != UINT32_MAX) {
|
||||||
// with Direct3D 10+ round-to-nearest-even rule + epsilon wrong layers
|
if (vol_min_filter_applicable) {
|
||||||
// are fetched.
|
if (!has_vol_mag_filter || !has_vol_min_filter ||
|
||||||
// TODO(Triang3l): Investigate the correct rounding.
|
vol_mag_filter_linear != vol_min_filter_linear) {
|
||||||
// TODO(Triang3l): Support vol_mag_filter and vol_min_filter for 2D
|
// Check if magnifying (derivative <= 1, according to OpenGL
|
||||||
// arrays and maybe even 3D textures (color gradint LUT in Burnout
|
// rules) or minifying (> 1) the texture across Z. Get the
|
||||||
// Revenge).
|
// maximum of absolutes of the two derivatives of the array
|
||||||
|
// layer, either explicit or implicit.
|
||||||
|
if (instr.attributes.use_register_gradients) {
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ROUND_Z) |
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MAX) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
||||||
|
shader_code_.push_back(EncodeVectorMaskedOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
|
||||||
|
shader_code_.push_back(vol_filter_temp);
|
||||||
|
shader_code_.push_back(EncodeVectorSelectOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, 2, 1) |
|
||||||
|
ENCODE_D3D10_SB_OPERAND_EXTENDED(1));
|
||||||
|
shader_code_.push_back(
|
||||||
|
ENCODE_D3D10_SB_EXTENDED_OPERAND_MODIFIER(
|
||||||
|
D3D10_SB_OPERAND_MODIFIER_ABS));
|
||||||
|
shader_code_.push_back(system_temp_grad_h_lod_);
|
||||||
|
shader_code_.push_back(EncodeVectorSelectOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, 2, 1) |
|
||||||
|
ENCODE_D3D10_SB_OPERAND_EXTENDED(1));
|
||||||
|
shader_code_.push_back(
|
||||||
|
ENCODE_D3D10_SB_EXTENDED_OPERAND_MODIFIER(
|
||||||
|
D3D10_SB_OPERAND_MODIFIER_ABS));
|
||||||
|
shader_code_.push_back(system_temp_grad_v_);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.float_instruction_count;
|
||||||
|
} else {
|
||||||
|
for (uint32_t i = 0; i < 2; ++i) {
|
||||||
|
shader_code_.push_back(
|
||||||
|
ENCODE_D3D10_SB_OPCODE_TYPE(
|
||||||
|
i ? D3D11_SB_OPCODE_DERIV_RTY_COARSE
|
||||||
|
: D3D11_SB_OPCODE_DERIV_RTX_COARSE) |
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5));
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5));
|
||||||
|
shader_code_.push_back(EncodeVectorMaskedOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, 1 << i, 1));
|
||||||
|
shader_code_.push_back(vol_filter_temp);
|
||||||
|
shader_code_.push_back(EncodeVectorSelectOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, 2, 1));
|
||||||
|
shader_code_.push_back(coord_temp);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.float_instruction_count;
|
||||||
|
}
|
||||||
|
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0100, 1));
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MAX) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
||||||
|
shader_code_.push_back(EncodeVectorMaskedOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
|
||||||
|
shader_code_.push_back(vol_filter_temp);
|
||||||
|
shader_code_.push_back(EncodeVectorSelectOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, 0, 1) |
|
||||||
|
ENCODE_D3D10_SB_OPERAND_EXTENDED(1));
|
||||||
|
shader_code_.push_back(
|
||||||
|
ENCODE_D3D10_SB_EXTENDED_OPERAND_MODIFIER(
|
||||||
|
D3D10_SB_OPERAND_MODIFIER_ABS));
|
||||||
|
shader_code_.push_back(vol_filter_temp);
|
||||||
|
shader_code_.push_back(EncodeVectorSelectOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, 1, 1) |
|
||||||
|
ENCODE_D3D10_SB_OPERAND_EXTENDED(1));
|
||||||
|
shader_code_.push_back(
|
||||||
|
ENCODE_D3D10_SB_EXTENDED_OPERAND_MODIFIER(
|
||||||
|
D3D10_SB_OPERAND_MODIFIER_ABS));
|
||||||
|
shader_code_.push_back(vol_filter_temp);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.float_instruction_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if minifying.
|
||||||
|
shader_code_.push_back(
|
||||||
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_LT) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
|
||||||
|
shader_code_.push_back(EncodeVectorMaskedOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
|
||||||
|
shader_code_.push_back(vol_filter_temp);
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
|
||||||
|
shader_code_.push_back(0x3F800000);
|
||||||
|
shader_code_.push_back(EncodeVectorSelectOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
|
||||||
|
shader_code_.push_back(vol_filter_temp);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.float_instruction_count;
|
||||||
|
|
||||||
|
if (has_vol_mag_filter || has_vol_min_filter) {
|
||||||
|
if (has_vol_mag_filter && has_vol_min_filter) {
|
||||||
|
// Both from the instruction.
|
||||||
|
assert_true(vol_mag_filter_linear != vol_min_filter_linear);
|
||||||
|
if (vol_mag_filter_linear) {
|
||||||
|
// Either linear when minifying (non-zero) or linear when
|
||||||
|
// magnifying (zero).
|
||||||
|
vol_filter_temp_linear_test =
|
||||||
|
D3D10_SB_INSTRUCTION_TEST_ZERO;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Check if need to use the filter from the fetch constant.
|
||||||
|
// Has mag filter - need the fetch constant filter when
|
||||||
|
// minifying (non-zero minification test result).
|
||||||
|
// Has min filter - need it when magnifying (zero).
|
||||||
|
shader_code_.push_back(
|
||||||
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
|
||||||
|
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(
|
||||||
|
has_vol_mag_filter
|
||||||
|
? D3D10_SB_INSTRUCTION_TEST_NONZERO
|
||||||
|
: D3D10_SB_INSTRUCTION_TEST_ZERO) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
|
||||||
|
shader_code_.push_back(EncodeVectorSelectOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
|
||||||
|
shader_code_.push_back(vol_filter_temp);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.dynamic_flow_control_count;
|
||||||
|
|
||||||
|
// Take the filter from the dword 4 of the fetch constant
|
||||||
|
// ([1].x or [2].z) if it's not in the instruction.
|
||||||
|
// Has mag filter - this will be executed for minification
|
||||||
|
// (bit 1).
|
||||||
|
// Has min filter - for magnification (bit 0).
|
||||||
|
shader_code_.push_back(
|
||||||
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
||||||
|
shader_code_.push_back(EncodeVectorMaskedOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
|
||||||
|
shader_code_.push_back(vol_filter_temp);
|
||||||
|
shader_code_.push_back(EncodeVectorSelectOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER,
|
||||||
|
2 * (tfetch_index & 1), 3));
|
||||||
|
shader_code_.push_back(cbuffer_index_fetch_constants_);
|
||||||
|
shader_code_.push_back(
|
||||||
|
uint32_t(CbufferRegister::kFetchConstants));
|
||||||
|
shader_code_.push_back(tfetch_pair_offset + 1 +
|
||||||
|
(tfetch_index & 1));
|
||||||
|
shader_code_.push_back(EncodeScalarOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
|
||||||
|
shader_code_.push_back(has_vol_mag_filter ? (1 << 1)
|
||||||
|
: (1 << 0));
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.uint_instruction_count;
|
||||||
|
|
||||||
|
// If not using the filter from the fetch constant, set the
|
||||||
|
// value from the instruction.
|
||||||
|
// Need to change this for:
|
||||||
|
// - Magnifying (zero set) and linear (non-zero needed) vol
|
||||||
|
// mag filter.
|
||||||
|
// - Minifying (non-zero set) and point (zero needed) vol
|
||||||
|
// min filter.
|
||||||
|
// Already the expected zero or non-zero value for:
|
||||||
|
// - Magnifying (zero set) and point (zero needed) vol mag
|
||||||
|
// filter.
|
||||||
|
// - Minifying (non-zero set) and linear (non-zero needed)
|
||||||
|
// vol min filter.
|
||||||
|
if (vol_mag_filter_linear || vol_min_filter_point) {
|
||||||
|
shader_code_.push_back(
|
||||||
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ELSE) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
|
||||||
|
++stat_.instruction_count;
|
||||||
|
|
||||||
|
shader_code_.push_back(
|
||||||
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5));
|
||||||
|
shader_code_.push_back(EncodeVectorMaskedOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
|
||||||
|
shader_code_.push_back(vol_filter_temp);
|
||||||
|
shader_code_.push_back(EncodeScalarOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
|
||||||
|
shader_code_.push_back(uint32_t(has_vol_mag_filter));
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.mov_instruction_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close the fetch constant filter check.
|
||||||
|
shader_code_.push_back(
|
||||||
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
|
||||||
|
++stat_.instruction_count;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Mask the bit offset (1 for vol_min_filter, 0 for
|
||||||
|
// vol_mag_filter) in the fetch constant.
|
||||||
|
shader_code_.push_back(
|
||||||
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
|
||||||
|
shader_code_.push_back(EncodeVectorMaskedOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
|
||||||
|
shader_code_.push_back(vol_filter_temp);
|
||||||
|
shader_code_.push_back(EncodeVectorSelectOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
|
||||||
|
shader_code_.push_back(vol_filter_temp);
|
||||||
|
shader_code_.push_back(EncodeScalarOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
|
||||||
|
shader_code_.push_back(1);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.uint_instruction_count;
|
||||||
|
|
||||||
|
// Extract the filter from dword 4 of the fetch constant
|
||||||
|
// ([1].x or [2].z).
|
||||||
|
shader_code_.push_back(
|
||||||
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_UBFE) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(11));
|
||||||
|
shader_code_.push_back(EncodeVectorMaskedOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
|
||||||
|
shader_code_.push_back(vol_filter_temp);
|
||||||
|
shader_code_.push_back(EncodeScalarOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
|
||||||
|
shader_code_.push_back(1);
|
||||||
|
shader_code_.push_back(EncodeVectorSelectOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
|
||||||
|
shader_code_.push_back(vol_filter_temp);
|
||||||
|
shader_code_.push_back(EncodeVectorSelectOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER,
|
||||||
|
2 * (tfetch_index & 1), 3));
|
||||||
|
shader_code_.push_back(cbuffer_index_fetch_constants_);
|
||||||
|
shader_code_.push_back(
|
||||||
|
uint32_t(CbufferRegister::kFetchConstants));
|
||||||
|
shader_code_.push_back(tfetch_pair_offset + 1 +
|
||||||
|
(tfetch_index & 1));
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.uint_instruction_count;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (!has_vol_mag_filter) {
|
||||||
|
// Extract the magnification filter when there are no
|
||||||
|
// derivatives from bit 0 of dword 4 of the fetch constant
|
||||||
|
// ([1].x or [2].z).
|
||||||
|
shader_code_.push_back(
|
||||||
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
||||||
|
shader_code_.push_back(EncodeVectorMaskedOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
|
||||||
|
shader_code_.push_back(vol_filter_temp);
|
||||||
|
shader_code_.push_back(EncodeVectorSelectOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER,
|
||||||
|
2 * (tfetch_index & 1), 3));
|
||||||
|
shader_code_.push_back(cbuffer_index_fetch_constants_);
|
||||||
|
shader_code_.push_back(
|
||||||
|
uint32_t(CbufferRegister::kFetchConstants));
|
||||||
|
shader_code_.push_back(tfetch_pair_offset + 1 +
|
||||||
|
(tfetch_index & 1));
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
|
||||||
|
shader_code_.push_back(1);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.uint_instruction_count;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!vol_mag_filter_point || !vol_min_filter_point) {
|
||||||
|
if (!vol_mag_filter_linear || !vol_min_filter_linear) {
|
||||||
|
// Check if using linear filtering between array layers.
|
||||||
|
shader_code_.push_back(
|
||||||
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
|
||||||
|
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(
|
||||||
|
vol_filter_temp_linear_test) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
|
||||||
|
shader_code_.push_back(vol_filter_temp);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.dynamic_flow_control_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Floor the layer index to get the linear interpolation factor.
|
||||||
|
shader_code_.push_back(
|
||||||
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ROUND_NI) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5));
|
||||||
|
shader_code_.push_back(EncodeVectorMaskedOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1));
|
||||||
|
shader_code_.push_back(vol_filter_temp);
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1));
|
||||||
|
shader_code_.push_back(coord_temp);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.float_instruction_count;
|
||||||
|
|
||||||
|
// Get the fraction of the layer index, with i + 0.5 right between
|
||||||
|
// layers, as the linear interpolation factor between layers Z and
|
||||||
|
// Z + 1.
|
||||||
|
shader_code_.push_back(
|
||||||
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ADD) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8));
|
||||||
|
shader_code_.push_back(EncodeVectorMaskedOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1));
|
||||||
|
shader_code_.push_back(vol_filter_temp);
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1));
|
||||||
|
shader_code_.push_back(coord_temp);
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1) |
|
||||||
|
ENCODE_D3D10_SB_OPERAND_EXTENDED(1));
|
||||||
|
shader_code_.push_back(ENCODE_D3D10_SB_EXTENDED_OPERAND_MODIFIER(
|
||||||
|
D3D10_SB_OPERAND_MODIFIER_NEG));
|
||||||
|
shader_code_.push_back(vol_filter_temp);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.float_instruction_count;
|
||||||
|
|
||||||
|
// Floor the layer index again for sampling.
|
||||||
|
shader_code_.push_back(
|
||||||
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ROUND_NI) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5));
|
||||||
|
shader_code_.push_back(EncodeVectorMaskedOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, 0b0100, 1));
|
||||||
shader_code_.push_back(coord_temp);
|
shader_code_.push_back(coord_temp);
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1));
|
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1));
|
||||||
|
@ -1788,6 +2132,15 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
||||||
++stat_.instruction_count;
|
++stat_.instruction_count;
|
||||||
++stat_.float_instruction_count;
|
++stat_.float_instruction_count;
|
||||||
|
|
||||||
|
if (!vol_mag_filter_linear || !vol_min_filter_linear) {
|
||||||
|
// Close the linear filtering check.
|
||||||
|
shader_code_.push_back(
|
||||||
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
|
||||||
|
++stat_.instruction_count;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (instr.attributes.unnormalized_coordinates || offset_z != 0.0f) {
|
if (instr.attributes.unnormalized_coordinates || offset_z != 0.0f) {
|
||||||
// Handle 3D texture coordinates - may need to normalize and/or add
|
// Handle 3D texture coordinates - may need to normalize and/or add
|
||||||
// the offset. Check if 3D.
|
// the offset. Check if 3D.
|
||||||
|
@ -1907,10 +2260,16 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
||||||
++stat_.float_instruction_count;
|
++stat_.float_instruction_count;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Allocate the register for the value from the signed texture.
|
// Allocate the register for the value from the signed texture, and also
|
||||||
|
// for the second array layer and lerping the layers.
|
||||||
uint32_t signed_value_temp = instr.opcode == FetchOpcode::kTextureFetch
|
uint32_t signed_value_temp = instr.opcode == FetchOpcode::kTextureFetch
|
||||||
? PushSystemTemp()
|
? PushSystemTemp()
|
||||||
: UINT32_MAX;
|
: UINT32_MAX;
|
||||||
|
uint32_t vol_filter_lerp_temp = UINT32_MAX;
|
||||||
|
if (vol_filter_temp != UINT32_MAX &&
|
||||||
|
(!vol_mag_filter_point || !vol_min_filter_point)) {
|
||||||
|
vol_filter_lerp_temp = PushSystemTemp();
|
||||||
|
}
|
||||||
|
|
||||||
// tfetch1D/2D/Cube just fetch directly. tfetch3D needs to fetch either
|
// tfetch1D/2D/Cube just fetch directly. tfetch3D needs to fetch either
|
||||||
// the 3D texture or the 2D stacked texture, so two sample instructions
|
// the 3D texture or the 2D stacked texture, so two sample instructions
|
||||||
|
@ -1936,23 +2295,19 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
|
||||||
++stat_.instruction_count;
|
++stat_.instruction_count;
|
||||||
}
|
}
|
||||||
// Sample both unsigned and signed.
|
|
||||||
for (uint32_t j = 0; j < 2; ++j) {
|
|
||||||
uint32_t srv_register_current =
|
|
||||||
i ? srv_registers_stacked[j] : srv_registers[j];
|
|
||||||
uint32_t target_temp_current =
|
|
||||||
j ? signed_value_temp : system_temp_pv_;
|
|
||||||
if (instr.opcode == FetchOpcode::kGetTextureComputedLod) {
|
if (instr.opcode == FetchOpcode::kGetTextureComputedLod) {
|
||||||
// The non-pixel-shader case should be handled before because it
|
// The non-pixel-shader case should be handled before because it
|
||||||
// just returns a constant in this case.
|
// just returns a constant in this case.
|
||||||
assert_true(IsDxbcPixelShader());
|
assert_true(IsDxbcPixelShader());
|
||||||
|
uint32_t srv_register_current =
|
||||||
|
i ? srv_registers_stacked[0] : srv_registers[0];
|
||||||
replicate_result = true;
|
replicate_result = true;
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_1_SB_OPCODE_LOD) |
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_1_SB_OPCODE_LOD) |
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(11));
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(11));
|
||||||
shader_code_.push_back(EncodeVectorMaskedOperand(
|
shader_code_.push_back(
|
||||||
D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
|
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
|
||||||
shader_code_.push_back(target_temp_current);
|
shader_code_.push_back(system_temp_pv_);
|
||||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||||
shader_code_.push_back(coord_temp);
|
shader_code_.push_back(coord_temp);
|
||||||
|
@ -1973,21 +2328,61 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
|
||||||
shader_code_.push_back(EncodeVectorMaskedOperand(
|
shader_code_.push_back(EncodeVectorMaskedOperand(
|
||||||
D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
|
D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
|
||||||
shader_code_.push_back(target_temp_current);
|
shader_code_.push_back(system_temp_pv_);
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
|
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
|
||||||
shader_code_.push_back(target_temp_current);
|
shader_code_.push_back(system_temp_pv_);
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
|
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
|
||||||
shader_code_.push_back(*reinterpret_cast<const uint32_t*>(
|
shader_code_.push_back(
|
||||||
&instr.attributes.lod_bias));
|
*reinterpret_cast<const uint32_t*>(&instr.attributes.lod_bias));
|
||||||
++stat_.instruction_count;
|
++stat_.instruction_count;
|
||||||
++stat_.float_instruction_count;
|
++stat_.float_instruction_count;
|
||||||
}
|
}
|
||||||
// In this case, only the unsigned variant is accessed because data
|
} else {
|
||||||
// doesn't matter.
|
// Sample both unsigned and signed, and for stacked textures, two
|
||||||
break;
|
// samples if filtering is needed.
|
||||||
} else if (instr.attributes.use_register_lod) {
|
for (uint32_t j = 0; j < 2; ++j) {
|
||||||
|
uint32_t srv_register_current =
|
||||||
|
i ? srv_registers_stacked[j] : srv_registers[j];
|
||||||
|
uint32_t target_temp_sign = j ? signed_value_temp : system_temp_pv_;
|
||||||
|
for (uint32_t k = 0;
|
||||||
|
k < (vol_filter_lerp_temp != UINT32_MAX ? 2u : 1u); ++k) {
|
||||||
|
uint32_t target_temp_current =
|
||||||
|
k ? vol_filter_lerp_temp : target_temp_sign;
|
||||||
|
if (k) {
|
||||||
|
if (!vol_mag_filter_linear || !vol_min_filter_linear) {
|
||||||
|
// Check if array layer filtering is enabled and need one more
|
||||||
|
// sample.
|
||||||
|
shader_code_.push_back(
|
||||||
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
|
||||||
|
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(
|
||||||
|
vol_filter_temp_linear_test) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
|
||||||
|
shader_code_.push_back(EncodeVectorSelectOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
|
||||||
|
shader_code_.push_back(vol_filter_temp);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.dynamic_flow_control_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Go to the next array texture sample.
|
||||||
|
shader_code_.push_back(
|
||||||
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ADD) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
|
||||||
|
shader_code_.push_back(EncodeVectorMaskedOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, 0b0100, 1));
|
||||||
|
shader_code_.push_back(coord_temp);
|
||||||
|
shader_code_.push_back(EncodeVectorSelectOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, 2, 1));
|
||||||
|
shader_code_.push_back(coord_temp);
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
|
||||||
|
shader_code_.push_back(0x3F800000);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.float_instruction_count;
|
||||||
|
}
|
||||||
|
if (instr.attributes.use_register_lod) {
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_SAMPLE_L) |
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_SAMPLE_L) |
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(13));
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(13));
|
||||||
|
@ -2001,8 +2396,8 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
||||||
D3D10_SB_OPERAND_TYPE_RESOURCE, kSwizzleXYZW, 2));
|
D3D10_SB_OPERAND_TYPE_RESOURCE, kSwizzleXYZW, 2));
|
||||||
shader_code_.push_back(srv_register_current);
|
shader_code_.push_back(srv_register_current);
|
||||||
shader_code_.push_back(srv_register_current);
|
shader_code_.push_back(srv_register_current);
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(EncodeZeroComponentOperand(
|
||||||
EncodeZeroComponentOperand(D3D10_SB_OPERAND_TYPE_SAMPLER, 2));
|
D3D10_SB_OPERAND_TYPE_SAMPLER, 2));
|
||||||
shader_code_.push_back(sampler_register);
|
shader_code_.push_back(sampler_register);
|
||||||
shader_code_.push_back(sampler_register);
|
shader_code_.push_back(sampler_register);
|
||||||
shader_code_.push_back(EncodeVectorSelectOperand(
|
shader_code_.push_back(EncodeVectorSelectOperand(
|
||||||
|
@ -2011,9 +2406,10 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
||||||
++stat_.instruction_count;
|
++stat_.instruction_count;
|
||||||
++stat_.texture_normal_instructions;
|
++stat_.texture_normal_instructions;
|
||||||
} else if (instr.attributes.use_register_gradients) {
|
} else if (instr.attributes.use_register_gradients) {
|
||||||
// TODO(Triang3l): Apply the LOD bias somehow for register gradients
|
// TODO(Triang3l): Apply the LOD bias somehow for register
|
||||||
// (possibly will require moving the bias to the sampler, which may
|
// gradients (possibly will require moving the bias to the
|
||||||
// be not very good considering the sampler count is very limited).
|
// sampler, which may be not very good considering the sampler
|
||||||
|
// count is very limited).
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(
|
||||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_SAMPLE_D) |
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_SAMPLE_D) |
|
||||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(15));
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(15));
|
||||||
|
@ -2027,8 +2423,8 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
||||||
D3D10_SB_OPERAND_TYPE_RESOURCE, kSwizzleXYZW, 2));
|
D3D10_SB_OPERAND_TYPE_RESOURCE, kSwizzleXYZW, 2));
|
||||||
shader_code_.push_back(srv_register_current);
|
shader_code_.push_back(srv_register_current);
|
||||||
shader_code_.push_back(srv_register_current);
|
shader_code_.push_back(srv_register_current);
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(EncodeZeroComponentOperand(
|
||||||
EncodeZeroComponentOperand(D3D10_SB_OPERAND_TYPE_SAMPLER, 2));
|
D3D10_SB_OPERAND_TYPE_SAMPLER, 2));
|
||||||
shader_code_.push_back(sampler_register);
|
shader_code_.push_back(sampler_register);
|
||||||
shader_code_.push_back(sampler_register);
|
shader_code_.push_back(sampler_register);
|
||||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
|
@ -2043,12 +2439,12 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
||||||
// 3 different DXBC opcodes handled here:
|
// 3 different DXBC opcodes handled here:
|
||||||
// - sample_l, when not using a computed LOD or not in a pixel
|
// - sample_l, when not using a computed LOD or not in a pixel
|
||||||
// shader, in this case, LOD (0 + bias) is sampled.
|
// shader, in this case, LOD (0 + bias) is sampled.
|
||||||
// - sample, when sampling in a pixel shader (thus with derivatives)
|
// - sample, when sampling in a pixel shader (thus with
|
||||||
// with a computed LOD.
|
// derivatives) with a computed LOD.
|
||||||
// - sample_b, when sampling in a pixel shader with a biased
|
// - sample_b, when sampling in a pixel shader with a biased
|
||||||
// computed LOD.
|
// computed LOD.
|
||||||
// Both sample_l and sample_b should add the LOD bias as the last
|
// Both sample_l and sample_b should add the LOD bias as the
|
||||||
// operand in our case.
|
// last operand in our case.
|
||||||
bool explicit_lod =
|
bool explicit_lod =
|
||||||
!instr.attributes.use_computed_lod || !IsDxbcPixelShader();
|
!instr.attributes.use_computed_lod || !IsDxbcPixelShader();
|
||||||
if (explicit_lod) {
|
if (explicit_lod) {
|
||||||
|
@ -2074,13 +2470,13 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
||||||
D3D10_SB_OPERAND_TYPE_RESOURCE, kSwizzleXYZW, 2));
|
D3D10_SB_OPERAND_TYPE_RESOURCE, kSwizzleXYZW, 2));
|
||||||
shader_code_.push_back(srv_register_current);
|
shader_code_.push_back(srv_register_current);
|
||||||
shader_code_.push_back(srv_register_current);
|
shader_code_.push_back(srv_register_current);
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(EncodeZeroComponentOperand(
|
||||||
EncodeZeroComponentOperand(D3D10_SB_OPERAND_TYPE_SAMPLER, 2));
|
D3D10_SB_OPERAND_TYPE_SAMPLER, 2));
|
||||||
shader_code_.push_back(sampler_register);
|
shader_code_.push_back(sampler_register);
|
||||||
shader_code_.push_back(sampler_register);
|
shader_code_.push_back(sampler_register);
|
||||||
if (explicit_lod || instr.attributes.lod_bias != 0.0f) {
|
if (explicit_lod || instr.attributes.lod_bias != 0.0f) {
|
||||||
shader_code_.push_back(
|
shader_code_.push_back(EncodeScalarOperand(
|
||||||
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
|
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
|
||||||
shader_code_.push_back(*reinterpret_cast<const uint32_t*>(
|
shader_code_.push_back(*reinterpret_cast<const uint32_t*>(
|
||||||
&instr.attributes.lod_bias));
|
&instr.attributes.lod_bias));
|
||||||
}
|
}
|
||||||
|
@ -2091,6 +2487,75 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
||||||
++stat_.texture_normal_instructions;
|
++stat_.texture_normal_instructions;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (k) {
|
||||||
|
// b - a
|
||||||
|
shader_code_.push_back(
|
||||||
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ADD) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8));
|
||||||
|
shader_code_.push_back(EncodeVectorMaskedOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||||
|
shader_code_.push_back(target_temp_current);
|
||||||
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||||
|
shader_code_.push_back(target_temp_current);
|
||||||
|
shader_code_.push_back(
|
||||||
|
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP,
|
||||||
|
kSwizzleXYZW, 1) |
|
||||||
|
ENCODE_D3D10_SB_OPERAND_EXTENDED(1));
|
||||||
|
shader_code_.push_back(
|
||||||
|
ENCODE_D3D10_SB_EXTENDED_OPERAND_MODIFIER(
|
||||||
|
D3D10_SB_OPERAND_MODIFIER_NEG));
|
||||||
|
shader_code_.push_back(target_temp_sign);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.float_instruction_count;
|
||||||
|
|
||||||
|
// a + (b - a) * factor
|
||||||
|
shader_code_.push_back(
|
||||||
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MAD) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
||||||
|
shader_code_.push_back(EncodeVectorMaskedOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||||
|
shader_code_.push_back(target_temp_sign);
|
||||||
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||||
|
shader_code_.push_back(target_temp_current);
|
||||||
|
shader_code_.push_back(EncodeVectorReplicatedOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, 1, 1));
|
||||||
|
shader_code_.push_back(vol_filter_temp);
|
||||||
|
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||||
|
shader_code_.push_back(target_temp_sign);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.float_instruction_count;
|
||||||
|
|
||||||
|
if (!j) {
|
||||||
|
// Go back to the first layer to sample the signed texture.
|
||||||
|
shader_code_.push_back(
|
||||||
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ADD) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
|
||||||
|
shader_code_.push_back(EncodeVectorMaskedOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, 0b0100, 1));
|
||||||
|
shader_code_.push_back(coord_temp);
|
||||||
|
shader_code_.push_back(EncodeVectorSelectOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_TEMP, 2, 1));
|
||||||
|
shader_code_.push_back(coord_temp);
|
||||||
|
shader_code_.push_back(EncodeScalarOperand(
|
||||||
|
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
|
||||||
|
shader_code_.push_back(0xBF800000u);
|
||||||
|
++stat_.instruction_count;
|
||||||
|
++stat_.float_instruction_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!vol_mag_filter_linear || !vol_min_filter_linear) {
|
||||||
|
// Close the array layer filtering check.
|
||||||
|
shader_code_.push_back(
|
||||||
|
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) |
|
||||||
|
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
|
||||||
|
++stat_.instruction_count;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (instr.dimension == TextureDimension::k3D) {
|
if (instr.dimension == TextureDimension::k3D) {
|
||||||
|
@ -2343,6 +2808,9 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
||||||
PopSystemTemp();
|
PopSystemTemp();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (vol_filter_lerp_temp != UINT32_MAX) {
|
||||||
|
PopSystemTemp();
|
||||||
|
}
|
||||||
if (signed_value_temp != UINT32_MAX) {
|
if (signed_value_temp != UINT32_MAX) {
|
||||||
PopSystemTemp();
|
PopSystemTemp();
|
||||||
}
|
}
|
||||||
|
@ -2351,6 +2819,9 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (vol_filter_temp != UINT32_MAX) {
|
||||||
|
PopSystemTemp();
|
||||||
|
}
|
||||||
if (size_and_is_3d_temp != UINT32_MAX) {
|
if (size_and_is_3d_temp != UINT32_MAX) {
|
||||||
PopSystemTemp();
|
PopSystemTemp();
|
||||||
}
|
}
|
||||||
|
|
|
@ -440,6 +440,8 @@ struct ParsedTextureFetchInstruction {
|
||||||
TextureFilter min_filter = TextureFilter::kUseFetchConst;
|
TextureFilter min_filter = TextureFilter::kUseFetchConst;
|
||||||
TextureFilter mip_filter = TextureFilter::kUseFetchConst;
|
TextureFilter mip_filter = TextureFilter::kUseFetchConst;
|
||||||
AnisoFilter aniso_filter = AnisoFilter::kUseFetchConst;
|
AnisoFilter aniso_filter = AnisoFilter::kUseFetchConst;
|
||||||
|
TextureFilter vol_mag_filter = TextureFilter::kUseFetchConst;
|
||||||
|
TextureFilter vol_min_filter = TextureFilter::kUseFetchConst;
|
||||||
bool use_computed_lod = true;
|
bool use_computed_lod = true;
|
||||||
bool use_register_lod = false;
|
bool use_register_lod = false;
|
||||||
bool use_register_gradients = false;
|
bool use_register_gradients = false;
|
||||||
|
|
|
@ -1035,6 +1035,8 @@ void ShaderTranslator::ParseTextureFetchInstruction(
|
||||||
i.attributes.min_filter = op.min_filter();
|
i.attributes.min_filter = op.min_filter();
|
||||||
i.attributes.mip_filter = op.mip_filter();
|
i.attributes.mip_filter = op.mip_filter();
|
||||||
i.attributes.aniso_filter = op.aniso_filter();
|
i.attributes.aniso_filter = op.aniso_filter();
|
||||||
|
i.attributes.vol_mag_filter = op.vol_mag_filter();
|
||||||
|
i.attributes.vol_min_filter = op.vol_min_filter();
|
||||||
i.attributes.use_computed_lod = op.use_computed_lod();
|
i.attributes.use_computed_lod = op.use_computed_lod();
|
||||||
i.attributes.use_register_lod = op.use_register_lod();
|
i.attributes.use_register_lod = op.use_register_lod();
|
||||||
i.attributes.use_register_gradients = op.use_register_gradients();
|
i.attributes.use_register_gradients = op.use_register_gradients();
|
||||||
|
|
|
@ -423,6 +423,16 @@ void ParsedTextureFetchInstruction::Disassemble(StringBuffer* out) const {
|
||||||
", AnisoFilter=%s",
|
", AnisoFilter=%s",
|
||||||
kAnisoFilterNames[static_cast<int>(attributes.aniso_filter)]);
|
kAnisoFilterNames[static_cast<int>(attributes.aniso_filter)]);
|
||||||
}
|
}
|
||||||
|
if (attributes.vol_mag_filter != TextureFilter::kUseFetchConst) {
|
||||||
|
out->AppendFormat(
|
||||||
|
", VolMagFilter=%s",
|
||||||
|
kTextureFilterNames[static_cast<int>(attributes.vol_mag_filter)]);
|
||||||
|
}
|
||||||
|
if (attributes.vol_min_filter != TextureFilter::kUseFetchConst) {
|
||||||
|
out->AppendFormat(
|
||||||
|
", VolMinFilter=%s",
|
||||||
|
kTextureFilterNames[static_cast<int>(attributes.vol_min_filter)]);
|
||||||
|
}
|
||||||
if (!attributes.use_computed_lod) {
|
if (!attributes.use_computed_lod) {
|
||||||
out->Append(", UseComputedLOD=false");
|
out->Append(", UseComputedLOD=false");
|
||||||
}
|
}
|
||||||
|
|
|
@ -634,6 +634,14 @@ struct TextureFetchInstruction {
|
||||||
AnisoFilter aniso_filter() const {
|
AnisoFilter aniso_filter() const {
|
||||||
return static_cast<AnisoFilter>(data_.aniso_filter);
|
return static_cast<AnisoFilter>(data_.aniso_filter);
|
||||||
}
|
}
|
||||||
|
bool has_vol_mag_filter() const { return data_.vol_mag_filter != 0x3; }
|
||||||
|
TextureFilter vol_mag_filter() const {
|
||||||
|
return static_cast<TextureFilter>(data_.vol_mag_filter);
|
||||||
|
}
|
||||||
|
bool has_vol_min_filter() const { return data_.vol_min_filter != 0x3; }
|
||||||
|
TextureFilter vol_min_filter() const {
|
||||||
|
return static_cast<TextureFilter>(data_.vol_min_filter);
|
||||||
|
}
|
||||||
bool use_computed_lod() const { return data_.use_comp_lod == 1; }
|
bool use_computed_lod() const { return data_.use_comp_lod == 1; }
|
||||||
bool use_register_lod() const { return data_.use_reg_lod == 1; }
|
bool use_register_lod() const { return data_.use_reg_lod == 1; }
|
||||||
bool use_register_gradients() const { return data_.use_reg_gradients == 1; }
|
bool use_register_gradients() const { return data_.use_reg_gradients == 1; }
|
||||||
|
|
|
@ -103,12 +103,22 @@ inline std::string TranslateAnsiStringAddress(const Memory* memory,
|
||||||
|
|
||||||
inline std::wstring TranslateUnicodeString(
|
inline std::wstring TranslateUnicodeString(
|
||||||
const Memory* memory, const X_UNICODE_STRING* unicode_string) {
|
const Memory* memory, const X_UNICODE_STRING* unicode_string) {
|
||||||
if (!unicode_string || !unicode_string->length) {
|
if (!unicode_string) {
|
||||||
return L"";
|
return L"";
|
||||||
}
|
}
|
||||||
return std::wstring(
|
uint16_t length = unicode_string->length;
|
||||||
memory->TranslateVirtual<const wchar_t*>(unicode_string->pointer),
|
if (!length) {
|
||||||
unicode_string->length);
|
return L"";
|
||||||
|
}
|
||||||
|
const xe::be<uint16_t>* guest_string =
|
||||||
|
memory->TranslateVirtual<const xe::be<uint16_t>*>(
|
||||||
|
unicode_string->pointer);
|
||||||
|
std::wstring translated_string;
|
||||||
|
translated_string.reserve(length);
|
||||||
|
for (uint16_t i = 0; i < length; ++i) {
|
||||||
|
translated_string += wchar_t(uint16_t(guest_string[i]));
|
||||||
|
}
|
||||||
|
return translated_string;
|
||||||
}
|
}
|
||||||
} // namespace util
|
} // namespace util
|
||||||
|
|
||||||
|
|
|
@ -119,18 +119,58 @@ static_assert_size(XMA_CONTEXT_INIT, 56);
|
||||||
|
|
||||||
dword_result_t XMAInitializeContext(lpvoid_t context_ptr,
|
dword_result_t XMAInitializeContext(lpvoid_t context_ptr,
|
||||||
pointer_t<XMA_CONTEXT_INIT> context_init) {
|
pointer_t<XMA_CONTEXT_INIT> context_init) {
|
||||||
|
// Input buffers may be null (buffer 1 in Tony Hawk's American Wasteland).
|
||||||
|
// Convert to host endianness.
|
||||||
|
uint32_t input_buffer_0_guest_ptr = context_init->input_buffer_0_ptr;
|
||||||
|
uint32_t input_buffer_0_physical_address = 0;
|
||||||
|
if (input_buffer_0_guest_ptr) {
|
||||||
|
input_buffer_0_physical_address =
|
||||||
|
kernel_memory()->GetPhysicalAddress(input_buffer_0_guest_ptr);
|
||||||
|
// Xenia-specific safety check.
|
||||||
|
assert_true(input_buffer_0_physical_address != UINT32_MAX);
|
||||||
|
if (input_buffer_0_physical_address == UINT32_MAX) {
|
||||||
|
XELOGE(
|
||||||
|
"XMAInitializeContext: Invalid input buffer 0 virtual address %.8X",
|
||||||
|
input_buffer_0_guest_ptr);
|
||||||
|
return X_E_FALSE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
uint32_t input_buffer_1_guest_ptr = context_init->input_buffer_1_ptr;
|
||||||
|
uint32_t input_buffer_1_physical_address = 0;
|
||||||
|
if (input_buffer_1_guest_ptr) {
|
||||||
|
input_buffer_1_physical_address =
|
||||||
|
kernel_memory()->GetPhysicalAddress(input_buffer_1_guest_ptr);
|
||||||
|
assert_true(input_buffer_1_physical_address != UINT32_MAX);
|
||||||
|
if (input_buffer_1_physical_address == UINT32_MAX) {
|
||||||
|
XELOGE(
|
||||||
|
"XMAInitializeContext: Invalid input buffer 1 virtual address %.8X",
|
||||||
|
input_buffer_1_guest_ptr);
|
||||||
|
return X_E_FALSE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
uint32_t output_buffer_guest_ptr = context_init->output_buffer_ptr;
|
||||||
|
assert_not_zero(output_buffer_guest_ptr);
|
||||||
|
uint32_t output_buffer_physical_address =
|
||||||
|
kernel_memory()->GetPhysicalAddress(output_buffer_guest_ptr);
|
||||||
|
assert_true(output_buffer_physical_address != UINT32_MAX);
|
||||||
|
if (output_buffer_physical_address == UINT32_MAX) {
|
||||||
|
XELOGE("XMAInitializeContext: Invalid output buffer virtual address %.8X",
|
||||||
|
output_buffer_guest_ptr);
|
||||||
|
return X_E_FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
std::memset(context_ptr, 0, sizeof(XMA_CONTEXT_DATA));
|
std::memset(context_ptr, 0, sizeof(XMA_CONTEXT_DATA));
|
||||||
|
|
||||||
XMA_CONTEXT_DATA context(context_ptr);
|
XMA_CONTEXT_DATA context(context_ptr);
|
||||||
|
|
||||||
context.input_buffer_0_ptr = context_init->input_buffer_0_ptr;
|
context.input_buffer_0_ptr = input_buffer_0_physical_address;
|
||||||
context.input_buffer_0_packet_count =
|
context.input_buffer_0_packet_count =
|
||||||
context_init->input_buffer_0_packet_count;
|
context_init->input_buffer_0_packet_count;
|
||||||
context.input_buffer_1_ptr = context_init->input_buffer_1_ptr;
|
context.input_buffer_1_ptr = input_buffer_1_physical_address;
|
||||||
context.input_buffer_1_packet_count =
|
context.input_buffer_1_packet_count =
|
||||||
context_init->input_buffer_1_packet_count;
|
context_init->input_buffer_1_packet_count;
|
||||||
context.input_buffer_read_offset = context_init->input_buffer_read_offset;
|
context.input_buffer_read_offset = context_init->input_buffer_read_offset;
|
||||||
context.output_buffer_ptr = context_init->output_buffer_ptr;
|
context.output_buffer_ptr = output_buffer_physical_address;
|
||||||
context.output_buffer_block_count = context_init->output_buffer_block_count;
|
context.output_buffer_block_count = context_init->output_buffer_block_count;
|
||||||
|
|
||||||
// context.work_buffer = context_init->work_buffer; // ?
|
// context.work_buffer = context_init->work_buffer; // ?
|
||||||
|
|
Loading…
Reference in New Issue