[D3D12] DXBC part of signed textures

This commit is contained in:
Triang3l 2018-10-09 08:31:09 +03:00
parent 6a6e63060b
commit 7603de218b
4 changed files with 313 additions and 218 deletions

View File

@ -35,6 +35,8 @@ void D3D12Shader::SetTexturesAndSamplers(
const DxbcShaderTranslator::TextureSRV& translator_srv = texture_srvs[i]; const DxbcShaderTranslator::TextureSRV& translator_srv = texture_srvs[i];
srv.fetch_constant = translator_srv.fetch_constant; srv.fetch_constant = translator_srv.fetch_constant;
srv.dimension = translator_srv.dimension; srv.dimension = translator_srv.dimension;
srv.is_signed = translator_srv.is_signed;
srv.is_sign_required = translator_srv.is_sign_required;
texture_srvs_.push_back(srv); texture_srvs_.push_back(srv);
used_texture_mask_ |= 1u << translator_srv.fetch_constant; used_texture_mask_ |= 1u << translator_srv.fetch_constant;
} }

View File

@ -39,6 +39,10 @@ class D3D12Shader : public Shader {
struct TextureSRV { struct TextureSRV {
uint32_t fetch_constant; uint32_t fetch_constant;
TextureDimension dimension; TextureDimension dimension;
bool is_signed;
// Whether this SRV must be bound even if it's signed and all components are
// unsigned and vice versa.
bool is_sign_required;
}; };
const TextureSRV* GetTextureSRVs(uint32_t& count_out) const { const TextureSRV* GetTextureSRVs(uint32_t& count_out) const {
count_out = uint32_t(texture_srvs_.size()); count_out = uint32_t(texture_srvs_.size());

View File

@ -3416,7 +3416,9 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction(
} }
uint32_t DxbcShaderTranslator::FindOrAddTextureSRV(uint32_t fetch_constant, uint32_t DxbcShaderTranslator::FindOrAddTextureSRV(uint32_t fetch_constant,
TextureDimension dimension) { TextureDimension dimension,
bool is_signed,
bool is_sign_required) {
// 1D and 2D textures (including stacked ones) are treated as 2D arrays for // 1D and 2D textures (including stacked ones) are treated as 2D arrays for
// binding and coordinate simplicity. // binding and coordinate simplicity.
if (dimension == TextureDimension::k1D) { if (dimension == TextureDimension::k1D) {
@ -3424,9 +3426,15 @@ uint32_t DxbcShaderTranslator::FindOrAddTextureSRV(uint32_t fetch_constant,
} }
// 1 is added to the return value because T0/t0 is shared memory. // 1 is added to the return value because T0/t0 is shared memory.
for (uint32_t i = 0; i < uint32_t(texture_srvs_.size()); ++i) { for (uint32_t i = 0; i < uint32_t(texture_srvs_.size()); ++i) {
const TextureSRV& texture_srv = texture_srvs_[i]; TextureSRV& texture_srv = texture_srvs_[i];
if (texture_srv.fetch_constant == fetch_constant && if (texture_srv.fetch_constant == fetch_constant &&
texture_srv.dimension == dimension) { texture_srv.dimension == dimension &&
texture_srv.is_signed == is_signed) {
if (is_sign_required && !texture_srv.is_sign_required) {
// kGetTextureComputedLod uses only the unsigned SRV, which means it
// must be bound even when all components are signed.
texture_srv.is_sign_required = true;
}
return 1 + i; return 1 + i;
} }
} }
@ -3437,6 +3445,8 @@ uint32_t DxbcShaderTranslator::FindOrAddTextureSRV(uint32_t fetch_constant,
TextureSRV new_texture_srv; TextureSRV new_texture_srv;
new_texture_srv.fetch_constant = fetch_constant; new_texture_srv.fetch_constant = fetch_constant;
new_texture_srv.dimension = dimension; new_texture_srv.dimension = dimension;
new_texture_srv.is_signed = is_signed;
new_texture_srv.is_sign_required = is_sign_required;
const char* dimension_name; const char* dimension_name;
switch (dimension) { switch (dimension) {
case TextureDimension::k3D: case TextureDimension::k3D:
@ -3449,7 +3459,8 @@ uint32_t DxbcShaderTranslator::FindOrAddTextureSRV(uint32_t fetch_constant,
dimension_name = "2d"; dimension_name = "2d";
} }
new_texture_srv.name = new_texture_srv.name =
xe::format_string("xe_texture%u_%s", fetch_constant, dimension_name); xe::format_string("xe_texture%u_%s_%c", fetch_constant, dimension_name,
is_signed ? 's' : 'u');
uint32_t srv_register = 1 + uint32_t(texture_srvs_.size()); uint32_t srv_register = 1 + uint32_t(texture_srvs_.size());
texture_srvs_.emplace_back(std::move(new_texture_srv)); texture_srvs_.emplace_back(std::move(new_texture_srv));
return srv_register; return srv_register;
@ -3839,7 +3850,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
// tf1[2] tf1[3] tf1[4] tf1[5] // tf1[2] tf1[3] tf1[4] tf1[5]
uint32_t tfetch_pair_offset = (tfetch_index >> 1) * 3; uint32_t tfetch_pair_offset = (tfetch_index >> 1) * 3;
// TODO(Triang3l): kGetTextureBorderColorFrac, kGetTextureGradients. // TODO(Triang3l): kGetTextureBorderColorFrac.
if (!is_pixel_shader() && if (!is_pixel_shader() &&
(instr.opcode == FetchOpcode::kGetTextureComputedLod || (instr.opcode == FetchOpcode::kGetTextureComputedLod ||
instr.opcode == FetchOpcode::kGetTextureGradients)) { instr.opcode == FetchOpcode::kGetTextureGradients)) {
@ -3863,22 +3874,35 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
instr.opcode == FetchOpcode::kGetTextureWeights) { instr.opcode == FetchOpcode::kGetTextureWeights) {
store_result = true; store_result = true;
uint32_t srv_register; // 0 is unsigned, 1 is signed.
uint32_t srv_register_stacked; uint32_t srv_registers[2] = {UINT32_MAX, UINT32_MAX};
uint32_t sampler_register; uint32_t srv_registers_stacked[2] = {UINT32_MAX, UINT32_MAX};
if (instr.opcode == FetchOpcode::kGetTextureWeights) { uint32_t sampler_register = UINT32_MAX;
// Only the fetch constant needed. // Only the fetch constant needed for kGetTextureWeights.
srv_register = UINT32_MAX; if (instr.opcode != FetchOpcode::kGetTextureWeights) {
srv_register_stacked = UINT32_MAX; if (instr.opcode == FetchOpcode::kGetTextureComputedLod) {
sampler_register = UINT32_MAX; // The LOD is a scalar and it doesn't depend on the texture contents, so
} else { // require any variant - unsigned in this case because more texture
srv_register = FindOrAddTextureSRV(tfetch_index, instr.dimension); // formats support it.
// 3D or 2D stacked is selected dynamically. srv_registers[0] =
if (instr.dimension == TextureDimension::k3D) { FindOrAddTextureSRV(tfetch_index, instr.dimension, false, true);
srv_register_stacked = if (instr.dimension == TextureDimension::k3D) {
FindOrAddTextureSRV(tfetch_index, TextureDimension::k2D); // 3D or 2D stacked is selected dynamically.
srv_registers_stacked[0] = FindOrAddTextureSRV(
tfetch_index, TextureDimension::k2D, false, true);
}
} else { } else {
srv_register_stacked = UINT32_MAX; srv_registers[0] =
FindOrAddTextureSRV(tfetch_index, instr.dimension, false);
srv_registers[1] =
FindOrAddTextureSRV(tfetch_index, instr.dimension, true);
if (instr.dimension == TextureDimension::k3D) {
// 3D or 2D stacked is selected dynamically.
srv_registers_stacked[0] =
FindOrAddTextureSRV(tfetch_index, TextureDimension::k2D, false);
srv_registers_stacked[1] =
FindOrAddTextureSRV(tfetch_index, TextureDimension::k2D, true);
}
} }
sampler_register = FindOrAddSamplerBinding( sampler_register = FindOrAddSamplerBinding(
tfetch_index, instr.attributes.mag_filter, tfetch_index, instr.attributes.mag_filter,
@ -3886,6 +3910,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
instr.attributes.aniso_filter); instr.attributes.aniso_filter);
} }
uint32_t coord_temp = PushSystemTemp();
// Move coordinates to pv temporarily so zeros can be added to expand them // Move coordinates to pv temporarily so zeros can be added to expand them
// to Texture2DArray coordinates and to apply offset. Or, if the instruction // to Texture2DArray coordinates and to apply offset. Or, if the instruction
// is getWeights, move them to pv because their fractional part will be // is getWeights, move them to pv because their fractional part will be
@ -3913,7 +3938,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3 + operand_length)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3 + operand_length));
shader_code_.push_back( shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, coord_mask, 1)); EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, coord_mask, 1));
shader_code_.push_back(system_temp_pv_); shader_code_.push_back(coord_temp);
UseDxbcSourceOperand(operand); UseDxbcSourceOperand(operand);
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.mov_instruction_count; ++stat_.mov_instruction_count;
@ -3929,7 +3954,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8));
shader_code_.push_back(EncodeVectorMaskedOperand( shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, coord_zero_mask, 1)); D3D10_SB_OPERAND_TYPE_TEMP, coord_zero_mask, 1));
shader_code_.push_back(system_temp_pv_); shader_code_.push_back(coord_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand( shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0)); D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
shader_code_.push_back(0); shader_code_.push_back(0);
@ -4144,10 +4169,10 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10));
shader_code_.push_back(EncodeVectorMaskedOperand( shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, coord_mask, 1)); D3D10_SB_OPERAND_TYPE_TEMP, coord_mask, 1));
shader_code_.push_back(system_temp_pv_); shader_code_.push_back(coord_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand( shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(system_temp_pv_); shader_code_.push_back(coord_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand( shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0)); D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
shader_code_.push_back( shader_code_.push_back(
@ -4169,10 +4194,10 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
: 7)); : 7));
shader_code_.push_back(EncodeVectorMaskedOperand( shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, coord_mask, 1)); D3D10_SB_OPERAND_TYPE_TEMP, coord_mask, 1));
shader_code_.push_back(system_temp_pv_); shader_code_.push_back(coord_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand( shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(system_temp_pv_); shader_code_.push_back(coord_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand( shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(size_and_is_3d_temp); shader_code_.push_back(size_and_is_3d_temp);
@ -4200,10 +4225,10 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back( shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0100, 1)); EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0100, 1));
shader_code_.push_back(system_temp_pv_); shader_code_.push_back(coord_temp);
shader_code_.push_back( shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1)); EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1));
shader_code_.push_back(system_temp_pv_); shader_code_.push_back(coord_temp);
shader_code_.push_back( shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 3, 1)); EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 3, 1));
shader_code_.push_back(size_and_is_3d_temp); shader_code_.push_back(size_and_is_3d_temp);
@ -4235,10 +4260,10 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
} }
shader_code_.push_back(EncodeVectorMaskedOperand( shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 0b1000, 1)); D3D10_SB_OPERAND_TYPE_TEMP, 0b1000, 1));
shader_code_.push_back(system_temp_pv_); shader_code_.push_back(coord_temp);
shader_code_.push_back( shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1)); EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1));
shader_code_.push_back(system_temp_pv_); shader_code_.push_back(coord_temp);
if (offset_z != 0.0f) { if (offset_z != 0.0f) {
shader_code_.push_back( shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
@ -4257,10 +4282,10 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
} }
shader_code_.push_back(EncodeVectorMaskedOperand( shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 0b1000, 1)); D3D10_SB_OPERAND_TYPE_TEMP, 0b1000, 1));
shader_code_.push_back(system_temp_pv_); shader_code_.push_back(coord_temp);
shader_code_.push_back( shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1)); EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1));
shader_code_.push_back(system_temp_pv_); shader_code_.push_back(coord_temp);
shader_code_.push_back( shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1)); EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1));
shader_code_.push_back(size_and_is_3d_temp); shader_code_.push_back(size_and_is_3d_temp);
@ -4299,10 +4324,10 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(EncodeVectorMaskedOperand( shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, coord_mask, 1)); D3D10_SB_OPERAND_TYPE_TEMP, coord_mask, 1));
shader_code_.push_back(system_temp_pv_); shader_code_.push_back(coord_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand( shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(system_temp_pv_); shader_code_.push_back(coord_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand( shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(size_and_is_3d_temp); shader_code_.push_back(size_and_is_3d_temp);
@ -4317,7 +4342,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(12)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(12));
shader_code_.push_back(EncodeVectorMaskedOperand( shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, coord_mask, 1)); D3D10_SB_OPERAND_TYPE_TEMP, coord_mask, 1));
shader_code_.push_back(system_temp_pv_); shader_code_.push_back(coord_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand( shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0)); D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
shader_code_.push_back( shader_code_.push_back(
@ -4332,7 +4357,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
shader_code_.push_back(size_and_is_3d_temp); shader_code_.push_back(size_and_is_3d_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand( shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(system_temp_pv_); shader_code_.push_back(coord_temp);
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.float_instruction_count; ++stat_.float_instruction_count;
} }
@ -4341,8 +4366,8 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
} }
if (instr.opcode == FetchOpcode::kGetTextureWeights) { if (instr.opcode == FetchOpcode::kGetTextureWeights) {
// Return the fractional part of unnormalized coordinates (already in pv) // Return the fractional part of unnormalized coordinates as bilinear
// as bilinear filtering weights. // filtering weights.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_FRC) | shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_FRC) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5));
shader_code_.push_back( shader_code_.push_back(
@ -4350,7 +4375,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
shader_code_.push_back(system_temp_pv_); shader_code_.push_back(system_temp_pv_);
shader_code_.push_back(EncodeVectorSwizzledOperand( shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(system_temp_pv_); shader_code_.push_back(coord_temp);
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.float_instruction_count; ++stat_.float_instruction_count;
} else { } else {
@ -4360,9 +4385,40 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
// at the edges, especially in pixel shader helper invocations, the // at the edges, especially in pixel shader helper invocations, the
// major axis changes, causing S/T to jump between 0 and 1, breaking // major axis changes, causing S/T to jump between 0 and 1, breaking
// gradient calculation and causing the 1x1 mipmap to be sampled. // gradient calculation and causing the 1x1 mipmap to be sampled.
ArrayCoordToCubeDirection(system_temp_pv_); ArrayCoordToCubeDirection(coord_temp);
} }
// Bias the register LOD if fetching with explicit LOD (so this is not
// done two or four times due to 3D/stacked and unsigned/signed).
uint32_t lod_temp = system_temp_grad_h_lod_, lod_temp_component = 3;
if (instr.opcode == FetchOpcode::kTextureFetch &&
instr.attributes.use_register_lod &&
instr.attributes.lod_bias != 0.0f) {
lod_temp = PushSystemTemp();
lod_temp_component = 0;
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ADD) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(lod_temp);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 3, 1));
shader_code_.push_back(system_temp_grad_h_lod_);
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(
*reinterpret_cast<const uint32_t*>(&instr.attributes.lod_bias));
++stat_.instruction_count;
++stat_.float_instruction_count;
}
// Allocate the register for the value from the signed texture, and later
// for biasing and gamma correction.
uint32_t signs_value_temp = instr.opcode == FetchOpcode::kTextureFetch
? PushSystemTemp()
: UINT32_MAX;
// tfetch1D/2D/Cube just fetch directly. tfetch3D needs to fetch either // tfetch1D/2D/Cube just fetch directly. tfetch3D needs to fetch either
// the 3D texture or the 2D stacked texture, so two sample instructions // the 3D texture or the 2D stacked texture, so two sample instructions
// selected conditionally are used in this case. // selected conditionally are used in this case.
@ -4378,6 +4434,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.dynamic_flow_control_count; ++stat_.dynamic_flow_control_count;
} }
// Sample both 3D and 2D array bindings for tfetch3D.
for (uint32_t i = 0; for (uint32_t i = 0;
i < (instr.dimension == TextureDimension::k3D ? 2u : 1u); ++i) { i < (instr.dimension == TextureDimension::k3D ? 2u : 1u); ++i) {
if (i != 0) { if (i != 0) {
@ -4386,180 +4443,160 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
++stat_.instruction_count; ++stat_.instruction_count;
} }
uint32_t srv_register_current = // Sample both unsigned and signed.
i != 0 ? srv_register_stacked : srv_register; for (uint32_t j = 0; j < 2; ++j) {
if (instr.opcode == FetchOpcode::kGetTextureComputedLod) { uint32_t srv_register_current =
// The non-pixel-shader case should be handled before because it just i != 0 ? srv_registers_stacked[j] : srv_registers[j];
// returns a constant in this case. uint32_t target_temp_current =
assert_true(is_pixel_shader()); j != 0 ? signs_value_temp : system_temp_pv_;
replicate_result = true; if (instr.opcode == FetchOpcode::kGetTextureComputedLod) {
shader_code_.push_back( // The non-pixel-shader case should be handled before because it
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_1_SB_OPCODE_LOD) | // just returns a constant in this case.
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(11)); assert_true(is_pixel_shader());
shader_code_.push_back( replicate_result = true;
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(system_temp_pv_);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(system_temp_pv_);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_RESOURCE, kSwizzleXYZW, 2));
shader_code_.push_back(srv_register_current);
shader_code_.push_back(srv_register_current);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_SAMPLER, kSwizzleXYZW, 2));
shader_code_.push_back(sampler_register);
shader_code_.push_back(sampler_register);
++stat_.instruction_count;
++stat_.lod_instructions;
// Apply the LOD bias if used.
if (instr.attributes.lod_bias != 0.0f) {
shader_code_.push_back( shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ADD) | ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_1_SB_OPCODE_LOD) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(11));
shader_code_.push_back(EncodeVectorMaskedOperand( shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(system_temp_pv_); shader_code_.push_back(target_temp_current);
shader_code_.push_back( shader_code_.push_back(EncodeVectorSwizzledOperand(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(system_temp_pv_); shader_code_.push_back(coord_temp);
shader_code_.push_back( shader_code_.push_back(EncodeVectorSwizzledOperand(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); D3D10_SB_OPERAND_TYPE_RESOURCE, kSwizzleXYZW, 2));
shader_code_.push_back( shader_code_.push_back(srv_register_current);
*reinterpret_cast<const uint32_t*>(&instr.attributes.lod_bias)); shader_code_.push_back(srv_register_current);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_SAMPLER, kSwizzleXYZW, 2));
shader_code_.push_back(sampler_register);
shader_code_.push_back(sampler_register);
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.float_instruction_count; ++stat_.lod_instructions;
} // Apply the LOD bias if used.
} else if (instr.attributes.use_register_lod) { if (instr.attributes.lod_bias != 0.0f) {
uint32_t lod_register, lod_component; shader_code_.push_back(
if (instr.attributes.lod_bias != 0.0f) { ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ADD) |
// Bias the LOD in the register. ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
lod_register = PushSystemTemp(); shader_code_.push_back(EncodeVectorMaskedOperand(
lod_component = 0; D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back( shader_code_.push_back(target_temp_current);
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ADD) | shader_code_.push_back(
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(EncodeVectorMaskedOperand( shader_code_.push_back(target_temp_current);
D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); shader_code_.push_back(
shader_code_.push_back(lod_register); EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back( shader_code_.push_back(*reinterpret_cast<const uint32_t*>(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 3, 1)); &instr.attributes.lod_bias));
shader_code_.push_back(system_temp_grad_h_lod_); ++stat_.instruction_count;
shader_code_.push_back( ++stat_.float_instruction_count;
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); }
shader_code_.push_back( // In this case, only the unsigned variant is accessed because data
*reinterpret_cast<const uint32_t*>(&instr.attributes.lod_bias)); // doesn't matter.
++stat_.instruction_count; break;
++stat_.float_instruction_count; } else if (instr.attributes.use_register_lod) {
} else {
lod_register = system_temp_grad_h_lod_;
lod_component = 3;
}
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_SAMPLE_L) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(13));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(system_temp_pv_);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(system_temp_pv_);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_RESOURCE, kSwizzleXYZW, 2));
shader_code_.push_back(srv_register_current);
shader_code_.push_back(srv_register_current);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_SAMPLER, kSwizzleXYZW, 2));
shader_code_.push_back(sampler_register);
shader_code_.push_back(sampler_register);
shader_code_.push_back(EncodeVectorSelectOperand(
D3D10_SB_OPERAND_TYPE_TEMP, lod_component, 1));
shader_code_.push_back(lod_register);
++stat_.instruction_count;
++stat_.texture_normal_instructions;
if (instr.attributes.lod_bias != 0.0f) {
// Release the allocated lod_register.
PopSystemTemp();
}
} else if (instr.attributes.use_register_gradients) {
// TODO(Triang3l): Apply the LOD bias somehow for register gradients
// (possibly will require moving the bias to the sampler, which may be
// not very good considering the sampler count is very limited).
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_SAMPLE_D) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(15));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(system_temp_pv_);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(system_temp_pv_);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_RESOURCE, kSwizzleXYZW, 2));
shader_code_.push_back(srv_register_current);
shader_code_.push_back(srv_register_current);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_SAMPLER, kSwizzleXYZW, 2));
shader_code_.push_back(sampler_register);
shader_code_.push_back(sampler_register);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(system_temp_grad_h_lod_);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(system_temp_grad_v_);
++stat_.instruction_count;
++stat_.texture_gradient_instructions;
} else {
// 3 different DXBC opcodes handled here:
// - sample_l, when not using a computed LOD or not in a pixel shader,
// in this case, LOD (0 + bias) is sampled.
// - sample, when sampling in a pixel shader (thus with derivatives)
// with a computed LOD.
// - sample_b, when sampling in a pixel shader with a biased computed
// LOD.
// Both sample_l and sample_b should add the LOD bias as the last
// operand in our case.
bool explicit_lod =
!instr.attributes.use_computed_lod || !is_pixel_shader();
if (explicit_lod) {
shader_code_.push_back( shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_SAMPLE_L) | ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_SAMPLE_L) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(13)); ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(13));
} else if (instr.attributes.lod_bias != 0.0f) { shader_code_.push_back(EncodeVectorMaskedOperand(
shader_code_.push_back( D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_SAMPLE_B) | shader_code_.push_back(target_temp_current);
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(13)); shader_code_.push_back(EncodeVectorSwizzledOperand(
} else { D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back( shader_code_.push_back(coord_temp);
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_SAMPLE) | shader_code_.push_back(EncodeVectorSwizzledOperand(
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(11)); D3D10_SB_OPERAND_TYPE_RESOURCE, kSwizzleXYZW, 2));
} shader_code_.push_back(srv_register_current);
shader_code_.push_back( shader_code_.push_back(srv_register_current);
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1)); shader_code_.push_back(EncodeVectorSwizzledOperand(
shader_code_.push_back(system_temp_pv_); D3D10_SB_OPERAND_TYPE_SAMPLER, kSwizzleXYZW, 2));
shader_code_.push_back(EncodeVectorSwizzledOperand( shader_code_.push_back(sampler_register);
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); shader_code_.push_back(sampler_register);
shader_code_.push_back(system_temp_pv_); shader_code_.push_back(EncodeVectorSelectOperand(
shader_code_.push_back(EncodeVectorSwizzledOperand( D3D10_SB_OPERAND_TYPE_TEMP, lod_temp_component, 1));
D3D10_SB_OPERAND_TYPE_RESOURCE, kSwizzleXYZW, 2)); shader_code_.push_back(lod_temp);
shader_code_.push_back(srv_register_current); ++stat_.instruction_count;
shader_code_.push_back(srv_register_current);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_SAMPLER, kSwizzleXYZW, 2));
shader_code_.push_back(sampler_register);
shader_code_.push_back(sampler_register);
if (explicit_lod || instr.attributes.lod_bias != 0.0f) {
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(
*reinterpret_cast<const uint32_t*>(&instr.attributes.lod_bias));
}
++stat_.instruction_count;
if (!explicit_lod && instr.attributes.lod_bias != 0.0f) {
++stat_.texture_bias_instructions;
} else {
++stat_.texture_normal_instructions; ++stat_.texture_normal_instructions;
} else if (instr.attributes.use_register_gradients) {
// TODO(Triang3l): Apply the LOD bias somehow for register gradients
// (possibly will require moving the bias to the sampler, which may
// be not very good considering the sampler count is very limited).
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_SAMPLE_D) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(15));
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(target_temp_current);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(coord_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_RESOURCE, kSwizzleXYZW, 2));
shader_code_.push_back(srv_register_current);
shader_code_.push_back(srv_register_current);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_SAMPLER, kSwizzleXYZW, 2));
shader_code_.push_back(sampler_register);
shader_code_.push_back(sampler_register);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(system_temp_grad_h_lod_);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(system_temp_grad_v_);
++stat_.instruction_count;
++stat_.texture_gradient_instructions;
} else {
// 3 different DXBC opcodes handled here:
// - sample_l, when not using a computed LOD or not in a pixel
// shader, in this case, LOD (0 + bias) is sampled.
// - sample, when sampling in a pixel shader (thus with derivatives)
// with a computed LOD.
// - sample_b, when sampling in a pixel shader with a biased
// computed LOD.
// Both sample_l and sample_b should add the LOD bias as the last
// operand in our case.
bool explicit_lod =
!instr.attributes.use_computed_lod || !is_pixel_shader();
if (explicit_lod) {
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_SAMPLE_L) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(13));
} else if (instr.attributes.lod_bias != 0.0f) {
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_SAMPLE_B) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(13));
} else {
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_SAMPLE) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(11));
}
shader_code_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(target_temp_current);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(coord_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_RESOURCE, kSwizzleXYZW, 2));
shader_code_.push_back(srv_register_current);
shader_code_.push_back(srv_register_current);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_SAMPLER, kSwizzleXYZW, 2));
shader_code_.push_back(sampler_register);
shader_code_.push_back(sampler_register);
if (explicit_lod || instr.attributes.lod_bias != 0.0f) {
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(*reinterpret_cast<const uint32_t*>(
&instr.attributes.lod_bias));
}
++stat_.instruction_count;
if (!explicit_lod && instr.attributes.lod_bias != 0.0f) {
++stat_.texture_bias_instructions;
} else {
++stat_.texture_normal_instructions;
}
} }
} }
} }
@ -4576,14 +4613,15 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
cbuffer_index_fetch_constants_ = cbuffer_count_++; cbuffer_index_fetch_constants_ = cbuffer_count_++;
} }
// Apply sign bias (2 * color - 1) and linearize gamma textures. This is assert_true(signs_value_temp != UINT32_MAX);
// done before applying the exponent bias because this must be done on
// color values in 0...1 range, and this is closer to the storage
// format, while exponent bias is closer to the actual usage in shaders.
uint32_t signs_temp = PushSystemTemp(); uint32_t signs_temp = PushSystemTemp();
// Additionally allocate some temps needed to apply this.
uint32_t signs_value_temp = PushSystemTemp();
uint32_t signs_select_temp = PushSystemTemp(); uint32_t signs_select_temp = PushSystemTemp();
// Multiplex unsigned and signed SRVs, apply sign bias (2 * color - 1)
// and linearize gamma textures. This is done before applying the
// exponent bias because biasing and linearization must be done on color
// values in 0...1 range, and this is closer to the storage format,
// while exponent bias is closer to the actual usage in shaders.
// Extract the sign values from dword 0 ([0].x or [1].z) of the fetch // Extract the sign values from dword 0 ([0].x or [1].z) of the fetch
// constant, in bits 2:3, 4:5, 6:7 and 8:9. // constant, in bits 2:3, 4:5, 6:7 and 8:9.
shader_code_.push_back( shader_code_.push_back(
@ -4612,9 +4650,46 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.uint_instruction_count; ++stat_.uint_instruction_count;
// TODO(Triang3l): Handle TextureSign::kSigned somehow - would possibly // Replace the components fetched from the unsigned texture from those
// require conditionally sampling unsigned and signed versions of the // fetched from the signed where needed (the signed values are already
// texture. // loaded to signs_value_temp).
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IEQ) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(signs_select_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(signs_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
shader_code_.push_back(uint32_t(TextureSign::kSigned));
shader_code_.push_back(uint32_t(TextureSign::kSigned));
shader_code_.push_back(uint32_t(TextureSign::kSigned));
shader_code_.push_back(uint32_t(TextureSign::kSigned));
++stat_.instruction_count;
++stat_.int_instruction_count;
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(system_temp_pv_);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(signs_select_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(signs_value_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(system_temp_pv_);
++stat_.instruction_count;
++stat_.movc_instruction_count;
// Reusing signs_value_temp from now because the value from the signed
// texture has already been copied.
// Expand 0...1 to -1...1 (for normal and DuDv maps, for instance). // Expand 0...1 to -1...1 (for normal and DuDv maps, for instance).
shader_code_.push_back( shader_code_.push_back(
@ -4772,8 +4847,8 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
++stat_.instruction_count; ++stat_.instruction_count;
++stat_.movc_instruction_count; ++stat_.movc_instruction_count;
// Release signs_temp, signs_value_temp and signs_select_temp. // Release signs_temp and signs_select_temp.
PopSystemTemp(3); PopSystemTemp(2);
// Apply exponent bias. // Apply exponent bias.
uint32_t exp_adjust_temp = PushSystemTemp(); uint32_t exp_adjust_temp = PushSystemTemp();
@ -4846,11 +4921,20 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
// Release exp_adjust_temp. // Release exp_adjust_temp.
PopSystemTemp(); PopSystemTemp();
} }
if (signs_value_temp != UINT32_MAX) {
PopSystemTemp();
}
if (lod_temp != system_temp_grad_h_lod_) {
PopSystemTemp();
}
} }
if (size_and_is_3d_temp != UINT32_MAX) { if (size_and_is_3d_temp != UINT32_MAX) {
PopSystemTemp(); PopSystemTemp();
} }
// Release coord_temp.
PopSystemTemp();
} else if (instr.opcode == FetchOpcode::kGetTextureGradients) { } else if (instr.opcode == FetchOpcode::kGetTextureGradients) {
assert_true(is_pixel_shader()); assert_true(is_pixel_shader());
store_result = true; store_result = true;

View File

@ -95,6 +95,10 @@ class DxbcShaderTranslator : public ShaderTranslator {
struct TextureSRV { struct TextureSRV {
uint32_t fetch_constant; uint32_t fetch_constant;
TextureDimension dimension; TextureDimension dimension;
bool is_signed;
// Whether this SRV must be bound even if it's signed and all components are
// unsigned and vice versa (for kGetTextureComputedLod).
bool is_sign_required;
std::string name; std::string name;
}; };
// The first binding returned is at t1 because t0 is shared memory. // The first binding returned is at t1 because t0 is shared memory.
@ -393,7 +397,8 @@ class DxbcShaderTranslator : public ShaderTranslator {
// Returns T#/t# index (they are the same in this translator). // Returns T#/t# index (they are the same in this translator).
uint32_t FindOrAddTextureSRV(uint32_t fetch_constant, uint32_t FindOrAddTextureSRV(uint32_t fetch_constant,
TextureDimension dimension); TextureDimension dimension, bool is_signed,
bool is_sign_required = false);
// Returns S#/s# index (they are the same in this translator). // Returns S#/s# index (they are the same in this translator).
uint32_t FindOrAddSamplerBinding(uint32_t fetch_constant, uint32_t FindOrAddSamplerBinding(uint32_t fetch_constant,
TextureFilter mag_filter, TextureFilter mag_filter,