[DXBC] ALU vector ops to new codegen
This commit is contained in:
parent
3aa0ce3096
commit
b79ba69548
|
@ -875,7 +875,7 @@ void DxbcShaderTranslator::StartTranslation() {
|
|||
// depends on the guest code (thus no guarantees), initialize everything
|
||||
// now (except for pv, it's an internal temporary variable, not accessible
|
||||
// by the guest).
|
||||
system_temp_pv_ = PushSystemTemp();
|
||||
system_temp_result_ = PushSystemTemp();
|
||||
system_temp_ps_pc_p0_a0_ = PushSystemTemp(0b1111);
|
||||
system_temp_aL_ = PushSystemTemp(0b1111);
|
||||
system_temp_loop_count_ = PushSystemTemp(0b1111);
|
||||
|
@ -1089,7 +1089,7 @@ void DxbcShaderTranslator::CompleteShaderCode() {
|
|||
DxbcOpEndLoop();
|
||||
|
||||
// Release the following system temporary values so epilogue can reuse them:
|
||||
// - system_temp_pv_.
|
||||
// - system_temp_result_.
|
||||
// - system_temp_ps_pc_p0_a0_.
|
||||
// - system_temp_aL_.
|
||||
// - system_temp_loop_count_.
|
||||
|
@ -1306,6 +1306,96 @@ void DxbcShaderTranslator::EmitInstructionDisassembly() {
|
|||
length_dwords * sizeof(uint32_t) - length - 1);
|
||||
}
|
||||
|
||||
DxbcShaderTranslator::DxbcSrc DxbcShaderTranslator::LoadOperand(
|
||||
const InstructionOperand& operand, uint32_t needed_components,
|
||||
bool& temp_pushed_out) {
|
||||
temp_pushed_out = false;
|
||||
|
||||
uint32_t first_needed_component;
|
||||
if (!xe::bit_scan_forward(needed_components, &first_needed_component)) {
|
||||
return DxbcSrc::LF(0.0f);
|
||||
}
|
||||
|
||||
DxbcIndex index(operand.storage_index);
|
||||
switch (operand.storage_addressing_mode) {
|
||||
case InstructionStorageAddressingMode::kStatic:
|
||||
break;
|
||||
case InstructionStorageAddressingMode::kAddressAbsolute:
|
||||
index = DxbcIndex(system_temp_ps_pc_p0_a0_, 3, operand.storage_index);
|
||||
break;
|
||||
case InstructionStorageAddressingMode::kAddressRelative:
|
||||
index = DxbcIndex(system_temp_aL_, 0, operand.storage_index);
|
||||
break;
|
||||
}
|
||||
|
||||
DxbcSrc src(DxbcSrc::LF(0.0f));
|
||||
switch (operand.storage_source) {
|
||||
case InstructionStorageSource::kRegister: {
|
||||
if (uses_register_dynamic_addressing()) {
|
||||
// Load x#[#] to r# because x#[#] can be used only with mov.
|
||||
uint32_t temp = PushSystemTemp();
|
||||
temp_pushed_out = true;
|
||||
uint32_t used_swizzle_components = 0;
|
||||
for (uint32_t i = 0; i < uint32_t(operand.component_count); ++i) {
|
||||
if (!(needed_components & (1 << i))) {
|
||||
continue;
|
||||
}
|
||||
SwizzleSource component = operand.GetComponent(i);
|
||||
assert_true(component >= SwizzleSource::kX &&
|
||||
component <= SwizzleSource::kW);
|
||||
used_swizzle_components |=
|
||||
1 << (uint32_t(component) - uint32_t(SwizzleSource::kX));
|
||||
}
|
||||
assert_not_zero(used_swizzle_components);
|
||||
DxbcOpMov(DxbcDest::R(temp, used_swizzle_components),
|
||||
DxbcSrc::X(0, index));
|
||||
src = DxbcSrc::R(temp);
|
||||
} else {
|
||||
assert_true(operand.storage_addressing_mode ==
|
||||
InstructionStorageAddressingMode::kStatic);
|
||||
src = DxbcSrc::R(index.index_);
|
||||
}
|
||||
} break;
|
||||
case InstructionStorageSource::kConstantFloat: {
|
||||
if (cbuffer_index_float_constants_ == kCbufferIndexUnallocated) {
|
||||
cbuffer_index_float_constants_ = cbuffer_count_++;
|
||||
}
|
||||
if (operand.storage_addressing_mode ==
|
||||
InstructionStorageAddressingMode::kStatic) {
|
||||
uint32_t float_constant_index =
|
||||
constant_register_map().GetPackedFloatConstantIndex(
|
||||
operand.storage_index);
|
||||
assert_true(float_constant_index != UINT32_MAX);
|
||||
if (float_constant_index == UINT32_MAX) {
|
||||
return DxbcSrc::LF(0.0f);
|
||||
}
|
||||
index.index_ = float_constant_index;
|
||||
} else {
|
||||
assert_true(constant_register_map().float_dynamic_addressing);
|
||||
}
|
||||
src = DxbcSrc::CB(cbuffer_index_float_constants_,
|
||||
uint32_t(CbufferRegister::kFloatConstants), index);
|
||||
} break;
|
||||
default:
|
||||
assert_unhandled_case(operand.storage_source);
|
||||
return DxbcSrc::LF(0.0f);
|
||||
}
|
||||
|
||||
// Swizzle, skipping unneeded components similar to how FXC skips components,
|
||||
// by replacing them with the leftmost used one.
|
||||
uint32_t swizzle = 0;
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
SwizzleSource component = operand.GetComponent(
|
||||
(needed_components & (1 << i)) ? i : first_needed_component);
|
||||
assert_true(component >= SwizzleSource::kX &&
|
||||
component <= SwizzleSource::kW);
|
||||
swizzle |= (uint32_t(component) - uint32_t(SwizzleSource::kX)) << (i * 2);
|
||||
}
|
||||
src = src.Swizzle(swizzle);
|
||||
|
||||
return src.WithModifiers(operand.is_absolute_value, operand.is_negated);
|
||||
}
|
||||
|
||||
void DxbcShaderTranslator::LoadDxbcSourceOperand(
|
||||
const InstructionOperand& operand, DxbcSourceOperand& dxbc_operand) {
|
||||
// Initialize the values to their defaults.
|
||||
|
@ -1693,306 +1783,151 @@ void DxbcShaderTranslator::UnloadDxbcSourceOperand(
|
|||
}
|
||||
|
||||
void DxbcShaderTranslator::StoreResult(const InstructionResult& result,
|
||||
uint32_t reg, bool replicate_x,
|
||||
const DxbcSrc& src,
|
||||
bool can_store_memexport_address) {
|
||||
uint32_t used_write_mask = result.GetUsedWriteMask();
|
||||
if (result.storage_target == InstructionStorageTarget::kNone ||
|
||||
!result.GetUsedWriteMask()) {
|
||||
if (!used_write_mask) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Validate memexport writes (Halo 3 has some weird invalid ones).
|
||||
if (result.storage_target == InstructionStorageTarget::kExportAddress) {
|
||||
if (!can_store_memexport_address || memexport_alloc_current_count_ == 0 ||
|
||||
memexport_alloc_current_count_ > kMaxMemExports ||
|
||||
system_temps_memexport_address_[memexport_alloc_current_count_ - 1] ==
|
||||
UINT32_MAX) {
|
||||
// Get the destination address and type.
|
||||
DxbcDest dest(DxbcDest::Null());
|
||||
bool is_clamped = result.is_clamped;
|
||||
switch (result.storage_target) {
|
||||
case InstructionStorageTarget::kNone:
|
||||
return;
|
||||
}
|
||||
} else if (result.storage_target == InstructionStorageTarget::kExportData) {
|
||||
if (memexport_alloc_current_count_ == 0 ||
|
||||
memexport_alloc_current_count_ > kMaxMemExports ||
|
||||
system_temps_memexport_data_[memexport_alloc_current_count_ - 1]
|
||||
[result.storage_index] == UINT32_MAX) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t saturate_bit =
|
||||
ENCODE_D3D10_SB_INSTRUCTION_SATURATE(result.is_clamped);
|
||||
|
||||
// Scalar targets get only one component.
|
||||
// TODO(Triang3l): It's not replicated, it's X specifically.
|
||||
if (result.storage_target == InstructionStorageTarget::kDepth) {
|
||||
assert_not_zero(used_write_mask & 0b0001);
|
||||
SwizzleSource component = result.components[0];
|
||||
if (replicate_x && component <= SwizzleSource::kW) {
|
||||
component = SwizzleSource::kX;
|
||||
}
|
||||
// Both r[imm32] and imm32 operands are 2 tokens long.
|
||||
switch (result.storage_target) {
|
||||
case InstructionStorageTarget::kDepth:
|
||||
assert_true(writes_depth());
|
||||
if (writes_depth()) {
|
||||
if (edram_rov_used_) {
|
||||
shader_code_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
|
||||
ENCODE_D3D10_SB_INSTRUCTION_SATURATE(1) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5));
|
||||
shader_code_.push_back(EncodeVectorMaskedOperand(
|
||||
D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
|
||||
shader_code_.push_back(system_temp_rov_depth_stencil_);
|
||||
} else {
|
||||
shader_code_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
|
||||
ENCODE_D3D10_SB_INSTRUCTION_SATURATE(1) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(4));
|
||||
shader_code_.push_back(
|
||||
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH, 0));
|
||||
}
|
||||
case InstructionStorageTarget::kRegister:
|
||||
if (uses_register_dynamic_addressing()) {
|
||||
DxbcIndex register_index(result.storage_index);
|
||||
switch (result.storage_addressing_mode) {
|
||||
case InstructionStorageAddressingMode::kStatic:
|
||||
break;
|
||||
case InstructionStorageAddressingMode::kAddressAbsolute:
|
||||
register_index =
|
||||
DxbcIndex(system_temp_ps_pc_p0_a0_, 3, result.storage_index);
|
||||
break;
|
||||
case InstructionStorageAddressingMode::kAddressRelative:
|
||||
register_index =
|
||||
DxbcIndex(system_temp_aL_, 0, result.storage_index);
|
||||
break;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
assert_unhandled_case(result.storage_target);
|
||||
dest = DxbcDest::X(0, register_index);
|
||||
} else {
|
||||
assert_true(result.storage_addressing_mode ==
|
||||
InstructionStorageAddressingMode::kStatic);
|
||||
dest = DxbcDest::R(result.storage_index);
|
||||
}
|
||||
break;
|
||||
case InstructionStorageTarget::kInterpolator:
|
||||
dest = DxbcDest::O(uint32_t(InOutRegister::kVSDSOutInterpolators) +
|
||||
result.storage_index);
|
||||
break;
|
||||
case InstructionStorageTarget::kPosition:
|
||||
dest = DxbcDest::R(system_temp_position_);
|
||||
break;
|
||||
case InstructionStorageTarget::kPointSizeEdgeFlagKillVertex:
|
||||
assert_zero(used_write_mask & 0b1000);
|
||||
dest = DxbcDest::R(system_temp_point_size_edge_flag_kill_vertex_);
|
||||
break;
|
||||
case InstructionStorageTarget::kExportAddress:
|
||||
// Validate memexport writes (Halo 3 has some weird invalid ones).
|
||||
if (!can_store_memexport_address || memexport_alloc_current_count_ == 0 ||
|
||||
memexport_alloc_current_count_ > kMaxMemExports ||
|
||||
system_temps_memexport_address_[memexport_alloc_current_count_ - 1] ==
|
||||
UINT32_MAX) {
|
||||
return;
|
||||
}
|
||||
if (component <= SwizzleSource::kW) {
|
||||
shader_code_.push_back(EncodeVectorSelectOperand(
|
||||
D3D10_SB_OPERAND_TYPE_TEMP, uint32_t(component), 1));
|
||||
shader_code_.push_back(reg);
|
||||
} else {
|
||||
shader_code_.push_back(
|
||||
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
|
||||
shader_code_.push_back(component == SwizzleSource::k1 ? 0x3F800000 : 0);
|
||||
}
|
||||
++stat_.instruction_count;
|
||||
++stat_.mov_instruction_count;
|
||||
}
|
||||
dest = DxbcDest::R(
|
||||
system_temps_memexport_address_[memexport_alloc_current_count_ - 1]);
|
||||
break;
|
||||
case InstructionStorageTarget::kExportData: {
|
||||
// Validate memexport writes (Halo 3 has some weird invalid ones).
|
||||
if (memexport_alloc_current_count_ == 0 ||
|
||||
memexport_alloc_current_count_ > kMaxMemExports ||
|
||||
system_temps_memexport_data_[memexport_alloc_current_count_ - 1]
|
||||
[result.storage_index] == UINT32_MAX) {
|
||||
return;
|
||||
}
|
||||
dest = DxbcDest::R(
|
||||
system_temps_memexport_data_[memexport_alloc_current_count_ - 1]
|
||||
[result.storage_index]);
|
||||
// Mark that the eM# has been written to and needs to be exported.
|
||||
assert_not_zero(used_write_mask);
|
||||
uint32_t memexport_index = memexport_alloc_current_count_ - 1;
|
||||
DxbcOpOr(DxbcDest::R(system_temp_memexport_written_,
|
||||
1 << (memexport_index >> 2)),
|
||||
DxbcSrc::R(system_temp_memexport_written_)
|
||||
.Select(memexport_index >> 2),
|
||||
DxbcSrc::LU(uint32_t(1) << (result.storage_index +
|
||||
((memexport_index & 3) << 3))));
|
||||
} break;
|
||||
case InstructionStorageTarget::kColor:
|
||||
assert_not_zero(used_write_mask);
|
||||
assert_true(writes_color_target(result.storage_index));
|
||||
dest = DxbcDest::R(system_temps_color_[result.storage_index]);
|
||||
if (edram_rov_used_) {
|
||||
// For ROV output, mark that the color has been written to.
|
||||
// According to:
|
||||
// https://docs.microsoft.com/en-us/windows/desktop/direct3dhlsl/dx9-graphics-reference-asm-ps-registers-output-color
|
||||
// if a color target hasn't been written to - including due to flow
|
||||
// control - the render target must not be modified (the unwritten
|
||||
// components of a written target are undefined, not sure if this
|
||||
// behavior is respected on the real GPU, but the ROV code currently
|
||||
// doesn't preserve unmodified components).
|
||||
DxbcOpOr(DxbcDest::R(system_temp_rov_params_, 0b0001),
|
||||
DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX),
|
||||
DxbcSrc::LU(uint32_t(1) << (8 + result.storage_index)));
|
||||
}
|
||||
break;
|
||||
case InstructionStorageTarget::kDepth:
|
||||
// Writes X to scalar oDepth or to X of system_temp_rov_depth_stencil_, no
|
||||
// additional swizzling needed.
|
||||
assert_true(used_write_mask == 0b0001);
|
||||
assert_true(writes_depth());
|
||||
if (edram_rov_used_) {
|
||||
dest = DxbcDest::R(system_temp_rov_depth_stencil_);
|
||||
} else {
|
||||
dest = DxbcDest::ODepth();
|
||||
}
|
||||
// Depth outside [0, 1] is not safe for use with the ROV code. Though 20e4
|
||||
// float depth can store values below 2, it's a very unusual case.
|
||||
// Direct3D 10+ SV_Depth, however, can accept any values, including
|
||||
// specials, when the depth buffer is floating-point.
|
||||
is_clamped = true;
|
||||
break;
|
||||
}
|
||||
if (dest.type_ == DxbcOperandType::kNull) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Get the write masks and data required for loading of both the swizzled part
|
||||
// and the constant (zero/one) part. The write mask is treated also as a read
|
||||
// mask in DXBC, and `mov r0.zw, r1.xyzw` actually means r0.zw = r1.zw, not
|
||||
// r0.zw = r1.xy.
|
||||
uint32_t swizzle_mask = 0;
|
||||
uint32_t swizzle_components = 0;
|
||||
uint32_t constant_mask = 0;
|
||||
uint32_t constant_values = 0;
|
||||
// Write.
|
||||
uint32_t src_additional_swizzle = 0;
|
||||
uint32_t constant_mask = 0, constant_1_mask = 0;
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
if (!(used_write_mask & (1 << i))) {
|
||||
continue;
|
||||
}
|
||||
SwizzleSource component = result.components[i];
|
||||
if (component <= SwizzleSource::kW) {
|
||||
swizzle_mask |= 1 << i;
|
||||
// If replicating X, just keep zero swizzle (XXXX).
|
||||
if (!replicate_x) {
|
||||
swizzle_components |= uint32_t(component) << (i * 2);
|
||||
}
|
||||
if (component >= SwizzleSource::kX && component <= SwizzleSource::kW) {
|
||||
src_additional_swizzle |=
|
||||
(uint32_t(component) - uint32_t(SwizzleSource::kX)) << (i * 2);
|
||||
} else {
|
||||
constant_mask |= 1 << i;
|
||||
constant_values |= (component == SwizzleSource::k1 ? 1 : 0) << i;
|
||||
}
|
||||
}
|
||||
|
||||
bool is_static = result.storage_addressing_mode ==
|
||||
InstructionStorageAddressingMode::kStatic;
|
||||
// If the index is dynamic, choose where it's taken from.
|
||||
uint32_t dynamic_address_register, dynamic_address_component;
|
||||
if (result.storage_addressing_mode ==
|
||||
InstructionStorageAddressingMode::kAddressRelative) {
|
||||
// Addressed by aL.x.
|
||||
dynamic_address_register = system_temp_aL_;
|
||||
dynamic_address_component = 0;
|
||||
} else {
|
||||
// Addressed by a0.
|
||||
dynamic_address_register = system_temp_ps_pc_p0_a0_;
|
||||
dynamic_address_component = 3;
|
||||
}
|
||||
|
||||
// Store both parts of the write (i == 0 - swizzled, i == 1 - constant).
|
||||
for (uint32_t i = 0; i < 2; ++i) {
|
||||
uint32_t mask = i == 0 ? swizzle_mask : constant_mask;
|
||||
if (mask == 0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// r# for the swizzled part, 4-component imm32 for the constant part.
|
||||
uint32_t source_length = i != 0 ? 5 : 2;
|
||||
switch (result.storage_target) {
|
||||
case InstructionStorageTarget::kRegister:
|
||||
if (uses_register_dynamic_addressing()) {
|
||||
++stat_.instruction_count;
|
||||
++stat_.array_instruction_count;
|
||||
shader_code_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH((is_static ? 4 : 6) +
|
||||
source_length) |
|
||||
saturate_bit);
|
||||
shader_code_.push_back(EncodeVectorMaskedOperand(
|
||||
D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP, mask, 2,
|
||||
D3D10_SB_OPERAND_INDEX_IMMEDIATE32,
|
||||
is_static ? D3D10_SB_OPERAND_INDEX_IMMEDIATE32
|
||||
: D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE));
|
||||
shader_code_.push_back(0);
|
||||
shader_code_.push_back(result.storage_index);
|
||||
if (!is_static) {
|
||||
shader_code_.push_back(EncodeVectorSelectOperand(
|
||||
D3D10_SB_OPERAND_TYPE_TEMP, dynamic_address_component, 1));
|
||||
shader_code_.push_back(dynamic_address_register);
|
||||
}
|
||||
} else {
|
||||
assert_true(is_static);
|
||||
++stat_.instruction_count;
|
||||
++stat_.mov_instruction_count;
|
||||
shader_code_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3 + source_length) |
|
||||
saturate_bit);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, mask, 1));
|
||||
shader_code_.push_back(result.storage_index);
|
||||
}
|
||||
break;
|
||||
|
||||
case InstructionStorageTarget::kInterpolator:
|
||||
++stat_.instruction_count;
|
||||
++stat_.mov_instruction_count;
|
||||
shader_code_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3 + source_length) |
|
||||
saturate_bit);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_OUTPUT, mask, 1));
|
||||
shader_code_.push_back(uint32_t(InOutRegister::kVSDSOutInterpolators) +
|
||||
uint32_t(result.storage_index));
|
||||
break;
|
||||
|
||||
case InstructionStorageTarget::kPosition:
|
||||
++stat_.instruction_count;
|
||||
++stat_.mov_instruction_count;
|
||||
shader_code_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3 + source_length) |
|
||||
saturate_bit);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, mask, 1));
|
||||
shader_code_.push_back(system_temp_position_);
|
||||
break;
|
||||
|
||||
case InstructionStorageTarget::kPointSizeEdgeFlagKillVertex:
|
||||
++stat_.instruction_count;
|
||||
++stat_.mov_instruction_count;
|
||||
shader_code_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3 + source_length) |
|
||||
saturate_bit);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, mask, 1));
|
||||
shader_code_.push_back(system_temp_point_size_edge_flag_kill_vertex_);
|
||||
break;
|
||||
|
||||
case InstructionStorageTarget::kExportAddress:
|
||||
++stat_.instruction_count;
|
||||
++stat_.mov_instruction_count;
|
||||
shader_code_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3 + source_length) |
|
||||
saturate_bit);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, mask, 1));
|
||||
shader_code_.push_back(
|
||||
system_temps_memexport_address_[memexport_alloc_current_count_ -
|
||||
1]);
|
||||
break;
|
||||
|
||||
case InstructionStorageTarget::kExportData:
|
||||
++stat_.instruction_count;
|
||||
++stat_.mov_instruction_count;
|
||||
shader_code_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3 + source_length) |
|
||||
saturate_bit);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, mask, 1));
|
||||
shader_code_.push_back(
|
||||
system_temps_memexport_data_[memexport_alloc_current_count_ - 1]
|
||||
[uint32_t(result.storage_index)]);
|
||||
break;
|
||||
|
||||
case InstructionStorageTarget::kColor:
|
||||
++stat_.instruction_count;
|
||||
++stat_.mov_instruction_count;
|
||||
shader_code_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3 + source_length) |
|
||||
saturate_bit);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, mask, 1));
|
||||
shader_code_.push_back(system_temps_color_[result.storage_index]);
|
||||
break;
|
||||
|
||||
default:
|
||||
continue;
|
||||
}
|
||||
|
||||
if (i == 0) {
|
||||
// Copy from the source r#.
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_TEMP, swizzle_components, 1));
|
||||
shader_code_.push_back(reg);
|
||||
} else {
|
||||
// Load constants.
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
|
||||
for (uint32_t j = 0; j < 4; ++j) {
|
||||
shader_code_.push_back((constant_values & (1 << j)) ? 0x3F800000 : 0);
|
||||
if (component == SwizzleSource::k1) {
|
||||
constant_1_mask |= 1 << i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (result.storage_target == InstructionStorageTarget::kExportData) {
|
||||
// Mark that the eM# has been written to and needs to be exported.
|
||||
uint32_t memexport_index = memexport_alloc_current_count_ - 1;
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_OR) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
|
||||
shader_code_.push_back(EncodeVectorMaskedOperand(
|
||||
D3D10_SB_OPERAND_TYPE_TEMP, 1 << (memexport_index >> 2), 1));
|
||||
shader_code_.push_back(system_temp_memexport_written_);
|
||||
shader_code_.push_back(EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP,
|
||||
memexport_index >> 2, 1));
|
||||
shader_code_.push_back(system_temp_memexport_written_);
|
||||
shader_code_.push_back(
|
||||
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
|
||||
shader_code_.push_back(
|
||||
uint32_t(1) << (result.storage_index + ((memexport_index & 3) << 3)));
|
||||
++stat_.instruction_count;
|
||||
++stat_.uint_instruction_count;
|
||||
if (used_write_mask != constant_mask) {
|
||||
DxbcOpMov(dest.Mask(used_write_mask & ~constant_mask),
|
||||
src.SwizzleSwizzled(src_additional_swizzle), is_clamped);
|
||||
}
|
||||
|
||||
if (edram_rov_used_ &&
|
||||
result.storage_target == InstructionStorageTarget::kColor) {
|
||||
// For ROV output, mark that the color has been written to.
|
||||
// According to:
|
||||
// https://docs.microsoft.com/en-us/windows/desktop/direct3dhlsl/dx9-graphics-reference-asm-ps-registers-output-color
|
||||
// if a color target has been written to - including due to flow control -
|
||||
// the render target must not be modified (the unwritten components of a
|
||||
// written target are undefined, not sure if this behavior is respected on
|
||||
// the real GPU, but the ROV code currently uses pre-packed masks to keep
|
||||
// the old values, so preservation of components is not done).
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_OR) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
|
||||
shader_code_.push_back(system_temp_rov_params_);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
|
||||
shader_code_.push_back(system_temp_rov_params_);
|
||||
shader_code_.push_back(
|
||||
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
|
||||
shader_code_.push_back(1 << (8 + result.storage_index));
|
||||
++stat_.instruction_count;
|
||||
++stat_.uint_instruction_count;
|
||||
if (constant_mask) {
|
||||
DxbcOpMov(dest.Mask(constant_mask),
|
||||
DxbcSrc::LF(float(constant_1_mask & 1),
|
||||
float((constant_1_mask >> 1) & 1),
|
||||
float((constant_1_mask >> 2) & 1),
|
||||
float((constant_1_mask >> 3) & 1)));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2192,8 +2127,8 @@ void DxbcShaderTranslator::ProcessLoopStartInstruction(
|
|||
EmitInstructionDisassembly();
|
||||
}
|
||||
|
||||
// Count (as uint) in bits 0:7 of the loop constant, initial aL in 8:15.
|
||||
// Starting from vector 2 because of bool constants.
|
||||
// Count (unsigned) in bits 0:7 of the loop constant, initial aL (unsigned) in
|
||||
// 8:15. Starting from vector 2 because of bool constants.
|
||||
if (cbuffer_index_bool_loop_constants_ == kCbufferIndexUnallocated) {
|
||||
cbuffer_index_bool_loop_constants_ = cbuffer_count_++;
|
||||
}
|
||||
|
@ -2280,12 +2215,12 @@ void DxbcShaderTranslator::ProcessLoopEndInstruction(
|
|||
{
|
||||
// Continue case.
|
||||
uint32_t aL_add_temp = PushSystemTemp();
|
||||
// Extract the value to add to aL (in bits 16:23 of the loop constant).
|
||||
// Starting from vector 2 because of bool constants.
|
||||
// Extract the value to add to aL (signed, in bits 16:23 of the loop
|
||||
// constant). Starting from vector 2 because of bool constants.
|
||||
if (cbuffer_index_bool_loop_constants_ == kCbufferIndexUnallocated) {
|
||||
cbuffer_index_bool_loop_constants_ = cbuffer_count_++;
|
||||
}
|
||||
DxbcOpUBFE(DxbcDest::R(aL_add_temp, 0b0001), DxbcSrc::LU(8),
|
||||
DxbcOpIBFE(DxbcDest::R(aL_add_temp, 0b0001), DxbcSrc::LU(8),
|
||||
DxbcSrc::LU(16),
|
||||
DxbcSrc::CB(cbuffer_index_bool_loop_constants_,
|
||||
uint32_t(CbufferRegister::kBoolLoopConstants),
|
||||
|
|
|
@ -764,7 +764,7 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
if (index_dimension > 1) {
|
||||
operand_token |= uint32_t(index_2d_.GetRepresentation()) << 25;
|
||||
if (index_dimension > 2) {
|
||||
operand_token |= uint32_t(index_2d_.GetRepresentation()) << 28;
|
||||
operand_token |= uint32_t(index_3d_.GetRepresentation()) << 28;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1084,12 +1084,15 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
kDefault = 10,
|
||||
kDiscard = 13,
|
||||
kDiv = 14,
|
||||
kDP2 = 15,
|
||||
kDP3 = 16,
|
||||
kDP4 = 17,
|
||||
kElse = 18,
|
||||
kEndIf = 21,
|
||||
kEndLoop = 22,
|
||||
kEndSwitch = 23,
|
||||
kEq = 24,
|
||||
kFrc = 26,
|
||||
kFToI = 27,
|
||||
kFToU = 28,
|
||||
kGE = 29,
|
||||
|
@ -1118,6 +1121,7 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
kRet = 62,
|
||||
kRetC = 63,
|
||||
kRoundNE = 64,
|
||||
kRoundNI = 65,
|
||||
kRoundZ = 67,
|
||||
kSwitch = 76,
|
||||
kULT = 79,
|
||||
|
@ -1291,6 +1295,32 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
DxbcEmitAluOp(DxbcOpcode::kDiv, 0b00, dest, src0, src1, saturate);
|
||||
++stat_.float_instruction_count;
|
||||
}
|
||||
void DxbcOpDP2(const DxbcDest& dest, const DxbcSrc& src0, const DxbcSrc& src1,
|
||||
bool saturate = false) {
|
||||
uint32_t operands_length =
|
||||
dest.GetLength() + src0.GetLength(0b0011) + src1.GetLength(0b0011);
|
||||
shader_code_.reserve(shader_code_.size() + 1 + operands_length);
|
||||
shader_code_.push_back(
|
||||
DxbcOpcodeToken(DxbcOpcode::kDP2, operands_length, saturate));
|
||||
dest.Write(shader_code_);
|
||||
src0.Write(shader_code_, false, 0b0011);
|
||||
src1.Write(shader_code_, false, 0b0011);
|
||||
++stat_.instruction_count;
|
||||
++stat_.float_instruction_count;
|
||||
}
|
||||
void DxbcOpDP3(const DxbcDest& dest, const DxbcSrc& src0, const DxbcSrc& src1,
|
||||
bool saturate = false) {
|
||||
uint32_t operands_length =
|
||||
dest.GetLength() + src0.GetLength(0b0111) + src1.GetLength(0b0111);
|
||||
shader_code_.reserve(shader_code_.size() + 1 + operands_length);
|
||||
shader_code_.push_back(
|
||||
DxbcOpcodeToken(DxbcOpcode::kDP3, operands_length, saturate));
|
||||
dest.Write(shader_code_);
|
||||
src0.Write(shader_code_, false, 0b0111);
|
||||
src1.Write(shader_code_, false, 0b0111);
|
||||
++stat_.instruction_count;
|
||||
++stat_.float_instruction_count;
|
||||
}
|
||||
void DxbcOpDP4(const DxbcDest& dest, const DxbcSrc& src0, const DxbcSrc& src1,
|
||||
bool saturate = false) {
|
||||
uint32_t operands_length =
|
||||
|
@ -1325,6 +1355,11 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
DxbcEmitAluOp(DxbcOpcode::kEq, 0b00, dest, src0, src1);
|
||||
++stat_.float_instruction_count;
|
||||
}
|
||||
void DxbcOpFrc(const DxbcDest& dest, const DxbcSrc& src,
|
||||
bool saturate = false) {
|
||||
DxbcEmitAluOp(DxbcOpcode::kFrc, 0b0, dest, src, saturate);
|
||||
++stat_.float_instruction_count;
|
||||
}
|
||||
void DxbcOpFToI(const DxbcDest& dest, const DxbcSrc& src) {
|
||||
DxbcEmitAluOp(DxbcOpcode::kFToI, 0b0, dest, src);
|
||||
++stat_.conversion_instruction_count;
|
||||
|
@ -1471,6 +1506,11 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
DxbcEmitAluOp(DxbcOpcode::kRoundNE, 0b0, dest, src, saturate);
|
||||
++stat_.float_instruction_count;
|
||||
}
|
||||
void DxbcOpRoundNI(const DxbcDest& dest, const DxbcSrc& src,
|
||||
bool saturate = false) {
|
||||
DxbcEmitAluOp(DxbcOpcode::kRoundNI, 0b0, dest, src, saturate);
|
||||
++stat_.float_instruction_count;
|
||||
}
|
||||
void DxbcOpRoundZ(const DxbcDest& dest, const DxbcSrc& src,
|
||||
bool saturate = false) {
|
||||
DxbcEmitAluOp(DxbcOpcode::kRoundZ, 0b0, dest, src, saturate);
|
||||
|
@ -2027,6 +2067,14 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
// as shader messages, from instruction_disassembly_buffer_.
|
||||
void EmitInstructionDisassembly();
|
||||
|
||||
// Converts a shader translator source operand to a DXBC emitter operand, or
|
||||
// returns a zero literal operand if it's not going to be referenced. This may
|
||||
// allocate a temporary register and emit instructions if the operand can't be
|
||||
// used directly with most DXBC instructions (like, if it's an indexable GPR),
|
||||
// in this case, temp_pushed_out will be set to true, and PopSystemTemp must
|
||||
// be done when the operand is not needed anymore.
|
||||
DxbcSrc LoadOperand(const InstructionOperand& operand,
|
||||
uint32_t needed_components, bool& temp_pushed_out);
|
||||
// Abstract 4-component vector source operand.
|
||||
// TODO(Triang3l): Remove after fully moving to the new emitter.
|
||||
struct DxbcSourceOperand {
|
||||
|
@ -2085,11 +2133,12 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
// TODO(Triang3l): Remove after fully moving to the new emitter.
|
||||
void UnloadDxbcSourceOperand(const DxbcSourceOperand& operand);
|
||||
|
||||
// Writes xyzw or xxxx of the specified r# to the destination.
|
||||
// can_store_memexport_address is for safety, to allow only proper MADs with
|
||||
// a stream constant to write to eA.
|
||||
void StoreResult(const InstructionResult& result, uint32_t reg,
|
||||
bool replicate_x, bool can_store_memexport_address = false);
|
||||
// Writes the specified source (src must be usable as a vector `mov` source,
|
||||
// including to x#) to an instruction storage target.
|
||||
// can_store_memexport_address is for safety, to allow only proper MADs with a
|
||||
// stream constant to write to eA.
|
||||
void StoreResult(const InstructionResult& result, const DxbcSrc& src,
|
||||
bool can_store_memexport_address = false);
|
||||
|
||||
// The nesting of `if` instructions is the following:
|
||||
// - pc checks (labels).
|
||||
|
@ -2150,12 +2199,12 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
TextureFilter min_filter,
|
||||
TextureFilter mip_filter,
|
||||
AnisoFilter aniso_filter);
|
||||
// Converts (S, T, face index) in the specified temporary register to a 3D
|
||||
// cubemap coordinate.
|
||||
void ArrayCoordToCubeDirection(uint32_t reg);
|
||||
// Converts (array S + 1, array T + 1, face index) in the specified temporary
|
||||
// register to a 3D cubemap coordinate.
|
||||
void TfetchCubeCoordToCubeDirection(uint32_t reg);
|
||||
|
||||
bool ProcessVectorAluOperation(const ParsedAluInstruction& instr,
|
||||
bool& replicate_result_x,
|
||||
void ProcessVectorAluOperation(const ParsedAluInstruction& instr,
|
||||
uint32_t& result_swizzle,
|
||||
bool& predicate_written);
|
||||
bool ProcessScalarAluOperation(const ParsedAluInstruction& instr,
|
||||
bool& predicate_written);
|
||||
|
@ -2334,9 +2383,9 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
// eM# in each `alloc export`, or UINT32_MAX if not used.
|
||||
uint32_t system_temps_memexport_data_[kMaxMemExports][5];
|
||||
|
||||
// Vector ALU result or fetch scratch (since Xenos write masks can contain
|
||||
// Vector ALU or fetch result/scratch (since Xenos write masks can contain
|
||||
// swizzles).
|
||||
uint32_t system_temp_pv_;
|
||||
uint32_t system_temp_result_;
|
||||
// Temporary register ID for previous scalar result, program counter,
|
||||
// predicate and absolute address register.
|
||||
uint32_t system_temp_ps_pc_p0_a0_;
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -42,7 +42,7 @@ void DxbcShaderTranslator::SwapVertexData(uint32_t vfetch_index,
|
|||
shader_code_.push_back(temp1);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(system_temp_pv_);
|
||||
shader_code_.push_back(system_temp_result_);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
|
||||
shader_code_.push_back(8);
|
||||
|
@ -74,7 +74,7 @@ void DxbcShaderTranslator::SwapVertexData(uint32_t vfetch_index,
|
|||
shader_code_.push_back(8);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(system_temp_pv_);
|
||||
shader_code_.push_back(system_temp_result_);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(temp1);
|
||||
|
@ -91,7 +91,7 @@ void DxbcShaderTranslator::SwapVertexData(uint32_t vfetch_index,
|
|||
shader_code_.push_back(temp2);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(system_temp_pv_);
|
||||
shader_code_.push_back(system_temp_result_);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
|
||||
shader_code_.push_back(16);
|
||||
|
@ -189,7 +189,7 @@ void DxbcShaderTranslator::SwapVertexData(uint32_t vfetch_index,
|
|||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, write_mask, 1));
|
||||
shader_code_.push_back(system_temp_pv_);
|
||||
shader_code_.push_back(system_temp_result_);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
|
||||
shader_code_.push_back(temp2);
|
||||
|
@ -198,7 +198,7 @@ void DxbcShaderTranslator::SwapVertexData(uint32_t vfetch_index,
|
|||
shader_code_.push_back(temp1);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(system_temp_pv_);
|
||||
shader_code_.push_back(system_temp_result_);
|
||||
++stat_.instruction_count;
|
||||
++stat_.movc_instruction_count;
|
||||
|
||||
|
@ -212,7 +212,7 @@ void DxbcShaderTranslator::SwapVertexData(uint32_t vfetch_index,
|
|||
shader_code_.push_back(temp1);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(system_temp_pv_);
|
||||
shader_code_.push_back(system_temp_result_);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
|
||||
shader_code_.push_back(16);
|
||||
|
@ -244,7 +244,7 @@ void DxbcShaderTranslator::SwapVertexData(uint32_t vfetch_index,
|
|||
shader_code_.push_back(16);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(system_temp_pv_);
|
||||
shader_code_.push_back(system_temp_result_);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(temp1);
|
||||
|
@ -257,7 +257,7 @@ void DxbcShaderTranslator::SwapVertexData(uint32_t vfetch_index,
|
|||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, write_mask, 1));
|
||||
shader_code_.push_back(system_temp_pv_);
|
||||
shader_code_.push_back(system_temp_result_);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1));
|
||||
shader_code_.push_back(temp2);
|
||||
|
@ -266,7 +266,7 @@ void DxbcShaderTranslator::SwapVertexData(uint32_t vfetch_index,
|
|||
shader_code_.push_back(temp1);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(system_temp_pv_);
|
||||
shader_code_.push_back(system_temp_result_);
|
||||
++stat_.instruction_count;
|
||||
++stat_.movc_instruction_count;
|
||||
|
||||
|
@ -342,7 +342,7 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction(
|
|||
5 + DxbcSourceOperandLength(index_operand)));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
|
||||
shader_code_.push_back(system_temp_pv_);
|
||||
shader_code_.push_back(system_temp_result_);
|
||||
UseDxbcSourceOperand(index_operand, kSwizzleXYZW, 0);
|
||||
shader_code_.push_back(
|
||||
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
|
||||
|
@ -353,10 +353,10 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction(
|
|||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
|
||||
shader_code_.push_back(system_temp_pv_);
|
||||
shader_code_.push_back(system_temp_result_);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
|
||||
shader_code_.push_back(system_temp_pv_);
|
||||
shader_code_.push_back(system_temp_result_);
|
||||
++stat_.instruction_count;
|
||||
++stat_.conversion_instruction_count;
|
||||
} else {
|
||||
|
@ -365,7 +365,7 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction(
|
|||
3 + DxbcSourceOperandLength(index_operand)));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
|
||||
shader_code_.push_back(system_temp_pv_);
|
||||
shader_code_.push_back(system_temp_result_);
|
||||
UseDxbcSourceOperand(index_operand, kSwizzleXYZW, 0);
|
||||
++stat_.instruction_count;
|
||||
++stat_.conversion_instruction_count;
|
||||
|
@ -390,7 +390,7 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction(
|
|||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1));
|
||||
shader_code_.push_back(system_temp_pv_);
|
||||
shader_code_.push_back(system_temp_result_);
|
||||
shader_code_.push_back(EncodeVectorSelectOperand(
|
||||
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, (vfetch_index & 1) * 2, 3));
|
||||
shader_code_.push_back(cbuffer_index_fetch_constants_);
|
||||
|
@ -407,16 +407,16 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction(
|
|||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
|
||||
shader_code_.push_back(system_temp_pv_);
|
||||
shader_code_.push_back(system_temp_result_);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
|
||||
shader_code_.push_back(system_temp_pv_);
|
||||
shader_code_.push_back(system_temp_result_);
|
||||
shader_code_.push_back(
|
||||
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
|
||||
shader_code_.push_back(instr.attributes.stride * 4);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1));
|
||||
shader_code_.push_back(system_temp_pv_);
|
||||
shader_code_.push_back(system_temp_result_);
|
||||
++stat_.instruction_count;
|
||||
++stat_.uint_instruction_count;
|
||||
|
||||
|
@ -426,10 +426,10 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction(
|
|||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
|
||||
shader_code_.push_back(system_temp_pv_);
|
||||
shader_code_.push_back(system_temp_result_);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
|
||||
shader_code_.push_back(system_temp_pv_);
|
||||
shader_code_.push_back(system_temp_result_);
|
||||
shader_code_.push_back(
|
||||
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
|
||||
shader_code_.push_back(instr.attributes.offset * 4);
|
||||
|
@ -444,7 +444,7 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction(
|
|||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1));
|
||||
shader_code_.push_back(system_temp_pv_);
|
||||
shader_code_.push_back(system_temp_result_);
|
||||
shader_code_.push_back(EncodeVectorSelectOperand(
|
||||
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSysConst_Flags_Comp, 3));
|
||||
shader_code_.push_back(cbuffer_index_system_constants_);
|
||||
|
@ -462,7 +462,7 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction(
|
|||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1));
|
||||
shader_code_.push_back(system_temp_pv_);
|
||||
shader_code_.push_back(system_temp_result_);
|
||||
++stat_.instruction_count;
|
||||
++stat_.dynamic_flow_control_count;
|
||||
|
||||
|
@ -471,10 +471,10 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction(
|
|||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8));
|
||||
shader_code_.push_back(EncodeVectorMaskedOperand(
|
||||
D3D10_SB_OPERAND_TYPE_TEMP, (1 << load_dword_count) - 1, 1));
|
||||
shader_code_.push_back(system_temp_pv_);
|
||||
shader_code_.push_back(system_temp_result_);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
|
||||
shader_code_.push_back(system_temp_pv_);
|
||||
shader_code_.push_back(system_temp_result_);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D11_SB_OPERAND_TYPE_UNORDERED_ACCESS_VIEW,
|
||||
kSwizzleXYZW & ((1 << (load_dword_count * 2)) - 1), 2));
|
||||
|
@ -492,10 +492,10 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction(
|
|||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8));
|
||||
shader_code_.push_back(EncodeVectorMaskedOperand(
|
||||
D3D10_SB_OPERAND_TYPE_TEMP, (1 << load_dword_count) - 1, 1));
|
||||
shader_code_.push_back(system_temp_pv_);
|
||||
shader_code_.push_back(system_temp_result_);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
|
||||
shader_code_.push_back(system_temp_pv_);
|
||||
shader_code_.push_back(system_temp_result_);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_RESOURCE,
|
||||
kSwizzleXYZW & ((1 << (load_dword_count * 2)) - 1), 2));
|
||||
|
@ -607,7 +607,7 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction(
|
|||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(15));
|
||||
shader_code_.push_back(EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP,
|
||||
result_write_mask, 1));
|
||||
shader_code_.push_back(system_temp_pv_);
|
||||
shader_code_.push_back(system_temp_result_);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
|
||||
shader_code_.push_back(extract_widths[0]);
|
||||
|
@ -622,7 +622,7 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction(
|
|||
shader_code_.push_back(extract_offsets[3]);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_TEMP, extract_swizzle, 1));
|
||||
shader_code_.push_back(system_temp_pv_);
|
||||
shader_code_.push_back(system_temp_result_);
|
||||
++stat_.instruction_count;
|
||||
if (extract_signed) {
|
||||
++stat_.int_instruction_count;
|
||||
|
@ -639,10 +639,10 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction(
|
|||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5));
|
||||
shader_code_.push_back(EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP,
|
||||
result_write_mask, 1));
|
||||
shader_code_.push_back(system_temp_pv_);
|
||||
shader_code_.push_back(system_temp_result_);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(system_temp_pv_);
|
||||
shader_code_.push_back(system_temp_result_);
|
||||
++stat_.instruction_count;
|
||||
++stat_.conversion_instruction_count;
|
||||
} else if (normalize_scales[0] != 0.0f) {
|
||||
|
@ -655,10 +655,10 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction(
|
|||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5));
|
||||
shader_code_.push_back(EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP,
|
||||
result_write_mask, 1));
|
||||
shader_code_.push_back(system_temp_pv_);
|
||||
shader_code_.push_back(system_temp_result_);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(system_temp_pv_);
|
||||
shader_code_.push_back(system_temp_result_);
|
||||
++stat_.instruction_count;
|
||||
++stat_.conversion_instruction_count;
|
||||
if (!instr.attributes.is_integer) {
|
||||
|
@ -667,10 +667,10 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction(
|
|||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10));
|
||||
shader_code_.push_back(EncodeVectorMaskedOperand(
|
||||
D3D10_SB_OPERAND_TYPE_TEMP, result_write_mask, 1));
|
||||
shader_code_.push_back(system_temp_pv_);
|
||||
shader_code_.push_back(system_temp_result_);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(system_temp_pv_);
|
||||
shader_code_.push_back(system_temp_result_);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
|
@ -687,10 +687,10 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction(
|
|||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10));
|
||||
shader_code_.push_back(EncodeVectorMaskedOperand(
|
||||
D3D10_SB_OPERAND_TYPE_TEMP, result_write_mask, 1));
|
||||
shader_code_.push_back(system_temp_pv_);
|
||||
shader_code_.push_back(system_temp_result_);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(system_temp_pv_);
|
||||
shader_code_.push_back(system_temp_result_);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
|
||||
shader_code_.push_back(0xBF800000u);
|
||||
|
@ -710,7 +710,7 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction(
|
|||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8));
|
||||
shader_code_.push_back(EncodeVectorMaskedOperand(
|
||||
D3D10_SB_OPERAND_TYPE_TEMP, 0b1111 & ~result_write_mask, 1));
|
||||
shader_code_.push_back(system_temp_pv_);
|
||||
shader_code_.push_back(system_temp_result_);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
|
||||
shader_code_.push_back(0);
|
||||
|
@ -727,10 +727,10 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction(
|
|||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10));
|
||||
shader_code_.push_back(EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP,
|
||||
result_write_mask, 1));
|
||||
shader_code_.push_back(system_temp_pv_);
|
||||
shader_code_.push_back(system_temp_result_);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(system_temp_pv_);
|
||||
shader_code_.push_back(system_temp_result_);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
|
||||
uint32_t exp_adjust_scale =
|
||||
|
@ -743,7 +743,7 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction(
|
|||
++stat_.float_instruction_count;
|
||||
}
|
||||
|
||||
StoreResult(instr.result, system_temp_pv_, false);
|
||||
StoreResult(instr.result, DxbcSrc::R(system_temp_result_));
|
||||
}
|
||||
|
||||
uint32_t DxbcShaderTranslator::FindOrAddTextureSRV(uint32_t fetch_constant,
|
||||
|
@ -852,9 +852,9 @@ uint32_t DxbcShaderTranslator::FindOrAddSamplerBinding(
|
|||
return sampler_register;
|
||||
}
|
||||
|
||||
void DxbcShaderTranslator::ArrayCoordToCubeDirection(uint32_t reg) {
|
||||
// This does the reverse of what the cube vector ALU instruction does, but
|
||||
// assuming S and T are normalized.
|
||||
void DxbcShaderTranslator::TfetchCubeCoordToCubeDirection(uint32_t reg) {
|
||||
// This does the reverse of what's done by the ALU sequence for cubemap
|
||||
// coordinate calculation.
|
||||
//
|
||||
// The major axis depends on the face index (passed as a float in reg.z):
|
||||
// +X for 0, -X for 1, +Y for 2, -Y for 3, +Z for 4, -Z for 5.
|
||||
|
@ -872,8 +872,8 @@ void DxbcShaderTranslator::ArrayCoordToCubeDirection(uint32_t reg) {
|
|||
// * Y is -T.
|
||||
// * Z is 1.0 or -1.0.
|
||||
|
||||
// Make 0, not 0.5, the center of S and T.
|
||||
// mad reg.xy__, reg.xy__, l(2.0, 2.0, _, _), l(-1.0, -1.0, _, _)
|
||||
// Make 0, not 1.5, the center of S and T.
|
||||
// mad reg.xy__, reg.xy__, l(2.0, 2.0, _, _), l(-3.0, -3.0, _, _)
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MAD) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(15));
|
||||
shader_code_.push_back(
|
||||
|
@ -890,8 +890,8 @@ void DxbcShaderTranslator::ArrayCoordToCubeDirection(uint32_t reg) {
|
|||
shader_code_.push_back(0x3F800000u);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
|
||||
shader_code_.push_back(0xBF800000u);
|
||||
shader_code_.push_back(0xBF800000u);
|
||||
shader_code_.push_back(0xC0400000u);
|
||||
shader_code_.push_back(0xC0400000u);
|
||||
shader_code_.push_back(0);
|
||||
shader_code_.push_back(0);
|
||||
++stat_.instruction_count;
|
||||
|
@ -1194,7 +1194,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
|||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||
shader_code_.push_back(system_temp_pv_);
|
||||
shader_code_.push_back(system_temp_result_);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
|
||||
shader_code_.push_back(0);
|
||||
|
@ -2149,7 +2149,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
|||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, coord_mask, 1));
|
||||
shader_code_.push_back(system_temp_pv_);
|
||||
shader_code_.push_back(system_temp_result_);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(coord_temp);
|
||||
|
@ -2157,12 +2157,13 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
|||
++stat_.float_instruction_count;
|
||||
} else {
|
||||
if (instr.dimension == TextureDimension::kCube) {
|
||||
// Convert cubemap coordinates passed as 2D array texture coordinates to
|
||||
// a 3D direction. We can't use a 2D array to emulate cubemaps because
|
||||
// at the edges, especially in pixel shader helper invocations, the
|
||||
// major axis changes, causing S/T to jump between 0 and 1, breaking
|
||||
// gradient calculation and causing the 1x1 mipmap to be sampled.
|
||||
ArrayCoordToCubeDirection(coord_temp);
|
||||
// Convert cubemap coordinates passed as 2D array texture coordinates
|
||||
// plus 1 in ST to a 3D direction. We can't use a 2D array to emulate
|
||||
// cubemaps because at the edges, especially in pixel shader helper
|
||||
// invocations, the major axis changes, causing S/T to jump between 0
|
||||
// and 1, breaking gradient calculation and causing the 1x1 mipmap to be
|
||||
// sampled.
|
||||
TfetchCubeCoordToCubeDirection(coord_temp);
|
||||
}
|
||||
|
||||
// Bias the register LOD if fetching with explicit LOD (so this is not
|
||||
|
@ -2237,7 +2238,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
|||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(11));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
|
||||
shader_code_.push_back(system_temp_pv_);
|
||||
shader_code_.push_back(system_temp_result_);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(coord_temp);
|
||||
|
@ -2260,10 +2261,10 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
|||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
|
||||
shader_code_.push_back(EncodeVectorMaskedOperand(
|
||||
D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
|
||||
shader_code_.push_back(system_temp_pv_);
|
||||
shader_code_.push_back(system_temp_result_);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
|
||||
shader_code_.push_back(system_temp_pv_);
|
||||
shader_code_.push_back(system_temp_result_);
|
||||
shader_code_.push_back(
|
||||
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
|
||||
shader_code_.push_back(
|
||||
|
@ -2277,7 +2278,8 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
|||
for (uint32_t j = 0; j < 2; ++j) {
|
||||
uint32_t srv_index_current =
|
||||
i ? srv_indices_stacked[j] : srv_indices[j];
|
||||
uint32_t target_temp_sign = j ? signed_value_temp : system_temp_pv_;
|
||||
uint32_t target_temp_sign =
|
||||
j ? signed_value_temp : system_temp_result_;
|
||||
for (uint32_t k = 0;
|
||||
k < (vol_filter_lerp_temp != UINT32_MAX ? 2u : 1u); ++k) {
|
||||
uint32_t target_temp_current =
|
||||
|
@ -2564,7 +2566,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
|||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1 << i, 1));
|
||||
shader_code_.push_back(system_temp_pv_);
|
||||
shader_code_.push_back(system_temp_result_);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1));
|
||||
shader_code_.push_back(sign_temp);
|
||||
|
@ -2573,7 +2575,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
|||
shader_code_.push_back(signed_value_temp);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, i, 1));
|
||||
shader_code_.push_back(system_temp_pv_);
|
||||
shader_code_.push_back(system_temp_result_);
|
||||
++stat_.instruction_count;
|
||||
++stat_.movc_instruction_count;
|
||||
|
||||
|
@ -2603,7 +2605,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
|||
shader_code_.push_back(sign_temp);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, i, 1));
|
||||
shader_code_.push_back(system_temp_pv_);
|
||||
shader_code_.push_back(system_temp_result_);
|
||||
shader_code_.push_back(
|
||||
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
|
||||
shader_code_.push_back(0x40000000u);
|
||||
|
@ -2619,7 +2621,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
|||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1 << i, 1));
|
||||
shader_code_.push_back(system_temp_pv_);
|
||||
shader_code_.push_back(system_temp_result_);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1));
|
||||
shader_code_.push_back(sign_temp);
|
||||
|
@ -2628,7 +2630,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
|||
shader_code_.push_back(sign_temp);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, i, 1));
|
||||
shader_code_.push_back(system_temp_pv_);
|
||||
shader_code_.push_back(system_temp_result_);
|
||||
++stat_.instruction_count;
|
||||
++stat_.movc_instruction_count;
|
||||
|
||||
|
@ -2661,7 +2663,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
|||
++stat_.dynamic_flow_control_count;
|
||||
|
||||
// Degamma the channel.
|
||||
ConvertPWLGamma(false, system_temp_pv_, i, system_temp_pv_, i,
|
||||
ConvertPWLGamma(false, system_temp_result_, i, system_temp_result_, i,
|
||||
sign_temp, 0, sign_temp, 1);
|
||||
|
||||
// Close the gamma conditional.
|
||||
|
@ -2733,10 +2735,10 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
|||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||
shader_code_.push_back(system_temp_pv_);
|
||||
shader_code_.push_back(system_temp_result_);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(system_temp_pv_);
|
||||
shader_code_.push_back(system_temp_result_);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
|
||||
shader_code_.push_back(exp_adjust_temp);
|
||||
|
@ -2774,7 +2776,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
|||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3 + operand_length));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0101, 1));
|
||||
shader_code_.push_back(system_temp_pv_);
|
||||
shader_code_.push_back(system_temp_result_);
|
||||
UseDxbcSourceOperand(operand, 0b01010000);
|
||||
++stat_.instruction_count;
|
||||
++stat_.float_instruction_count;
|
||||
|
@ -2784,7 +2786,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
|||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3 + operand_length));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1010, 1));
|
||||
shader_code_.push_back(system_temp_pv_);
|
||||
shader_code_.push_back(system_temp_result_);
|
||||
UseDxbcSourceOperand(operand, 0b01010000);
|
||||
++stat_.instruction_count;
|
||||
++stat_.float_instruction_count;
|
||||
|
@ -2857,10 +2859,10 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
|||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||
shader_code_.push_back(system_temp_pv_);
|
||||
shader_code_.push_back(system_temp_result_);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
|
||||
shader_code_.push_back(system_temp_pv_);
|
||||
shader_code_.push_back(system_temp_result_);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b01000100, 1));
|
||||
shader_code_.push_back(exp_bias_temp);
|
||||
|
@ -2898,7 +2900,9 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
|||
}
|
||||
|
||||
if (store_result) {
|
||||
StoreResult(instr.result, system_temp_pv_, replicate_result);
|
||||
StoreResult(instr.result,
|
||||
DxbcSrc::R(system_temp_result_,
|
||||
replicate_result ? DxbcSrc::kXXXX : DxbcSrc::kXYZW));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -289,7 +289,7 @@ struct ParsedLoopStartInstruction {
|
|||
uint32_t dword_index = 0;
|
||||
|
||||
// Integer constant register that holds the loop parameters.
|
||||
// Byte-wise: [loop count, start, step [-128, 127], ?]
|
||||
// 0:7 - uint8 loop count, 8:15 - uint8 start aL, 16:23 - int8 aL step.
|
||||
uint32_t loop_constant_index = 0;
|
||||
// Whether to reuse the current aL instead of reset it to loop start.
|
||||
bool is_repeat = false;
|
||||
|
@ -311,7 +311,7 @@ struct ParsedLoopEndInstruction {
|
|||
bool predicate_condition = false;
|
||||
|
||||
// Integer constant register that holds the loop parameters.
|
||||
// Byte-wise: [loop count, start, step [-128, 127], ?]
|
||||
// 0:7 - uint8 loop count, 8:15 - uint8 start aL, 16:23 - int8 aL step.
|
||||
uint32_t loop_constant_index = 0;
|
||||
|
||||
// Target address of the start of the loop body.
|
||||
|
|
|
@ -256,7 +256,7 @@ struct ControlFlowLoopStartInstruction {
|
|||
// Whether to reuse the current aL instead of reset it to loop start.
|
||||
bool is_repeat() const { return is_repeat_; }
|
||||
// Integer constant register that holds the loop parameters.
|
||||
// Byte-wise: [loop count, start, step [-128, 127], ?]
|
||||
// 0:7 - uint8 loop count, 8:15 - uint8 start aL, 16:23 - int8 aL step.
|
||||
uint32_t loop_id() const { return loop_id_; }
|
||||
|
||||
private:
|
||||
|
@ -281,7 +281,7 @@ struct ControlFlowLoopEndInstruction {
|
|||
// Target address of the start of the loop body.
|
||||
uint32_t address() const { return address_; }
|
||||
// Integer constant register that holds the loop parameters.
|
||||
// Byte-wise: [loop count, start, step [-128, 127], ?]
|
||||
// 0:7 - uint8 loop count, 8:15 - uint8 start aL, 16:23 - int8 aL step.
|
||||
uint32_t loop_id() const { return loop_id_; }
|
||||
// Break from the loop if the predicate matches the expected value.
|
||||
bool is_predicated_break() const { return is_predicated_break_; }
|
||||
|
@ -667,11 +667,13 @@ static_assert_size(TextureFetchInstruction, 12);
|
|||
// Both are valid only within the current ALU clause. They are not modified
|
||||
// when the instruction that would write them fails its predication check.
|
||||
// - Direct3D 9 rules (like in GCN v_*_legacy_f32 instructions) for
|
||||
// multiplication (0 * anything = 0) wherever it's present (mul, mad, dp,
|
||||
// etc.) and for NaN in min/max. It's very important to respect this rule for
|
||||
// multiplication, as games often rely on it in vector normalization (rcp and
|
||||
// mul), Infinity * 0 resulting in NaN breaks a lot of things in games -
|
||||
// causes white screen in Halo 3, white specular on characters in GTA IV.
|
||||
// multiplication (0 or denormal * anything = 0) wherever it's present (mul,
|
||||
// mad, dp, etc.) and for NaN in min/max. It's very important to respect this
|
||||
// rule for multiplication, as games often rely on it in vector normalization
|
||||
// (rcp and mul), Infinity * 0 resulting in NaN breaks a lot of things in
|
||||
// games - causes white screen in Halo 3, white specular on characters in GTA
|
||||
// IV.
|
||||
// TODO(Triang3l): Investigate signed zero handling in multiplication.
|
||||
|
||||
enum class AluScalarOpcode : uint32_t {
|
||||
// Floating-Point Add
|
||||
|
@ -1145,7 +1147,7 @@ enum class AluVectorOpcode : uint32_t {
|
|||
// cube/CUBEv dest, src0, src1
|
||||
// dest.x = T cube coordinate;
|
||||
// dest.y = S cube coordinate;
|
||||
// dest.z = 2.0 * MajorAxis;
|
||||
// dest.z = 2.0 * major axis;
|
||||
// dest.w = FaceID;
|
||||
// https://developer.amd.com/wordpress/media/2012/12/AMD_Southern_Islands_Instruction_Set_Architecture.pdf
|
||||
// if (abs(z) >= abs(x) && abs(z) >= abs(y)) {
|
||||
|
@ -1167,6 +1169,16 @@ enum class AluVectorOpcode : uint32_t {
|
|||
// Expects src0.zzxy and src1.yxzz swizzles.
|
||||
// FaceID is D3DCUBEMAP_FACES:
|
||||
// https://msdn.microsoft.com/en-us/library/windows/desktop/bb172528(v=vs.85).aspx
|
||||
// Used like:
|
||||
// cube r0, source.zzxy, source.yxz
|
||||
// rcp r0.z, r0_abs.z
|
||||
// mad r0.xy, r0, r0.zzzw, 1.5f
|
||||
// tfetchCube r0, r0.yxw, tf0
|
||||
// http://web.archive.org/web/20100705154143/http://msdn.microsoft.com/en-us/library/bb313921.aspx
|
||||
// On GCN, the sequence is the same, so GCN documentation can be used as a
|
||||
// reference (tfetchCube doesn't accept the UV as if the texture was a 2D
|
||||
// array in XY exactly, to get texture array UV, 1 must be subtracted from its
|
||||
// XY inputs).
|
||||
kCube = 18,
|
||||
|
||||
// Four-Element Maximum
|
||||
|
@ -1293,12 +1305,20 @@ enum class AluVectorOpcode : uint32_t {
|
|||
// Per-Component Floating-Point Maximum with Copy To Integer in AR
|
||||
// maxa dest, src0, src1
|
||||
// This is a combined max + mova/MOVAv.
|
||||
// int result = (int)floor(src0.w + 0.5);
|
||||
// a0 = clamp(result, -256, 255);
|
||||
// a0 = (int)clamp(floor(src0.w + 0.5), -256.0, 255.0);
|
||||
// dest.x = src0.x >= src1.x ? src0.x : src1.x;
|
||||
// dest.y = src0.x >= src1.y ? src0.y : src1.y;
|
||||
// dest.z = src0.x >= src1.z ? src0.z : src1.z;
|
||||
// dest.w = src0.x >= src1.w ? src0.w : src1.w;
|
||||
// The MSDN documentation specifies clamp as:
|
||||
// if (!(SQResultF >= -256.0)) {
|
||||
// SQResultF = -256.0;
|
||||
// }
|
||||
// if (SQResultF > 255.0) {
|
||||
// SQResultF = 255.0;
|
||||
// }
|
||||
// http://web.archive.org/web/20100705151335/http://msdn.microsoft.com:80/en-us/library/bb313931.aspx
|
||||
// However, using NaN as an address would be unusual.
|
||||
kMaxA = 29,
|
||||
};
|
||||
|
||||
|
@ -1329,6 +1349,7 @@ constexpr bool AluVectorOpHasSideEffects(AluVectorOpcode vector_opcode) {
|
|||
// (doesn't check the operand count though).
|
||||
constexpr uint32_t GetAluVectorOpUsedSourceComponents(
|
||||
AluVectorOpcode vector_opcode, uint32_t src_index) {
|
||||
assert_not_zero(src_index);
|
||||
switch (vector_opcode) {
|
||||
case AluVectorOpcode::kDp3:
|
||||
return 0b0111;
|
||||
|
@ -1353,27 +1374,30 @@ constexpr uint32_t GetAluVectorOpUsedSourceComponents(
|
|||
// components specified in the write mask are needed, but there are instructions
|
||||
// with special behavior for certain components.
|
||||
constexpr uint32_t GetAluVectorOpNeededSourceComponents(
|
||||
AluVectorOpcode vector_opcode, uint32_t src_index, uint32_t write_mask) {
|
||||
uint32_t components = write_mask;
|
||||
AluVectorOpcode vector_opcode, uint32_t src_index,
|
||||
uint32_t used_result_components) {
|
||||
assert_not_zero(src_index);
|
||||
uint32_t components = used_result_components;
|
||||
switch (vector_opcode) {
|
||||
case AluVectorOpcode::kDp4:
|
||||
case AluVectorOpcode::kMax4:
|
||||
components = write_mask ? 0b1111 : 0;
|
||||
components = used_result_components ? 0b1111 : 0;
|
||||
break;
|
||||
case AluVectorOpcode::kDp3:
|
||||
components = write_mask ? 0b0111 : 0;
|
||||
components = used_result_components ? 0b0111 : 0;
|
||||
break;
|
||||
case AluVectorOpcode::kDp2Add:
|
||||
components = write_mask ? (src_index == 3 ? 0b0001 : 0b0011) : 0;
|
||||
components =
|
||||
used_result_components ? (src_index == 3 ? 0b0001 : 0b0011) : 0;
|
||||
break;
|
||||
case AluVectorOpcode::kCube:
|
||||
components = write_mask ? 0b1111 : 0;
|
||||
components = used_result_components ? 0b1111 : 0;
|
||||
break;
|
||||
case AluVectorOpcode::kSetpEqPush:
|
||||
case AluVectorOpcode::kSetpNePush:
|
||||
case AluVectorOpcode::kSetpGtPush:
|
||||
case AluVectorOpcode::kSetpGePush:
|
||||
components = write_mask ? 0b1001 : 0b1000;
|
||||
components = used_result_components ? 0b1001 : 0b1000;
|
||||
break;
|
||||
case AluVectorOpcode::kKillEq:
|
||||
case AluVectorOpcode::kKillGt:
|
||||
|
|
Loading…
Reference in New Issue