[GPU] Shader translator refactoring (mostly ALU), fixes for disassembly round trip and write masks
This commit is contained in:
parent
8f91e580f4
commit
3aa0ce3096
|
@ -2961,6 +2961,14 @@ bool D3D12CommandProcessor::UpdateBindings(
|
|||
(!samplers_written_pixel_ ||
|
||||
current_samplers_hash_pixel_ != samplers_hash_pixel);
|
||||
|
||||
// These are the constant base addresses/ranges for shaders.
|
||||
// We have these hardcoded right now cause nothing seems to differ on the Xbox
|
||||
// 360 (however, OpenGL ES on Adreno 200 on Android has different ranges).
|
||||
assert_true(regs[XE_GPU_REG_SQ_VS_CONST].u32 == 0x000FF000 ||
|
||||
regs[XE_GPU_REG_SQ_VS_CONST].u32 == 0x00000000);
|
||||
assert_true(regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x000FF100 ||
|
||||
regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000);
|
||||
|
||||
// Check if the float constant layout is still the same and get the counts.
|
||||
const Shader::ConstantRegisterMap& float_constant_map_vertex =
|
||||
vertex_shader->constant_register_map();
|
||||
|
|
|
@ -809,14 +809,6 @@ bool PipelineCache::EnsureShadersTranslated(
|
|||
D3D12Shader* vertex_shader, D3D12Shader* pixel_shader,
|
||||
Shader::HostVertexShaderType host_vertex_shader_type) {
|
||||
auto& regs = *register_file_;
|
||||
|
||||
// These are the constant base addresses/ranges for shaders.
|
||||
// We have these hardcoded right now cause nothing seems to differ.
|
||||
assert_true(regs[XE_GPU_REG_SQ_VS_CONST].u32 == 0x000FF000 ||
|
||||
regs[XE_GPU_REG_SQ_VS_CONST].u32 == 0x00000000);
|
||||
assert_true(regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x000FF100 ||
|
||||
regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000);
|
||||
|
||||
auto sq_program_cntl = regs.Get<reg::SQ_PROGRAM_CNTL>();
|
||||
|
||||
// Edge flags are not supported yet (because polygon primitives are not).
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/base/cvar.h"
|
||||
#include "xenia/base/math.h"
|
||||
|
||||
DEFINE_bool(dxbc_switch, true,
|
||||
"Use switch rather than if for flow control. Turning this off or "
|
||||
|
@ -86,7 +87,6 @@ DxbcShaderTranslator::DxbcShaderTranslator(uint32_t vendor_id,
|
|||
// Don't allocate again and again for the first shader.
|
||||
shader_code_.reserve(8192);
|
||||
shader_object_.reserve(16384);
|
||||
float_constant_index_offsets_.reserve(512);
|
||||
}
|
||||
DxbcShaderTranslator::~DxbcShaderTranslator() = default;
|
||||
|
||||
|
@ -161,8 +161,6 @@ void DxbcShaderTranslator::Reset() {
|
|||
cbuffer_index_fetch_constants_ = kCbufferIndexUnallocated;
|
||||
|
||||
system_constants_used_ = 0;
|
||||
float_constants_dynamic_indexed_ = false;
|
||||
float_constant_index_offsets_.clear();
|
||||
|
||||
in_control_point_index_used_ = false;
|
||||
|
||||
|
@ -1166,29 +1164,6 @@ void DxbcShaderTranslator::CompleteShaderCode() {
|
|||
|
||||
// Release system_temps_subroutine_.
|
||||
PopSystemTemp(system_temps_subroutine_count_);
|
||||
|
||||
// Remap float constant indices if not indexed dynamically.
|
||||
if (!float_constants_dynamic_indexed_ &&
|
||||
!float_constant_index_offsets_.empty()) {
|
||||
uint8_t float_constant_map[256] = {};
|
||||
uint32_t float_constant_count = 0;
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
uint64_t float_constants_used = constant_register_map().float_bitmap[i];
|
||||
uint32_t float_constant_index;
|
||||
while (
|
||||
xe::bit_scan_forward(float_constants_used, &float_constant_index)) {
|
||||
float_constants_used &= ~(1ull << float_constant_index);
|
||||
float_constant_map[i * 64 + float_constant_index] =
|
||||
float_constant_count++;
|
||||
}
|
||||
}
|
||||
size_t index_count = float_constant_index_offsets_.size();
|
||||
for (size_t i = 0; i < index_count; ++i) {
|
||||
uint32_t index_offset = float_constant_index_offsets_[i];
|
||||
shader_code_[index_offset] =
|
||||
float_constant_map[shader_code_[index_offset] & 255];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<uint8_t> DxbcShaderTranslator::CompleteTranslation() {
|
||||
|
@ -1420,7 +1395,7 @@ void DxbcShaderTranslator::LoadDxbcSourceOperand(
|
|||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP, kSwizzleXYZW, 2));
|
||||
shader_code_.push_back(0);
|
||||
shader_code_.push_back(uint32_t(operand.storage_index));
|
||||
shader_code_.push_back(operand.storage_index);
|
||||
} else {
|
||||
shader_code_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
|
||||
|
@ -1433,7 +1408,7 @@ void DxbcShaderTranslator::LoadDxbcSourceOperand(
|
|||
D3D10_SB_OPERAND_INDEX_IMMEDIATE32,
|
||||
D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE));
|
||||
shader_code_.push_back(0);
|
||||
shader_code_.push_back(uint32_t(operand.storage_index));
|
||||
shader_code_.push_back(operand.storage_index);
|
||||
shader_code_.push_back(EncodeVectorSelectOperand(
|
||||
D3D10_SB_OPERAND_TYPE_TEMP, dynamic_address_component, 1));
|
||||
shader_code_.push_back(dynamic_address_register);
|
||||
|
@ -1445,7 +1420,7 @@ void DxbcShaderTranslator::LoadDxbcSourceOperand(
|
|||
assert_true(operand.storage_addressing_mode ==
|
||||
InstructionStorageAddressingMode::kStatic);
|
||||
dxbc_operand.type = DxbcSourceOperand::Type::kRegister;
|
||||
dxbc_operand.index = uint32_t(operand.storage_index);
|
||||
dxbc_operand.index = operand.storage_index;
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -1457,11 +1432,18 @@ void DxbcShaderTranslator::LoadDxbcSourceOperand(
|
|||
cbuffer_index_float_constants_ = cbuffer_count_++;
|
||||
}
|
||||
dxbc_operand.type = DxbcSourceOperand::Type::kConstantFloat;
|
||||
dxbc_operand.index = uint32_t(operand.storage_index);
|
||||
dxbc_operand.addressing_mode = operand.storage_addressing_mode;
|
||||
if (operand.storage_addressing_mode !=
|
||||
if (operand.storage_addressing_mode ==
|
||||
InstructionStorageAddressingMode::kStatic) {
|
||||
float_constants_dynamic_indexed_ = true;
|
||||
uint32_t float_constant_index =
|
||||
constant_register_map().GetPackedFloatConstantIndex(
|
||||
operand.storage_index);
|
||||
assert_true(float_constant_index != UINT32_MAX);
|
||||
dxbc_operand.index =
|
||||
float_constant_index != UINT32_MAX ? float_constant_index : 0;
|
||||
} else {
|
||||
assert_true(constant_register_map().float_dynamic_addressing);
|
||||
dxbc_operand.index = operand.storage_index;
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -1652,11 +1634,6 @@ void DxbcShaderTranslator::UseDxbcSourceOperand(
|
|||
}
|
||||
shader_code_.push_back(cbuffer_index_float_constants_);
|
||||
shader_code_.push_back(uint32_t(CbufferRegister::kFloatConstants));
|
||||
if (!float_constants_dynamic_indexed_) {
|
||||
// If there's no dynamic indexing in the shader, constants are compacted
|
||||
// and remapped. Store where the index has been written.
|
||||
float_constant_index_offsets_.push_back(uint32_t(shader_code_.size()));
|
||||
}
|
||||
shader_code_.push_back(operand.index);
|
||||
if (!is_static) {
|
||||
uint32_t dynamic_address_register, dynamic_address_component;
|
||||
|
@ -1718,8 +1695,9 @@ void DxbcShaderTranslator::UnloadDxbcSourceOperand(
|
|||
void DxbcShaderTranslator::StoreResult(const InstructionResult& result,
|
||||
uint32_t reg, bool replicate_x,
|
||||
bool can_store_memexport_address) {
|
||||
uint32_t used_write_mask = result.GetUsedWriteMask();
|
||||
if (result.storage_target == InstructionStorageTarget::kNone ||
|
||||
!result.has_any_writes()) {
|
||||
!result.GetUsedWriteMask()) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -1744,10 +1722,9 @@ void DxbcShaderTranslator::StoreResult(const InstructionResult& result,
|
|||
ENCODE_D3D10_SB_INSTRUCTION_SATURATE(result.is_clamped);
|
||||
|
||||
// Scalar targets get only one component.
|
||||
// TODO(Triang3l): It's not replicated, it's X specifically.
|
||||
if (result.storage_target == InstructionStorageTarget::kDepth) {
|
||||
if (!result.write_mask[0]) {
|
||||
return;
|
||||
}
|
||||
assert_not_zero(used_write_mask & 0b0001);
|
||||
SwizzleSource component = result.components[0];
|
||||
if (replicate_x && component <= SwizzleSource::kW) {
|
||||
component = SwizzleSource::kX;
|
||||
|
@ -1802,7 +1779,7 @@ void DxbcShaderTranslator::StoreResult(const InstructionResult& result,
|
|||
uint32_t constant_mask = 0;
|
||||
uint32_t constant_values = 0;
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
if (!result.write_mask[i]) {
|
||||
if (!(used_write_mask & (1 << i))) {
|
||||
continue;
|
||||
}
|
||||
SwizzleSource component = result.components[i];
|
||||
|
@ -1858,7 +1835,7 @@ void DxbcShaderTranslator::StoreResult(const InstructionResult& result,
|
|||
is_static ? D3D10_SB_OPERAND_INDEX_IMMEDIATE32
|
||||
: D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE));
|
||||
shader_code_.push_back(0);
|
||||
shader_code_.push_back(uint32_t(result.storage_index));
|
||||
shader_code_.push_back(result.storage_index);
|
||||
if (!is_static) {
|
||||
shader_code_.push_back(EncodeVectorSelectOperand(
|
||||
D3D10_SB_OPERAND_TYPE_TEMP, dynamic_address_component, 1));
|
||||
|
@ -1874,11 +1851,11 @@ void DxbcShaderTranslator::StoreResult(const InstructionResult& result,
|
|||
saturate_bit);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, mask, 1));
|
||||
shader_code_.push_back(uint32_t(result.storage_index));
|
||||
shader_code_.push_back(result.storage_index);
|
||||
}
|
||||
break;
|
||||
|
||||
case InstructionStorageTarget::kInterpolant:
|
||||
case InstructionStorageTarget::kInterpolator:
|
||||
++stat_.instruction_count;
|
||||
++stat_.mov_instruction_count;
|
||||
shader_code_.push_back(
|
||||
|
@ -1943,7 +1920,7 @@ void DxbcShaderTranslator::StoreResult(const InstructionResult& result,
|
|||
[uint32_t(result.storage_index)]);
|
||||
break;
|
||||
|
||||
case InstructionStorageTarget::kColorTarget:
|
||||
case InstructionStorageTarget::kColor:
|
||||
++stat_.instruction_count;
|
||||
++stat_.mov_instruction_count;
|
||||
shader_code_.push_back(
|
||||
|
@ -1952,8 +1929,7 @@ void DxbcShaderTranslator::StoreResult(const InstructionResult& result,
|
|||
saturate_bit);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, mask, 1));
|
||||
shader_code_.push_back(
|
||||
system_temps_color_[uint32_t(result.storage_index)]);
|
||||
shader_code_.push_back(system_temps_color_[result.storage_index]);
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -1989,13 +1965,13 @@ void DxbcShaderTranslator::StoreResult(const InstructionResult& result,
|
|||
shader_code_.push_back(
|
||||
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
|
||||
shader_code_.push_back(
|
||||
1u << (uint32_t(result.storage_index) + ((memexport_index & 3) << 3)));
|
||||
uint32_t(1) << (result.storage_index + ((memexport_index & 3) << 3)));
|
||||
++stat_.instruction_count;
|
||||
++stat_.uint_instruction_count;
|
||||
}
|
||||
|
||||
if (edram_rov_used_ &&
|
||||
result.storage_target == InstructionStorageTarget::kColorTarget) {
|
||||
result.storage_target == InstructionStorageTarget::kColor) {
|
||||
// For ROV output, mark that the color has been written to.
|
||||
// According to:
|
||||
// https://docs.microsoft.com/en-us/windows/desktop/direct3dhlsl/dx9-graphics-reference-asm-ps-registers-output-color
|
||||
|
@ -2014,7 +1990,7 @@ void DxbcShaderTranslator::StoreResult(const InstructionResult& result,
|
|||
shader_code_.push_back(system_temp_rov_params_);
|
||||
shader_code_.push_back(
|
||||
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
|
||||
shader_code_.push_back(1 << (8 + uint32_t(result.storage_index)));
|
||||
shader_code_.push_back(1 << (8 + result.storage_index));
|
||||
++stat_.instruction_count;
|
||||
++stat_.uint_instruction_count;
|
||||
}
|
||||
|
@ -2479,19 +2455,6 @@ const DxbcShaderTranslator::SystemConstantRdef DxbcShaderTranslator::
|
|||
};
|
||||
|
||||
void DxbcShaderTranslator::WriteResourceDefinitions() {
|
||||
// ***************************************************************************
|
||||
// Preparation
|
||||
// ***************************************************************************
|
||||
|
||||
// Float constant count.
|
||||
uint32_t float_constant_count = 0;
|
||||
if (cbuffer_index_float_constants_ != kCbufferIndexUnallocated) {
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
float_constant_count +=
|
||||
xe::bit_count(constant_register_map().float_bitmap[i]);
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t chunk_position_dwords = uint32_t(shader_object_.size());
|
||||
uint32_t new_offset;
|
||||
|
||||
|
@ -2583,7 +2546,8 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
|
|||
if (RdefTypeIndex(i) == RdefTypeIndex::kFloat4ConstantArray) {
|
||||
// Declaring a 0-sized array may not be safe, so write something valid
|
||||
// even if they aren't used.
|
||||
shader_object_.push_back(std::max(float_constant_count, 1u));
|
||||
shader_object_.push_back(
|
||||
std::max(constant_register_map().float_count, uint32_t(1)));
|
||||
} else {
|
||||
shader_object_.push_back(type.element_count |
|
||||
(type.struct_member_count << 16));
|
||||
|
@ -2692,8 +2656,9 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
|
|||
if (cbuffer_index_float_constants_ != kCbufferIndexUnallocated) {
|
||||
shader_object_.push_back(constant_name_offset_float);
|
||||
shader_object_.push_back(0);
|
||||
shader_object_.push_back(std::max(float_constant_count, 1u) * 4 *
|
||||
sizeof(float));
|
||||
shader_object_.push_back(
|
||||
std::max(constant_register_map().float_count, uint32_t(1)) * 4 *
|
||||
sizeof(float));
|
||||
shader_object_.push_back(kDxbcRdefVariableFlagUsed);
|
||||
shader_object_.push_back(types_offset +
|
||||
uint32_t(RdefTypeIndex::kFloat4ConstantArray) *
|
||||
|
@ -2795,8 +2760,9 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
|
|||
shader_object_.push_back(cbuffer_name_offset_float);
|
||||
shader_object_.push_back(1);
|
||||
shader_object_.push_back(constant_offset_float);
|
||||
shader_object_.push_back(std::max(float_constant_count, 1u) * 4 *
|
||||
sizeof(float));
|
||||
shader_object_.push_back(
|
||||
std::max(constant_register_map().float_count, uint32_t(1)) * 4 *
|
||||
sizeof(float));
|
||||
shader_object_.push_back(uint32_t(DxbcRdefCbufferType::kCbuffer));
|
||||
shader_object_.push_back(0);
|
||||
} else if (i == cbuffer_index_bool_loop_constants_) {
|
||||
|
@ -3646,15 +3612,10 @@ void DxbcShaderTranslator::WriteShaderCode() {
|
|||
// Constant buffers, from most frequenly accessed to least frequently accessed
|
||||
// (the order is a hint to the driver according to the DXBC header).
|
||||
if (cbuffer_index_float_constants_ != kCbufferIndexUnallocated) {
|
||||
uint32_t float_constant_count = 0;
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
float_constant_count +=
|
||||
xe::bit_count(constant_register_map().float_bitmap[i]);
|
||||
}
|
||||
shader_object_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER) |
|
||||
ENCODE_D3D10_SB_D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN(
|
||||
float_constants_dynamic_indexed_
|
||||
constant_register_map().float_dynamic_addressing
|
||||
? D3D10_SB_CONSTANT_BUFFER_DYNAMIC_INDEXED
|
||||
: D3D10_SB_CONSTANT_BUFFER_IMMEDIATE_INDEXED) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
|
||||
|
@ -3663,7 +3624,7 @@ void DxbcShaderTranslator::WriteShaderCode() {
|
|||
shader_object_.push_back(cbuffer_index_float_constants_);
|
||||
shader_object_.push_back(uint32_t(CbufferRegister::kFloatConstants));
|
||||
shader_object_.push_back(uint32_t(CbufferRegister::kFloatConstants));
|
||||
shader_object_.push_back(float_constant_count);
|
||||
shader_object_.push_back(constant_register_map().float_count);
|
||||
shader_object_.push_back(0);
|
||||
}
|
||||
if (cbuffer_index_system_constants_ != kCbufferIndexUnallocated) {
|
||||
|
|
|
@ -857,10 +857,10 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
return 0b0000;
|
||||
}
|
||||
}
|
||||
DxbcDest Mask(uint32_t write_mask) const {
|
||||
[[nodiscard]] DxbcDest Mask(uint32_t write_mask) const {
|
||||
return DxbcDest(type_, write_mask, index_1d_, index_2d_, index_3d_);
|
||||
}
|
||||
DxbcDest MaskMasked(uint32_t write_mask) const {
|
||||
[[nodiscard]] DxbcDest MaskMasked(uint32_t write_mask) const {
|
||||
return DxbcDest(type_, write_mask_ & write_mask, index_1d_, index_2d_,
|
||||
index_3d_);
|
||||
}
|
||||
|
@ -991,26 +991,28 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
return DxbcSrc(DxbcOperandType::kInputCoverageMask, kXXXX);
|
||||
}
|
||||
|
||||
DxbcSrc WithModifiers(bool absolute, bool negate) const {
|
||||
[[nodiscard]] DxbcSrc WithModifiers(bool absolute, bool negate) const {
|
||||
DxbcSrc new_src(*this);
|
||||
new_src.absolute_ = absolute;
|
||||
new_src.negate_ = negate;
|
||||
return new_src;
|
||||
}
|
||||
DxbcSrc WithAbs(bool absolute) const {
|
||||
[[nodiscard]] DxbcSrc WithAbs(bool absolute) const {
|
||||
return WithModifiers(absolute, negate_);
|
||||
}
|
||||
DxbcSrc WithNeg(bool negate) const {
|
||||
[[nodiscard]] DxbcSrc WithNeg(bool negate) const {
|
||||
return WithModifiers(absolute_, negate);
|
||||
}
|
||||
DxbcSrc Abs() const { return WithModifiers(true, false); }
|
||||
DxbcSrc operator-() const { return WithModifiers(absolute_, !negate_); }
|
||||
DxbcSrc Swizzle(uint32_t swizzle) const {
|
||||
[[nodiscard]] DxbcSrc Abs() const { return WithModifiers(true, false); }
|
||||
[[nodiscard]] DxbcSrc operator-() const {
|
||||
return WithModifiers(absolute_, !negate_);
|
||||
}
|
||||
[[nodiscard]] DxbcSrc Swizzle(uint32_t swizzle) const {
|
||||
DxbcSrc new_src(*this);
|
||||
new_src.swizzle_ = swizzle;
|
||||
return new_src;
|
||||
}
|
||||
DxbcSrc SwizzleSwizzled(uint32_t swizzle) const {
|
||||
[[nodiscard]] DxbcSrc SwizzleSwizzled(uint32_t swizzle) const {
|
||||
DxbcSrc new_src(*this);
|
||||
new_src.swizzle_ = 0;
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
|
@ -1019,12 +1021,12 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
}
|
||||
return new_src;
|
||||
}
|
||||
DxbcSrc Select(uint32_t component) const {
|
||||
[[nodiscard]] DxbcSrc Select(uint32_t component) const {
|
||||
DxbcSrc new_src(*this);
|
||||
new_src.swizzle_ = component * 0b01010101;
|
||||
return new_src;
|
||||
}
|
||||
DxbcSrc SelectFromSwizzled(uint32_t component) const {
|
||||
[[nodiscard]] DxbcSrc SelectFromSwizzled(uint32_t component) const {
|
||||
DxbcSrc new_src(*this);
|
||||
new_src.swizzle_ = ((swizzle_ >> (component * 2)) & 3) * 0b01010101;
|
||||
return new_src;
|
||||
|
@ -2026,6 +2028,7 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
void EmitInstructionDisassembly();
|
||||
|
||||
// Abstract 4-component vector source operand.
|
||||
// TODO(Triang3l): Remove after fully moving to the new emitter.
|
||||
struct DxbcSourceOperand {
|
||||
enum class Type {
|
||||
// GPR number in the index - used only when GPRs are not dynamically
|
||||
|
@ -2064,18 +2067,22 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
};
|
||||
// Each Load must be followed by Unload, otherwise there may be a temporary
|
||||
// register leak.
|
||||
// TODO(Triang3l): Remove after fully moving to the new emitter.
|
||||
void LoadDxbcSourceOperand(const InstructionOperand& operand,
|
||||
DxbcSourceOperand& dxbc_operand);
|
||||
// Number of tokens this operand adds to the instruction length when used.
|
||||
// TODO(Triang3l): Remove after fully moving to the new emitter.
|
||||
uint32_t DxbcSourceOperandLength(const DxbcSourceOperand& operand,
|
||||
bool negate = false,
|
||||
bool absolute = false) const;
|
||||
// Writes the operand access tokens to the instruction (either for a scalar if
|
||||
// select_component is <= 3, or for a vector).
|
||||
// TODO(Triang3l): Remove after fully moving to the new emitter.
|
||||
void UseDxbcSourceOperand(const DxbcSourceOperand& operand,
|
||||
uint32_t additional_swizzle = kSwizzleXYZW,
|
||||
uint32_t select_component = 4, bool negate = false,
|
||||
bool absolute = false);
|
||||
// TODO(Triang3l): Remove after fully moving to the new emitter.
|
||||
void UnloadDxbcSourceOperand(const DxbcSourceOperand& operand);
|
||||
|
||||
// Writes xyzw or xxxx of the specified r# to the destination.
|
||||
|
@ -2258,15 +2265,6 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
// the remaining ones can be marked as unused in RDEF.
|
||||
uint64_t system_constants_used_;
|
||||
|
||||
// Whether constants are dynamically indexed and need to be marked as such in
|
||||
// dcl_constantBuffer.
|
||||
bool float_constants_dynamic_indexed_;
|
||||
|
||||
// Offsets of float constant indices in shader_code_, for remapping in
|
||||
// CompleteTranslation (initially, at these offsets, guest float constant
|
||||
// indices are written).
|
||||
std::vector<uint32_t> float_constant_index_offsets_;
|
||||
|
||||
// Whether InOutRegister::kDSInControlPointIndex has been used in the shader.
|
||||
bool in_control_point_index_used_;
|
||||
|
||||
|
|
|
@ -23,7 +23,8 @@ bool DxbcShaderTranslator::ProcessVectorAluOperation(
|
|||
replicate_result_x = false;
|
||||
predicate_written = false;
|
||||
|
||||
if (!instr.has_vector_op) {
|
||||
if (!instr.vector_and_constant_result.GetUsedWriteMask() &&
|
||||
!AluVectorOpHasSideEffects(instr.vector_opcode)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -32,7 +33,7 @@ bool DxbcShaderTranslator::ProcessVectorAluOperation(
|
|||
if (instr.vector_opcode == AluVectorOpcode::kCube) {
|
||||
operand_count = 1;
|
||||
} else {
|
||||
operand_count = uint32_t(instr.vector_operand_count);
|
||||
operand_count = instr.vector_operand_count;
|
||||
}
|
||||
DxbcSourceOperand dxbc_operands[3];
|
||||
// Whether the operand is the same as any previous operand, and thus is loaded
|
||||
|
@ -42,7 +43,7 @@ bool DxbcShaderTranslator::ProcessVectorAluOperation(
|
|||
for (uint32_t i = 0; i < operand_count; ++i) {
|
||||
const InstructionOperand& operand = instr.vector_operands[i];
|
||||
for (uint32_t j = 0; j < i; ++j) {
|
||||
if (operand == instr.vector_operands[j]) {
|
||||
if (operand.GetIdenticalComponents(instr.vector_operands[j]) == 0b1111) {
|
||||
operands_duplicate[i] = true;
|
||||
dxbc_operands[i] = dxbc_operands[j];
|
||||
break;
|
||||
|
@ -117,7 +118,8 @@ bool DxbcShaderTranslator::ProcessVectorAluOperation(
|
|||
UseDxbcSourceOperand(dxbc_operands[1]);
|
||||
++stat_.instruction_count;
|
||||
++stat_.float_instruction_count;
|
||||
if (!instr.vector_operands[0].EqualsAbsolute(instr.vector_operands[1])) {
|
||||
if (instr.vector_operands[0].GetAbsoluteIdenticalComponents(
|
||||
instr.vector_operands[1]) != 0b1111) {
|
||||
// Reproduce Shader Model 3 multiplication behavior (0 * anything = 0),
|
||||
// flushing denormals (must be done using eq - doing bitwise comparison
|
||||
// doesn't flush denormals).
|
||||
|
@ -281,7 +283,8 @@ bool DxbcShaderTranslator::ProcessVectorAluOperation(
|
|||
UseDxbcSourceOperand(dxbc_operands[2]);
|
||||
++stat_.instruction_count;
|
||||
++stat_.float_instruction_count;
|
||||
if (!instr.vector_operands[0].EqualsAbsolute(instr.vector_operands[1])) {
|
||||
if (instr.vector_operands[0].GetAbsoluteIdenticalComponents(
|
||||
instr.vector_operands[1]) != 0b1111) {
|
||||
// Reproduce Shader Model 3 multiplication behavior (0 * anything = 0).
|
||||
// If any operand is zero or denormalized, just leave the addition part.
|
||||
uint32_t is_subnormal_temp = PushSystemTemp();
|
||||
|
@ -388,7 +391,8 @@ bool DxbcShaderTranslator::ProcessVectorAluOperation(
|
|||
case AluVectorOpcode::kDp4:
|
||||
case AluVectorOpcode::kDp3:
|
||||
case AluVectorOpcode::kDp2Add: {
|
||||
if (instr.vector_operands[0].EqualsAbsolute(instr.vector_operands[1])) {
|
||||
if (instr.vector_operands[0].GetAbsoluteIdenticalComponents(
|
||||
instr.vector_operands[1]) != 0b1111) {
|
||||
// The operands are the same when calculating vector length, no need to
|
||||
// emulate 0 * anything = 0 in this case.
|
||||
shader_code_.push_back(
|
||||
|
@ -1092,7 +1096,9 @@ bool DxbcShaderTranslator::ProcessVectorAluOperation(
|
|||
UseDxbcSourceOperand(dxbc_operands[1], kSwizzleXYZW, 1);
|
||||
++stat_.instruction_count;
|
||||
++stat_.float_instruction_count;
|
||||
if (!instr.vector_operands[0].EqualsAbsolute(instr.vector_operands[1])) {
|
||||
if (!(instr.vector_operands[0].GetAbsoluteIdenticalComponents(
|
||||
instr.vector_operands[1]) &
|
||||
0b0010)) {
|
||||
// Reproduce Shader Model 3 multiplication behavior (0 * anything = 0).
|
||||
// This is an attenuation calculation function, so infinity is probably
|
||||
// not very unlikely.
|
||||
|
@ -1294,7 +1300,8 @@ bool DxbcShaderTranslator::ProcessScalarAluOperation(
|
|||
const ParsedAluInstruction& instr, bool& predicate_written) {
|
||||
predicate_written = false;
|
||||
|
||||
if (!instr.has_scalar_op) {
|
||||
if (instr.scalar_opcode == ucode::AluScalarOpcode::kRetainPrev &&
|
||||
!instr.scalar_result.GetUsedWriteMask()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -1306,7 +1313,7 @@ bool DxbcShaderTranslator::ProcessScalarAluOperation(
|
|||
for (uint32_t i = 0; i < uint32_t(instr.scalar_operand_count); ++i) {
|
||||
const InstructionOperand& operand = instr.scalar_operands[i];
|
||||
for (uint32_t j = 0; j < i; ++j) {
|
||||
if (operand == instr.scalar_operands[j]) {
|
||||
if (operand.GetIdenticalComponents(instr.scalar_operands[j]) == 0b1111) {
|
||||
operands_duplicate[i] = true;
|
||||
dxbc_operands[i] = dxbc_operands[j];
|
||||
break;
|
||||
|
@ -2303,7 +2310,9 @@ bool DxbcShaderTranslator::ProcessScalarAluOperation(
|
|||
UseDxbcSourceOperand(dxbc_operands[1], kSwizzleXYZW, 0);
|
||||
++stat_.instruction_count;
|
||||
++stat_.float_instruction_count;
|
||||
if (!instr.scalar_operands[0].EqualsAbsolute(instr.scalar_operands[1])) {
|
||||
if (!(instr.scalar_operands[0].GetAbsoluteIdenticalComponents(
|
||||
instr.scalar_operands[1]) &
|
||||
0b0001)) {
|
||||
// Reproduce Shader Model 3 multiplication behavior (0 * anything = 0).
|
||||
uint32_t is_subnormal_temp = PushSystemTemp();
|
||||
// Get the non-NaN multiplicand closer to zero to check if any of them
|
||||
|
@ -2421,7 +2430,7 @@ bool DxbcShaderTranslator::ProcessScalarAluOperation(
|
|||
|
||||
void DxbcShaderTranslator::ProcessAluInstruction(
|
||||
const ParsedAluInstruction& instr) {
|
||||
if (instr.is_nop()) {
|
||||
if (instr.IsNop()) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -2445,7 +2454,8 @@ void DxbcShaderTranslator::ProcessAluInstruction(
|
|||
ProcessScalarAluOperation(instr, predicate_written_scalar);
|
||||
|
||||
if (store_vector) {
|
||||
StoreResult(instr.vector_result, system_temp_pv_, replicate_vector_x,
|
||||
StoreResult(instr.vector_and_constant_result, system_temp_pv_,
|
||||
replicate_vector_x,
|
||||
instr.GetMemExportStreamConstant() != UINT32_MAX);
|
||||
}
|
||||
if (store_scalar) {
|
||||
|
|
|
@ -10,10 +10,12 @@
|
|||
#ifndef XENIA_GPU_SHADER_H_
|
||||
#define XENIA_GPU_SHADER_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <filesystem>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "xenia/base/math.h"
|
||||
#include "xenia/base/string_buffer.h"
|
||||
#include "xenia/gpu/ucode.h"
|
||||
#include "xenia/gpu/xenos.h"
|
||||
|
@ -21,23 +23,32 @@
|
|||
namespace xe {
|
||||
namespace gpu {
|
||||
|
||||
// The structures here are used for both translation and disassembly.
|
||||
//
|
||||
// Because disassembly uses them too, to make sure "assemble -> disassemble ->
|
||||
// reassemble" round trip is always successful with the XNA assembler (as it is
|
||||
// the accuracy benchmark for translation), only generalization - not
|
||||
// optimization like nop skipping/replacement - must be done while converting
|
||||
// microcode to these structures (in other words, parsed shader code should be
|
||||
// enough to accurately reconstruct the microcode for any shader that could be
|
||||
// written by a human in assembly).
|
||||
//
|
||||
// During the "parsed -> host" part of the translation, however, translators are
|
||||
// free to make any optimizations (as long as they don't affect the result, of
|
||||
// course) they find appropriate.
|
||||
|
||||
enum class InstructionStorageTarget {
|
||||
// Result is not stored.
|
||||
kNone,
|
||||
// Result is stored to a temporary register indexed by storage_index [0-31].
|
||||
kRegister,
|
||||
// Result is stored into a vertex shader interpolant export [0-15].
|
||||
kInterpolant,
|
||||
// Result is stored into a vertex shader interpolator export [0-15].
|
||||
kInterpolator,
|
||||
// Result is stored to the position export (gl_Position).
|
||||
kPosition,
|
||||
// Result is stored to the vertex shader misc export register.
|
||||
// See R6xx/R7xx registers for details (USE_VTX_POINT_SIZE, USE_VTX_EDGE_FLAG,
|
||||
// USE_VTX_KILL_FLAG).
|
||||
// X - PSIZE (gl_PointSize).
|
||||
// Y - EDGEFLAG (glEdgeFlag) for PrimitiveType::kPolygon wireframe/point
|
||||
// drawing.
|
||||
// Z - KILLVERTEX flag (used in Banjo-Kazooie: Nuts & Bolts for grass), set
|
||||
// for killing primitives based on PA_CL_CLIP_CNTL::VTX_KILL_OR condition.
|
||||
// Result is stored to the vertex shader misc export register, see
|
||||
// ucode::ExportRegister::kVSPointSizeEdgeFlagKillVertex for description of
|
||||
// components.
|
||||
kPointSizeEdgeFlagKillVertex,
|
||||
// Result is stored as memexport destination address
|
||||
// (see xenos::xe_gpu_memexport_stream_t).
|
||||
|
@ -45,11 +56,29 @@ enum class InstructionStorageTarget {
|
|||
// Result is stored to memexport destination data.
|
||||
kExportData,
|
||||
// Result is stored to a color target export indexed by storage_index [0-3].
|
||||
kColorTarget,
|
||||
// Result is stored to the depth export (gl_FragDepth).
|
||||
kColor,
|
||||
// X of the result is stored to the depth export (gl_FragDepth).
|
||||
kDepth,
|
||||
};
|
||||
|
||||
// Must be used only in translation to skip unused components, but not in
|
||||
// disassembly (because oPts.x000 will be assembled, but oPts.x00_ has both
|
||||
// skipped components and zeros, which cannot be encoded, and therefore it will
|
||||
// not).
|
||||
constexpr uint32_t GetInstructionStorageTargetUsedComponents(
|
||||
InstructionStorageTarget target) {
|
||||
switch (target) {
|
||||
case InstructionStorageTarget::kNone:
|
||||
return 0b0000;
|
||||
case InstructionStorageTarget::kPointSizeEdgeFlagKillVertex:
|
||||
return 0b0111;
|
||||
case InstructionStorageTarget::kDepth:
|
||||
return 0b0001;
|
||||
default:
|
||||
return 0b1111;
|
||||
}
|
||||
}
|
||||
|
||||
enum class InstructionStorageAddressingMode {
|
||||
// The storage index is not dynamically addressed.
|
||||
kStatic,
|
||||
|
@ -75,71 +104,63 @@ enum class SwizzleSource {
|
|||
k1,
|
||||
};
|
||||
|
||||
constexpr SwizzleSource GetSwizzleFromComponentIndex(int i) {
|
||||
constexpr SwizzleSource GetSwizzleFromComponentIndex(uint32_t i) {
|
||||
return static_cast<SwizzleSource>(i);
|
||||
}
|
||||
inline char GetCharForComponentIndex(int i) {
|
||||
inline char GetCharForComponentIndex(uint32_t i) {
|
||||
const static char kChars[] = {'x', 'y', 'z', 'w'};
|
||||
return kChars[i];
|
||||
}
|
||||
inline char GetCharForSwizzle(SwizzleSource swizzle_source) {
|
||||
const static char kChars[] = {'x', 'y', 'z', 'w', '0', '1'};
|
||||
return kChars[static_cast<int>(swizzle_source)];
|
||||
return kChars[static_cast<uint32_t>(swizzle_source)];
|
||||
}
|
||||
|
||||
struct InstructionResult {
|
||||
// Where the result is going.
|
||||
InstructionStorageTarget storage_target = InstructionStorageTarget::kNone;
|
||||
// Index into the storage_target, if it is indexed.
|
||||
int storage_index = 0;
|
||||
uint32_t storage_index = 0;
|
||||
// How the storage index is dynamically addressed, if it is.
|
||||
InstructionStorageAddressingMode storage_addressing_mode =
|
||||
InstructionStorageAddressingMode::kStatic;
|
||||
// True if the result is exporting from the shader.
|
||||
bool is_export = false;
|
||||
// True to clamp the result value to [0-1].
|
||||
bool is_clamped = false;
|
||||
// Defines whether each output component is written.
|
||||
bool write_mask[4] = {false, false, false, false};
|
||||
// Defines whether each output component is written, though this is from the
|
||||
// original microcode, not taking into account whether such components
|
||||
// actually exist in the target.
|
||||
uint32_t original_write_mask = 0b0000;
|
||||
// Defines the source for each output component xyzw.
|
||||
SwizzleSource components[4] = {SwizzleSource::kX, SwizzleSource::kY,
|
||||
SwizzleSource::kZ, SwizzleSource::kW};
|
||||
// Returns true if any component is written to.
|
||||
bool has_any_writes() const {
|
||||
return write_mask[0] || write_mask[1] || write_mask[2] || write_mask[3];
|
||||
}
|
||||
// Returns true if all components are written to.
|
||||
bool has_all_writes() const {
|
||||
return write_mask[0] && write_mask[1] && write_mask[2] && write_mask[3];
|
||||
}
|
||||
// Returns number of components written
|
||||
uint32_t num_writes() const {
|
||||
uint32_t total = 0;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (write_mask[i]) {
|
||||
total++;
|
||||
}
|
||||
}
|
||||
|
||||
return total;
|
||||
}
|
||||
// Returns true if any non-constant components are written.
|
||||
bool stores_non_constants() const {
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (write_mask[i] && components[i] != SwizzleSource::k0 &&
|
||||
components[i] != SwizzleSource::k1) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
// Returns the write mask containing only components actually present in the
|
||||
// target.
|
||||
uint32_t GetUsedWriteMask() const {
|
||||
return original_write_mask &
|
||||
GetInstructionStorageTargetUsedComponents(storage_target);
|
||||
}
|
||||
// True if the components are in their 'standard' swizzle arrangement (xyzw).
|
||||
bool is_standard_swizzle() const {
|
||||
return has_all_writes() && components[0] == SwizzleSource::kX &&
|
||||
bool IsStandardSwizzle() const {
|
||||
return (GetUsedWriteMask() == 0b1111) &&
|
||||
components[0] == SwizzleSource::kX &&
|
||||
components[1] == SwizzleSource::kY &&
|
||||
components[2] == SwizzleSource::kZ &&
|
||||
components[3] == SwizzleSource::kW;
|
||||
}
|
||||
// Returns the components of the result, before swizzling, that won't be
|
||||
// discarded or replaced with a constant.
|
||||
uint32_t GetUsedResultComponents() const {
|
||||
uint32_t used_write_mask = GetUsedWriteMask();
|
||||
uint32_t used_components = 0b0000;
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
if ((used_write_mask & (1 << i)) && components[i] >= SwizzleSource::kX &&
|
||||
components[i] <= SwizzleSource::kW) {
|
||||
used_components |=
|
||||
1 << (uint32_t(components[i]) - uint32_t(SwizzleSource::kX));
|
||||
}
|
||||
}
|
||||
return used_components;
|
||||
}
|
||||
};
|
||||
|
||||
enum class InstructionStorageSource {
|
||||
|
@ -159,7 +180,7 @@ struct InstructionOperand {
|
|||
// Where the source comes from.
|
||||
InstructionStorageSource storage_source = InstructionStorageSource::kRegister;
|
||||
// Index into the storage_target, if it is indexed.
|
||||
int storage_index = 0;
|
||||
uint32_t storage_index = 0;
|
||||
// How the storage index is dynamically addressed, if it is.
|
||||
InstructionStorageAddressingMode storage_addressing_mode =
|
||||
InstructionStorageAddressingMode::kStatic;
|
||||
|
@ -168,13 +189,19 @@ struct InstructionOperand {
|
|||
// True to take the absolute value of the source (before any negation).
|
||||
bool is_absolute_value = false;
|
||||
// Number of components taken from the source operand.
|
||||
int component_count = 0;
|
||||
uint32_t component_count = 4;
|
||||
// Defines the source for each component xyzw (up to the given
|
||||
// component_count).
|
||||
SwizzleSource components[4] = {SwizzleSource::kX, SwizzleSource::kY,
|
||||
SwizzleSource::kZ, SwizzleSource::kW};
|
||||
// Returns the swizzle source for the component, replicating the rightmost
|
||||
// component if there are less than 4 components (similar to what the Xbox 360
|
||||
// shader compiler does as a general rule for unspecified components).
|
||||
SwizzleSource GetComponent(uint32_t index) const {
|
||||
return components[std::min(index, component_count - 1)];
|
||||
}
|
||||
// True if the components are in their 'standard' swizzle arrangement (xyzw).
|
||||
bool is_standard_swizzle() const {
|
||||
bool IsStandardSwizzle() const {
|
||||
switch (component_count) {
|
||||
case 4:
|
||||
return components[0] == SwizzleSource::kX &&
|
||||
|
@ -185,26 +212,32 @@ struct InstructionOperand {
|
|||
return false;
|
||||
}
|
||||
|
||||
// Whether absolute values of two operands are identical (useful for emulating
|
||||
// Shader Model 3 0*anything=0 multiplication behavior).
|
||||
bool EqualsAbsolute(const InstructionOperand& other) const {
|
||||
// Returns which components of two operands are identical, but may have
|
||||
// different signs (for simplicity of usage with GetComponent, treating the
|
||||
// rightmost component as replicated).
|
||||
uint32_t GetAbsoluteIdenticalComponents(
|
||||
const InstructionOperand& other) const {
|
||||
if (storage_source != other.storage_source ||
|
||||
storage_index != other.storage_index ||
|
||||
storage_addressing_mode != other.storage_addressing_mode ||
|
||||
component_count != other.component_count) {
|
||||
return false;
|
||||
storage_addressing_mode != other.storage_addressing_mode) {
|
||||
return 0;
|
||||
}
|
||||
for (int i = 0; i < component_count; ++i) {
|
||||
if (components[i] != other.components[i]) {
|
||||
return false;
|
||||
}
|
||||
uint32_t identical_components = 0;
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
identical_components |= uint32_t(GetComponent(i) == other.GetComponent(i))
|
||||
<< i;
|
||||
}
|
||||
return true;
|
||||
return identical_components;
|
||||
}
|
||||
|
||||
bool operator==(const InstructionOperand& other) const {
|
||||
return EqualsAbsolute(other) && is_negated == other.is_negated &&
|
||||
is_absolute_value == other.is_absolute_value;
|
||||
// Returns which components of two operands will always be bitwise equal, but
|
||||
// may have different signs (disregarding component_count for simplicity of
|
||||
// usage with GetComponent, treating the rightmost component as replicated).
|
||||
uint32_t GetIdenticalComponents(const InstructionOperand& other) const {
|
||||
if (is_negated != other.is_negated ||
|
||||
is_absolute_value != other.is_absolute_value) {
|
||||
return 0;
|
||||
}
|
||||
return GetAbsoluteIdenticalComponents(other);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -365,9 +398,6 @@ struct ParsedAllocInstruction {
|
|||
};
|
||||
|
||||
struct ParsedVertexFetchInstruction {
|
||||
// Index into the ucode dword source.
|
||||
uint32_t dword_index = 0;
|
||||
|
||||
// Opcode for the instruction.
|
||||
ucode::FetchOpcode opcode;
|
||||
// Friendly name of the instruction.
|
||||
|
@ -409,9 +439,6 @@ struct ParsedVertexFetchInstruction {
|
|||
};
|
||||
|
||||
struct ParsedTextureFetchInstruction {
|
||||
// Index into the ucode dword source.
|
||||
uint32_t dword_index = 0;
|
||||
|
||||
// Opcode for the instruction.
|
||||
ucode::FetchOpcode opcode;
|
||||
// Friendly name of the instruction.
|
||||
|
@ -462,17 +489,6 @@ struct ParsedTextureFetchInstruction {
|
|||
};
|
||||
|
||||
struct ParsedAluInstruction {
|
||||
// Index into the ucode dword source.
|
||||
uint32_t dword_index = 0;
|
||||
|
||||
// True if the vector part of the instruction needs to be executed and data
|
||||
// about it in this structure is valid.
|
||||
bool has_vector_op = false;
|
||||
// True if the scalar part of the instruction needs to be executed and data
|
||||
// about it in this structure is valid.
|
||||
bool has_scalar_op = false;
|
||||
bool is_nop() const { return !has_vector_op && !has_scalar_op; }
|
||||
|
||||
// Opcode for the vector part of the instruction.
|
||||
ucode::AluVectorOpcode vector_opcode = ucode::AluVectorOpcode::kAdd;
|
||||
// Opcode for the scalar part of the instruction.
|
||||
|
@ -488,8 +504,20 @@ struct ParsedAluInstruction {
|
|||
// Expected predication condition value if predicated.
|
||||
bool predicate_condition = false;
|
||||
|
||||
// Describes how the vector operation result is stored.
|
||||
InstructionResult vector_result;
|
||||
// Describes how the vector operation result and, for exports, constant 0/1
|
||||
// are stored. For simplicity of translation and disassembly, treating
|
||||
// constant 0/1 writes as a part of the vector operation - they need to be
|
||||
// expressed somehow in the disassembly anyway with a properly disassembled
|
||||
// instruction even if only constants are being exported. The XNA disassembler
|
||||
// falls back to displaying the whole vector operation, even if only constant
|
||||
// components are written, if the scalar operation is a nop or if the vector
|
||||
// operation has side effects (but if the scalar operation isn't nop, it
|
||||
// outputs the entire constant mask in the scalar operation destination).
|
||||
// Normally the XNA disassembler outputs the constant mask in both vector and
|
||||
// scalar operations, but that's not required by assembler, so it doesn't
|
||||
// really matter whether it's specified in the vector operation, in the scalar
|
||||
// operation, or in both.
|
||||
InstructionResult vector_and_constant_result;
|
||||
// Describes how the scalar operation result is stored.
|
||||
InstructionResult scalar_result;
|
||||
// Both operations must be executed before any result is stored if vector and
|
||||
|
@ -499,27 +527,109 @@ struct ParsedAluInstruction {
|
|||
// operations.
|
||||
|
||||
// Number of source operands of the vector operation.
|
||||
size_t vector_operand_count = 0;
|
||||
uint32_t vector_operand_count = 0;
|
||||
// Describes each source operand of the vector operation.
|
||||
InstructionOperand vector_operands[3];
|
||||
// Number of source operands of the scalar operation.
|
||||
size_t scalar_operand_count = 0;
|
||||
uint32_t scalar_operand_count = 0;
|
||||
// Describes each source operand of the scalar operation.
|
||||
InstructionOperand scalar_operands[2];
|
||||
|
||||
// If this is a valid eA write (MAD with a stream constant), returns the index
|
||||
// of the stream float constant, otherwise returns UINT32_MAX.
|
||||
// Whether the vector part of the instruction is the same as if it was omitted
|
||||
// in the assembly (if compiled or assembled with the Xbox 360 shader
|
||||
// compiler), and thus reassembling the shader with this instruction omitted
|
||||
// will result in the same microcode (since instructions with just an empty
|
||||
// write mask may have different values in other fields).
|
||||
// This is for disassembly! Translators should use the write masks and
|
||||
// AluVectorOpHasSideEffects to skip operations, as this only covers one very
|
||||
// specific nop format!
|
||||
bool IsVectorOpDefaultNop() const {
|
||||
if (vector_opcode != ucode::AluVectorOpcode::kMax ||
|
||||
vector_and_constant_result.original_write_mask ||
|
||||
vector_and_constant_result.is_clamped ||
|
||||
vector_operands[0].storage_source !=
|
||||
InstructionStorageSource::kRegister ||
|
||||
vector_operands[0].storage_index != 0 ||
|
||||
vector_operands[0].storage_addressing_mode !=
|
||||
InstructionStorageAddressingMode::kStatic ||
|
||||
vector_operands[0].is_negated || vector_operands[0].is_absolute_value ||
|
||||
!vector_operands[0].IsStandardSwizzle() ||
|
||||
vector_operands[1].storage_source !=
|
||||
InstructionStorageSource::kRegister ||
|
||||
vector_operands[1].storage_index != 0 ||
|
||||
vector_operands[1].storage_addressing_mode !=
|
||||
InstructionStorageAddressingMode::kStatic ||
|
||||
vector_operands[1].is_negated || vector_operands[1].is_absolute_value ||
|
||||
!vector_operands[1].IsStandardSwizzle()) {
|
||||
return false;
|
||||
}
|
||||
if (vector_and_constant_result.storage_target ==
|
||||
InstructionStorageTarget::kRegister) {
|
||||
if (vector_and_constant_result.storage_index != 0 ||
|
||||
vector_and_constant_result.storage_addressing_mode !=
|
||||
InstructionStorageAddressingMode::kStatic) {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
// In case both vector and scalar operations are nop, still need to write
|
||||
// somewhere that it's an export, not mov r0._, r0 + retain_prev r0._.
|
||||
// Accurate round trip is possible only if the target is o0 or oC0,
|
||||
// because if the total write mask is empty, the XNA assembler forces the
|
||||
// destination to be o0/oC0, but this doesn't really matter in this case.
|
||||
if (IsScalarOpDefaultNop()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Whether the scalar part of the instruction is the same as if it was omitted
|
||||
// in the assembly (if compiled or assembled with the Xbox 360 shader
|
||||
// compiler), and thus reassembling the shader with this instruction omitted
|
||||
// will result in the same microcode (since instructions with just an empty
|
||||
// write mask may have different values in other fields).
|
||||
bool IsScalarOpDefaultNop() const {
|
||||
if (scalar_opcode != ucode::AluScalarOpcode::kRetainPrev ||
|
||||
scalar_result.original_write_mask || scalar_result.is_clamped) {
|
||||
return false;
|
||||
}
|
||||
if (scalar_result.storage_target == InstructionStorageTarget::kRegister) {
|
||||
if (scalar_result.storage_index != 0 ||
|
||||
scalar_result.storage_addressing_mode !=
|
||||
InstructionStorageAddressingMode::kStatic) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
// For exports, if both are nop, the vector operation will be kept to state
|
||||
// in the microcode that the destination in the microcode is an export.
|
||||
return true;
|
||||
}
|
||||
|
||||
// For translation (not disassembly) - whether this instruction has totally no
|
||||
// effect.
|
||||
bool IsNop() const {
|
||||
return scalar_opcode == ucode::AluScalarOpcode::kRetainPrev &&
|
||||
!scalar_result.GetUsedWriteMask() &&
|
||||
!vector_and_constant_result.GetUsedWriteMask() &&
|
||||
!ucode::AluVectorOpHasSideEffects(vector_opcode);
|
||||
}
|
||||
|
||||
// If this is a "normal" eA write recognized by Xenia (MAD with a stream
|
||||
// constant), returns the index of the stream float constant, otherwise
|
||||
// returns UINT32_MAX.
|
||||
uint32_t GetMemExportStreamConstant() const {
|
||||
if (has_vector_op &&
|
||||
vector_result.storage_target ==
|
||||
if (vector_and_constant_result.storage_target ==
|
||||
InstructionStorageTarget::kExportAddress &&
|
||||
vector_opcode == ucode::AluVectorOpcode::kMad &&
|
||||
vector_result.has_all_writes() &&
|
||||
vector_and_constant_result.GetUsedResultComponents() == 0b1111 &&
|
||||
!vector_and_constant_result.is_clamped &&
|
||||
vector_operands[2].storage_source ==
|
||||
InstructionStorageSource::kConstantFloat &&
|
||||
vector_operands[2].storage_addressing_mode ==
|
||||
InstructionStorageAddressingMode::kStatic &&
|
||||
vector_operands[2].is_standard_swizzle()) {
|
||||
vector_operands[2].IsStandardSwizzle() &&
|
||||
!vector_operands[2].is_negated &&
|
||||
!vector_operands[2].is_absolute_value) {
|
||||
return vector_operands[2].storage_index;
|
||||
}
|
||||
return UINT32_MAX;
|
||||
|
@ -581,9 +691,8 @@ class Shader {
|
|||
struct ConstantRegisterMap {
|
||||
// Bitmap of all kConstantFloat registers read by the shader.
|
||||
// Any shader can only read up to 256 of the 512, and the base is dependent
|
||||
// on the shader type. Each bit corresponds to a storage index from the type
|
||||
// base, so bit 0 in a vertex shader is register 0, and bit 0 in a fragment
|
||||
// shader is register 256.
|
||||
// on the shader type and SQ_VS/PS_CONST registers. Each bit corresponds to
|
||||
// a storage index from the type base.
|
||||
uint64_t float_bitmap[256 / 64];
|
||||
// Bitmap of all loop constants read by the shader.
|
||||
// Each bit corresponds to a storage index [0-31].
|
||||
|
@ -595,8 +704,33 @@ class Shader {
|
|||
// Total number of kConstantFloat registers read by the shader.
|
||||
uint32_t float_count;
|
||||
|
||||
// Computed byte count of all registers required when packed.
|
||||
uint32_t packed_byte_length;
|
||||
// Whether kConstantFloat registers are indexed dynamically - in this case,
|
||||
// float_bitmap must be set to all 1, and tight packing must not be done.
|
||||
bool float_dynamic_addressing;
|
||||
|
||||
// Returns the index of the float4 constant as if all float4 constant
|
||||
// registers actually referenced were tightly packed in a buffer, or
|
||||
// UINT32_MAX if not found.
|
||||
uint32_t GetPackedFloatConstantIndex(uint32_t float_constant) const {
|
||||
if (float_constant >= 256) {
|
||||
return UINT32_MAX;
|
||||
}
|
||||
if (float_dynamic_addressing) {
|
||||
// Any can potentially be read - not packing.
|
||||
return float_constant;
|
||||
}
|
||||
uint32_t block_index = float_constant / 64;
|
||||
uint32_t bit_index = float_constant % 64;
|
||||
if (!(float_bitmap[block_index] & (uint64_t(1) << bit_index))) {
|
||||
return UINT32_MAX;
|
||||
}
|
||||
uint32_t offset = 0;
|
||||
for (uint32_t i = 0; i < block_index; ++i) {
|
||||
offset += xe::bit_count(float_bitmap[i]);
|
||||
}
|
||||
return offset + xe::bit_count(float_bitmap[block_index] &
|
||||
((uint64_t(1) << bit_index) - 1));
|
||||
}
|
||||
};
|
||||
|
||||
Shader(ShaderType shader_type, uint64_t ucode_data_hash,
|
||||
|
@ -642,7 +776,9 @@ class Shader {
|
|||
}
|
||||
|
||||
// Returns true if the given color target index [0-3].
|
||||
bool writes_color_target(int i) const { return writes_color_targets_[i]; }
|
||||
bool writes_color_target(uint32_t i) const {
|
||||
return writes_color_targets_[i];
|
||||
}
|
||||
|
||||
// True if the shader overrides the pixel depth.
|
||||
bool writes_depth() const { return writes_depth_; }
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -57,15 +57,19 @@ class ShaderTranslator {
|
|||
}
|
||||
// True if the current shader is a pixel shader.
|
||||
bool is_pixel_shader() const { return shader_type_ == ShaderType::kPixel; }
|
||||
// Used constant register info, populated before translation.
|
||||
const Shader::ConstantRegisterMap& constant_register_map() const {
|
||||
return constant_register_map_;
|
||||
}
|
||||
// True if the current shader addresses general-purpose registers with dynamic
|
||||
// indices.
|
||||
// indices, set before translation. Doesn't include writes to r[#+a#] with an
|
||||
// empty used write mask.
|
||||
bool uses_register_dynamic_addressing() const {
|
||||
return uses_register_dynamic_addressing_;
|
||||
}
|
||||
// True if the current shader writes to a color target on any execution path.
|
||||
// True if the current shader writes to a color target on any execution path,
|
||||
// set before translation. Doesn't include writes with an empty used write
|
||||
// mask.
|
||||
bool writes_color_target(int i) const { return writes_color_targets_[i]; }
|
||||
bool writes_any_color_target() const {
|
||||
for (size_t i = 0; i < xe::countof(writes_color_targets_); ++i) {
|
||||
|
@ -75,7 +79,8 @@ class ShaderTranslator {
|
|||
}
|
||||
return false;
|
||||
}
|
||||
// True if the current shader overrides the pixel depth.
|
||||
// True if the current shader overrides the pixel depth, set before
|
||||
// translation. Doesn't include writes with an empty used write mask.
|
||||
bool writes_depth() const { return writes_depth_; }
|
||||
// True if Xenia can automatically enable early depth/stencil for the pixel
|
||||
// shader when RB_DEPTHCONTROL EARLY_Z_ENABLE is not set, provided alpha
|
||||
|
@ -181,8 +186,8 @@ class ShaderTranslator {
|
|||
private:
|
||||
struct AluOpcodeInfo {
|
||||
const char* name;
|
||||
size_t argument_count;
|
||||
int src_swizzle_component_count;
|
||||
uint32_t argument_count;
|
||||
uint32_t src_swizzle_component_count;
|
||||
bool disable_implicit_early_z;
|
||||
};
|
||||
|
||||
|
@ -229,10 +234,16 @@ class ShaderTranslator {
|
|||
ParsedTextureFetchInstruction* out_instr);
|
||||
|
||||
void TranslateAluInstruction(const ucode::AluInstruction& op);
|
||||
void ParseAluVectorOperation(const ucode::AluInstruction& op,
|
||||
ParsedAluInstruction& instr);
|
||||
void ParseAluScalarOperation(const ucode::AluInstruction& op,
|
||||
ParsedAluInstruction& instr);
|
||||
void ParseAluInstruction(const ucode::AluInstruction& op,
|
||||
ParsedAluInstruction& out_instr) const;
|
||||
static void ParseAluInstructionOperand(const ucode::AluInstruction& op,
|
||||
uint32_t i,
|
||||
uint32_t swizzle_component_count,
|
||||
InstructionOperand& out_op);
|
||||
static void ParseAluInstructionOperandSpecial(
|
||||
const ucode::AluInstruction& op, InstructionStorageSource storage_source,
|
||||
uint32_t reg, bool negate, int const_slot, uint32_t component_index,
|
||||
InstructionOperand& out_op);
|
||||
|
||||
// Input shader metadata and microcode.
|
||||
ShaderType shader_type_;
|
||||
|
@ -265,12 +276,16 @@ class ShaderTranslator {
|
|||
uint32_t unique_vertex_bindings_ = 0;
|
||||
uint32_t unique_texture_bindings_ = 0;
|
||||
|
||||
// These all are gathered before translation.
|
||||
// uses_register_dynamic_addressing_ for writes, writes_color_targets_,
|
||||
// writes_depth_ don't include empty used write masks.
|
||||
Shader::ConstantRegisterMap constant_register_map_ = {0};
|
||||
bool uses_register_dynamic_addressing_ = false;
|
||||
bool writes_color_targets_[4] = {false, false, false, false};
|
||||
bool writes_depth_ = false;
|
||||
bool implicit_early_z_allowed_ = true;
|
||||
|
||||
// Memexport info is gathered before translation.
|
||||
uint32_t memexport_alloc_count_ = 0;
|
||||
// For register allocation in implementations - what was used after each
|
||||
// `alloc export`.
|
||||
|
|
|
@ -28,7 +28,7 @@ void DisassembleResultOperand(const InstructionResult& result,
|
|||
out->Append('r');
|
||||
uses_storage_index = true;
|
||||
break;
|
||||
case InstructionStorageTarget::kInterpolant:
|
||||
case InstructionStorageTarget::kInterpolator:
|
||||
out->Append('o');
|
||||
uses_storage_index = true;
|
||||
break;
|
||||
|
@ -45,7 +45,7 @@ void DisassembleResultOperand(const InstructionResult& result,
|
|||
out->Append("eM");
|
||||
uses_storage_index = true;
|
||||
break;
|
||||
case InstructionStorageTarget::kColorTarget:
|
||||
case InstructionStorageTarget::kColor:
|
||||
out->Append("oC");
|
||||
uses_storage_index = true;
|
||||
break;
|
||||
|
@ -68,12 +68,19 @@ void DisassembleResultOperand(const InstructionResult& result,
|
|||
break;
|
||||
}
|
||||
}
|
||||
if (!result.has_any_writes()) {
|
||||
// Not using GetUsedWriteMask/IsStandardSwizzle because they filter out
|
||||
// components not having any runtime effect, but those components are still
|
||||
// present in the microcode.
|
||||
if (!result.original_write_mask) {
|
||||
out->Append("._");
|
||||
} else if (!result.is_standard_swizzle()) {
|
||||
} else if (result.original_write_mask != 0b1111 ||
|
||||
result.components[0] != SwizzleSource::kX ||
|
||||
result.components[1] != SwizzleSource::kY ||
|
||||
result.components[2] != SwizzleSource::kZ ||
|
||||
result.components[3] != SwizzleSource::kW) {
|
||||
out->Append('.');
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
if (result.write_mask[i]) {
|
||||
if (result.original_write_mask & (1 << i)) {
|
||||
out->Append(GetCharForSwizzle(result.components[i]));
|
||||
} else {
|
||||
out->Append('_');
|
||||
|
@ -116,7 +123,7 @@ void DisassembleSourceOperand(const InstructionOperand& op, StringBuffer* out) {
|
|||
out->AppendFormat("[{}+aL]", op.storage_index);
|
||||
break;
|
||||
}
|
||||
if (!op.is_standard_swizzle()) {
|
||||
if (!op.IsStandardSwizzle()) {
|
||||
out->Append('.');
|
||||
if (op.component_count == 1) {
|
||||
out->Append(GetCharForSwizzle(op.components[0]));
|
||||
|
@ -124,7 +131,7 @@ void DisassembleSourceOperand(const InstructionOperand& op, StringBuffer* out) {
|
|||
out->Append(GetCharForSwizzle(op.components[0]));
|
||||
out->Append(GetCharForSwizzle(op.components[1]));
|
||||
} else {
|
||||
for (int j = 0; j < op.component_count; ++j) {
|
||||
for (uint32_t j = 0; j < op.component_count; ++j) {
|
||||
out->Append(GetCharForSwizzle(op.components[j]));
|
||||
}
|
||||
}
|
||||
|
@ -454,11 +461,19 @@ void ParsedTextureFetchInstruction::Disassemble(StringBuffer* out) const {
|
|||
}
|
||||
|
||||
void ParsedAluInstruction::Disassemble(StringBuffer* out) const {
|
||||
if (is_nop()) {
|
||||
out->Append(" nop\n");
|
||||
bool is_vector_op_default_nop = IsVectorOpDefaultNop();
|
||||
bool is_scalar_op_default_nop = IsScalarOpDefaultNop();
|
||||
if (is_vector_op_default_nop && is_scalar_op_default_nop) {
|
||||
out->Append(" ");
|
||||
if (is_predicated) {
|
||||
out->Append(predicate_condition ? " (p0) " : "(!p0) ");
|
||||
} else {
|
||||
out->Append(" ");
|
||||
}
|
||||
out->Append("nop\n");
|
||||
return;
|
||||
}
|
||||
if (has_vector_op) {
|
||||
if (!is_vector_op_default_nop) {
|
||||
out->Append(" ");
|
||||
if (is_predicated) {
|
||||
out->Append(predicate_condition ? " (p0) " : "(!p0) ");
|
||||
|
@ -466,19 +481,19 @@ void ParsedAluInstruction::Disassemble(StringBuffer* out) const {
|
|||
out->Append(" ");
|
||||
}
|
||||
out->Append(vector_opcode_name);
|
||||
if (vector_result.is_clamped) {
|
||||
if (vector_and_constant_result.is_clamped) {
|
||||
out->Append("_sat");
|
||||
}
|
||||
out->Append(' ');
|
||||
DisassembleResultOperand(vector_result, out);
|
||||
for (int i = 0; i < vector_operand_count; ++i) {
|
||||
DisassembleResultOperand(vector_and_constant_result, out);
|
||||
for (uint32_t i = 0; i < vector_operand_count; ++i) {
|
||||
out->Append(", ");
|
||||
DisassembleSourceOperand(vector_operands[i], out);
|
||||
}
|
||||
out->Append('\n');
|
||||
}
|
||||
if (has_scalar_op) {
|
||||
out->Append(has_vector_op ? " + " : " ");
|
||||
if (!is_scalar_op_default_nop) {
|
||||
out->Append(is_vector_op_default_nop ? " " : " + ");
|
||||
if (is_predicated) {
|
||||
out->Append(predicate_condition ? " (p0) " : "(!p0) ");
|
||||
} else {
|
||||
|
@ -490,7 +505,7 @@ void ParsedAluInstruction::Disassemble(StringBuffer* out) const {
|
|||
}
|
||||
out->Append(' ');
|
||||
DisassembleResultOperand(scalar_result, out);
|
||||
for (int i = 0; i < scalar_operand_count; ++i) {
|
||||
for (uint32_t i = 0; i < scalar_operand_count; ++i) {
|
||||
out->Append(", ");
|
||||
DisassembleSourceOperand(scalar_operands[i], out);
|
||||
}
|
||||
|
|
|
@ -2003,7 +2003,7 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction(
|
|||
|
||||
void SpirvShaderTranslator::ProcessAluInstruction(
|
||||
const ParsedAluInstruction& instr) {
|
||||
if (instr.is_nop()) {
|
||||
if (instr.IsNop()) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -2044,7 +2044,7 @@ void SpirvShaderTranslator::ProcessAluInstruction(
|
|||
ProcessScalarAluOperation(instr, close_predicated_block_scalar);
|
||||
|
||||
if (store_vector) {
|
||||
StoreToResult(b.createLoad(pv_), instr.vector_result);
|
||||
StoreToResult(b.createLoad(pv_), instr.vector_and_constant_result);
|
||||
}
|
||||
if (store_scalar) {
|
||||
StoreToResult(b.createLoad(ps_), instr.scalar_result);
|
||||
|
@ -2252,7 +2252,8 @@ bool SpirvShaderTranslator::ProcessVectorAluOperation(
|
|||
const ParsedAluInstruction& instr, bool& close_predicated_block) {
|
||||
close_predicated_block = false;
|
||||
|
||||
if (!instr.has_vector_op) {
|
||||
if (!instr.vector_and_constant_result.GetUsedWriteMask() &&
|
||||
!AluVectorOpHasSideEffects(instr.vector_opcode)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -2261,7 +2262,7 @@ bool SpirvShaderTranslator::ProcessVectorAluOperation(
|
|||
// TODO: If we have identical operands, reuse previous one.
|
||||
Id sources[3] = {0};
|
||||
Id dest = vec4_float_zero_;
|
||||
for (size_t i = 0; i < instr.vector_operand_count; i++) {
|
||||
for (uint32_t i = 0; i < instr.vector_operand_count; i++) {
|
||||
sources[i] = LoadFromOperand(instr.vector_operands[i]);
|
||||
}
|
||||
|
||||
|
@ -2636,7 +2637,8 @@ bool SpirvShaderTranslator::ProcessScalarAluOperation(
|
|||
const ParsedAluInstruction& instr, bool& close_predicated_block) {
|
||||
close_predicated_block = false;
|
||||
|
||||
if (!instr.has_scalar_op) {
|
||||
if (instr.scalar_opcode == ucode::AluScalarOpcode::kRetainPrev &&
|
||||
!instr.scalar_result.GetUsedWriteMask()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -2645,12 +2647,12 @@ bool SpirvShaderTranslator::ProcessScalarAluOperation(
|
|||
// TODO: If we have identical operands, reuse previous one.
|
||||
Id sources[3] = {0};
|
||||
Id dest = b.makeFloatConstant(0);
|
||||
for (size_t i = 0, x = 0; i < instr.scalar_operand_count; i++) {
|
||||
for (uint32_t i = 0, x = 0; i < instr.scalar_operand_count; i++) {
|
||||
auto src = LoadFromOperand(instr.scalar_operands[i]);
|
||||
|
||||
// Pull components out of the vector operands and use them as sources.
|
||||
if (instr.scalar_operands[i].component_count > 1) {
|
||||
for (int j = 0; j < instr.scalar_operands[i].component_count; j++) {
|
||||
for (uint32_t j = 0; j < instr.scalar_operands[i].component_count; j++) {
|
||||
sources[x++] = b.createCompositeExtract(src, float_type_, j);
|
||||
}
|
||||
} else {
|
||||
|
@ -3191,7 +3193,7 @@ Id SpirvShaderTranslator::LoadFromOperand(const InstructionOperand& op) {
|
|||
}
|
||||
|
||||
// swizzle
|
||||
if (op.component_count > 1 && !op.is_standard_swizzle()) {
|
||||
if (op.component_count > 1 && !op.IsStandardSwizzle()) {
|
||||
std::vector<uint32_t> operands;
|
||||
operands.push_back(storage_value);
|
||||
operands.push_back(b.makeCompositeConstant(
|
||||
|
@ -3200,7 +3202,7 @@ Id SpirvShaderTranslator::LoadFromOperand(const InstructionOperand& op) {
|
|||
|
||||
// Components start from left and are duplicated rightwards
|
||||
// e.g. count = 1, xxxx / count = 2, xyyy ...
|
||||
for (int i = 0; i < 4; i++) {
|
||||
for (uint32_t i = 0; i < 4; i++) {
|
||||
auto swiz = op.components[i];
|
||||
if (i > op.component_count - 1) {
|
||||
swiz = op.components[op.component_count - 1];
|
||||
|
@ -3244,7 +3246,8 @@ void SpirvShaderTranslator::StoreToResult(Id source_value_id,
|
|||
return;
|
||||
}
|
||||
|
||||
if (!result.has_any_writes()) {
|
||||
uint32_t used_write_mask = result.GetUsedWriteMask();
|
||||
if (!used_write_mask) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -3285,7 +3288,7 @@ void SpirvShaderTranslator::StoreToResult(Id source_value_id,
|
|||
storage_array = true;
|
||||
assert_true(uint32_t(result.storage_index) < register_count());
|
||||
break;
|
||||
case InstructionStorageTarget::kInterpolant:
|
||||
case InstructionStorageTarget::kInterpolator:
|
||||
assert_true(is_vertex_shader());
|
||||
storage_pointer = interpolators_;
|
||||
storage_class = spv::StorageClass::StorageClassOutput;
|
||||
|
@ -3310,7 +3313,7 @@ void SpirvShaderTranslator::StoreToResult(Id source_value_id,
|
|||
storage_offsets.push_back(0);
|
||||
storage_array = false;
|
||||
break;
|
||||
case InstructionStorageTarget::kColorTarget:
|
||||
case InstructionStorageTarget::kColor:
|
||||
assert_true(is_pixel_shader());
|
||||
assert_not_zero(frag_outputs_);
|
||||
storage_pointer = frag_outputs_;
|
||||
|
@ -3351,7 +3354,7 @@ void SpirvShaderTranslator::StoreToResult(Id source_value_id,
|
|||
|
||||
// Only load from storage if we need it later.
|
||||
Id storage_value = 0;
|
||||
if ((source_is_scalar && !storage_is_scalar) || !result.has_all_writes()) {
|
||||
if ((source_is_scalar && !storage_is_scalar) || used_write_mask != 0b1111) {
|
||||
storage_value = b.createLoad(storage_pointer);
|
||||
}
|
||||
|
||||
|
@ -3366,7 +3369,7 @@ void SpirvShaderTranslator::StoreToResult(Id source_value_id,
|
|||
}
|
||||
|
||||
// destination swizzle
|
||||
if (!result.is_standard_swizzle() && !source_is_scalar) {
|
||||
if (!result.IsStandardSwizzle() && !source_is_scalar) {
|
||||
std::vector<uint32_t> operands;
|
||||
operands.push_back(source_value_id);
|
||||
operands.push_back(b.makeCompositeConstant(
|
||||
|
@ -3377,7 +3380,7 @@ void SpirvShaderTranslator::StoreToResult(Id source_value_id,
|
|||
// e.g. count = 1, xxxx / count = 2, xyyy ...
|
||||
uint32_t source_components = b.getNumComponents(source_value_id);
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (!result.write_mask[i]) {
|
||||
if (!(used_write_mask & (1 << i))) {
|
||||
// Undefined / don't care.
|
||||
operands.push_back(0);
|
||||
continue;
|
||||
|
@ -3411,29 +3414,30 @@ void SpirvShaderTranslator::StoreToResult(Id source_value_id,
|
|||
}
|
||||
|
||||
// write mask
|
||||
if (!result.has_all_writes() && !source_is_scalar && !storage_is_scalar) {
|
||||
if (used_write_mask != 0b1111 && !source_is_scalar && !storage_is_scalar) {
|
||||
std::vector<uint32_t> operands;
|
||||
operands.push_back(source_value_id);
|
||||
operands.push_back(storage_value);
|
||||
|
||||
for (int i = 0; i < b.getNumTypeComponents(storage_type); i++) {
|
||||
operands.push_back(
|
||||
result.write_mask[i] ? i : b.getNumComponents(source_value_id) + i);
|
||||
operands.push_back((used_write_mask & (1 << i))
|
||||
? i
|
||||
: b.getNumComponents(source_value_id) + i);
|
||||
}
|
||||
|
||||
source_value_id =
|
||||
b.createOp(spv::Op::OpVectorShuffle, storage_type, operands);
|
||||
} else if (source_is_scalar && !storage_is_scalar) {
|
||||
assert_true(result.num_writes() >= 1);
|
||||
assert_not_zero(used_write_mask);
|
||||
|
||||
if (result.has_all_writes()) {
|
||||
if (used_write_mask == 0b1111) {
|
||||
source_value_id =
|
||||
b.smearScalar(spv::NoPrecision, source_value_id, storage_type);
|
||||
} else {
|
||||
// Find first enabled component
|
||||
uint32_t index = 0;
|
||||
for (uint32_t i = 0; i < 4; i++) {
|
||||
if (result.write_mask[i]) {
|
||||
if (used_write_mask & (1 << i)) {
|
||||
index = i;
|
||||
break;
|
||||
}
|
||||
|
@ -3443,10 +3447,10 @@ void SpirvShaderTranslator::StoreToResult(Id source_value_id,
|
|||
}
|
||||
} else if (!source_is_scalar && storage_is_scalar) {
|
||||
// Num writes /needs/ to be 1, and let's assume it's the first element.
|
||||
assert_true(result.num_writes() == 1);
|
||||
assert_true(xe::bit_count(used_write_mask) == 1);
|
||||
|
||||
for (uint32_t i = 0; i < 4; i++) {
|
||||
if (result.write_mask[i]) {
|
||||
if (used_write_mask & (1 << i)) {
|
||||
source_value_id =
|
||||
b.createCompositeExtract(source_value_id, storage_type, 0);
|
||||
break;
|
||||
|
|
|
@ -667,7 +667,11 @@ static_assert_size(TextureFetchInstruction, 12);
|
|||
// Both are valid only within the current ALU clause. They are not modified
|
||||
// when the instruction that would write them fails its predication check.
|
||||
// - Direct3D 9 rules (like in GCN v_*_legacy_f32 instructions) for
|
||||
// multiplication (0 * anything = 0) and for NaN in min/max.
|
||||
// multiplication (0 * anything = 0) wherever it's present (mul, mad, dp,
|
||||
// etc.) and for NaN in min/max. It's very important to respect this rule for
|
||||
// multiplication, as games often rely on it in vector normalization (rcp and
|
||||
// mul), Infinity * 0 resulting in NaN breaks a lot of things in games -
|
||||
// causes white screen in Halo 3, white specular on characters in GTA IV.
|
||||
|
||||
enum class AluScalarOpcode : uint32_t {
|
||||
// Floating-Point Add
|
||||
|
@ -1300,8 +1304,10 @@ enum class AluVectorOpcode : uint32_t {
|
|||
|
||||
// Whether the vector instruction has side effects such as discarding a pixel or
|
||||
// setting the predicate and can't be ignored even if it doesn't write to
|
||||
// anywhere.
|
||||
inline bool AluVectorOpcodeHasSideEffects(AluVectorOpcode vector_opcode) {
|
||||
// anywhere. Note that all scalar operations except for retain_prev have a side
|
||||
// effect of modifying the previous scalar result register, so they must always
|
||||
// be executed even if not writing.
|
||||
constexpr bool AluVectorOpHasSideEffects(AluVectorOpcode vector_opcode) {
|
||||
switch (vector_opcode) {
|
||||
case AluVectorOpcode::kSetpEqPush:
|
||||
case AluVectorOpcode::kSetpNePush:
|
||||
|
@ -1319,7 +1325,126 @@ inline bool AluVectorOpcodeHasSideEffects(AluVectorOpcode vector_opcode) {
|
|||
return false;
|
||||
}
|
||||
|
||||
// Whether each component of a source operand is used at all in the instruction
|
||||
// (doesn't check the operand count though).
|
||||
constexpr uint32_t GetAluVectorOpUsedSourceComponents(
|
||||
AluVectorOpcode vector_opcode, uint32_t src_index) {
|
||||
switch (vector_opcode) {
|
||||
case AluVectorOpcode::kDp3:
|
||||
return 0b0111;
|
||||
case AluVectorOpcode::kDp2Add:
|
||||
return src_index == 3 ? 0b0001 : 0b0011;
|
||||
case AluVectorOpcode::kSetpEqPush:
|
||||
case AluVectorOpcode::kSetpNePush:
|
||||
case AluVectorOpcode::kSetpGtPush:
|
||||
case AluVectorOpcode::kSetpGePush:
|
||||
return 0b1001;
|
||||
case AluVectorOpcode::kDst:
|
||||
return src_index == 2 ? 0b1010 : 0b0110;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return 0b1111;
|
||||
}
|
||||
|
||||
// Whether each component of a source operand is needed for the instruction if
|
||||
// executed with the specified write mask, and thus can't be thrown away or be
|
||||
// undefined in translation. For per-component operations, for example, only the
|
||||
// components specified in the write mask are needed, but there are instructions
|
||||
// with special behavior for certain components.
|
||||
constexpr uint32_t GetAluVectorOpNeededSourceComponents(
|
||||
AluVectorOpcode vector_opcode, uint32_t src_index, uint32_t write_mask) {
|
||||
uint32_t components = write_mask;
|
||||
switch (vector_opcode) {
|
||||
case AluVectorOpcode::kDp4:
|
||||
case AluVectorOpcode::kMax4:
|
||||
components = write_mask ? 0b1111 : 0;
|
||||
break;
|
||||
case AluVectorOpcode::kDp3:
|
||||
components = write_mask ? 0b0111 : 0;
|
||||
break;
|
||||
case AluVectorOpcode::kDp2Add:
|
||||
components = write_mask ? (src_index == 3 ? 0b0001 : 0b0011) : 0;
|
||||
break;
|
||||
case AluVectorOpcode::kCube:
|
||||
components = write_mask ? 0b1111 : 0;
|
||||
break;
|
||||
case AluVectorOpcode::kSetpEqPush:
|
||||
case AluVectorOpcode::kSetpNePush:
|
||||
case AluVectorOpcode::kSetpGtPush:
|
||||
case AluVectorOpcode::kSetpGePush:
|
||||
components = write_mask ? 0b1001 : 0b1000;
|
||||
break;
|
||||
case AluVectorOpcode::kKillEq:
|
||||
case AluVectorOpcode::kKillGt:
|
||||
case AluVectorOpcode::kKillGe:
|
||||
case AluVectorOpcode::kKillNe:
|
||||
components = 0b1111;
|
||||
break;
|
||||
// kDst is per-component, but not all components are used -
|
||||
// GetAluVectorOpUsedSourceComponents will filter out the unused ones.
|
||||
case AluVectorOpcode::kMaxA:
|
||||
if (src_index == 1) {
|
||||
components |= 0b1000;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return components &
|
||||
GetAluVectorOpUsedSourceComponents(vector_opcode, src_index);
|
||||
}
|
||||
|
||||
enum class ExportRegister : uint32_t {
|
||||
kVSInterpolator0 = 0,
|
||||
kVSInterpolator1,
|
||||
kVSInterpolator2,
|
||||
kVSInterpolator3,
|
||||
kVSInterpolator4,
|
||||
kVSInterpolator5,
|
||||
kVSInterpolator6,
|
||||
kVSInterpolator7,
|
||||
kVSInterpolator8,
|
||||
kVSInterpolator9,
|
||||
kVSInterpolator10,
|
||||
kVSInterpolator11,
|
||||
kVSInterpolator12,
|
||||
kVSInterpolator13,
|
||||
kVSInterpolator14,
|
||||
kVSInterpolator15,
|
||||
|
||||
kVSPosition = 62,
|
||||
|
||||
// See R6xx/R7xx registers for details (USE_VTX_POINT_SIZE, USE_VTX_EDGE_FLAG,
|
||||
// USE_VTX_KILL_FLAG).
|
||||
// X - PSIZE (gl_PointSize).
|
||||
// Y - EDGEFLAG (glEdgeFlag) for PrimitiveType::kPolygon wireframe/point
|
||||
// drawing.
|
||||
// Z - KILLVERTEX flag (used in Banjo-Kazooie: Nuts & Bolts for grass), set
|
||||
// for killing primitives based on PA_CL_CLIP_CNTL::VTX_KILL_OR condition.
|
||||
kVSPointSizeEdgeFlagKillVertex = 63,
|
||||
|
||||
kPSColor0 = 0,
|
||||
kPSColor1,
|
||||
kPSColor2,
|
||||
kPSColor3,
|
||||
|
||||
// In X.
|
||||
kPSDepth = 61,
|
||||
|
||||
// Memory export: index.?y?? * 0100 + xe_gpu_memexport_stream_t.xyzw.
|
||||
kExportAddress = 32,
|
||||
// Memory export: values for texels [index+0], [index+1], ..., [index+4].
|
||||
kExportData0 = 33,
|
||||
kExportData1,
|
||||
kExportData2,
|
||||
kExportData3,
|
||||
kExportData4,
|
||||
};
|
||||
|
||||
struct AluInstruction {
|
||||
// Raw accessors.
|
||||
|
||||
// Whether data is being exported (or written to local registers).
|
||||
bool is_export() const { return data_.export_data == 1; }
|
||||
bool export_write_mask() const { return data_.scalar_dest_rel == 1; }
|
||||
|
@ -1334,20 +1459,12 @@ struct AluInstruction {
|
|||
bool is_const_1_addressed() const { return data_.const_1_rel_abs == 1; }
|
||||
bool is_address_relative() const { return data_.address_absolute == 1; }
|
||||
|
||||
bool has_vector_op() const {
|
||||
return vector_write_mask() || is_export() ||
|
||||
AluVectorOpcodeHasSideEffects(vector_opcode());
|
||||
}
|
||||
AluVectorOpcode vector_opcode() const { return data_.vector_opc; }
|
||||
uint32_t vector_write_mask() const { return data_.vector_write_mask; }
|
||||
uint32_t vector_dest() const { return data_.vector_dest; }
|
||||
bool is_vector_dest_relative() const { return data_.vector_dest_rel == 1; }
|
||||
bool vector_clamp() const { return data_.vector_clamp == 1; }
|
||||
|
||||
bool has_scalar_op() const {
|
||||
return scalar_opcode() != AluScalarOpcode::kRetainPrev ||
|
||||
(!is_export() && scalar_write_mask() != 0);
|
||||
}
|
||||
AluScalarOpcode scalar_opcode() const { return data_.scalar_opc; }
|
||||
uint32_t scalar_write_mask() const { return data_.scalar_write_mask; }
|
||||
uint32_t scalar_dest() const { return data_.scalar_dest; }
|
||||
|
@ -1407,14 +1524,62 @@ struct AluInstruction {
|
|||
}
|
||||
}
|
||||
|
||||
// Helpers.
|
||||
|
||||
// Note that even if the export component is unused (like W of the vertex
|
||||
// shader misc register, YZW of pixel shader depth), it must still not be
|
||||
// excluded - that may make disassembly not reassemblable if there are
|
||||
// constant 0 writes in the export, like, oPts.x000 will be assembled, but
|
||||
// oPts.x00_ will not, even though W has no effect on anything.
|
||||
uint32_t GetVectorOpResultWriteMask() const {
|
||||
uint32_t mask = vector_write_mask();
|
||||
if (is_export()) {
|
||||
mask &= ~scalar_write_mask();
|
||||
}
|
||||
return mask;
|
||||
}
|
||||
uint32_t GetScalarOpResultWriteMask() const {
|
||||
uint32_t mask = scalar_write_mask();
|
||||
if (is_export()) {
|
||||
mask &= ~vector_write_mask();
|
||||
}
|
||||
return mask;
|
||||
}
|
||||
uint32_t GetConstant0WriteMask() const {
|
||||
if (!is_export() || !is_scalar_dest_relative()) {
|
||||
return 0b0000;
|
||||
}
|
||||
return 0b1111 & ~(vector_write_mask() | scalar_write_mask());
|
||||
}
|
||||
uint32_t GetConstant1WriteMask() const {
|
||||
if (!is_export()) {
|
||||
return 0b0000;
|
||||
}
|
||||
return vector_write_mask() & scalar_write_mask();
|
||||
}
|
||||
|
||||
private:
|
||||
XEPACKEDSTRUCT(Data, {
|
||||
XEPACKEDSTRUCTANONYMOUS({
|
||||
// If exporting, both vector and scalar operations use the vector
|
||||
// destination (which can't be relative in this case).
|
||||
// Not very important note: If both scalar and vector operations exporting
|
||||
// something have empty write mask, the XNA assembler forces vector_dest
|
||||
// to 0 (interpolator 0 or color 0) directly in the microcode.
|
||||
uint32_t vector_dest : 6;
|
||||
uint32_t vector_dest_rel : 1;
|
||||
uint32_t abs_constants : 1;
|
||||
uint32_t scalar_dest : 6;
|
||||
uint32_t scalar_dest_rel : 1;
|
||||
// Exports have different write masking (export is done to vector_dest by
|
||||
// both the vector and the scalar operation, and exports can write
|
||||
// constant 0 and 1). For each component:
|
||||
// - vector_write_mask 0, scalar_write_mask 0:
|
||||
// - scalar_dest_rel 0 - unchanged.
|
||||
// - scalar_dest_rel 1 - constant 0 (all components must be written).
|
||||
// - vector_write_mask 1, scalar_write_mask 0 - from vector operation.
|
||||
// - vector_write_mask 0, scalar_write_mask 1 - from scalar operation.
|
||||
// - vector_write_mask 1, scalar_write_mask 1 - constant 1.
|
||||
uint32_t export_data : 1;
|
||||
uint32_t vector_write_mask : 4;
|
||||
uint32_t scalar_write_mask : 4;
|
||||
|
|
|
@ -267,6 +267,7 @@ namespace shader_playground {
|
|||
"--shader_output=" + translatedDisasmPath,
|
||||
"--shader_output_type=" + outputType,
|
||||
"--vertex_shader_output_type=" + vertexShaderType,
|
||||
"--dxbc_source_map=true",
|
||||
};
|
||||
if (translationComboBox.SelectedIndex == 1) {
|
||||
startArguments.Add("--shader_output_dxbc_rov=true");
|
||||
|
|
Loading…
Reference in New Issue