From 85fb028faae183e8dd2630b84d07633abd2edb7f Mon Sep 17 00:00:00 2001 From: Triang3l Date: Fri, 7 Feb 2020 09:42:43 +0300 Subject: [PATCH] [D3D12] Cleaner DXBC emission code, port ROV prologue to it --- src/xenia/gpu/dxbc_shader_translator.cc | 90 ++ src/xenia/gpu/dxbc_shader_translator.h | 770 ++++++++++++ src/xenia/gpu/dxbc_shader_translator_om.cc | 1238 +++++--------------- 3 files changed, 1123 insertions(+), 975 deletions(-) diff --git a/src/xenia/gpu/dxbc_shader_translator.cc b/src/xenia/gpu/dxbc_shader_translator.cc index 927477516..66b607f6b 100644 --- a/src/xenia/gpu/dxbc_shader_translator.cc +++ b/src/xenia/gpu/dxbc_shader_translator.cc @@ -180,6 +180,87 @@ void DxbcShaderTranslator::Reset() { std::memset(&stat_, 0, sizeof(stat_)); } +void DxbcShaderTranslator::DxbcSrc::Write(std::vector& code, + uint32_t dest_write_mask, + bool is_integer) const { + uint32_t operand_token = GetOperandTokenTypeAndIndex(); + uint32_t dest_component = DxbcDest::GetMaskSingleComponent(dest_write_mask); + uint32_t select_component = dest_component != UINT32_MAX ? dest_component : 0; + bool dest_is_vector = + dest_write_mask != 0b0000 && dest_component == UINT32_MAX; + if (type_ == DxbcOperandType::kImmediate32) { + if (dest_is_vector) { + operand_token |= uint32_t(DxbcOperandDimension::kVector) | + (uint32_t(DxbcComponentSelection::kSwizzle) << 2) | + (DxbcSrc::kXYZW << 4); + } else { + operand_token |= uint32_t(DxbcOperandDimension::kScalar); + } + code.push_back(operand_token); + if (dest_is_vector) { + for (uint32_t i = 0; i < 4; ++i) { + code.push_back((dest_write_mask & (1 << i)) + ? GetModifiedImmediate(i, is_integer) + : 0); + } + } else { + code.push_back(GetModifiedImmediate(select_component, is_integer)); + } + } else { + switch (GetDimension()) { + case DxbcOperandDimension::kScalar: + if (dest_is_vector) { + operand_token |= uint32_t(DxbcOperandDimension::kVector) | + (uint32_t(DxbcComponentSelection::kSwizzle) << 2) | + (DxbcSrc::kXXXX << 4); + } else { + operand_token |= uint32_t(DxbcOperandDimension::kScalar); + } + break; + case DxbcOperandDimension::kVector: + operand_token |= uint32_t(DxbcOperandDimension::kVector); + if (dest_is_vector) { + operand_token |= uint32_t(DxbcComponentSelection::kSwizzle) << 2; + // Clear swizzle of unused components to a used value to avoid + // referencing potentially uninitialized register components. + uint32_t used_component; + if (!xe::bit_scan_forward(dest_write_mask, &used_component)) { + used_component = 0; + } + for (uint32_t i = 0; i < 4; ++i) { + uint32_t swizzle_index = + (dest_write_mask & (1 << i)) ? i : used_component; + operand_token |= + (((swizzle_ >> (swizzle_index * 2)) & 3) << (4 + i * 2)); + } + } else { + operand_token |= (uint32_t(DxbcComponentSelection::kSelect1) << 2) | + (((swizzle_ >> (select_component * 2)) & 3) << 4); + } + break; + default: + break; + } + DxbcOperandModifier modifier = DxbcOperandModifier::kNone; + if (absolute_ && negate_) { + modifier = DxbcOperandModifier::kAbsoluteNegate; + } else if (absolute_) { + modifier = DxbcOperandModifier::kAbsolute; + } else if (negate_) { + modifier = DxbcOperandModifier::kNegate; + } + if (modifier != DxbcOperandModifier::kNone) { + operand_token |= uint32_t(1) << 31; + } + code.push_back(operand_token); + if (modifier != DxbcOperandModifier::kNone) { + code.push_back(uint32_t(DxbcExtendedOperandType::kModifier) | + (uint32_t(modifier) << 6)); + } + DxbcOperandAddress::Write(code); + } +} + bool DxbcShaderTranslator::UseSwitchForControlFlow() const { // Xenia crashes on Intel HD Graphics 4000 with switch. return cvars::dxbc_switch && vendor_id_ != 0x8086; @@ -1306,6 +1387,15 @@ void DxbcShaderTranslator::StartTranslation() { system_temp_grad_v_ = PushSystemTemp(0b0111); } + // Zero general-purpose registers to prevent crashes when the game references + // them. + for (uint32_t i = IsDxbcPixelShader() ? kInterpolatorCount : 0; + i < register_count(); ++i) { + DxbcOpMov( + uses_register_dynamic_addressing() ? DxbcDest::X(0, i) : DxbcDest::R(i), + DxbcSrc::LU(uint32_t(0))); + } + // Write stage-specific prologue. if (IsDxbcVertexOrDomainShader()) { StartVertexOrDomainShader(); diff --git a/src/xenia/gpu/dxbc_shader_translator.h b/src/xenia/gpu/dxbc_shader_translator.h index 19a46eae7..0d81910e4 100644 --- a/src/xenia/gpu/dxbc_shader_translator.h +++ b/src/xenia/gpu/dxbc_shader_translator.h @@ -10,10 +10,12 @@ #ifndef XENIA_GPU_DXBC_SHADER_TRANSLATOR_H_ #define XENIA_GPU_DXBC_SHADER_TRANSLATOR_H_ +#include #include #include #include +#include "xenia/base/assert.h" #include "xenia/base/cvar.h" #include "xenia/base/math.h" #include "xenia/base/string_buffer.h" @@ -25,6 +27,28 @@ namespace xe { namespace gpu { // Generates shader model 5_1 byte code (for Direct3D 12). +// +// IMPORTANT CONTRIBUTION NOTES: +// +// Not all DXBC instructions accept all kinds of operands equally! +// Refer to Shader Model 4 and 5 Assembly on MSDN to see if the needed +// swizzle/selection, absolute/negate modifiers and saturation are supported by +// the instruction. +// https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx9-graphics-reference-asm +// Before adding anything that behaves in a way that doesn't follow patterns +// already used in Xenia, try to write the same logic in HLSL, compile it with +// FXC and see the resulting assembly *and preferably binary bytecode* as some +// instructions may, for example, require selection rather than swizzling for +// certain operands. For bytecode structure, see d3d12TokenizedProgramFormat.hpp +// from the Windows Driver Kit. +// +// Avoid using uninitialized register components - such as registers written to +// in "if" and not in "else", but then used outside unconditionally or with a +// different condition (or even with the same condition, but in a different "if" +// block). This will cause crashes on AMD drivers, and will also limit +// optimization possibilities as this may result in false dependencies. Always +// mov l(0, 0, 0, 0) to such components before potential branching - +// PushSystemTemp accepts a zero mask for this purpose. class DxbcShaderTranslator : public ShaderTranslator { public: DxbcShaderTranslator(uint32_t vendor_id, bool edram_rov_used); @@ -360,6 +384,752 @@ class DxbcShaderTranslator : public ShaderTranslator { void ProcessAluInstruction(const ParsedAluInstruction& instr) override; private: + // D3D10_SB_OPERAND_TYPE + enum class DxbcOperandType : uint32_t { + kTemp = 0, + kInput = 1, + kOutput = 2, + // Only usable as destination or source (but not both) in mov (and it + // becomes an array instruction this way). + kIndexableTemp = 3, + kImmediate32 = 4, + kSampler = 6, + kResource = 7, + kConstantBuffer = 8, + kLabel = 10, + kInputPrimitiveID = 11, + kOutputDepth = 12, + kNull = 13, + kInputDomainPoint = 28, + kUnorderedAccessView = 30, + kInputCoverageMask = 35, + }; + + // D3D10_SB_OPERAND_INDEX_DIMENSION + static constexpr uint32_t GetDxbcOperandIndexDimension(DxbcOperandType type) { + switch (type) { + case DxbcOperandType::kTemp: + case DxbcOperandType::kInput: + case DxbcOperandType::kOutput: + case DxbcOperandType::kLabel: + return 1; + case DxbcOperandType::kIndexableTemp: + case DxbcOperandType::kSampler: + case DxbcOperandType::kResource: + case DxbcOperandType::kUnorderedAccessView: + return 2; + case DxbcOperandType::kConstantBuffer: + return 3; + default: + return 0; + } + } + + // D3D10_SB_OPERAND_NUM_COMPONENTS + enum class DxbcOperandDimension : uint32_t { + kNoData, // D3D10_SB_OPERAND_0_COMPONENT + kScalar, // D3D10_SB_OPERAND_1_COMPONENT + kVector, // D3D10_SB_OPERAND_4_COMPONENT + }; + + static constexpr DxbcOperandDimension GetDxbcOperandDimension( + DxbcOperandType type, bool dest_in_dcl = false) { + switch (type) { + case DxbcOperandType::kSampler: + case DxbcOperandType::kLabel: + case DxbcOperandType::kNull: + return DxbcOperandDimension::kNoData; + case DxbcOperandType::kInputPrimitiveID: + case DxbcOperandType::kOutputDepth: + return DxbcOperandDimension::kScalar; + case DxbcOperandType::kInputCoverageMask: + return dest_in_dcl ? DxbcOperandDimension::kScalar + : DxbcOperandDimension::kVector; + default: + return DxbcOperandDimension::kVector; + } + } + + // D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE + enum class DxbcComponentSelection { + kMask, + kSwizzle, + kSelect1, + }; + + struct DxbcIndex { + // D3D10_SB_OPERAND_INDEX_REPRESENTATION + enum class Representation : uint32_t { + kImmediate32 = 0, + kRelative = 2, + kImmediate32PlusRelative = 3, + }; + + uint32_t index_; + // UINT32_MAX if absolute. Lower 2 bits are the component index, upper bits + // are the temp register index. Applicable to indexable temps, inputs, + // outputs except for pixel shaders, constant buffers and bindings. + uint32_t relative_to_temp_; + + // Implicit constructor. + DxbcIndex(uint32_t index = 0) + : index_(index), relative_to_temp_(UINT32_MAX) {} + DxbcIndex(uint32_t temp, uint32_t temp_component, uint32_t offset = 0) + : index_(offset), relative_to_temp_((temp << 2) | temp_component) {} + + Representation GetRepresentation() const { + if (relative_to_temp_ != UINT32_MAX) { + return index_ != 0 ? Representation::kImmediate32PlusRelative + : Representation::kRelative; + } + return Representation::kImmediate32; + } + uint32_t GetLength() const { + return relative_to_temp_ != UINT32_MAX ? (index_ != 0 ? 3 : 2) : 1; + } + void Write(std::vector& code) const { + if (relative_to_temp_ == UINT32_MAX || index_ != 0) { + code.push_back(index_); + } + if (relative_to_temp_ != UINT32_MAX) { + // Encode selecting one component from absolute-indexed r#. + code.push_back(uint32_t(DxbcOperandDimension::kVector) | + (uint32_t(DxbcComponentSelection::kSelect1) << 2) | + ((relative_to_temp_ & 3) << 4) | + (uint32_t(DxbcOperandType::kTemp) << 12) | (1 << 20) | + (uint32_t(Representation::kImmediate32) << 22)); + code.push_back(relative_to_temp_ >> 2); + } + } + }; + + struct DxbcOperandAddress { + DxbcOperandType type_; + DxbcIndex index_1d_, index_2d_, index_3d_; + + explicit DxbcOperandAddress(DxbcOperandType type, + DxbcIndex index_1d = DxbcIndex(), + DxbcIndex index_2d = DxbcIndex(), + DxbcIndex index_3d = DxbcIndex()) + : type_(type), + index_1d_(index_1d), + index_2d_(index_2d), + index_3d_(index_3d) {} + + DxbcOperandDimension GetDimension(bool dest_in_dcl = false) const { + return GetDxbcOperandDimension(type_, dest_in_dcl); + } + uint32_t GetIndexDimension() const { + return GetDxbcOperandIndexDimension(type_); + } + uint32_t GetOperandTokenTypeAndIndex() const { + uint32_t index_dimension = GetIndexDimension(); + uint32_t operand_token = + (uint32_t(type_) << 12) | (index_dimension << 20); + if (index_dimension > 0) { + operand_token |= uint32_t(index_1d_.GetRepresentation()) << 22; + if (index_dimension > 1) { + operand_token |= uint32_t(index_2d_.GetRepresentation()) << 25; + if (index_dimension > 2) { + operand_token |= uint32_t(index_2d_.GetRepresentation()) << 28; + } + } + } + return operand_token; + } + uint32_t GetLength() const { + uint32_t length = 0; + uint32_t index_dimension = GetIndexDimension(); + if (index_dimension > 0) { + length += index_1d_.GetLength(); + if (index_dimension > 1) { + length += index_2d_.GetLength(); + if (index_dimension > 2) { + length += index_3d_.GetLength(); + } + } + } + return length; + } + void Write(std::vector& code) const { + uint32_t index_dimension = GetIndexDimension(); + if (index_dimension > 0) { + index_1d_.Write(code); + if (index_dimension > 1) { + index_2d_.Write(code); + if (index_dimension > 2) { + index_3d_.Write(code); + } + } + } + } + }; + + // D3D10_SB_EXTENDED_OPERAND_TYPE + enum class DxbcExtendedOperandType : uint32_t { + kEmpty, + kModifier, + }; + + // D3D10_SB_OPERAND_MODIFIER + enum class DxbcOperandModifier : uint32_t { + kNone, + kNegate, + kAbsolute, + kAbsoluteNegate, + }; + + struct DxbcDest : DxbcOperandAddress { + // Ignored for 0-component and 1-component operand types. + uint32_t write_mask_; + + explicit DxbcDest(DxbcOperandType type, uint32_t write_mask = 0b1111, + DxbcIndex index_1d = DxbcIndex(), + DxbcIndex index_2d = DxbcIndex(), + DxbcIndex index_3d = DxbcIndex()) + : DxbcOperandAddress(type, index_1d, index_2d, index_3d), + write_mask_(write_mask) {} + + static DxbcDest R(uint32_t index, uint32_t write_mask = 0b1111) { + return DxbcDest(DxbcOperandType::kTemp, write_mask, index); + } + static DxbcDest O(DxbcIndex index, uint32_t write_mask = 0b1111) { + return DxbcDest(DxbcOperandType::kOutput, write_mask, index); + } + static DxbcDest X(uint32_t index_1d, DxbcIndex index_2d, + uint32_t write_mask = 0b1111) { + return DxbcDest(DxbcOperandType::kIndexableTemp, write_mask, index_1d, + index_2d); + } + static DxbcDest ODepth() { + return DxbcDest(DxbcOperandType::kOutputDepth, 0b0001); + } + static DxbcDest Null() { return DxbcDest(DxbcOperandType::kNull, 0b0000); } + // Must write to all 4 components. + static DxbcDest U(uint32_t index_1d, DxbcIndex index_2d) { + return DxbcDest(DxbcOperandType::kUnorderedAccessView, 0b1111, index_1d, + index_2d); + } + + uint32_t GetMask() const { + switch (GetDimension()) { + case DxbcOperandDimension::kNoData: + return 0b0000; + case DxbcOperandDimension::kScalar: + return 0b0001; + case DxbcOperandDimension::kVector: + return write_mask_; + default: + assert_unhandled_case(GetDimension()); + return 0b0000; + } + } + DxbcDest Mask(uint32_t write_mask) const { + return DxbcDest(type_, write_mask, index_1d_, index_2d_, index_3d_); + } + DxbcDest MaskMasked(uint32_t write_mask) const { + return DxbcDest(type_, write_mask_ & write_mask, index_1d_, index_2d_, + index_3d_); + } + static uint32_t GetMaskSingleComponent(uint32_t write_mask) { + uint32_t component; + if (xe::bit_scan_forward(write_mask, &component)) { + if ((write_mask >> component) == 1) { + return component; + } + } + return UINT32_MAX; + } + uint32_t GetMaskSingleComponent() const { + return GetMaskSingleComponent(GetMask()); + } + + uint32_t GetLength() const { return 1 + DxbcOperandAddress::GetLength(); } + void Write(std::vector& code, bool in_dcl = false) const { + uint32_t operand_token = GetOperandTokenTypeAndIndex(); + DxbcOperandDimension dimension = GetDimension(in_dcl); + operand_token |= uint32_t(dimension); + if (dimension == DxbcOperandDimension::kVector) { + operand_token |= + (uint32_t(DxbcComponentSelection::kMask) << 2) | (write_mask_ << 4); + } + code.push_back(operand_token); + DxbcOperandAddress::Write(code); + } + }; + + struct DxbcSrc : DxbcOperandAddress { + enum : uint32_t { + kXYZW = 0b11100100, + kXXXX = 0b00000000, + kYYYY = 0b01010101, + kZZZZ = 0b10101010, + kWWWW = 0b11111111, + }; + + // Ignored for 0-component and 1-component operand types. + uint32_t swizzle_; + bool absolute_; + bool negate_; + // Only valid for DxbcOperandType::kImmediate32. + uint32_t immediate_[4]; + + explicit DxbcSrc(DxbcOperandType type, uint32_t swizzle = kXYZW, + DxbcIndex index_1d = DxbcIndex(), + DxbcIndex index_2d = DxbcIndex(), + DxbcIndex index_3d = DxbcIndex()) + : DxbcOperandAddress(type, index_1d, index_2d, index_3d), + swizzle_(swizzle), + absolute_(false), + negate_(false) {} + + static DxbcSrc R(uint32_t index, uint32_t swizzle = kXYZW) { + return DxbcSrc(DxbcOperandType::kTemp, swizzle, index); + } + static DxbcSrc V(DxbcIndex index, uint32_t swizzle = kXYZW) { + return DxbcSrc(DxbcOperandType::kInput, swizzle, index); + } + static DxbcSrc X(uint32_t index_1d, DxbcIndex index_2d, + uint32_t swizzle = kXYZW) { + return DxbcSrc(DxbcOperandType::kIndexableTemp, swizzle, index_1d, + index_2d); + } + static DxbcSrc LU(uint32_t x, uint32_t y, uint32_t z, uint32_t w) { + DxbcSrc src(DxbcOperandType::kImmediate32, kXYZW); + src.immediate_[0] = x; + src.immediate_[1] = y; + src.immediate_[2] = z; + src.immediate_[3] = w; + return src; + } + static DxbcSrc LU(uint32_t x) { return LU(x, x, x, x); } + static DxbcSrc LI(int32_t x, int32_t y, int32_t z, int32_t w) { + return LU(uint32_t(x), uint32_t(y), uint32_t(z), uint32_t(w)); + } + static DxbcSrc LI(int32_t x) { return LI(x, x, x, x); } + static DxbcSrc LF(float x, float y, float z, float w) { + return LU(*reinterpret_cast(&x), + *reinterpret_cast(&y), + *reinterpret_cast(&z), + *reinterpret_cast(&w)); + } + static DxbcSrc LF(float x) { return LF(x, x, x, x); } + static DxbcSrc LP(const uint32_t* xyzw) { + return LU(xyzw[0], xyzw[1], xyzw[2], xyzw[3]); + } + static DxbcSrc LP(const int32_t* xyzw) { + return LI(xyzw[0], xyzw[1], xyzw[2], xyzw[3]); + } + static DxbcSrc LP(const float* xyzw) { + return LF(xyzw[0], xyzw[1], xyzw[2], xyzw[3]); + } + static DxbcSrc S(uint32_t index_1d, DxbcIndex index_2d) { + return DxbcSrc(DxbcOperandType::kSampler, kXXXX, index_1d, index_2d); + } + static DxbcSrc T(uint32_t index_1d, DxbcIndex index_2d, + uint32_t swizzle = kXYZW) { + return DxbcSrc(DxbcOperandType::kResource, swizzle, index_1d, index_2d); + } + static DxbcSrc CB(uint32_t index_1d, DxbcIndex index_2d, DxbcIndex index_3d, + uint32_t swizzle = kXYZW) { + return DxbcSrc(DxbcOperandType::kConstantBuffer, swizzle, index_1d, + index_2d, index_3d); + } + static DxbcSrc Label(uint32_t index) { + return DxbcSrc(DxbcOperandType::kLabel, kXXXX, index); + } + static DxbcSrc VPrim() { + return DxbcSrc(DxbcOperandType::kInputPrimitiveID, kXXXX); + } + static DxbcSrc VDomain(uint32_t swizzle = kXYZW) { + return DxbcSrc(DxbcOperandType::kInputDomainPoint, swizzle); + } + static DxbcSrc U(uint32_t index_1d, DxbcIndex index_2d, + uint32_t swizzle = kXYZW) { + return DxbcSrc(DxbcOperandType::kUnorderedAccessView, swizzle, index_1d, + index_2d); + } + static DxbcSrc VCoverage() { + return DxbcSrc(DxbcOperandType::kInputCoverageMask, kXXXX); + } + + DxbcSrc WithModifiers(bool absolute, bool negate) const { + DxbcSrc new_src(*this); + new_src.absolute_ = absolute; + new_src.negate_ = negate; + return new_src; + } + DxbcSrc WithAbs(bool absolute) const { + return WithModifiers(absolute, negate_); + } + DxbcSrc WithNeg(bool negate) const { + return WithModifiers(absolute_, negate); + } + DxbcSrc Abs() const { return WithModifiers(true, false); } + DxbcSrc operator-() const { return WithModifiers(absolute_, !negate_); } + DxbcSrc Swizzle(uint32_t swizzle) const { + DxbcSrc new_src(*this); + new_src.swizzle_ = swizzle; + return new_src; + } + DxbcSrc SwizzleSwizzled(uint32_t swizzle) const { + DxbcSrc new_src(*this); + new_src.swizzle_ = 0; + for (uint32_t i = 0; i < 4; ++i) { + new_src.swizzle_ |= ((swizzle_ >> (((swizzle >> (i * 2)) & 3) * 2)) & 3) + << (i * 2); + } + return new_src; + } + DxbcSrc Select(uint32_t component) const { + DxbcSrc new_src(*this); + new_src.swizzle_ = component * 0b01010101; + return new_src; + } + DxbcSrc SelectFromSwizzled(uint32_t component) const { + DxbcSrc new_src(*this); + new_src.swizzle_ = ((swizzle_ >> (component * 2)) & 3) * 0b01010101; + return new_src; + } + + uint32_t GetLength(uint32_t dest_write_mask) const { + bool dest_is_vector = + dest_write_mask != 0b0000 && + DxbcDest::GetMaskSingleComponent(dest_write_mask) == UINT32_MAX; + if (type_ == DxbcOperandType::kImmediate32) { + return dest_is_vector ? 5 : 2; + } + return ((absolute_ || negate_) ? 2 : 1) + DxbcOperandAddress::GetLength(); + } + static uint32_t GetModifiedImmediate(uint32_t value, bool is_integer, + bool absolute, bool negate) { + if (is_integer) { + if (absolute) { + *reinterpret_cast(&value) = + std::abs(*reinterpret_cast(&value)); + } + if (negate) { + *reinterpret_cast(&value) = + -*reinterpret_cast(&value); + } + } else { + if (absolute) { + value &= uint32_t(INT32_MAX); + } + if (negate) { + value ^= uint32_t(INT32_MAX) + 1; + } + } + return value; + } + uint32_t GetModifiedImmediate(uint32_t swizzle_index, + bool is_integer) const { + return GetModifiedImmediate( + immediate_[(swizzle_ >> (swizzle_index * 2)) & 3], is_integer, + absolute_, negate_); + } + void Write(std::vector& code, uint32_t dest_write_mask, + bool is_integer) const; + }; + + // D3D10_SB_OPCODE_TYPE + enum class DxbcOpcode : uint32_t { + kAdd = 0, + kAnd = 1, + kCall = 4, + kDiv = 14, + kElse = 18, + kEndIf = 21, + kEndLoop = 22, + kEndSwitch = 23, + kFToU = 28, + kIAdd = 30, + kIf = 31, + kIMAd = 35, + kIShL = 41, + kMAd = 50, + kMin = 51, + kMax = 52, + kMov = 54, + kMovC = 55, + kRetC = 63, + kUGE = 80, + kUMul = 81, + kUMAd = 82, + kUShR = 85, + kXOr = 87, + kDerivRTXCoarse = 122, + kDerivRTXFine = 123, + kDerivRTYCoarse = 124, + kDerivRTYFine = 125, + kUBFE = 138, + kIBFE = 139, + kBFI = 140, + kEvalSampleIndex = 204, + }; + + static uint32_t DxbcOpcodeToken(DxbcOpcode opcode, uint32_t operands_length, + bool saturate = false) { + return uint32_t(opcode) | (saturate ? (1 << 13) : 0) | + ((1 + operands_length) << 24); + } + + void DxbcEmitAluOp(DxbcOpcode opcode, uint32_t src_are_integer, + const DxbcDest& dest, const DxbcSrc& src, + bool saturate = false) { + uint32_t dest_write_mask = dest.GetMask(); + uint32_t operands_length = + dest.GetLength() + src.GetLength(dest_write_mask); + shader_code_.reserve(shader_code_.size() + 1 + operands_length); + shader_code_.push_back(DxbcOpcodeToken(opcode, operands_length, saturate)); + dest.Write(shader_code_); + src.Write(shader_code_, dest_write_mask, (src_are_integer & 0b1) != 0); + ++stat_.instruction_count; + } + void DxbcEmitAluOp(DxbcOpcode opcode, uint32_t src_are_integer, + const DxbcDest& dest, const DxbcSrc& src0, + const DxbcSrc& src1, bool saturate = false) { + uint32_t dest_write_mask = dest.GetMask(); + uint32_t operands_length = dest.GetLength() + + src0.GetLength(dest_write_mask) + + src1.GetLength(dest_write_mask); + shader_code_.reserve(shader_code_.size() + 1 + operands_length); + shader_code_.push_back(DxbcOpcodeToken(opcode, operands_length, saturate)); + dest.Write(shader_code_); + src0.Write(shader_code_, dest_write_mask, (src_are_integer & 0b1) != 0); + src1.Write(shader_code_, dest_write_mask, (src_are_integer & 0b10) != 0); + ++stat_.instruction_count; + } + void DxbcEmitAluOp(DxbcOpcode opcode, uint32_t src_are_integer, + const DxbcDest& dest, const DxbcSrc& src0, + const DxbcSrc& src1, const DxbcSrc& src2, + bool saturate = false) { + uint32_t dest_write_mask = dest.GetMask(); + uint32_t operands_length = + dest.GetLength() + src0.GetLength(dest_write_mask) + + src1.GetLength(dest_write_mask) + src2.GetLength(dest_write_mask); + shader_code_.reserve(shader_code_.size() + 1 + operands_length); + shader_code_.push_back(DxbcOpcodeToken(opcode, operands_length, saturate)); + dest.Write(shader_code_); + src0.Write(shader_code_, dest_write_mask, (src_are_integer & 0b1) != 0); + src1.Write(shader_code_, dest_write_mask, (src_are_integer & 0b10) != 0); + src2.Write(shader_code_, dest_write_mask, (src_are_integer & 0b100) != 0); + ++stat_.instruction_count; + } + void DxbcEmitAluOp(DxbcOpcode opcode, uint32_t src_are_integer, + const DxbcDest& dest, const DxbcSrc& src0, + const DxbcSrc& src1, const DxbcSrc& src2, + const DxbcSrc& src3, bool saturate = false) { + uint32_t dest_write_mask = dest.GetMask(); + uint32_t operands_length = + dest.GetLength() + src0.GetLength(dest_write_mask) + + src1.GetLength(dest_write_mask) + src2.GetLength(dest_write_mask) + + src3.GetLength(dest_write_mask); + shader_code_.reserve(shader_code_.size() + 1 + operands_length); + shader_code_.push_back(DxbcOpcodeToken(opcode, operands_length, saturate)); + dest.Write(shader_code_); + src0.Write(shader_code_, dest_write_mask, (src_are_integer & 0b1) != 0); + src1.Write(shader_code_, dest_write_mask, (src_are_integer & 0b10) != 0); + src2.Write(shader_code_, dest_write_mask, (src_are_integer & 0b100) != 0); + src3.Write(shader_code_, dest_write_mask, (src_are_integer & 0b1000) != 0); + ++stat_.instruction_count; + } + void DxbcEmitAluOp(DxbcOpcode opcode, uint32_t src_are_integer, + const DxbcDest& dest0, const DxbcDest& dest1, + const DxbcSrc& src0, const DxbcSrc& src1, + bool saturate = false) { + uint32_t dest_write_mask = dest0.GetMask() | dest1.GetMask(); + uint32_t operands_length = dest0.GetLength() + dest1.GetLength() + + src0.GetLength(dest_write_mask) + + src1.GetLength(dest_write_mask); + shader_code_.reserve(shader_code_.size() + 1 + operands_length); + shader_code_.push_back(DxbcOpcodeToken(opcode, operands_length, saturate)); + dest0.Write(shader_code_); + dest1.Write(shader_code_); + src0.Write(shader_code_, dest_write_mask, (src_are_integer & 0b1) != 0); + src1.Write(shader_code_, dest_write_mask, (src_are_integer & 0b10) != 0); + ++stat_.instruction_count; + } + void DxbcEmitFlowOp(DxbcOpcode opcode, const DxbcSrc& src, + bool test = false) { + uint32_t operands_length = src.GetLength(0b0000); + shader_code_.reserve(shader_code_.size() + 1 + operands_length); + shader_code_.push_back(DxbcOpcodeToken(opcode, operands_length) | + (test ? (1 << 18) : 0)); + src.Write(shader_code_, 0b0000, true); + ++stat_.instruction_count; + } + + void DxbcOpAdd(const DxbcDest& dest, const DxbcSrc& src0, const DxbcSrc& src1, + bool saturate = false) { + DxbcEmitAluOp(DxbcOpcode::kAdd, 0b00, dest, src0, src1, saturate); + ++stat_.float_instruction_count; + } + void DxbcOpAnd(const DxbcDest& dest, const DxbcSrc& src0, + const DxbcSrc& src1) { + DxbcEmitAluOp(DxbcOpcode::kAnd, 0b11, dest, src0, src1); + ++stat_.uint_instruction_count; + } + void DxbcOpCall(const DxbcSrc& label) { + DxbcEmitFlowOp(DxbcOpcode::kCall, label); + ++stat_.static_flow_control_count; + } + void DxbcOpElse() { + shader_code_.push_back(DxbcOpcodeToken(DxbcOpcode::kElse, 0)); + ++stat_.instruction_count; + } + void DxbcOpDiv(const DxbcDest& dest, const DxbcSrc& src0, const DxbcSrc& src1, + bool saturate = false) { + DxbcEmitAluOp(DxbcOpcode::kDiv, 0b00, dest, src0, src1, saturate); + ++stat_.float_instruction_count; + } + void DxbcOpEndIf() { + shader_code_.push_back(DxbcOpcodeToken(DxbcOpcode::kEndIf, 0)); + ++stat_.instruction_count; + } + void DxbcOpEndLoop() { + shader_code_.push_back(DxbcOpcodeToken(DxbcOpcode::kEndLoop, 0)); + ++stat_.instruction_count; + } + void DxbcOpEndSwitch() { + shader_code_.push_back(DxbcOpcodeToken(DxbcOpcode::kEndSwitch, 0)); + ++stat_.instruction_count; + } + void DxbcOpFToU(const DxbcDest& dest, const DxbcSrc& src) { + DxbcEmitAluOp(DxbcOpcode::kFToU, 0b0, dest, src); + ++stat_.conversion_instruction_count; + } + void DxbcOpIAdd(const DxbcDest& dest, const DxbcSrc& src0, + const DxbcSrc& src1) { + DxbcEmitAluOp(DxbcOpcode::kIAdd, 0b11, dest, src0, src1); + ++stat_.int_instruction_count; + } + void DxbcOpIf(bool test, const DxbcSrc& src) { + DxbcEmitFlowOp(DxbcOpcode::kIf, src, test); + ++stat_.dynamic_flow_control_count; + } + void DxbcOpIMAd(const DxbcDest& dest, const DxbcSrc& mul0, + const DxbcSrc& mul1, const DxbcSrc& add) { + DxbcEmitAluOp(DxbcOpcode::kIMAd, 0b111, dest, mul0, mul1, add); + ++stat_.int_instruction_count; + } + void DxbcOpIShL(const DxbcDest& dest, const DxbcSrc& value, + const DxbcSrc& shift) { + DxbcEmitAluOp(DxbcOpcode::kIShL, 0b11, dest, value, shift); + ++stat_.int_instruction_count; + } + void DxbcOpMAd(const DxbcDest& dest, const DxbcSrc& mul0, const DxbcSrc& mul1, + const DxbcSrc& add, bool saturate = false) { + DxbcEmitAluOp(DxbcOpcode::kMAd, 0b000, dest, mul0, mul1, add, saturate); + ++stat_.float_instruction_count; + } + void DxbcOpMin(const DxbcDest& dest, const DxbcSrc& src0, const DxbcSrc& src1, + bool saturate = false) { + DxbcEmitAluOp(DxbcOpcode::kMin, 0b00, dest, src0, src1, saturate); + ++stat_.float_instruction_count; + } + void DxbcOpMax(const DxbcDest& dest, const DxbcSrc& src0, const DxbcSrc& src1, + bool saturate = false) { + DxbcEmitAluOp(DxbcOpcode::kMax, 0b00, dest, src0, src1, saturate); + ++stat_.float_instruction_count; + } + void DxbcOpMov(const DxbcDest& dest, const DxbcSrc& src, + bool saturate = false) { + DxbcEmitAluOp(DxbcOpcode::kMov, 0b0, dest, src, saturate); + if (dest.type_ == DxbcOperandType::kIndexableTemp || + src.type_ == DxbcOperandType::kIndexableTemp) { + ++stat_.array_instruction_count; + } else { + ++stat_.mov_instruction_count; + } + } + void DxbcOpMovC(const DxbcDest& dest, const DxbcSrc& test, + const DxbcSrc& src_nz, const DxbcSrc& src_z, + bool saturate = false) { + DxbcEmitAluOp(DxbcOpcode::kMovC, 0b001, dest, test, src_nz, src_z, + saturate); + ++stat_.movc_instruction_count; + } + void DxbcOpRetC(bool test, const DxbcSrc& src) { + DxbcEmitFlowOp(DxbcOpcode::kRetC, src, test); + ++stat_.dynamic_flow_control_count; + } + void DxbcOpUGE(const DxbcDest& dest, const DxbcSrc& src0, + const DxbcSrc& src1) { + DxbcEmitAluOp(DxbcOpcode::kUGE, 0b11, dest, src0, src1); + ++stat_.uint_instruction_count; + } + void DxbcOpUMul(const DxbcDest& dest_hi, const DxbcDest& dest_lo, + const DxbcSrc& src0, const DxbcSrc& src1) { + DxbcEmitAluOp(DxbcOpcode::kUMul, 0b11, dest_hi, dest_lo, src0, src1); + ++stat_.uint_instruction_count; + } + void DxbcOpUMAd(const DxbcDest& dest, const DxbcSrc& mul0, + const DxbcSrc& mul1, const DxbcSrc& add) { + DxbcEmitAluOp(DxbcOpcode::kUMAd, 0b111, dest, mul0, mul1, add); + ++stat_.uint_instruction_count; + } + void DxbcOpUShR(const DxbcDest& dest, const DxbcSrc& value, + const DxbcSrc& shift) { + DxbcEmitAluOp(DxbcOpcode::kUShR, 0b11, dest, value, shift); + ++stat_.uint_instruction_count; + } + void DxbcOpXOr(const DxbcDest& dest, const DxbcSrc& src0, + const DxbcSrc& src1) { + DxbcEmitAluOp(DxbcOpcode::kXOr, 0b11, dest, src0, src1); + ++stat_.uint_instruction_count; + } + void DxbcOpDerivRTXCoarse(const DxbcDest& dest, const DxbcSrc& src, + bool saturate = false) { + DxbcEmitAluOp(DxbcOpcode::kDerivRTXCoarse, 0b0, dest, src, saturate); + ++stat_.float_instruction_count; + } + void DxbcOpDerivRTXFine(const DxbcDest& dest, const DxbcSrc& src, + bool saturate = false) { + DxbcEmitAluOp(DxbcOpcode::kDerivRTXFine, 0b0, dest, src, saturate); + ++stat_.float_instruction_count; + } + void DxbcOpDerivRTYCoarse(const DxbcDest& dest, const DxbcSrc& src, + bool saturate = false) { + DxbcEmitAluOp(DxbcOpcode::kDerivRTYCoarse, 0b0, dest, src, saturate); + ++stat_.float_instruction_count; + } + void DxbcOpDerivRTYFine(const DxbcDest& dest, const DxbcSrc& src, + bool saturate = false) { + DxbcEmitAluOp(DxbcOpcode::kDerivRTYFine, 0b0, dest, src, saturate); + ++stat_.float_instruction_count; + } + void DxbcOpUBFE(const DxbcDest& dest, const DxbcSrc& width, + const DxbcSrc& offset, const DxbcSrc& src) { + DxbcEmitAluOp(DxbcOpcode::kUBFE, 0b111, dest, width, offset, src); + ++stat_.uint_instruction_count; + } + void DxbcOpIBFE(const DxbcDest& dest, const DxbcSrc& width, + const DxbcSrc& offset, const DxbcSrc& src) { + DxbcEmitAluOp(DxbcOpcode::kIBFE, 0b111, dest, width, offset, src); + ++stat_.int_instruction_count; + } + void DxbcOpBFI(const DxbcDest& dest, const DxbcSrc& width, + const DxbcSrc& offset, const DxbcSrc& from, + const DxbcSrc& to) { + DxbcEmitAluOp(DxbcOpcode::kBFI, 0b1111, dest, width, offset, from, to); + ++stat_.uint_instruction_count; + } + void DxbcOpEvalSampleIndex(const DxbcDest& dest, const DxbcSrc& value, + const DxbcSrc& sample_index) { + uint32_t dest_write_mask = dest.GetMask(); + uint32_t operands_length = dest.GetLength() + + value.GetLength(dest_write_mask) + + sample_index.GetLength(0b0000); + shader_code_.reserve(shader_code_.size() + 1 + operands_length); + shader_code_.push_back( + DxbcOpcodeToken(DxbcOpcode::kEvalSampleIndex, operands_length)); + dest.Write(shader_code_); + value.Write(shader_code_, dest_write_mask, false); + sample_index.Write(shader_code_, 0b0000, true); + ++stat_.instruction_count; + } + enum : uint32_t { kSysConst_Flags_Index = 0, kSysConst_Flags_Vec = 0, diff --git a/src/xenia/gpu/dxbc_shader_translator_om.cc b/src/xenia/gpu/dxbc_shader_translator_om.cc index 5c0e8131a..fe0c9a9b2 100644 --- a/src/xenia/gpu/dxbc_shader_translator_om.cc +++ b/src/xenia/gpu/dxbc_shader_translator_om.cc @@ -154,228 +154,96 @@ void DxbcShaderTranslator::StartPixelShader_LoadROVParameters() { // 4 (-> 1) to a temp SGPR. uint32_t resolution_scale_log2_temp = PushSystemTemp(); system_constants_used_ |= 1ull << kSysConst_EDRAMResolutionSquareScale_Index; - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_USHR) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); - shader_code_.push_back(resolution_scale_log2_temp); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, - kSysConst_EDRAMResolutionSquareScale_Comp, 3)); - shader_code_.push_back(cbuffer_index_system_constants_); - shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants)); - shader_code_.push_back(kSysConst_EDRAMResolutionSquareScale_Vec); - shader_code_.push_back( - EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); - shader_code_.push_back(2); - ++stat_.instruction_count; - ++stat_.uint_instruction_count; - + DxbcOpUShR(DxbcDest::R(resolution_scale_log2_temp, 0b0001), + DxbcSrc::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_EDRAMResolutionSquareScale_Vec) + .Select(kSysConst_EDRAMResolutionSquareScale_Comp), + DxbcSrc::LU(2)); // Convert the pixel position (if resolution scale is 4, this will be 2x2 // bigger) to integer to system_temp_rov_params_.zw. // system_temp_rov_params_.z = X host pixel position as uint // system_temp_rov_params_.w = Y host pixel position as uint - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_FTOU) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1100, 1)); - shader_code_.push_back(system_temp_rov_params_); - shader_code_.push_back( - EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_INPUT, 0b01000000, 1)); - shader_code_.push_back(uint32_t(InOutRegister::kPSInPosition)); - ++stat_.instruction_count; - ++stat_.conversion_instruction_count; - + DxbcOpFToU(DxbcDest::R(system_temp_rov_params_, 0b1100), + DxbcSrc::V(uint32_t(InOutRegister::kPSInPosition), 0b01000000)); // Revert the resolution scale to convert the position to guest pixels. // system_temp_rov_params_.z = X guest pixel position / sample width // system_temp_rov_params_.w = Y guest pixel position / sample height - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_USHR) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1100, 1)); - shader_code_.push_back(system_temp_rov_params_); - shader_code_.push_back( - EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); - shader_code_.push_back(system_temp_rov_params_); - shader_code_.push_back( - EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); - shader_code_.push_back(resolution_scale_log2_temp); - ++stat_.instruction_count; - ++stat_.uint_instruction_count; + DxbcOpUShR(DxbcDest::R(system_temp_rov_params_, 0b1100), + DxbcSrc::R(system_temp_rov_params_), + DxbcSrc::R(resolution_scale_log2_temp, DxbcSrc::kXXXX)); // Convert the position from pixels to samples. // system_temp_rov_params_.z = X guest sample 0 position // system_temp_rov_params_.w = Y guest sample 0 position system_constants_used_ |= 1ull << kSysConst_SampleCountLog2_Index; - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ISHL) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1100, 1)); - shader_code_.push_back(system_temp_rov_params_); - shader_code_.push_back( - EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1)); - shader_code_.push_back(system_temp_rov_params_); - shader_code_.push_back(EncodeVectorSwizzledOperand( - D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, - (kSysConst_SampleCountLog2_Comp << 4) | - ((kSysConst_SampleCountLog2_Comp + 1) << 6), - 3)); - shader_code_.push_back(cbuffer_index_system_constants_); - shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants)); - shader_code_.push_back(kSysConst_SampleCountLog2_Vec); - ++stat_.instruction_count; - ++stat_.int_instruction_count; - + DxbcOpIShL(DxbcDest::R(system_temp_rov_params_, 0b1100), + DxbcSrc::R(system_temp_rov_params_), + DxbcSrc::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_SampleCountLog2_Vec, + (kSysConst_SampleCountLog2_Comp << 4) | + ((kSysConst_SampleCountLog2_Comp + 1) << 6))); // Get 80x16 samples tile index - start dividing X by 80 by getting the high // part of the result of multiplication of X by 0xCCCCCCCD into X. // system_temp_rov_params_.x = (X * 0xCCCCCCCD) >> 32, or X / 80 * 64 // system_temp_rov_params_.z = X guest sample 0 position // system_temp_rov_params_.w = Y guest sample 0 position - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_UMUL) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); - shader_code_.push_back(system_temp_rov_params_); - shader_code_.push_back( - EncodeZeroComponentOperand(D3D10_SB_OPERAND_TYPE_NULL, 0)); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1)); - shader_code_.push_back(system_temp_rov_params_); - shader_code_.push_back( - EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); - shader_code_.push_back(0xCCCCCCCDu); - ++stat_.instruction_count; - ++stat_.uint_instruction_count; - + DxbcOpUMul(DxbcDest::R(system_temp_rov_params_, 0b0001), DxbcDest::Null(), + DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kZZZZ), + DxbcSrc::LU(0xCCCCCCCDu)); // Get 80x16 samples tile index - finish dividing X by 80 and divide Y by 16 // into system_temp_rov_params_.xy. // system_temp_rov_params_.x = X tile position // system_temp_rov_params_.y = Y tile position // system_temp_rov_params_.z = X guest sample 0 position // system_temp_rov_params_.w = Y guest sample 0 position - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_USHR) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0011, 1)); - shader_code_.push_back(system_temp_rov_params_); - shader_code_.push_back( - EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b00001100, 1)); - shader_code_.push_back(system_temp_rov_params_); - shader_code_.push_back(EncodeVectorSwizzledOperand( - D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0)); - shader_code_.push_back(6); - shader_code_.push_back(4); - shader_code_.push_back(0); - shader_code_.push_back(0); - ++stat_.instruction_count; - ++stat_.uint_instruction_count; - + DxbcOpUShR(DxbcDest::R(system_temp_rov_params_, 0b0011), + DxbcSrc::R(system_temp_rov_params_, 0b00001100), + DxbcSrc::LU(6, 4, 0, 0)); // Get the tile index to system_temp_rov_params_.y. // system_temp_rov_params_.x = X tile position // system_temp_rov_params_.y = tile index // system_temp_rov_params_.z = X guest sample 0 position // system_temp_rov_params_.w = Y guest sample 0 position system_constants_used_ |= 1ull << kSysConst_EDRAMPitchTiles_Index; - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_UMAD) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(11)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1)); - shader_code_.push_back(system_temp_rov_params_); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1)); - shader_code_.push_back(system_temp_rov_params_); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, - kSysConst_EDRAMPitchTiles_Comp, 3)); - shader_code_.push_back(cbuffer_index_system_constants_); - shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants)); - shader_code_.push_back(kSysConst_EDRAMPitchTiles_Vec); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); - shader_code_.push_back(system_temp_rov_params_); - ++stat_.instruction_count; - ++stat_.uint_instruction_count; - + DxbcOpUMAd(DxbcDest::R(system_temp_rov_params_, 0b0010), + DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kYYYY), + DxbcSrc::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_EDRAMPitchTiles_Vec) + .Select(kSysConst_EDRAMPitchTiles_Comp), + DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX)); // Convert the tile index into a tile offset. // system_temp_rov_params_.x = X tile position // system_temp_rov_params_.y = tile offset // system_temp_rov_params_.z = X guest sample 0 position // system_temp_rov_params_.w = Y guest sample 0 position - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_UMUL) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8)); - shader_code_.push_back( - EncodeZeroComponentOperand(D3D10_SB_OPERAND_TYPE_NULL, 0)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1)); - shader_code_.push_back(system_temp_rov_params_); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1)); - shader_code_.push_back(system_temp_rov_params_); - shader_code_.push_back( - EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); - shader_code_.push_back(1280); - ++stat_.instruction_count; - ++stat_.uint_instruction_count; - + DxbcOpUMul(DxbcDest::Null(), DxbcDest::R(system_temp_rov_params_, 0b0010), + DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kYYYY), + DxbcSrc::LU(1280)); // Get tile-local X sample index into system_temp_rov_params_.z. // system_temp_rov_params_.y = tile offset // system_temp_rov_params_.z = X sample 0 position within the tile // system_temp_rov_params_.w = Y guest sample 0 position - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IMAD) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0100, 1)); - shader_code_.push_back(system_temp_rov_params_); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); - shader_code_.push_back(system_temp_rov_params_); - shader_code_.push_back( - EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); - shader_code_.push_back(uint32_t(-80)); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1)); - shader_code_.push_back(system_temp_rov_params_); - ++stat_.instruction_count; - ++stat_.int_instruction_count; - + DxbcOpIMAd(DxbcDest::R(system_temp_rov_params_, 0b0100), + DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX), + DxbcSrc::LI(-80), + DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kZZZZ)); // Get tile-local Y sample index into system_temp_rov_params_.w. // system_temp_rov_params_.y = tile offset // system_temp_rov_params_.z = X sample 0 position within the tile // system_temp_rov_params_.w = Y sample 0 position within the tile - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1000, 1)); - shader_code_.push_back(system_temp_rov_params_); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 3, 1)); - shader_code_.push_back(system_temp_rov_params_); - shader_code_.push_back( - EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); - shader_code_.push_back(15); - ++stat_.instruction_count; - ++stat_.uint_instruction_count; - + DxbcOpAnd(DxbcDest::R(system_temp_rov_params_, 0b1000), + DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kWWWW), + DxbcSrc::LU(15)); // Go to the target row within the tile in system_temp_rov_params_.y. // system_temp_rov_params_.y = row offset // system_temp_rov_params_.z = X sample 0 position within the tile - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IMAD) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1)); - shader_code_.push_back(system_temp_rov_params_); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 3, 1)); - shader_code_.push_back(system_temp_rov_params_); - shader_code_.push_back( - EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); - shader_code_.push_back(80); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1)); - shader_code_.push_back(system_temp_rov_params_); - ++stat_.instruction_count; - ++stat_.uint_instruction_count; - + DxbcOpIMAd(DxbcDest::R(system_temp_rov_params_, 0b0010), + DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kWWWW), + DxbcSrc::LI(80), + DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kYYYY)); // Choose in which 40-sample half of the tile the pixel is, for swapping // 40-sample columns when accessing the depth buffer - games expect this // behavior when writing depth back to the EDRAM via color writing (GTA IV, @@ -383,218 +251,78 @@ void DxbcShaderTranslator::StartPixelShader_LoadROVParameters() { // system_temp_rov_params_.x = tile-local sample 0 X >= 40 // system_temp_rov_params_.y = row offset // system_temp_rov_params_.z = X sample 0 position within the tile - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_UGE) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); - shader_code_.push_back(system_temp_rov_params_); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1)); - shader_code_.push_back(system_temp_rov_params_); - shader_code_.push_back( - EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); - shader_code_.push_back(40); - ++stat_.instruction_count; - ++stat_.uint_instruction_count; - + DxbcOpUGE(DxbcDest::R(system_temp_rov_params_, 0b0001), + DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kZZZZ), + DxbcSrc::LU(40)); // Choose what to add to the depth/stencil X position. // system_temp_rov_params_.x = 40 or -40 offset for the depth buffer // system_temp_rov_params_.y = row offset // system_temp_rov_params_.z = X sample 0 position within the tile - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); - shader_code_.push_back(system_temp_rov_params_); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); - shader_code_.push_back(system_temp_rov_params_); - shader_code_.push_back( - EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); - shader_code_.push_back(uint32_t(-40)); - shader_code_.push_back( - EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); - shader_code_.push_back(40); - ++stat_.instruction_count; - ++stat_.movc_instruction_count; - + DxbcOpMovC(DxbcDest::R(system_temp_rov_params_, 0b0001), + DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX), + DxbcSrc::LI(-40), DxbcSrc::LI(40)); // Flip tile halves for the depth/stencil buffer. // system_temp_rov_params_.x = X sample 0 position within the depth tile // system_temp_rov_params_.y = row offset // system_temp_rov_params_.z = X sample 0 position within the tile - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IADD) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); - shader_code_.push_back(system_temp_rov_params_); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1)); - shader_code_.push_back(system_temp_rov_params_); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); - shader_code_.push_back(system_temp_rov_params_); - ++stat_.instruction_count; - ++stat_.int_instruction_count; - + DxbcOpIAdd(DxbcDest::R(system_temp_rov_params_, 0b0001), + DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kZZZZ), + DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX)); if (color_targets_written) { // Write 32bpp color offset to system_temp_rov_params_.z. // system_temp_rov_params_.x = X sample 0 position within the depth tile // system_temp_rov_params_.y = row offset // system_temp_rov_params_.z = unscaled 32bpp color offset - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IADD) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0100, 1)); - shader_code_.push_back(system_temp_rov_params_); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1)); - shader_code_.push_back(system_temp_rov_params_); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1)); - shader_code_.push_back(system_temp_rov_params_); - ++stat_.instruction_count; - ++stat_.int_instruction_count; + DxbcOpIAdd(DxbcDest::R(system_temp_rov_params_, 0b0100), + DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kYYYY), + DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kZZZZ)); } - // Write depth/stencil offset to system_temp_rov_params_.y. // system_temp_rov_params_.y = unscaled 32bpp depth/stencil offset // system_temp_rov_params_.z = unscaled 32bpp color offset if needed - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IADD) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1)); - shader_code_.push_back(system_temp_rov_params_); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1)); - shader_code_.push_back(system_temp_rov_params_); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); - shader_code_.push_back(system_temp_rov_params_); - ++stat_.instruction_count; - ++stat_.int_instruction_count; - + DxbcOpIAdd(DxbcDest::R(system_temp_rov_params_, 0b0010), + DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kYYYY), + DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX)); // Add the EDRAM base for depth/stencil. // system_temp_rov_params_.y = unscaled 32bpp depth/stencil address // system_temp_rov_params_.z = unscaled 32bpp color offset if needed system_constants_used_ |= 1ull << kSysConst_EDRAMDepthBaseDwords_Index; - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IADD) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1)); - shader_code_.push_back(system_temp_rov_params_); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1)); - shader_code_.push_back(system_temp_rov_params_); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, - kSysConst_EDRAMDepthBaseDwords_Comp, 3)); - shader_code_.push_back(cbuffer_index_system_constants_); - shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants)); - shader_code_.push_back(kSysConst_EDRAMDepthBaseDwords_Vec); - ++stat_.instruction_count; - ++stat_.int_instruction_count; + DxbcOpIAdd(DxbcDest::R(system_temp_rov_params_, 0b0010), + DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kYYYY), + DxbcSrc::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_EDRAMDepthBaseDwords_Vec) + .Select(kSysConst_EDRAMDepthBaseDwords_Comp)); // Apply the resolution scale. - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) | - ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN( - D3D10_SB_INSTRUCTION_TEST_NONZERO) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3)); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); - shader_code_.push_back(resolution_scale_log2_temp); - ++stat_.instruction_count; - ++stat_.dynamic_flow_control_count; - + DxbcOpIf(true, DxbcSrc::R(resolution_scale_log2_temp, DxbcSrc::kXXXX)); // Release resolution_scale_log2_temp. PopSystemTemp(); - - uint32_t offsets_masked, offsets_select; - uint32_t offsets_immediate, offsets_components; - if (color_targets_written) { - offsets_masked = - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0110, 1); - offsets_select = EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, - kSwizzleXYZW, 1); - offsets_immediate = EncodeVectorSwizzledOperand( - D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0); - offsets_components = 4; - } else { - offsets_masked = - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1); - offsets_select = - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1); - offsets_immediate = - EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0); - offsets_components = 1; - } - - // Scale the offsets by the resolution scale. - // system_temp_rov_params_.y = scaled 32bpp depth/stencil first host pixel - // address - // system_temp_rov_params_.z = scaled 32bpp color first host pixel offset if - // needed - shader_code_.push_back( - ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ISHL) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(6 + offsets_components)); - shader_code_.push_back(offsets_masked); - shader_code_.push_back(system_temp_rov_params_); - shader_code_.push_back(offsets_select); - shader_code_.push_back(system_temp_rov_params_); - shader_code_.push_back(offsets_immediate); - for (uint32_t i = 0; i < offsets_components; ++i) { - shader_code_.push_back(2); - } - ++stat_.instruction_count; - ++stat_.int_instruction_count; - - // Add host pixel offsets. - // system_temp_rov_params_.y = scaled 32bpp depth/stencil address - // system_temp_rov_params_.z = scaled 32bpp color offset if needed - for (uint32_t i = 0; i < 2; ++i) { - // Convert a position component to integer. - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_FTOU) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); - shader_code_.push_back(system_temp_rov_params_); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_INPUT, i, 1)); - shader_code_.push_back(uint32_t(InOutRegister::kPSInPosition)); - ++stat_.instruction_count; - ++stat_.conversion_instruction_count; - - // Insert the host pixel offset on each axis. - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_BFI) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH( - 9 + offsets_components * 2)); - shader_code_.push_back(offsets_masked); - shader_code_.push_back(system_temp_rov_params_); - shader_code_.push_back(offsets_immediate); - for (uint32_t j = 0; j < offsets_components; ++j) { - shader_code_.push_back(1); + { + DxbcDest offsets_dest(DxbcDest::R(system_temp_rov_params_, + color_targets_written ? 0b0110 : 0b0010)); + // Scale the offsets by the resolution scale. + // system_temp_rov_params_.y = scaled 32bpp depth/stencil first host pixel + // address + // system_temp_rov_params_.z = scaled 32bpp color first host pixel offset if + // needed + DxbcOpIShL(offsets_dest, DxbcSrc::R(system_temp_rov_params_), + DxbcSrc::LU(2)); + // Add host pixel offsets. + // system_temp_rov_params_.y = scaled 32bpp depth/stencil address + // system_temp_rov_params_.z = scaled 32bpp color offset if needed + for (uint32_t i = 0; i < 2; ++i) { + // Convert a position component to integer. + DxbcOpFToU(DxbcDest::R(system_temp_rov_params_, 0b0001), + DxbcSrc::V(uint32_t(InOutRegister::kPSInPosition)).Select(i)); + // Insert the host pixel offset on each axis. + DxbcOpBFI(offsets_dest, DxbcSrc::LU(1), DxbcSrc::LU(i), + DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX), + DxbcSrc::R(system_temp_rov_params_)); } - shader_code_.push_back(offsets_immediate); - for (uint32_t j = 0; j < offsets_components; ++j) { - shader_code_.push_back(i); - } - if (color_targets_written) { - shader_code_.push_back( - EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); - } else { - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); - } - shader_code_.push_back(system_temp_rov_params_); - shader_code_.push_back(offsets_select); - shader_code_.push_back(system_temp_rov_params_); - ++stat_.instruction_count; - ++stat_.uint_instruction_count; } - // Close the resolution scale conditional. - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1)); - ++stat_.instruction_count; + DxbcOpEndIf(); if (color_targets_written) { // Get the 64bpp color offset to system_temp_rov_params_.w. @@ -603,19 +331,9 @@ void DxbcShaderTranslator::StartPixelShader_LoadROVParameters() { // system_temp_rov_params_.y = scaled 32bpp depth/stencil address // system_temp_rov_params_.z = scaled 32bpp color offset // system_temp_rov_params_.w = scaled 64bpp color offset - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ISHL) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1000, 1)); - shader_code_.push_back(system_temp_rov_params_); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1)); - shader_code_.push_back(system_temp_rov_params_); - shader_code_.push_back( - EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); - shader_code_.push_back(1); - ++stat_.instruction_count; - ++stat_.int_instruction_count; + DxbcOpIShL(DxbcDest::R(system_temp_rov_params_, 0b1000), + DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kZZZZ), + DxbcSrc::LU(1)); } // *************************************************************************** @@ -627,80 +345,30 @@ void DxbcShaderTranslator::StartPixelShader_LoadROVParameters() { // Check if 4x MSAA is enabled. system_constants_used_ |= 1ull << kSysConst_SampleCountLog2_Index; - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) | - ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN( - D3D10_SB_INSTRUCTION_TEST_NONZERO) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5)); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, - kSysConst_SampleCountLog2_Comp, 3)); - shader_code_.push_back(cbuffer_index_system_constants_); - shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants)); - shader_code_.push_back(kSysConst_SampleCountLog2_Vec); - ++stat_.instruction_count; - ++stat_.dynamic_flow_control_count; - - // Copy the 4x AA coverage to system_temp_rov_params_.x. - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(6)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); - shader_code_.push_back(system_temp_rov_params_); - shader_code_.push_back(EncodeVectorSelectOperand( - D3D11_SB_OPERAND_TYPE_INPUT_COVERAGE_MASK, 0, 0)); - shader_code_.push_back( - EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); - shader_code_.push_back((1 << 4) - 1); - ++stat_.instruction_count; - ++stat_.uint_instruction_count; - + DxbcOpIf(true, DxbcSrc::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_SampleCountLog2_Vec) + .Select(kSysConst_SampleCountLog2_Comp)); + { + // Copy the 4x AA coverage to system_temp_rov_params_.x. + DxbcOpAnd(DxbcDest::R(system_temp_rov_params_, 0b0001), + DxbcSrc::VCoverage(), DxbcSrc::LU((1 << 4) - 1)); + } // Handle 1 or 2 samples. - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ELSE) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1)); - ++stat_.instruction_count; - - // Extract sample 3 coverage, which will be used as sample 1. - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_UBFE) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); - shader_code_.push_back(system_temp_rov_params_); - shader_code_.push_back( - EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); - shader_code_.push_back(1); - shader_code_.push_back( - EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); - shader_code_.push_back(3); - shader_code_.push_back(EncodeVectorSelectOperand( - D3D11_SB_OPERAND_TYPE_INPUT_COVERAGE_MASK, 0, 0)); - ++stat_.instruction_count; - ++stat_.uint_instruction_count; - - // Combine coverage of samples 0 (in bit 0 of vCoverage) and 3 (in bit 0 of - // system_temp_rov_params_.x). - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_BFI) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(10)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); - shader_code_.push_back(system_temp_rov_params_); - shader_code_.push_back( - EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); - shader_code_.push_back(31); - shader_code_.push_back( - EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); - shader_code_.push_back(1); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); - shader_code_.push_back(system_temp_rov_params_); - shader_code_.push_back(EncodeVectorSelectOperand( - D3D11_SB_OPERAND_TYPE_INPUT_COVERAGE_MASK, 0, 0)); - ++stat_.instruction_count; - ++stat_.uint_instruction_count; - + DxbcOpElse(); + { + // Extract sample 3 coverage, which will be used as sample 1. + DxbcOpUBFE(DxbcDest::R(system_temp_rov_params_, 0b0001), DxbcSrc::LU(1), + DxbcSrc::LU(3), DxbcSrc::VCoverage()); + // Combine coverage of samples 0 (in bit 0 of vCoverage) and 3 (in bit 0 of + // system_temp_rov_params_.x). + DxbcOpBFI(DxbcDest::R(system_temp_rov_params_, 0b0001), DxbcSrc::LU(31), + DxbcSrc::LU(1), + DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX), + DxbcSrc::VCoverage()); + } // Close the 4x MSAA conditional. - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1)); - ++stat_.instruction_count; + DxbcOpEndIf(); } void DxbcShaderTranslator::ROV_DepthStencilTest() { @@ -709,132 +377,55 @@ void DxbcShaderTranslator::ROV_DepthStencilTest() { // Check whether depth/stencil is enabled. 1 SGPR taken. // temp1.x = kSysFlag_ROVDepthStencil system_constants_used_ |= 1ull << kSysConst_Flags_Index; - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); - shader_code_.push_back(temp1); - shader_code_.push_back(EncodeVectorSelectOperand( - D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSysConst_Flags_Comp, 3)); - shader_code_.push_back(cbuffer_index_system_constants_); - shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants)); - shader_code_.push_back(kSysConst_Flags_Vec); - shader_code_.push_back( - EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); - shader_code_.push_back(kSysFlag_ROVDepthStencil); - ++stat_.instruction_count; - ++stat_.uint_instruction_count; - + DxbcOpAnd(DxbcDest::R(temp1, 0b0001), + DxbcSrc::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_Flags_Vec) + .Select(kSysConst_Flags_Comp), + DxbcSrc::LU(kSysFlag_ROVDepthStencil)); // Open the depth/stencil enabled conditional. 1 SGPR released. // temp1.x = free - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) | - ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN( - D3D10_SB_INSTRUCTION_TEST_NONZERO) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3)); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); - shader_code_.push_back(temp1); - ++stat_.instruction_count; - ++stat_.dynamic_flow_control_count; + DxbcOpIf(true, DxbcSrc::R(temp1, DxbcSrc::kXXXX)); if (writes_depth()) { // Convert the shader-generated depth to 24-bit - move the 32-bit depth to // the conversion subroutine's argument. - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); - shader_code_.push_back(system_temps_subroutine_); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); - shader_code_.push_back(system_temp_rov_depth_stencil_); - ++stat_.instruction_count; - ++stat_.mov_instruction_count; - + DxbcOpMov(DxbcDest::R(system_temps_subroutine_, 0b0001), + DxbcSrc::R(system_temp_rov_depth_stencil_, DxbcSrc::kXXXX)); // Convert the shader-generated depth to 24-bit. - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_CALL) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3)); - shader_code_.push_back(EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_LABEL, 1)); - shader_code_.push_back(label_rov_depth_to_24bit_); - ++stat_.instruction_count; - ++stat_.static_flow_control_count; - + DxbcOpCall(DxbcSrc::Label(label_rov_depth_to_24bit_)); // Store a copy of the depth in temp1.x to reload later. // temp1.x = 24-bit oDepth - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); - shader_code_.push_back(temp1); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); - shader_code_.push_back(system_temps_subroutine_); - ++stat_.instruction_count; - ++stat_.mov_instruction_count; + DxbcOpMov(DxbcDest::R(temp1, 0b0001), + DxbcSrc::R(system_temps_subroutine_, DxbcSrc::kXXXX)); } else { // Load the first sample's Z and W to system_temps_subroutine_[0] - need // this regardless of coverage for polygon offset. - shader_code_.push_back( - ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_EVAL_SAMPLE_INDEX) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0011, 1)); - shader_code_.push_back(system_temps_subroutine_); - shader_code_.push_back(EncodeVectorSwizzledOperand( - D3D10_SB_OPERAND_TYPE_INPUT, kSwizzleXYZW, 1)); - shader_code_.push_back(uint32_t(InOutRegister::kPSInClipSpaceZW)); - shader_code_.push_back( - EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); - shader_code_.push_back(0); - ++stat_.instruction_count; - + DxbcOpEvalSampleIndex(DxbcDest::R(system_temps_subroutine_, 0b0011), + DxbcSrc::V(uint32_t(InOutRegister::kPSInClipSpaceZW)), + DxbcSrc::LU(0)); // Calculate the first sample's Z/W to system_temps_subroutine_[0].x for // conversion to 24-bit and depth test. - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DIV) | - ENCODE_D3D10_SB_INSTRUCTION_SATURATE(1) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); - shader_code_.push_back(system_temps_subroutine_); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); - shader_code_.push_back(system_temps_subroutine_); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1)); - shader_code_.push_back(system_temps_subroutine_); - ++stat_.instruction_count; - ++stat_.float_instruction_count; - + DxbcOpDiv(DxbcDest::R(system_temps_subroutine_, 0b0001), + DxbcSrc::R(system_temps_subroutine_, DxbcSrc::kXXXX), + DxbcSrc::R(system_temps_subroutine_, DxbcSrc::kYYYY), true); // Apply viewport Z range to the first sample because this would affect the // slope-scaled depth bias (tested on PC on Direct3D 12, by comparing the // fraction of the polygon's area with depth clamped - affected by the // constant bias, but not affected by the slope-scaled bias, also depth // range clamping should be done after applying the offset as well). system_constants_used_ |= 1ull << kSysConst_EDRAMDepthRange_Index; - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MAD) | - ENCODE_D3D10_SB_INSTRUCTION_SATURATE(1) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(13)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); - shader_code_.push_back(system_temps_subroutine_); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); - shader_code_.push_back(system_temps_subroutine_); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, - kSysConst_EDRAMDepthRangeScale_Comp, 3)); - shader_code_.push_back(cbuffer_index_system_constants_); - shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants)); - shader_code_.push_back(kSysConst_EDRAMDepthRange_Vec); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, - kSysConst_EDRAMDepthRangeOffset_Comp, 3)); - shader_code_.push_back(cbuffer_index_system_constants_); - shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants)); - shader_code_.push_back(kSysConst_EDRAMDepthRange_Vec); - ++stat_.instruction_count; - ++stat_.float_instruction_count; - + DxbcOpMAd(DxbcDest::R(system_temps_subroutine_, 0b0001), + DxbcSrc::R(system_temps_subroutine_, DxbcSrc::kXXXX), + DxbcSrc::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_EDRAMDepthRange_Vec) + .Select(kSysConst_EDRAMDepthRangeScale_Comp), + DxbcSrc::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_EDRAMDepthRange_Vec) + .Select(kSysConst_EDRAMDepthRangeOffset_Comp), + true); // Get the derivatives of a sample's depth, for the slope-scaled polygon // offset. Probably not very significant that it's for the sample 0 rather // than for the center, likely neither is accurate because Xenos probably @@ -842,125 +433,59 @@ void DxbcShaderTranslator::ROV_DepthStencilTest() { // the slope-scaled polygon offset in R5xx Acceleration. Take 2 VGPRs. // temp1.x = ddx(z) // temp1.y = ddy(z) - for (uint32_t i = 0; i < 2; ++i) { - shader_code_.push_back( - ENCODE_D3D10_SB_OPCODE_TYPE(i ? D3D11_SB_OPCODE_DERIV_RTY_COARSE - : D3D11_SB_OPCODE_DERIV_RTX_COARSE) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5)); - shader_code_.push_back(EncodeVectorMaskedOperand( - D3D10_SB_OPERAND_TYPE_TEMP, 0b0001 << i, 1)); - shader_code_.push_back(temp1); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); - shader_code_.push_back(system_temps_subroutine_); - ++stat_.instruction_count; - ++stat_.float_instruction_count; - } - + DxbcOpDerivRTXCoarse(DxbcDest::R(temp1, 0b0001), + DxbcSrc::R(system_temps_subroutine_, DxbcSrc::kXXXX)); + DxbcOpDerivRTYCoarse(DxbcDest::R(temp1, 0b0010), + DxbcSrc::R(system_temps_subroutine_, DxbcSrc::kXXXX)); // Get the maximum depth slope for polygon offset to temp1.y. // Release 1 VGPR (Y derivative). // temp1.x = max(|ddx(z)|, |ddy(z)|) // temp1.y = free // https://docs.microsoft.com/en-us/windows/desktop/direct3d9/depth-bias - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MAX) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); - shader_code_.push_back(temp1); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1) | - ENCODE_D3D10_SB_OPERAND_EXTENDED(1)); - shader_code_.push_back(ENCODE_D3D10_SB_EXTENDED_OPERAND_MODIFIER( - D3D10_SB_OPERAND_MODIFIER_ABS)); - shader_code_.push_back(temp1); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1) | - ENCODE_D3D10_SB_OPERAND_EXTENDED(1)); - shader_code_.push_back(ENCODE_D3D10_SB_EXTENDED_OPERAND_MODIFIER( - D3D10_SB_OPERAND_MODIFIER_ABS)); - shader_code_.push_back(temp1); - ++stat_.instruction_count; - ++stat_.float_instruction_count; - + DxbcOpMax(DxbcDest::R(temp1, 0b0001), + DxbcSrc::R(temp1, DxbcSrc::kXXXX).Abs(), + DxbcSrc::R(temp1, DxbcSrc::kYYYY).Abs()); // Copy the needed polygon offset values to temp1.yz. Take 2 VGPRs. // temp1.x = max(|ddx(z)|, |ddy(z)|) // temp1.y = polygon offset scale // temp1.z = polygon offset bias system_constants_used_ |= (1ull << kSysConst_EDRAMPolyOffsetFront_Index) | (1ull << kSysConst_EDRAMPolyOffsetBack_Index); - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(13)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0110, 1)); - shader_code_.push_back(temp1); - shader_code_.push_back( - EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_INPUT, 0, 1)); - shader_code_.push_back(uint32_t(InOutRegister::kPSInFrontFace)); - shader_code_.push_back(EncodeVectorSwizzledOperand( - D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, - (kSysConst_EDRAMPolyOffsetFrontScale_Comp << 2) | - (kSysConst_EDRAMPolyOffsetFrontOffset_Comp << 4), - 3)); - shader_code_.push_back(cbuffer_index_system_constants_); - shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants)); - shader_code_.push_back(kSysConst_EDRAMPolyOffsetFront_Vec); - shader_code_.push_back(EncodeVectorSwizzledOperand( - D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, - (kSysConst_EDRAMPolyOffsetBackScale_Comp << 2) | - (kSysConst_EDRAMPolyOffsetBackOffset_Comp << 4), - 3)); - shader_code_.push_back(cbuffer_index_system_constants_); - shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants)); - shader_code_.push_back(kSysConst_EDRAMPolyOffsetBack_Vec); - ++stat_.instruction_count; - ++stat_.movc_instruction_count; - + DxbcOpMovC( + DxbcDest::R(temp1, 0b0110), + DxbcSrc::V(uint32_t(InOutRegister::kPSInFrontFace), DxbcSrc::kXXXX), + DxbcSrc::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_EDRAMPolyOffsetFront_Vec, + (kSysConst_EDRAMPolyOffsetFrontScale_Comp << 2) | + (kSysConst_EDRAMPolyOffsetFrontOffset_Comp << 4)), + DxbcSrc::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_EDRAMPolyOffsetBack_Vec, + (kSysConst_EDRAMPolyOffsetBackScale_Comp << 2) | + (kSysConst_EDRAMPolyOffsetBackOffset_Comp << 4))); // Apply the slope scale and the constant bias to the offset, and release 2 // VGPRs. // temp1.x = polygon offset // temp1.y = free // temp1.z = free - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MAD) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); - shader_code_.push_back(temp1); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1)); - shader_code_.push_back(temp1); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); - shader_code_.push_back(temp1); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1)); - shader_code_.push_back(temp1); - ++stat_.instruction_count; - ++stat_.float_instruction_count; - + DxbcOpMAd(DxbcDest::R(temp1, 0b0001), DxbcSrc::R(temp1, DxbcSrc::kYYYY), + DxbcSrc::R(temp1, DxbcSrc::kXXXX), + DxbcSrc::R(temp1, DxbcSrc::kZZZZ)); // Calculate the upper Z range bound to temp1.y for clamping after biasing, // taking 1 SGPR. // temp1.x = polygon offset // temp1.y = viewport maximum depth system_constants_used_ |= 1ull << kSysConst_EDRAMDepthRange_Index; - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ADD) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(11)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1)); - shader_code_.push_back(temp1); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, - kSysConst_EDRAMDepthRangeOffset_Comp, 3)); - shader_code_.push_back(cbuffer_index_system_constants_); - shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants)); - shader_code_.push_back(kSysConst_EDRAMDepthRange_Vec); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, - kSysConst_EDRAMDepthRangeScale_Comp, 3)); - shader_code_.push_back(cbuffer_index_system_constants_); - shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants)); - shader_code_.push_back(kSysConst_EDRAMDepthRange_Vec); - ++stat_.instruction_count; - ++stat_.float_instruction_count; + DxbcOpAdd(DxbcDest::R(temp1, 0b0010), + DxbcSrc::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_EDRAMDepthRange_Vec) + .Select(kSysConst_EDRAMDepthRangeOffset_Comp), + DxbcSrc::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_EDRAMDepthRange_Vec) + .Select(kSysConst_EDRAMDepthRangeScale_Comp)); } for (uint32_t i = 0; i < 4; ++i) { @@ -968,33 +493,14 @@ void DxbcShaderTranslator::ROV_DepthStencilTest() { // temp1.x = polygon offset or 24-bit oDepth // temp1.y = viewport maximum depth if not writing to oDepth // temp1.z = coverage of the current sample - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0100, 1)); - shader_code_.push_back(temp1); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); - shader_code_.push_back(system_temp_rov_params_); - shader_code_.push_back( - EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); - shader_code_.push_back(1 << i); - ++stat_.instruction_count; - ++stat_.uint_instruction_count; - + DxbcOpAnd(DxbcDest::R(temp1, 0b0100), + DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX), + DxbcSrc::LU(1 << i)); // Check if the current sample is covered. Release 1 VGPR. // temp1.x = polygon offset or 24-bit oDepth // temp1.y = viewport maximum depth if not writing to oDepth // temp1.z = free - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) | - ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN( - D3D10_SB_INSTRUCTION_TEST_NONZERO) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3)); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1)); - shader_code_.push_back(temp1); - ++stat_.instruction_count; - ++stat_.dynamic_flow_control_count; + DxbcOpIf(true, DxbcSrc::R(temp1, DxbcSrc::kZZZZ)); if (writes_depth()) { // Same depth for all samples, already converted to 24-bit - only move it @@ -1002,17 +508,8 @@ void DxbcShaderTranslator::ROV_DepthStencilTest() { // not already there (it's there for the first sample - returned from the // conversion to 24-bit). if (i) { - shader_code_.push_back( - ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); - shader_code_.push_back(system_temps_subroutine_); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); - shader_code_.push_back(temp1); - ++stat_.instruction_count; - ++stat_.mov_instruction_count; + DxbcOpMov(DxbcDest::R(system_temps_subroutine_, 0b0001), + DxbcSrc::R(temp1, DxbcSrc::kXXXX)); } } else { if (i) { @@ -1028,247 +525,96 @@ void DxbcShaderTranslator::ROV_DepthStencilTest() { // 2x MSAA, handling samples 0 and 3 (upper-left and lower-right) as 0 // and 1. Thus, evaluate Z/W at sample 3 when 4x is not enabled. system_constants_used_ |= 1ull << kSysConst_SampleCountLog2_Index; - shader_code_.push_back( - ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(11)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); - shader_code_.push_back(system_temps_subroutine_); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, - kSysConst_SampleCountLog2_Comp, 3)); - shader_code_.push_back(cbuffer_index_system_constants_); - shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants)); - shader_code_.push_back(kSysConst_SampleCountLog2_Vec); - shader_code_.push_back( - EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); - shader_code_.push_back(3); - shader_code_.push_back( - EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); - shader_code_.push_back(1); - ++stat_.instruction_count; - ++stat_.movc_instruction_count; - - shader_code_.push_back( - ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_EVAL_SAMPLE_INDEX) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0011, 1)); - shader_code_.push_back(system_temps_subroutine_); - shader_code_.push_back(EncodeVectorSwizzledOperand( - D3D10_SB_OPERAND_TYPE_INPUT, kSwizzleXYZW, 1)); - shader_code_.push_back(uint32_t(InOutRegister::kPSInClipSpaceZW)); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); - shader_code_.push_back(system_temps_subroutine_); - ++stat_.instruction_count; + DxbcOpMovC(DxbcDest::R(system_temps_subroutine_, 0b0001), + DxbcSrc::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_SampleCountLog2_Vec) + .Select(kSysConst_SampleCountLog2_Comp), + DxbcSrc::LU(3), DxbcSrc::LU(1)); + DxbcOpEvalSampleIndex( + DxbcDest::R(system_temps_subroutine_, 0b0011), + DxbcSrc::V(uint32_t(InOutRegister::kPSInClipSpaceZW)), + DxbcSrc::R(system_temps_subroutine_, DxbcSrc::kXXXX)); } else { - shader_code_.push_back( - ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_EVAL_SAMPLE_INDEX) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0011, 1)); - shader_code_.push_back(system_temps_subroutine_); - shader_code_.push_back(EncodeVectorSwizzledOperand( - D3D10_SB_OPERAND_TYPE_INPUT, kSwizzleXYZW, 1)); - shader_code_.push_back(uint32_t(InOutRegister::kPSInClipSpaceZW)); - shader_code_.push_back( - EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); - shader_code_.push_back(1 << i); - ++stat_.instruction_count; + DxbcOpEvalSampleIndex( + DxbcDest::R(system_temps_subroutine_, 0b0011), + DxbcSrc::V(uint32_t(InOutRegister::kPSInClipSpaceZW)), + DxbcSrc::LU(i)); } - // Calculate Z/W for the current sample from the evaluated Z and W. - shader_code_.push_back( - ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DIV) | - ENCODE_D3D10_SB_INSTRUCTION_SATURATE(1) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); - shader_code_.push_back(system_temps_subroutine_); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); - shader_code_.push_back(system_temps_subroutine_); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1)); - shader_code_.push_back(system_temps_subroutine_); - ++stat_.instruction_count; - ++stat_.float_instruction_count; - + DxbcOpDiv(DxbcDest::R(system_temps_subroutine_, 0b0001), + DxbcSrc::R(system_temps_subroutine_, DxbcSrc::kXXXX), + DxbcSrc::R(system_temps_subroutine_, DxbcSrc::kYYYY), true); // Apply viewport Z range the same way as it was applied to sample 0. system_constants_used_ |= 1ull << kSysConst_EDRAMDepthRange_Index; - shader_code_.push_back( - ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MAD) | - ENCODE_D3D10_SB_INSTRUCTION_SATURATE(1) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(13)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); - shader_code_.push_back(system_temps_subroutine_); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); - shader_code_.push_back(system_temps_subroutine_); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, - kSysConst_EDRAMDepthRangeScale_Comp, 3)); - shader_code_.push_back(cbuffer_index_system_constants_); - shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants)); - shader_code_.push_back(kSysConst_EDRAMDepthRange_Vec); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, - kSysConst_EDRAMDepthRangeOffset_Comp, 3)); - shader_code_.push_back(cbuffer_index_system_constants_); - shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants)); - shader_code_.push_back(kSysConst_EDRAMDepthRange_Vec); - ++stat_.instruction_count; - ++stat_.float_instruction_count; + DxbcOpMAd(DxbcDest::R(system_temps_subroutine_, 0b0001), + DxbcSrc::R(system_temps_subroutine_, DxbcSrc::kXXXX), + DxbcSrc::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_EDRAMDepthRange_Vec) + .Select(kSysConst_EDRAMDepthRangeScale_Comp), + DxbcSrc::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_EDRAMDepthRange_Vec) + .Select(kSysConst_EDRAMDepthRangeOffset_Comp), + true); } - // Add the bias to the depth of the sample. - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ADD) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); - shader_code_.push_back(system_temps_subroutine_); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); - shader_code_.push_back(system_temps_subroutine_); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); - shader_code_.push_back(temp1); - ++stat_.instruction_count; - ++stat_.float_instruction_count; - + DxbcOpAdd(DxbcDest::R(system_temps_subroutine_, 0b0001), + DxbcSrc::R(system_temps_subroutine_, DxbcSrc::kXXXX), + DxbcSrc::R(temp1, DxbcSrc::kXXXX)); // Clamp the biased depth to the lower viewport depth bound. system_constants_used_ |= 1ull << kSysConst_EDRAMDepthRange_Index; - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MAX) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); - shader_code_.push_back(system_temps_subroutine_); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); - shader_code_.push_back(system_temps_subroutine_); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, - kSysConst_EDRAMDepthRangeOffset_Comp, 3)); - shader_code_.push_back(cbuffer_index_system_constants_); - shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants)); - shader_code_.push_back(kSysConst_EDRAMDepthRange_Vec); - ++stat_.instruction_count; - ++stat_.float_instruction_count; - + DxbcOpMax(DxbcDest::R(system_temps_subroutine_, 0b0001), + DxbcSrc::R(system_temps_subroutine_, DxbcSrc::kXXXX), + DxbcSrc::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_EDRAMDepthRange_Vec) + .Select(kSysConst_EDRAMDepthRangeOffset_Comp)); // Clamp the biased depth to the upper viewport depth bound. - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MIN) | - ENCODE_D3D10_SB_INSTRUCTION_SATURATE(1) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); - shader_code_.push_back(system_temps_subroutine_); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); - shader_code_.push_back(system_temps_subroutine_); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1)); - shader_code_.push_back(temp1); - ++stat_.instruction_count; - ++stat_.float_instruction_count; - + DxbcOpMin(DxbcDest::R(system_temps_subroutine_, 0b0001), + DxbcSrc::R(system_temps_subroutine_, DxbcSrc::kXXXX), + DxbcSrc::R(temp1, DxbcSrc::kYYYY), true); // Convert the depth to 24-bit - takes system_temps_subroutine_[0].x, // returns also in system_temps_subroutine_[0].x. - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_CALL) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3)); - shader_code_.push_back( - EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_LABEL, 1)); - shader_code_.push_back(label_rov_depth_to_24bit_); - ++stat_.instruction_count; - ++stat_.static_flow_control_count; + DxbcOpCall(DxbcSrc::Label(label_rov_depth_to_24bit_)); } // Perform depth/stencil test for the sample, get the result in bits 4 // (passed) and 8 (new depth/stencil buffer value is different). - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_CALL) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3)); - shader_code_.push_back(EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_LABEL, 1)); - shader_code_.push_back(label_rov_depth_stencil_sample_); - ++stat_.instruction_count; - ++stat_.static_flow_control_count; - + DxbcOpCall(DxbcSrc::Label(label_rov_depth_stencil_sample_)); // Write the resulting depth/stencil value in system_temps_subroutine_[0].x // to the sample's depth in system_temp_rov_depth_stencil_. - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1 << i, 1)); - shader_code_.push_back(system_temp_rov_depth_stencil_); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); - shader_code_.push_back(system_temps_subroutine_); - ++stat_.instruction_count; - ++stat_.mov_instruction_count; - + DxbcOpMov(DxbcDest::R(system_temp_rov_depth_stencil_, 1 << i), + DxbcSrc::R(system_temps_subroutine_, DxbcSrc::kXXXX)); if (i) { // Shift the result bits to the correct position. - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ISHL) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1)); - shader_code_.push_back(system_temps_subroutine_); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1)); - shader_code_.push_back(system_temps_subroutine_); - shader_code_.push_back( - EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); - shader_code_.push_back(i); - ++stat_.instruction_count; - ++stat_.int_instruction_count; + DxbcOpIShL(DxbcDest::R(system_temps_subroutine_, 0b0010), + DxbcSrc::R(system_temps_subroutine_, DxbcSrc::kYYYY), + DxbcSrc::LU(i)); } - // Add the result in system_temps_subroutine_[0].y to // system_temp_rov_params_.x. Bits 0:3 will be cleared in case of test // failure (only doing this for covered samples), bits 4:7 will be added if // need to defer writing. - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_XOR) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); - shader_code_.push_back(system_temp_rov_params_); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); - shader_code_.push_back(system_temp_rov_params_); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1)); - shader_code_.push_back(system_temps_subroutine_); - ++stat_.instruction_count; - ++stat_.uint_instruction_count; + DxbcOpXOr(DxbcDest::R(system_temp_rov_params_, 0b0001), + DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX), + DxbcSrc::R(system_temps_subroutine_, DxbcSrc::kYYYY)); // Close the sample conditional. - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1)); - ++stat_.instruction_count; + DxbcOpEndIf(); // Go to the next sample (samples are at +0, +80, +1, +81, so need to do // +80, -79, +80 and -81 after each sample). system_constants_used_ |= 1ull << kSysConst_EDRAMResolutionSquareScale_Index; - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IMAD) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(11)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1)); - shader_code_.push_back(system_temp_rov_params_); - shader_code_.push_back( - EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); - shader_code_.push_back((i & 1) ? -78 - i : 80); - shader_code_.push_back(EncodeVectorSelectOperand( - D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, - kSysConst_EDRAMResolutionSquareScale_Comp, 3)); - shader_code_.push_back(cbuffer_index_system_constants_); - shader_code_.push_back(uint32_t(CbufferRegister::kSystemConstants)); - shader_code_.push_back(kSysConst_EDRAMResolutionSquareScale_Vec); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1)); - shader_code_.push_back(system_temp_rov_params_); - ++stat_.instruction_count; - ++stat_.int_instruction_count; + DxbcOpIMAd(DxbcDest::R(system_temp_rov_params_, 0b0010), + DxbcSrc::LI((i & 1) ? -78 - i : 80), + DxbcSrc::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_EDRAMResolutionSquareScale_Vec) + .Select(kSysConst_EDRAMResolutionSquareScale_Comp), + DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kYYYY)); } if (ROV_IsDepthStencilEarly()) { @@ -1279,96 +625,38 @@ void DxbcShaderTranslator::ROV_DepthStencilTest() { // reject at 2x2 quad granularity because texture fetches need derivatives. // temp1.x = coverage | deferred depth/stencil write - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_AND) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); - shader_code_.push_back(temp1); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); - shader_code_.push_back(system_temp_rov_params_); - shader_code_.push_back( - EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); - shader_code_.push_back(0b11111111); - ++stat_.instruction_count; - ++stat_.uint_instruction_count; - + DxbcOpAnd(DxbcDest::R(temp1, 0b0001), + DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX), + DxbcSrc::LU(0b11111111)); // temp1.x = 1.0 if any sample is covered or potentially needs stencil write // in the end of the shader in the current pixel - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); - shader_code_.push_back(temp1); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); - shader_code_.push_back(temp1); - shader_code_.push_back( - EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); - shader_code_.push_back(0x3F800000); - shader_code_.push_back( - EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); - shader_code_.push_back(0); - ++stat_.instruction_count; - ++stat_.movc_instruction_count; - - for (uint32_t i = 0; i < 2; ++i) { - // temp1.x = 1.0 if anything is covered in the current pixel (i = 0) / - // the current half of the quad (i = 1) - // temp1.y = non-zero if anything is covered in the pixel across X - // (i = 0) / the two pixels across Y (i = 1) - shader_code_.push_back( - ENCODE_D3D10_SB_OPCODE_TYPE(i ? D3D11_SB_OPCODE_DERIV_RTY_COARSE - : D3D11_SB_OPCODE_DERIV_RTX_FINE) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0010, 1)); - shader_code_.push_back(temp1); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); - shader_code_.push_back(temp1); - ++stat_.instruction_count; - ++stat_.float_instruction_count; - - // temp1.x = 1.0 if anything is covered in the current half of the quad - // (i = 0) / the whole quad (i = 1) - // temp1.y = free - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOVC) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(9)); - shader_code_.push_back( - EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1)); - shader_code_.push_back(temp1); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1)); - shader_code_.push_back(temp1); - shader_code_.push_back( - EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0)); - shader_code_.push_back(0x3F800000); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); - shader_code_.push_back(temp1); - ++stat_.instruction_count; - ++stat_.movc_instruction_count; - } - + DxbcOpMovC(DxbcDest::R(temp1, 0b0001), DxbcSrc::R(temp1, DxbcSrc::kXXXX), + DxbcSrc::LF(1.0f), DxbcSrc::LF(0.0f)); + // temp1.x = 1.0 if any sample is covered or potentially needs stencil write + // in the end of the shader in the current pixel + // temp1.y = non-zero if anything is covered in the pixel across X + DxbcOpDerivRTXFine(DxbcDest::R(temp1, 0b0010), + DxbcSrc::R(temp1, DxbcSrc::kXXXX)); + // temp1.x = 1.0 if anything is covered in the current half of the quad + // temp1.y = free + DxbcOpMovC(DxbcDest::R(temp1, 0b0001), DxbcSrc::R(temp1, DxbcSrc::kYYYY), + DxbcSrc::LF(1.0f), DxbcSrc::R(temp1, DxbcSrc::kXXXX)); + // temp1.x = 1.0 if anything is covered in the current half of the quad + // temp1.y = non-zero if anything is covered in the two pixels across Y + DxbcOpDerivRTYCoarse(DxbcDest::R(temp1, 0b0010), + DxbcSrc::R(temp1, DxbcSrc::kXXXX)); + // temp1.x = 1.0 if anything is covered in the current whole quad + // temp1.y = free + DxbcOpMovC(DxbcDest::R(temp1, 0b0001), DxbcSrc::R(temp1, DxbcSrc::kYYYY), + DxbcSrc::LF(1.0f), DxbcSrc::R(temp1, DxbcSrc::kXXXX)); // End the shader if nothing is covered in the 2x2 quad after early // depth/stencil. // temp1.x = free - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_RETC) | - ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN( - D3D10_SB_INSTRUCTION_TEST_ZERO) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3)); - shader_code_.push_back( - EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1)); - shader_code_.push_back(temp1); - ++stat_.instruction_count; - ++stat_.dynamic_flow_control_count; + DxbcOpRetC(false, DxbcSrc::R(temp1, DxbcSrc::kXXXX)); } // Close the large depth/stencil conditional. - shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ENDIF) | - ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1)); - ++stat_.instruction_count; + DxbcOpEndIf(); // Release temp1. PopSystemTemp();