From 9a74df491ffd0189ddd26bc2030e0eae6a775f4e Mon Sep 17 00:00:00 2001 From: Triang3l Date: Mon, 4 Jan 2021 16:15:34 +0300 Subject: [PATCH] [DXBC] dxbc.h with non-translator-specific parts --- src/xenia/gpu/dxbc.h | 1611 +++++++++++ src/xenia/gpu/dxbc_shader_translator.cc | 1388 +++++---- src/xenia/gpu/dxbc_shader_translator.h | 1603 +---------- src/xenia/gpu/dxbc_shader_translator_alu.cc | 934 +++--- src/xenia/gpu/dxbc_shader_translator_fetch.cc | 1127 ++++---- .../gpu/dxbc_shader_translator_memexport.cc | 381 +-- src/xenia/gpu/dxbc_shader_translator_om.cc | 2512 +++++++++-------- 7 files changed, 4784 insertions(+), 4772 deletions(-) create mode 100644 src/xenia/gpu/dxbc.h diff --git a/src/xenia/gpu/dxbc.h b/src/xenia/gpu/dxbc.h new file mode 100644 index 000000000..ea4c448e5 --- /dev/null +++ b/src/xenia/gpu/dxbc.h @@ -0,0 +1,1611 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2021 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_DXBC_H_ +#define XENIA_GPU_DXBC_H_ + +#include +#include +#include + +#include "xenia/base/assert.h" +#include "xenia/base/math.h" + +namespace xe { +namespace gpu { +namespace dxbc { + +// Utilities for generating shader model 5_1 byte code (for Direct3D 12). +// +// IMPORTANT CONTRIBUTION NOTES: +// +// While DXBC may look like a flexible and high-level representation with highly +// generalized building blocks, actually it has a lot of restrictions on operand +// usage! +// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +// !!!DO NOT ADD ANYTHING FXC THAT WOULD NOT PRODUCE!!! +// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +// !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! +// Before adding any sequence that you haven't seen in Xenia, try writing +// equivalent code in HLSL and running it through FXC, try with /Od, try with +// full optimization, but if you see that FXC follows a different pattern than +// what you are expecting, do what FXC does!!! +// Most important limitations: +// - Absolute, negate and saturate are only supported by instructions that +// explicitly support them. See MSDN pages of the specific instructions you +// want to use with modifiers: +// https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx9-graphics-reference-asm +// - Component selection in the general case (ALU instructions - things like +// resource access and flow control mostly explicitly need a specific +// component selection mode defined in the specification of the instruction): +// - 0-component - for operand types with no data (samplers, labels). +// - 1-component - for scalar destination operand types, and for scalar source +// operand types when the destination vector has 1 component masked +// (including scalar immediates). +// - Mask - for vector destination operand types. +// - Swizzle - for both vector and scalar (replicated in this case) source +// operand types, when the destination vector has 2 or more components +// masked. Immediates in this case have XYZW swizzle. +// - Select 1 - for vector source operand types, when the destination has 1 +// component masked or is of a scalar type. +// - Input operands (v#) can be used only as sources, output operands (o#) can +// be used only as destinations. +// - Indexable temporaries (x#) can only be used as a destination or a source +// operand (but not both at once) of a mov instruction - a load/store pattern +// here. Also, movs involving x# are counted as ArrayInstructions rather than +// MovInstructions in STAT. The other operand can be anything that most other +// instructions accept, but it still must be a mov with x# on one side. +// !NOTE!: The D3D11.3 Functional Specification on Microsoft's GitHub profile, +// as of March 27th, 2020, is NOT a reliable reference, even though it contains +// many DXBC details! There are multiple places where it clearly contradicts +// what FXC does, even when targeting old shader models like 4_0: +// - The limit of 1 immediate or constant buffer source operand per instruction +// is totally ignored by FXC - in simple tests, it can emit an instruction +// with two constant buffer sources, or one constant buffer source and one +// immediate, or a multiply-add with two immediate operands. +// - It says x# can be used wherever r# can be used - in synthetic tests, FXC +// always accesses x# in a load/store way via mov. +// - It says x# can be used for indexing, including nested indexing of x# (one +// level deep), however, FXC moves the inner index operand to r# first in this +// case. +// +// For bytecode structure, see d3d12TokenizedProgramFormat.hpp from the Windows +// Driver Kit, and DXILConv from DirectX Shader Compiler. +// +// Avoid using uninitialized register components - such as registers written to +// in "if" and not in "else", but then used outside unconditionally or with a +// different condition (or even with the same condition, but in a different "if" +// block). This will cause crashes on AMD drivers, and will also limit +// optimization possibilities as this may result in false dependencies. Always +// mov l(0, 0, 0, 0) to such components before potential branching - +// PushSystemTemp accepts a zero mask for this purpose. +// +// Clamping of non-negative values must be done first to the lower bound (using +// max), then to the upper bound (using min), to match the saturate modifier +// behavior, which results in 0 for NaN. + +constexpr uint8_t kAlignmentPadding = 0xAB; + +// D3D_SHADER_VARIABLE_CLASS +enum class RdefVariableClass : uint32_t { + kScalar, + kVector, + kMatrixRows, + kMatrixColumns, + kObject, + kStruct, + kInterfaceClass, + kInterfacePointer, +}; + +// D3D_SHADER_VARIABLE_TYPE subset +enum class RdefVariableType : uint32_t { + kInt = 2, + kFloat = 3, + kUInt = 19, +}; + +// D3D_SHADER_VARIABLE_FLAGS +enum RdefVariableFlags : uint32_t { + kRdefVariableFlagUserPacked = 1 << 0, + kRdefVariableFlagUsed = 1 << 1, + kRdefVariableFlagInterfacePointer = 1 << 2, + kRdefVariableFlagInterfaceParameter = 1 << 3, +}; + +// D3D_CBUFFER_TYPE +enum class RdefCbufferType : uint32_t { + kCbuffer, + kTbuffer, + kInterfacePointers, + kResourceBindInfo, +}; + +// D3D_SHADER_INPUT_TYPE +enum class RdefInputType : uint32_t { + kCbuffer, + kTbuffer, + kTexture, + kSampler, + kUAVRWTyped, + kStructured, + kUAVRWStructured, + kByteAddress, + kUAVRWByteAddress, + kUAVAppendStructured, + kUAVConsumeStructured, + kUAVRWStructuredWithCounter, +}; + +// D3D_RESOURCE_RETURN_TYPE +enum class RdefReturnType : uint32_t { + kVoid, + kUNorm, + kSNorm, + kSInt, + kUInt, + kFloat, + kMixed, + kDouble, + kContinued, +}; + +// D3D12_SRV_DIMENSION/D3D12_UAV_DIMENSION +enum class RdefDimension : uint32_t { + kUnknown = 0, + + kSRVBuffer = 1, + kSRVTexture1D, + kSRVTexture1DArray, + kSRVTexture2D, + kSRVTexture2DArray, + kSRVTexture2DMS, + kSRVTexture2DMSArray, + kSRVTexture3D, + kSRVTextureCube, + kSRVTextureCubeArray, + + kUAVBuffer = 1, + kUAVTexture1D, + kUAVTexture1DArray, + kUAVTexture2D, + kUAVTexture2DArray, + kUAVTexture3D, +}; + +// D3D_SHADER_INPUT_FLAGS +enum RdefInputFlags : uint32_t { + // For constant buffers, UserPacked is set if it was declared as `cbuffer` + // rather than `ConstantBuffer` (not dynamically indexable; though + // non-uniform dynamic indexing of constant buffers also didn't work on AMD + // drivers in 2018). + kRdefInputFlagUserPacked = 1 << 0, + kRdefInputFlagComparisonSampler = 1 << 1, + kRdefInputFlagComponent0 = 1 << 2, + kRdefInputFlagComponent1 = 1 << 3, + kRdefInputFlagsComponents = + kRdefInputFlagComponent0 | kRdefInputFlagComponent1, + kRdefInputFlagUnused = 1 << 4, +}; + +// D3D_NAME subset +enum class Name : uint32_t { + kUndefined = 0, + kPosition = 1, + kClipDistance = 2, + kCullDistance = 3, + kVertexID = 6, + kIsFrontFace = 9, + kFinalQuadEdgeTessFactor = 11, + kFinalQuadInsideTessFactor = 12, + kFinalTriEdgeTessFactor = 13, + kFinalTriInsideTessFactor = 14, +}; + +// D3D_REGISTER_COMPONENT_TYPE +enum class SignatureRegisterComponentType : uint32_t { + kUnknown, + kUInt32, + kSInt32, + kFloat32, +}; + +// D3D10_INTERNALSHADER_PARAMETER +struct SignatureParameter { + // Offset in bytes from the start of the chunk. + uint32_t semantic_name; + uint32_t semantic_index; + // kUndefined for pixel shader outputs - inferred from the component type and + // what is used in the shader. + Name system_value; + SignatureRegisterComponentType component_type; + // o#/v# when there's linkage, SV_Target index or -1 in pixel shader output. + uint32_t register_index; + uint8_t mask; + union { + // For an output signature. + uint8_t never_writes_mask; + // For an input signature. + uint8_t always_reads_mask; + }; +}; +static_assert(alignof(SignatureParameter) <= sizeof(uint32_t)); + +// D3D10_INTERNALSHADER_SIGNATURE +struct Signature { + uint32_t parameter_count; + // Offset in bytes from the start of the chunk. + uint32_t parameter_info_offset; +}; +static_assert(alignof(Signature) <= sizeof(uint32_t)); + +// D3D11_SB_TESSELLATOR_DOMAIN +enum class TessellatorDomain : uint32_t { + kUndefined, + kIsoline, + kTriangle, + kQuad, +}; + +// The STAT chunk (based on Wine d3dcompiler_parse_stat). +struct Statistics { + uint32_t instruction_count; + uint32_t temp_register_count; + // Unknown in Wine. + uint32_t def_count; + // Only inputs and outputs. + uint32_t dcl_count; + uint32_t float_instruction_count; + uint32_t int_instruction_count; + uint32_t uint_instruction_count; + // endif, ret. + uint32_t static_flow_control_count; + // if (but not else). + uint32_t dynamic_flow_control_count; + // Unknown in Wine. + uint32_t macro_instruction_count; + uint32_t temp_array_count; + uint32_t array_instruction_count; + uint32_t cut_instruction_count; + uint32_t emit_instruction_count; + uint32_t texture_normal_instructions; + uint32_t texture_load_instructions; + uint32_t texture_comp_instructions; + uint32_t texture_bias_instructions; + uint32_t texture_gradient_instructions; + // Not including indexable temp load/store. + uint32_t mov_instruction_count; + // Unknown in Wine. + uint32_t movc_instruction_count; + uint32_t conversion_instruction_count; + // Unknown in Wine. + uint32_t unknown_22; + uint32_t input_primitive; + uint32_t gs_output_topology; + uint32_t gs_max_output_vertex_count; + uint32_t unknown_26; + // Unknown in Wine, but confirmed by testing. + uint32_t lod_instructions; + uint32_t unknown_28; + uint32_t unknown_29; + uint32_t c_control_points; + uint32_t hs_output_primitive; + uint32_t hs_partitioning; + TessellatorDomain tessellator_domain; + // Unknown in Wine. + uint32_t c_barrier_instructions; + // Unknown in Wine. + uint32_t c_interlocked_instructions; + // Unknown in Wine, but confirmed by testing. + uint32_t c_texture_store_instructions; +}; + +// D3D10_SB_OPERAND_TYPE subset +enum class OperandType : uint32_t { + kTemp = 0, + kInput = 1, + kOutput = 2, + // Only usable as destination or source (but not both) in mov (and it + // becomes an array instruction this way). + kIndexableTemp = 3, + kImmediate32 = 4, + kSampler = 6, + kResource = 7, + kConstantBuffer = 8, + kLabel = 10, + kInputPrimitiveID = 11, + kOutputDepth = 12, + kNull = 13, + kInputControlPoint = 25, + kInputDomainPoint = 28, + kUnorderedAccessView = 30, + kInputCoverageMask = 35, + kOutputDepthLessEqual = 39, +}; + +// D3D10_SB_OPERAND_INDEX_DIMENSION +constexpr uint32_t GetOperandIndexDimension(OperandType type) { + switch (type) { + case OperandType::kTemp: + case OperandType::kInput: + case OperandType::kOutput: + case OperandType::kLabel: + return 1; + case OperandType::kIndexableTemp: + case OperandType::kSampler: + case OperandType::kResource: + case OperandType::kInputControlPoint: + case OperandType::kUnorderedAccessView: + return 2; + case OperandType::kConstantBuffer: + return 3; + default: + return 0; + } +} + +// D3D10_SB_OPERAND_NUM_COMPONENTS +enum class OperandDimension : uint32_t { + kNoData, // D3D10_SB_OPERAND_0_COMPONENT + kScalar, // D3D10_SB_OPERAND_1_COMPONENT + kVector, // D3D10_SB_OPERAND_4_COMPONENT +}; + +constexpr OperandDimension GetOperandDimension(OperandType type, + bool dest_in_dcl = false) { + switch (type) { + case OperandType::kSampler: + case OperandType::kLabel: + case OperandType::kNull: + return OperandDimension::kNoData; + case OperandType::kInputPrimitiveID: + case OperandType::kOutputDepth: + case OperandType::kOutputDepthLessEqual: + return OperandDimension::kScalar; + case OperandType::kInputCoverageMask: + return dest_in_dcl ? OperandDimension::kScalar + : OperandDimension::kVector; + default: + return OperandDimension::kVector; + } +} + +// D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE +enum class ComponentSelection { + kMask, + kSwizzle, + kSelect1, +}; + +struct Index { + // D3D10_SB_OPERAND_INDEX_REPRESENTATION + enum class Representation : uint32_t { + kImmediate32, + kImmediate64, + kRelative, + kImmediate32PlusRelative, + kImmediate64PlusRelative, + }; + + uint32_t index_; + // UINT32_MAX if absolute. Lower 2 bits are the component index, upper bits + // are the temp register index. Applicable to indexable temps, inputs, + // outputs except for pixel shaders, constant buffers and bindings. + uint32_t relative_to_temp_; + + // Implicit constructor. + Index(uint32_t index = 0) : index_(index), relative_to_temp_(UINT32_MAX) {} + Index(uint32_t temp, uint32_t temp_component, uint32_t offset = 0) + : index_(offset), relative_to_temp_((temp << 2) | temp_component) {} + + Representation GetRepresentation() const { + if (relative_to_temp_ != UINT32_MAX) { + return index_ != 0 ? Representation::kImmediate32PlusRelative + : Representation::kRelative; + } + return Representation::kImmediate32; + } + uint32_t GetLength() const { + return relative_to_temp_ != UINT32_MAX ? (index_ != 0 ? 3 : 2) : 1; + } + void Write(std::vector& code) const { + if (relative_to_temp_ == UINT32_MAX || index_ != 0) { + code.push_back(index_); + } + if (relative_to_temp_ != UINT32_MAX) { + // Encode selecting one component from absolute-indexed r#. + code.push_back(uint32_t(OperandDimension::kVector) | + (uint32_t(ComponentSelection::kSelect1) << 2) | + ((relative_to_temp_ & 3) << 4) | + (uint32_t(OperandType::kTemp) << 12) | (1 << 20) | + (uint32_t(Representation::kImmediate32) << 22)); + code.push_back(relative_to_temp_ >> 2); + } + } +}; + +struct OperandAddress { + OperandType type_; + Index index_1d_, index_2d_, index_3d_; + + explicit OperandAddress(OperandType type, Index index_1d = Index(), + Index index_2d = Index(), Index index_3d = Index()) + : type_(type), + index_1d_(index_1d), + index_2d_(index_2d), + index_3d_(index_3d) {} + + OperandDimension GetDimension(bool dest_in_dcl = false) const { + return GetOperandDimension(type_, dest_in_dcl); + } + uint32_t GetIndexDimension() const { return GetOperandIndexDimension(type_); } + uint32_t GetOperandTokenTypeAndIndex() const { + uint32_t index_dimension = GetIndexDimension(); + uint32_t operand_token = (uint32_t(type_) << 12) | (index_dimension << 20); + if (index_dimension > 0) { + operand_token |= uint32_t(index_1d_.GetRepresentation()) << 22; + if (index_dimension > 1) { + operand_token |= uint32_t(index_2d_.GetRepresentation()) << 25; + if (index_dimension > 2) { + operand_token |= uint32_t(index_3d_.GetRepresentation()) << 28; + } + } + } + return operand_token; + } + uint32_t GetLength() const { + uint32_t length = 0; + uint32_t index_dimension = GetIndexDimension(); + if (index_dimension > 0) { + length += index_1d_.GetLength(); + if (index_dimension > 1) { + length += index_2d_.GetLength(); + if (index_dimension > 2) { + length += index_3d_.GetLength(); + } + } + } + return length; + } + void Write(std::vector& code) const { + uint32_t index_dimension = GetIndexDimension(); + if (index_dimension > 0) { + index_1d_.Write(code); + if (index_dimension > 1) { + index_2d_.Write(code); + if (index_dimension > 2) { + index_3d_.Write(code); + } + } + } + } +}; + +// D3D10_SB_EXTENDED_OPERAND_TYPE +enum class ExtendedOperandType : uint32_t { + kEmpty, + kModifier, +}; + +// D3D10_SB_OPERAND_MODIFIER +enum class OperandModifier : uint32_t { + kNone, + kNegate, + kAbsolute, + kAbsoluteNegate, +}; + +struct Dest : OperandAddress { + // Ignored for 0-component and 1-component operand types. + uint32_t write_mask_; + + explicit Dest(OperandType type, uint32_t write_mask = 0b1111, + Index index_1d = Index(), Index index_2d = Index(), + Index index_3d = Index()) + : OperandAddress(type, index_1d, index_2d, index_3d), + write_mask_(write_mask) {} + + static Dest R(uint32_t index, uint32_t write_mask = 0b1111) { + return Dest(OperandType::kTemp, write_mask, index); + } + static Dest O(Index index, uint32_t write_mask = 0b1111) { + return Dest(OperandType::kOutput, write_mask, index); + } + static Dest X(uint32_t index_1d, Index index_2d, + uint32_t write_mask = 0b1111) { + return Dest(OperandType::kIndexableTemp, write_mask, index_1d, index_2d); + } + static Dest ODepth() { return Dest(OperandType::kOutputDepth, 0b0001); } + static Dest Null() { return Dest(OperandType::kNull, 0b0000); } + static Dest U(uint32_t index_1d, Index index_2d, + uint32_t write_mask = 0b1111) { + return Dest(OperandType::kUnorderedAccessView, write_mask, index_1d, + index_2d); + } + static Dest ODepthLE() { + return Dest(OperandType::kOutputDepthLessEqual, 0b0001); + } + + uint32_t GetMask() const { + switch (GetDimension()) { + case OperandDimension::kNoData: + return 0b0000; + case OperandDimension::kScalar: + return 0b0001; + case OperandDimension::kVector: + return write_mask_; + default: + assert_unhandled_case(GetDimension()); + return 0b0000; + } + } + [[nodiscard]] Dest Mask(uint32_t write_mask) const { + return Dest(type_, write_mask, index_1d_, index_2d_, index_3d_); + } + [[nodiscard]] Dest MaskMasked(uint32_t write_mask) const { + return Dest(type_, write_mask_ & write_mask, index_1d_, index_2d_, + index_3d_); + } + static uint32_t GetMaskSingleComponent(uint32_t write_mask) { + uint32_t component; + if (xe::bit_scan_forward(write_mask, &component)) { + if ((write_mask >> component) == 1) { + return component; + } + } + return UINT32_MAX; + } + uint32_t GetMaskSingleComponent() const { + return GetMaskSingleComponent(GetMask()); + } + + uint32_t GetLength() const { return 1 + OperandAddress::GetLength(); } + void Write(std::vector& code, bool in_dcl = false) const { + uint32_t operand_token = GetOperandTokenTypeAndIndex(); + OperandDimension dimension = GetDimension(in_dcl); + operand_token |= uint32_t(dimension); + if (dimension == OperandDimension::kVector) { + assert_true(write_mask_ > 0b0000 && write_mask_ <= 0b1111); + operand_token |= + (uint32_t(ComponentSelection::kMask) << 2) | (write_mask_ << 4); + } + code.push_back(operand_token); + OperandAddress::Write(code); + } +}; + +struct Src : OperandAddress { + enum : uint32_t { + kXYZW = 0b11100100, + kXXXX = 0b00000000, + kYYYY = 0b01010101, + kZZZZ = 0b10101010, + kWWWW = 0b11111111, + }; + + // Ignored for 0-component and 1-component operand types. + uint32_t swizzle_; + bool absolute_; + bool negate_; + // Only valid for OperandType::kImmediate32. + uint32_t immediate_[4]; + + explicit Src(OperandType type, uint32_t swizzle = kXYZW, + Index index_1d = Index(), Index index_2d = Index(), + Index index_3d = Index()) + : OperandAddress(type, index_1d, index_2d, index_3d), + swizzle_(swizzle), + absolute_(false), + negate_(false) {} + + static Src R(uint32_t index, uint32_t swizzle = kXYZW) { + return Src(OperandType::kTemp, swizzle, index); + } + static Src V(Index index, uint32_t swizzle = kXYZW) { + return Src(OperandType::kInput, swizzle, index); + } + static Src X(uint32_t index_1d, Index index_2d, uint32_t swizzle = kXYZW) { + return Src(OperandType::kIndexableTemp, swizzle, index_1d, index_2d); + } + static Src LU(uint32_t x, uint32_t y, uint32_t z, uint32_t w) { + Src src(OperandType::kImmediate32, kXYZW); + src.immediate_[0] = x; + src.immediate_[1] = y; + src.immediate_[2] = z; + src.immediate_[3] = w; + return src; + } + static Src LU(uint32_t x) { return LU(x, x, x, x); } + static Src LI(int32_t x, int32_t y, int32_t z, int32_t w) { + return LU(uint32_t(x), uint32_t(y), uint32_t(z), uint32_t(w)); + } + static Src LI(int32_t x) { return LI(x, x, x, x); } + static Src LF(float x, float y, float z, float w) { + return LU(*reinterpret_cast(&x), + *reinterpret_cast(&y), + *reinterpret_cast(&z), + *reinterpret_cast(&w)); + } + static Src LF(float x) { return LF(x, x, x, x); } + static Src LP(const uint32_t* xyzw) { + return LU(xyzw[0], xyzw[1], xyzw[2], xyzw[3]); + } + static Src LP(const int32_t* xyzw) { + return LI(xyzw[0], xyzw[1], xyzw[2], xyzw[3]); + } + static Src LP(const float* xyzw) { + return LF(xyzw[0], xyzw[1], xyzw[2], xyzw[3]); + } + static Src S(uint32_t index_1d, Index index_2d) { + return Src(OperandType::kSampler, kXXXX, index_1d, index_2d); + } + static Src T(uint32_t index_1d, Index index_2d, uint32_t swizzle = kXYZW) { + return Src(OperandType::kResource, swizzle, index_1d, index_2d); + } + static Src CB(uint32_t index_1d, Index index_2d, Index index_3d, + uint32_t swizzle = kXYZW) { + return Src(OperandType::kConstantBuffer, swizzle, index_1d, index_2d, + index_3d); + } + static Src Label(uint32_t index) { + return Src(OperandType::kLabel, kXXXX, index); + } + static Src VPrim() { return Src(OperandType::kInputPrimitiveID, kXXXX); } + static Src VICP(Index index_1d, Index index_2d, uint32_t swizzle = kXYZW) { + return Src(OperandType::kInputControlPoint, swizzle, index_1d, index_2d); + } + static Src VDomain(uint32_t swizzle = kXYZW) { + return Src(OperandType::kInputDomainPoint, swizzle); + } + static Src U(uint32_t index_1d, Index index_2d, uint32_t swizzle = kXYZW) { + return Src(OperandType::kUnorderedAccessView, swizzle, index_1d, index_2d); + } + static Src VCoverage() { return Src(OperandType::kInputCoverageMask, kXXXX); } + + [[nodiscard]] Src WithModifiers(bool absolute, bool negate) const { + Src new_src(*this); + new_src.absolute_ = absolute; + new_src.negate_ = negate; + return new_src; + } + [[nodiscard]] Src WithAbs(bool absolute) const { + return WithModifiers(absolute, negate_); + } + [[nodiscard]] Src WithNeg(bool negate) const { + return WithModifiers(absolute_, negate); + } + [[nodiscard]] Src Abs() const { return WithModifiers(true, false); } + [[nodiscard]] Src operator-() const { + return WithModifiers(absolute_, !negate_); + } + [[nodiscard]] Src Swizzle(uint32_t swizzle) const { + Src new_src(*this); + new_src.swizzle_ = swizzle; + return new_src; + } + [[nodiscard]] Src SwizzleSwizzled(uint32_t swizzle) const { + Src new_src(*this); + new_src.swizzle_ = 0; + for (uint32_t i = 0; i < 4; ++i) { + new_src.swizzle_ |= ((swizzle_ >> (((swizzle >> (i * 2)) & 3) * 2)) & 3) + << (i * 2); + } + return new_src; + } + [[nodiscard]] Src Select(uint32_t component) const { + Src new_src(*this); + new_src.swizzle_ = component * 0b01010101; + return new_src; + } + [[nodiscard]] Src SelectFromSwizzled(uint32_t component) const { + Src new_src(*this); + new_src.swizzle_ = ((swizzle_ >> (component * 2)) & 3) * 0b01010101; + return new_src; + } + + uint32_t GetLength(uint32_t mask, bool force_vector = false) const { + bool is_vector = + force_vector || + (mask != 0b0000 && Dest::GetMaskSingleComponent(mask) == UINT32_MAX); + if (type_ == OperandType::kImmediate32) { + return is_vector ? 5 : 2; + } + return ((absolute_ || negate_) ? 2 : 1) + OperandAddress::GetLength(); + } + static constexpr uint32_t GetModifiedImmediate(uint32_t value, + bool is_integer, bool absolute, + bool negate) { + if (is_integer) { + if (absolute) { + *reinterpret_cast(&value) = + std::abs(*reinterpret_cast(&value)); + } + if (negate) { + *reinterpret_cast(&value) = + -*reinterpret_cast(&value); + } + } else { + if (absolute) { + value &= uint32_t(INT32_MAX); + } + if (negate) { + value ^= uint32_t(INT32_MAX) + 1; + } + } + return value; + } + uint32_t GetModifiedImmediate(uint32_t swizzle_index, bool is_integer) const { + return GetModifiedImmediate( + immediate_[(swizzle_ >> (swizzle_index * 2)) & 3], is_integer, + absolute_, negate_); + } + void Write(std::vector& code, bool is_integer, uint32_t mask, + bool force_vector = false) const { + uint32_t operand_token = GetOperandTokenTypeAndIndex(); + uint32_t mask_single_component = Dest::GetMaskSingleComponent(mask); + uint32_t select_component = + mask_single_component != UINT32_MAX ? mask_single_component : 0; + bool is_vector = + force_vector || (mask != 0b0000 && mask_single_component == UINT32_MAX); + if (type_ == OperandType::kImmediate32) { + if (is_vector) { + operand_token |= uint32_t(OperandDimension::kVector) | + (uint32_t(ComponentSelection::kSwizzle) << 2) | + (Src::kXYZW << 4); + } else { + operand_token |= uint32_t(OperandDimension::kScalar); + } + code.push_back(operand_token); + if (is_vector) { + for (uint32_t i = 0; i < 4; ++i) { + code.push_back((mask & (1 << i)) ? GetModifiedImmediate(i, is_integer) + : 0); + } + } else { + code.push_back(GetModifiedImmediate(select_component, is_integer)); + } + } else { + switch (GetDimension()) { + case OperandDimension::kScalar: + if (is_vector) { + operand_token |= uint32_t(OperandDimension::kVector) | + (uint32_t(ComponentSelection::kSwizzle) << 2) | + (Src::kXXXX << 4); + } else { + operand_token |= uint32_t(OperandDimension::kScalar); + } + break; + case OperandDimension::kVector: + operand_token |= uint32_t(OperandDimension::kVector); + if (is_vector) { + operand_token |= uint32_t(ComponentSelection::kSwizzle) << 2; + // Clear swizzle of unused components to a used value to avoid + // referencing potentially uninitialized register components. + uint32_t used_component; + if (!xe::bit_scan_forward(mask, &used_component)) { + used_component = 0; + } + for (uint32_t i = 0; i < 4; ++i) { + uint32_t swizzle_index = (mask & (1 << i)) ? i : used_component; + operand_token |= + (((swizzle_ >> (swizzle_index * 2)) & 3) << (4 + i * 2)); + } + } else { + operand_token |= (uint32_t(ComponentSelection::kSelect1) << 2) | + (((swizzle_ >> (select_component * 2)) & 3) << 4); + } + break; + default: + break; + } + OperandModifier modifier = OperandModifier::kNone; + if (absolute_ && negate_) { + modifier = OperandModifier::kAbsoluteNegate; + } else if (absolute_) { + modifier = OperandModifier::kAbsolute; + } else if (negate_) { + modifier = OperandModifier::kNegate; + } + if (modifier != OperandModifier::kNone) { + operand_token |= uint32_t(1) << 31; + } + code.push_back(operand_token); + if (modifier != OperandModifier::kNone) { + code.push_back(uint32_t(ExtendedOperandType::kModifier) | + (uint32_t(modifier) << 6)); + } + OperandAddress::Write(code); + } + } +}; + +// D3D10_SB_OPCODE_TYPE subset +enum class Opcode : uint32_t { + kAdd = 0, + kAnd = 1, + kBreak = 2, + kCall = 4, + kCallC = 5, + kCase = 6, + kContinue = 7, + kDefault = 10, + kDiscard = 13, + kDiv = 14, + kDP2 = 15, + kDP3 = 16, + kDP4 = 17, + kElse = 18, + kEndIf = 21, + kEndLoop = 22, + kEndSwitch = 23, + kEq = 24, + kExp = 25, + kFrc = 26, + kFToI = 27, + kFToU = 28, + kGE = 29, + kIAdd = 30, + kIf = 31, + kIEq = 32, + kIGE = 33, + kILT = 34, + kIMAd = 35, + kIMax = 36, + kIMin = 37, + kIMul = 38, + kINE = 39, + kIShL = 41, + kIToF = 43, + kLabel = 44, + kLog = 47, + kLoop = 48, + kLT = 49, + kMAd = 50, + kMin = 51, + kMax = 52, + kMov = 54, + kMovC = 55, + kMul = 56, + kNE = 57, + kNot = 59, + kOr = 60, + kRet = 62, + kRetC = 63, + kRoundNE = 64, + kRoundNI = 65, + kRoundZ = 67, + kRSq = 68, + kSampleL = 72, + kSampleD = 73, + kSqRt = 75, + kSwitch = 76, + kSinCos = 77, + kULT = 79, + kUGE = 80, + kUMul = 81, + kUMAd = 82, + kUMax = 83, + kUMin = 84, + kUShR = 85, + kUToF = 86, + kXOr = 87, + kLOD = 108, + kDerivRTXCoarse = 122, + kDerivRTXFine = 123, + kDerivRTYCoarse = 124, + kDerivRTYFine = 125, + kRcp = 129, + kF32ToF16 = 130, + kF16ToF32 = 131, + kFirstBitHi = 135, + kUBFE = 138, + kIBFE = 139, + kBFI = 140, + kBFRev = 141, + kLdUAVTyped = 163, + kStoreUAVTyped = 164, + kLdRaw = 165, + kStoreRaw = 166, + kEvalSampleIndex = 204, + kEvalCentroid = 205, +}; + +// D3D10_SB_EXTENDED_OPCODE_TYPE +enum class ExtendedOpcodeType : uint32_t { + kEmpty, + kSampleControls, + kResourceDim, + kResourceReturnType, +}; + +constexpr uint32_t OpcodeToken(Opcode opcode, uint32_t operands_length, + bool saturate = false, + uint32_t extended_opcode_count = 0) { + return uint32_t(opcode) | (saturate ? (uint32_t(1) << 13) : 0) | + ((uint32_t(1) + extended_opcode_count + operands_length) << 24) | + (extended_opcode_count ? (uint32_t(1) << 31) : 0); +} + +constexpr uint32_t SampleControlsExtendedOpcodeToken(int32_t aoffimmi_u, + int32_t aoffimmi_v, + int32_t aoffimmi_w, + bool extended = false) { + return uint32_t(ExtendedOpcodeType::kSampleControls) | + ((uint32_t(aoffimmi_u) & uint32_t(0b1111)) << 9) | + ((uint32_t(aoffimmi_v) & uint32_t(0b1111)) << 13) | + ((uint32_t(aoffimmi_w) & uint32_t(0b1111)) << 17) | + (extended ? (uint32_t(1) << 31) : 0); +} + +// Assembler appending to the shader program code vector. +class Assembler { + public: + Assembler(std::vector& code, Statistics& stat) + : code_(code), stat_(stat) {} + + void OpAdd(const Dest& dest, const Src& src0, const Src& src1, + bool saturate = false) { + EmitAluOp(Opcode::kAdd, 0b00, dest, src0, src1, saturate); + ++stat_.float_instruction_count; + } + void OpAnd(const Dest& dest, const Src& src0, const Src& src1) { + EmitAluOp(Opcode::kAnd, 0b11, dest, src0, src1); + ++stat_.uint_instruction_count; + } + void OpBreak() { + code_.push_back(OpcodeToken(Opcode::kBreak, 0)); + ++stat_.instruction_count; + } + void OpCall(const Src& label) { + EmitFlowOp(Opcode::kCall, label); + ++stat_.static_flow_control_count; + } + void OpCallC(bool test, const Src& src, const Src& label) { + EmitFlowOp(Opcode::kCallC, src, label, test); + ++stat_.dynamic_flow_control_count; + } + void OpCase(const Src& src) { + EmitFlowOp(Opcode::kCase, src); + ++stat_.static_flow_control_count; + } + void OpContinue() { + code_.push_back(OpcodeToken(Opcode::kContinue, 0)); + ++stat_.instruction_count; + } + void OpDefault() { + code_.push_back(OpcodeToken(Opcode::kDefault, 0)); + ++stat_.instruction_count; + ++stat_.static_flow_control_count; + } + void OpDiscard(bool test, const Src& src) { + EmitFlowOp(Opcode::kDiscard, src, test); + } + void OpDiv(const Dest& dest, const Src& src0, const Src& src1, + bool saturate = false) { + EmitAluOp(Opcode::kDiv, 0b00, dest, src0, src1, saturate); + ++stat_.float_instruction_count; + } + void OpDP2(const Dest& dest, const Src& src0, const Src& src1, + bool saturate = false) { + uint32_t operands_length = + dest.GetLength() + src0.GetLength(0b0011) + src1.GetLength(0b0011); + code_.reserve(code_.size() + 1 + operands_length); + code_.push_back(OpcodeToken(Opcode::kDP2, operands_length, saturate)); + dest.Write(code_); + src0.Write(code_, false, 0b0011); + src1.Write(code_, false, 0b0011); + ++stat_.instruction_count; + ++stat_.float_instruction_count; + } + void OpDP3(const Dest& dest, const Src& src0, const Src& src1, + bool saturate = false) { + uint32_t operands_length = + dest.GetLength() + src0.GetLength(0b0111) + src1.GetLength(0b0111); + code_.reserve(code_.size() + 1 + operands_length); + code_.push_back(OpcodeToken(Opcode::kDP3, operands_length, saturate)); + dest.Write(code_); + src0.Write(code_, false, 0b0111); + src1.Write(code_, false, 0b0111); + ++stat_.instruction_count; + ++stat_.float_instruction_count; + } + void OpDP4(const Dest& dest, const Src& src0, const Src& src1, + bool saturate = false) { + uint32_t operands_length = + dest.GetLength() + src0.GetLength(0b1111) + src1.GetLength(0b1111); + code_.reserve(code_.size() + 1 + operands_length); + code_.push_back(OpcodeToken(Opcode::kDP4, operands_length, saturate)); + dest.Write(code_); + src0.Write(code_, false, 0b1111); + src1.Write(code_, false, 0b1111); + ++stat_.instruction_count; + ++stat_.float_instruction_count; + } + void OpElse() { + code_.push_back(OpcodeToken(Opcode::kElse, 0)); + ++stat_.instruction_count; + } + void OpEndIf() { + code_.push_back(OpcodeToken(Opcode::kEndIf, 0)); + ++stat_.instruction_count; + } + void OpEndLoop() { + code_.push_back(OpcodeToken(Opcode::kEndLoop, 0)); + ++stat_.instruction_count; + } + void OpEndSwitch() { + code_.push_back(OpcodeToken(Opcode::kEndSwitch, 0)); + ++stat_.instruction_count; + } + void OpEq(const Dest& dest, const Src& src0, const Src& src1) { + EmitAluOp(Opcode::kEq, 0b00, dest, src0, src1); + ++stat_.float_instruction_count; + } + void OpExp(const Dest& dest, const Src& src, bool saturate = false) { + EmitAluOp(Opcode::kExp, 0b0, dest, src, saturate); + ++stat_.float_instruction_count; + } + void OpFrc(const Dest& dest, const Src& src, bool saturate = false) { + EmitAluOp(Opcode::kFrc, 0b0, dest, src, saturate); + ++stat_.float_instruction_count; + } + void OpFToI(const Dest& dest, const Src& src) { + EmitAluOp(Opcode::kFToI, 0b0, dest, src); + ++stat_.conversion_instruction_count; + } + void OpFToU(const Dest& dest, const Src& src) { + EmitAluOp(Opcode::kFToU, 0b0, dest, src); + ++stat_.conversion_instruction_count; + } + void OpGE(const Dest& dest, const Src& src0, const Src& src1) { + EmitAluOp(Opcode::kGE, 0b00, dest, src0, src1); + ++stat_.float_instruction_count; + } + void OpIAdd(const Dest& dest, const Src& src0, const Src& src1) { + EmitAluOp(Opcode::kIAdd, 0b11, dest, src0, src1); + ++stat_.int_instruction_count; + } + void OpIf(bool test, const Src& src) { + EmitFlowOp(Opcode::kIf, src, test); + ++stat_.dynamic_flow_control_count; + } + void OpIEq(const Dest& dest, const Src& src0, const Src& src1) { + EmitAluOp(Opcode::kIEq, 0b11, dest, src0, src1); + ++stat_.int_instruction_count; + } + void OpIGE(const Dest& dest, const Src& src0, const Src& src1) { + EmitAluOp(Opcode::kIGE, 0b11, dest, src0, src1); + ++stat_.int_instruction_count; + } + void OpILT(const Dest& dest, const Src& src0, const Src& src1) { + EmitAluOp(Opcode::kILT, 0b11, dest, src0, src1); + ++stat_.int_instruction_count; + } + void OpIMAd(const Dest& dest, const Src& mul0, const Src& mul1, + const Src& add) { + EmitAluOp(Opcode::kIMAd, 0b111, dest, mul0, mul1, add); + ++stat_.int_instruction_count; + } + void OpIMax(const Dest& dest, const Src& src0, const Src& src1) { + EmitAluOp(Opcode::kIMax, 0b11, dest, src0, src1); + ++stat_.int_instruction_count; + } + void OpIMin(const Dest& dest, const Src& src0, const Src& src1) { + EmitAluOp(Opcode::kIMin, 0b11, dest, src0, src1); + ++stat_.int_instruction_count; + } + void OpIMul(const Dest& dest_hi, const Dest& dest_lo, const Src& src0, + const Src& src1) { + EmitAluOp(Opcode::kIMul, 0b11, dest_hi, dest_lo, src0, src1); + ++stat_.int_instruction_count; + } + void OpINE(const Dest& dest, const Src& src0, const Src& src1) { + EmitAluOp(Opcode::kINE, 0b11, dest, src0, src1); + ++stat_.int_instruction_count; + } + void OpIShL(const Dest& dest, const Src& value, const Src& shift) { + EmitAluOp(Opcode::kIShL, 0b11, dest, value, shift); + ++stat_.int_instruction_count; + } + void OpIToF(const Dest& dest, const Src& src) { + EmitAluOp(Opcode::kIToF, 0b1, dest, src); + ++stat_.conversion_instruction_count; + } + void OpLabel(const Src& label) { + // The label is source, not destination, for simplicity, to unify it will + // call/callc (in DXBC it's just a zero-component label operand). + uint32_t operands_length = label.GetLength(0b0000); + code_.reserve(code_.size() + 1 + operands_length); + code_.push_back(OpcodeToken(Opcode::kLabel, operands_length)); + label.Write(code_, true, 0b0000); + // Doesn't count towards stat_.instruction_count. + } + void OpLog(const Dest& dest, const Src& src, bool saturate = false) { + EmitAluOp(Opcode::kLog, 0b0, dest, src, saturate); + ++stat_.float_instruction_count; + } + void OpLoop() { + code_.push_back(OpcodeToken(Opcode::kLoop, 0)); + ++stat_.instruction_count; + ++stat_.dynamic_flow_control_count; + } + void OpLT(const Dest& dest, const Src& src0, const Src& src1) { + EmitAluOp(Opcode::kLT, 0b00, dest, src0, src1); + ++stat_.float_instruction_count; + } + void OpMAd(const Dest& dest, const Src& mul0, const Src& mul1, const Src& add, + bool saturate = false) { + EmitAluOp(Opcode::kMAd, 0b000, dest, mul0, mul1, add, saturate); + ++stat_.float_instruction_count; + } + void OpMin(const Dest& dest, const Src& src0, const Src& src1, + bool saturate = false) { + EmitAluOp(Opcode::kMin, 0b00, dest, src0, src1, saturate); + ++stat_.float_instruction_count; + } + void OpMax(const Dest& dest, const Src& src0, const Src& src1, + bool saturate = false) { + EmitAluOp(Opcode::kMax, 0b00, dest, src0, src1, saturate); + ++stat_.float_instruction_count; + } + void OpMov(const Dest& dest, const Src& src, bool saturate = false) { + EmitAluOp(Opcode::kMov, 0b0, dest, src, saturate); + if (dest.type_ == OperandType::kIndexableTemp || + src.type_ == OperandType::kIndexableTemp) { + ++stat_.array_instruction_count; + } else { + ++stat_.mov_instruction_count; + } + } + void OpMovC(const Dest& dest, const Src& test, const Src& src_nz, + const Src& src_z, bool saturate = false) { + EmitAluOp(Opcode::kMovC, 0b001, dest, test, src_nz, src_z, saturate); + ++stat_.movc_instruction_count; + } + void OpMul(const Dest& dest, const Src& src0, const Src& src1, + bool saturate = false) { + EmitAluOp(Opcode::kMul, 0b00, dest, src0, src1, saturate); + ++stat_.float_instruction_count; + } + void OpNE(const Dest& dest, const Src& src0, const Src& src1) { + EmitAluOp(Opcode::kNE, 0b00, dest, src0, src1); + ++stat_.float_instruction_count; + } + void OpNot(const Dest& dest, const Src& src) { + EmitAluOp(Opcode::kNot, 0b1, dest, src); + ++stat_.uint_instruction_count; + } + void OpOr(const Dest& dest, const Src& src0, const Src& src1) { + EmitAluOp(Opcode::kOr, 0b11, dest, src0, src1); + ++stat_.uint_instruction_count; + } + void OpRet() { + code_.push_back(OpcodeToken(Opcode::kRet, 0)); + ++stat_.instruction_count; + ++stat_.static_flow_control_count; + } + void OpRetC(bool test, const Src& src) { + EmitFlowOp(Opcode::kRetC, src, test); + ++stat_.dynamic_flow_control_count; + } + void OpRoundNE(const Dest& dest, const Src& src, bool saturate = false) { + EmitAluOp(Opcode::kRoundNE, 0b0, dest, src, saturate); + ++stat_.float_instruction_count; + } + void OpRoundNI(const Dest& dest, const Src& src, bool saturate = false) { + EmitAluOp(Opcode::kRoundNI, 0b0, dest, src, saturate); + ++stat_.float_instruction_count; + } + void OpRoundZ(const Dest& dest, const Src& src, bool saturate = false) { + EmitAluOp(Opcode::kRoundZ, 0b0, dest, src, saturate); + ++stat_.float_instruction_count; + } + void OpRSq(const Dest& dest, const Src& src, bool saturate = false) { + EmitAluOp(Opcode::kRSq, 0b0, dest, src, saturate); + ++stat_.float_instruction_count; + } + void OpSampleL(const Dest& dest, const Src& address, + uint32_t address_components, const Src& resource, + const Src& sampler, const Src& lod, int32_t aoffimmi_u = 0, + int32_t aoffimmi_v = 0, int32_t aoffimmi_w = 0) { + uint32_t dest_write_mask = dest.GetMask(); + uint32_t sample_controls = 0; + if (aoffimmi_u || aoffimmi_v || aoffimmi_w) { + sample_controls = + SampleControlsExtendedOpcodeToken(aoffimmi_u, aoffimmi_v, aoffimmi_w); + } + uint32_t address_mask = (1 << address_components) - 1; + uint32_t operands_length = + dest.GetLength() + address.GetLength(address_mask) + + resource.GetLength(dest_write_mask, true) + sampler.GetLength(0b0000) + + lod.GetLength(0b0000); + code_.reserve(code_.size() + 1 + (sample_controls ? 1 : 0) + + operands_length); + code_.push_back(OpcodeToken(Opcode::kSampleL, operands_length, false, + sample_controls ? 1 : 0)); + if (sample_controls) { + code_.push_back(sample_controls); + } + dest.Write(code_); + address.Write(code_, false, address_mask); + resource.Write(code_, false, dest_write_mask, true); + sampler.Write(code_, false, 0b0000); + lod.Write(code_, false, 0b0000); + ++stat_.instruction_count; + ++stat_.texture_normal_instructions; + } + void OpSampleD(const Dest& dest, const Src& address, + uint32_t address_components, const Src& resource, + const Src& sampler, const Src& x_derivatives, + const Src& y_derivatives, uint32_t derivatives_components, + int32_t aoffimmi_u = 0, int32_t aoffimmi_v = 0, + int32_t aoffimmi_w = 0) { + // If the address is 1-component, the derivatives are 1-component, if the + // address is 4-component, the derivatives are 4-component. + assert_true(derivatives_components <= address_components); + uint32_t dest_write_mask = dest.GetMask(); + uint32_t sample_controls = 0; + if (aoffimmi_u || aoffimmi_v || aoffimmi_w) { + sample_controls = + SampleControlsExtendedOpcodeToken(aoffimmi_u, aoffimmi_v, aoffimmi_w); + } + uint32_t address_mask = (1 << address_components) - 1; + uint32_t derivatives_mask = (1 << derivatives_components) - 1; + uint32_t operands_length = + dest.GetLength() + address.GetLength(address_mask) + + resource.GetLength(dest_write_mask, true) + sampler.GetLength(0b0000) + + x_derivatives.GetLength(derivatives_mask, address_components > 1) + + y_derivatives.GetLength(derivatives_mask, address_components > 1); + code_.reserve(code_.size() + 1 + (sample_controls ? 1 : 0) + + operands_length); + code_.push_back(OpcodeToken(Opcode::kSampleD, operands_length, false, + sample_controls ? 1 : 0)); + if (sample_controls) { + code_.push_back(sample_controls); + } + dest.Write(code_); + address.Write(code_, false, address_mask); + resource.Write(code_, false, dest_write_mask, true); + sampler.Write(code_, false, 0b0000); + x_derivatives.Write(code_, false, derivatives_mask, address_components > 1); + y_derivatives.Write(code_, false, derivatives_mask, address_components > 1); + ++stat_.instruction_count; + ++stat_.texture_gradient_instructions; + } + void OpSqRt(const Dest& dest, const Src& src, bool saturate = false) { + EmitAluOp(Opcode::kSqRt, 0b0, dest, src, saturate); + ++stat_.float_instruction_count; + } + void OpSwitch(const Src& src) { + EmitFlowOp(Opcode::kSwitch, src); + ++stat_.dynamic_flow_control_count; + } + void OpSinCos(const Dest& dest_sin, const Dest& dest_cos, const Src& src, + bool saturate = false) { + EmitAluOp(Opcode::kSinCos, 0b0, dest_sin, dest_cos, src, saturate); + ++stat_.float_instruction_count; + } + void OpULT(const Dest& dest, const Src& src0, const Src& src1) { + EmitAluOp(Opcode::kULT, 0b11, dest, src0, src1); + ++stat_.uint_instruction_count; + } + void OpUGE(const Dest& dest, const Src& src0, const Src& src1) { + EmitAluOp(Opcode::kUGE, 0b11, dest, src0, src1); + ++stat_.uint_instruction_count; + } + void OpUMul(const Dest& dest_hi, const Dest& dest_lo, const Src& src0, + const Src& src1) { + EmitAluOp(Opcode::kUMul, 0b11, dest_hi, dest_lo, src0, src1); + ++stat_.uint_instruction_count; + } + void OpUMAd(const Dest& dest, const Src& mul0, const Src& mul1, + const Src& add) { + EmitAluOp(Opcode::kUMAd, 0b111, dest, mul0, mul1, add); + ++stat_.uint_instruction_count; + } + void OpUMax(const Dest& dest, const Src& src0, const Src& src1) { + EmitAluOp(Opcode::kUMax, 0b11, dest, src0, src1); + ++stat_.uint_instruction_count; + } + void OpUMin(const Dest& dest, const Src& src0, const Src& src1) { + EmitAluOp(Opcode::kUMin, 0b11, dest, src0, src1); + ++stat_.uint_instruction_count; + } + void OpUShR(const Dest& dest, const Src& value, const Src& shift) { + EmitAluOp(Opcode::kUShR, 0b11, dest, value, shift); + ++stat_.uint_instruction_count; + } + void OpUToF(const Dest& dest, const Src& src) { + EmitAluOp(Opcode::kUToF, 0b1, dest, src); + ++stat_.conversion_instruction_count; + } + void OpXOr(const Dest& dest, const Src& src0, const Src& src1) { + EmitAluOp(Opcode::kXOr, 0b11, dest, src0, src1); + ++stat_.uint_instruction_count; + } + void OpLOD(const Dest& dest, const Src& address, uint32_t address_components, + const Src& resource, const Src& sampler) { + uint32_t dest_write_mask = dest.GetMask(); + uint32_t address_mask = (1 << address_components) - 1; + uint32_t operands_length = + dest.GetLength() + address.GetLength(address_mask) + + resource.GetLength(dest_write_mask) + sampler.GetLength(0b0000); + code_.reserve(code_.size() + 1 + operands_length); + code_.push_back(OpcodeToken(Opcode::kLOD, operands_length)); + dest.Write(code_); + address.Write(code_, false, address_mask); + resource.Write(code_, false, dest_write_mask); + sampler.Write(code_, false, 0b0000); + ++stat_.instruction_count; + ++stat_.lod_instructions; + } + void OpDerivRTXCoarse(const Dest& dest, const Src& src, + bool saturate = false) { + EmitAluOp(Opcode::kDerivRTXCoarse, 0b0, dest, src, saturate); + ++stat_.float_instruction_count; + } + void OpDerivRTXFine(const Dest& dest, const Src& src, bool saturate = false) { + EmitAluOp(Opcode::kDerivRTXFine, 0b0, dest, src, saturate); + ++stat_.float_instruction_count; + } + void OpDerivRTYCoarse(const Dest& dest, const Src& src, + bool saturate = false) { + EmitAluOp(Opcode::kDerivRTYCoarse, 0b0, dest, src, saturate); + ++stat_.float_instruction_count; + } + void OpDerivRTYFine(const Dest& dest, const Src& src, bool saturate = false) { + EmitAluOp(Opcode::kDerivRTYFine, 0b0, dest, src, saturate); + ++stat_.float_instruction_count; + } + void OpRcp(const Dest& dest, const Src& src, bool saturate = false) { + EmitAluOp(Opcode::kRcp, 0b0, dest, src, saturate); + ++stat_.float_instruction_count; + } + void OpF32ToF16(const Dest& dest, const Src& src) { + EmitAluOp(Opcode::kF32ToF16, 0b0, dest, src); + ++stat_.conversion_instruction_count; + } + void OpF16ToF32(const Dest& dest, const Src& src) { + EmitAluOp(Opcode::kF16ToF32, 0b1, dest, src); + ++stat_.conversion_instruction_count; + } + void OpFirstBitHi(const Dest& dest, const Src& src) { + EmitAluOp(Opcode::kFirstBitHi, 0b1, dest, src); + ++stat_.uint_instruction_count; + } + void OpUBFE(const Dest& dest, const Src& width, const Src& offset, + const Src& src) { + EmitAluOp(Opcode::kUBFE, 0b111, dest, width, offset, src); + ++stat_.uint_instruction_count; + } + void OpIBFE(const Dest& dest, const Src& width, const Src& offset, + const Src& src) { + EmitAluOp(Opcode::kIBFE, 0b111, dest, width, offset, src); + ++stat_.int_instruction_count; + } + void OpBFI(const Dest& dest, const Src& width, const Src& offset, + const Src& from, const Src& to) { + EmitAluOp(Opcode::kBFI, 0b1111, dest, width, offset, from, to); + ++stat_.uint_instruction_count; + } + void OpBFRev(const Dest& dest, const Src& src) { + EmitAluOp(Opcode::kBFRev, 0b1, dest, src); + ++stat_.uint_instruction_count; + } + void OpLdUAVTyped(const Dest& dest, const Src& address, + uint32_t address_components, const Src& uav) { + uint32_t dest_write_mask = dest.GetMask(); + uint32_t address_mask = (1 << address_components) - 1; + uint32_t operands_length = dest.GetLength() + + address.GetLength(address_mask, true) + + uav.GetLength(dest_write_mask, true); + code_.reserve(code_.size() + 1 + operands_length); + code_.push_back(OpcodeToken(Opcode::kLdUAVTyped, operands_length)); + dest.Write(code_); + address.Write(code_, true, address_mask, true); + uav.Write(code_, false, dest_write_mask, true); + ++stat_.instruction_count; + ++stat_.texture_load_instructions; + } + void OpStoreUAVTyped(const Dest& dest, const Src& address, + uint32_t address_components, const Src& value) { + uint32_t dest_write_mask = dest.GetMask(); + // Typed UAV writes don't support write masking. + assert_true(dest_write_mask == 0b1111); + uint32_t address_mask = (1 << address_components) - 1; + uint32_t operands_length = dest.GetLength() + + address.GetLength(address_mask, true) + + value.GetLength(dest_write_mask); + code_.reserve(code_.size() + 1 + operands_length); + code_.push_back(OpcodeToken(Opcode::kStoreUAVTyped, operands_length)); + dest.Write(code_); + address.Write(code_, true, address_mask, true); + value.Write(code_, false, dest_write_mask); + ++stat_.instruction_count; + ++stat_.c_texture_store_instructions; + } + void OpLdRaw(const Dest& dest, const Src& byte_offset, const Src& src) { + // For Load, FXC emits code for writing to any component of the destination, + // with xxxx swizzle of the source SRV/UAV. + // For Load2/Load3/Load4, it's xy/xyz/xyzw write mask and xyxx/xyzx/xyzw + // swizzle. + uint32_t dest_write_mask = dest.GetMask(); + assert_true(dest_write_mask == 0b0001 || dest_write_mask == 0b0010 || + dest_write_mask == 0b0100 || dest_write_mask == 0b1000 || + dest_write_mask == 0b0011 || dest_write_mask == 0b0111 || + dest_write_mask == 0b1111); + uint32_t component_count = xe::bit_count(dest_write_mask); + assert_true((src.swizzle_ & ((1 << (component_count * 2)) - 1)) == + (Src::kXYZW & ((1 << (component_count * 2)) - 1))); + uint32_t src_mask = (1 << component_count) - 1; + uint32_t operands_length = dest.GetLength() + + byte_offset.GetLength(0b0000) + + src.GetLength(src_mask, true); + code_.reserve(code_.size() + 1 + operands_length); + code_.push_back(OpcodeToken(Opcode::kLdRaw, operands_length)); + dest.Write(code_); + byte_offset.Write(code_, true, 0b0000); + src.Write(code_, true, src_mask, true); + ++stat_.instruction_count; + ++stat_.texture_load_instructions; + } + void OpStoreRaw(const Dest& dest, const Src& byte_offset, const Src& value) { + uint32_t dest_write_mask = dest.GetMask(); + assert_true(dest_write_mask == 0b0001 || dest_write_mask == 0b0011 || + dest_write_mask == 0b0111 || dest_write_mask == 0b1111); + uint32_t operands_length = dest.GetLength() + + byte_offset.GetLength(0b0000) + + value.GetLength(dest_write_mask); + code_.reserve(code_.size() + 1 + operands_length); + code_.push_back(OpcodeToken(Opcode::kStoreRaw, operands_length)); + dest.Write(code_); + byte_offset.Write(code_, true, 0b0000); + value.Write(code_, true, dest_write_mask); + ++stat_.instruction_count; + ++stat_.c_texture_store_instructions; + } + void OpEvalSampleIndex(const Dest& dest, const Src& value, + const Src& sample_index) { + uint32_t dest_write_mask = dest.GetMask(); + uint32_t operands_length = dest.GetLength() + + value.GetLength(dest_write_mask) + + sample_index.GetLength(0b0000); + code_.reserve(code_.size() + 1 + operands_length); + code_.push_back(OpcodeToken(Opcode::kEvalSampleIndex, operands_length)); + dest.Write(code_); + value.Write(code_, false, dest_write_mask); + sample_index.Write(code_, true, 0b0000); + ++stat_.instruction_count; + } + void OpEvalCentroid(const Dest& dest, const Src& value) { + uint32_t dest_write_mask = dest.GetMask(); + uint32_t operands_length = + dest.GetLength() + value.GetLength(dest_write_mask); + code_.reserve(code_.size() + 1 + operands_length); + code_.push_back(OpcodeToken(Opcode::kEvalCentroid, operands_length)); + dest.Write(code_); + value.Write(code_, false, dest_write_mask); + ++stat_.instruction_count; + } + + private: + void EmitAluOp(Opcode opcode, uint32_t src_are_integer, const Dest& dest, + const Src& src, bool saturate = false) { + uint32_t dest_write_mask = dest.GetMask(); + uint32_t operands_length = + dest.GetLength() + src.GetLength(dest_write_mask); + code_.reserve(code_.size() + 1 + operands_length); + code_.push_back(OpcodeToken(opcode, operands_length, saturate)); + dest.Write(code_); + src.Write(code_, (src_are_integer & 0b1) != 0, dest_write_mask); + ++stat_.instruction_count; + } + void EmitAluOp(Opcode opcode, uint32_t src_are_integer, const Dest& dest, + const Src& src0, const Src& src1, bool saturate = false) { + uint32_t dest_write_mask = dest.GetMask(); + uint32_t operands_length = dest.GetLength() + + src0.GetLength(dest_write_mask) + + src1.GetLength(dest_write_mask); + code_.reserve(code_.size() + 1 + operands_length); + code_.push_back(OpcodeToken(opcode, operands_length, saturate)); + dest.Write(code_); + src0.Write(code_, (src_are_integer & 0b1) != 0, dest_write_mask); + src1.Write(code_, (src_are_integer & 0b10) != 0, dest_write_mask); + ++stat_.instruction_count; + } + void EmitAluOp(Opcode opcode, uint32_t src_are_integer, const Dest& dest, + const Src& src0, const Src& src1, const Src& src2, + bool saturate = false) { + uint32_t dest_write_mask = dest.GetMask(); + uint32_t operands_length = + dest.GetLength() + src0.GetLength(dest_write_mask) + + src1.GetLength(dest_write_mask) + src2.GetLength(dest_write_mask); + code_.reserve(code_.size() + 1 + operands_length); + code_.push_back(OpcodeToken(opcode, operands_length, saturate)); + dest.Write(code_); + src0.Write(code_, (src_are_integer & 0b1) != 0, dest_write_mask); + src1.Write(code_, (src_are_integer & 0b10) != 0, dest_write_mask); + src2.Write(code_, (src_are_integer & 0b100) != 0, dest_write_mask); + ++stat_.instruction_count; + } + void EmitAluOp(Opcode opcode, uint32_t src_are_integer, const Dest& dest, + const Src& src0, const Src& src1, const Src& src2, + const Src& src3, bool saturate = false) { + uint32_t dest_write_mask = dest.GetMask(); + uint32_t operands_length = + dest.GetLength() + src0.GetLength(dest_write_mask) + + src1.GetLength(dest_write_mask) + src2.GetLength(dest_write_mask) + + src3.GetLength(dest_write_mask); + code_.reserve(code_.size() + 1 + operands_length); + code_.push_back(OpcodeToken(opcode, operands_length, saturate)); + dest.Write(code_); + src0.Write(code_, (src_are_integer & 0b1) != 0, dest_write_mask); + src1.Write(code_, (src_are_integer & 0b10) != 0, dest_write_mask); + src2.Write(code_, (src_are_integer & 0b100) != 0, dest_write_mask); + src3.Write(code_, (src_are_integer & 0b1000) != 0, dest_write_mask); + ++stat_.instruction_count; + } + void EmitAluOp(Opcode opcode, uint32_t src_are_integer, const Dest& dest0, + const Dest& dest1, const Src& src, bool saturate = false) { + uint32_t dest_write_mask = dest0.GetMask() | dest1.GetMask(); + uint32_t operands_length = + dest0.GetLength() + dest1.GetLength() + src.GetLength(dest_write_mask); + code_.reserve(code_.size() + 1 + operands_length); + code_.push_back(OpcodeToken(opcode, operands_length, saturate)); + dest0.Write(code_); + dest1.Write(code_); + src.Write(code_, (src_are_integer & 0b1) != 0, dest_write_mask); + ++stat_.instruction_count; + } + void EmitAluOp(Opcode opcode, uint32_t src_are_integer, const Dest& dest0, + const Dest& dest1, const Src& src0, const Src& src1, + bool saturate = false) { + uint32_t dest_write_mask = dest0.GetMask() | dest1.GetMask(); + uint32_t operands_length = dest0.GetLength() + dest1.GetLength() + + src0.GetLength(dest_write_mask) + + src1.GetLength(dest_write_mask); + code_.reserve(code_.size() + 1 + operands_length); + code_.push_back(OpcodeToken(opcode, operands_length, saturate)); + dest0.Write(code_); + dest1.Write(code_); + src0.Write(code_, (src_are_integer & 0b1) != 0, dest_write_mask); + src1.Write(code_, (src_are_integer & 0b10) != 0, dest_write_mask); + ++stat_.instruction_count; + } + void EmitFlowOp(Opcode opcode, const Src& src, bool test = false) { + uint32_t operands_length = src.GetLength(0b0000); + code_.reserve(code_.size() + 1 + operands_length); + code_.push_back(OpcodeToken(opcode, operands_length) | + (test ? (1 << 18) : 0)); + src.Write(code_, true, 0b0000); + ++stat_.instruction_count; + } + void EmitFlowOp(Opcode opcode, const Src& src0, const Src& src1, + bool test = false) { + uint32_t operands_length = src0.GetLength(0b0000) + src1.GetLength(0b0000); + code_.reserve(code_.size() + 1 + operands_length); + code_.push_back(OpcodeToken(opcode, operands_length) | + (test ? (1 << 18) : 0)); + src0.Write(code_, true, 0b0000); + src1.Write(code_, true, 0b0000); + ++stat_.instruction_count; + } + + std::vector& code_; + Statistics& stat_; +}; + +} // namespace dxbc +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_DXBC_H_ diff --git a/src/xenia/gpu/dxbc_shader_translator.cc b/src/xenia/gpu/dxbc_shader_translator.cc index 534355ce3..f813b46cf 100644 --- a/src/xenia/gpu/dxbc_shader_translator.cc +++ b/src/xenia/gpu/dxbc_shader_translator.cc @@ -68,7 +68,8 @@ DxbcShaderTranslator::DxbcShaderTranslator(uint32_t vendor_id, bool bindless_resources_used, bool edram_rov_used, bool force_emit_source_map) - : vendor_id_(vendor_id), + : a_(shader_code_, stat_), + vendor_id_(vendor_id), bindless_resources_used_(bindless_resources_used), edram_rov_used_(edram_rov_used) { emit_source_map_ = force_emit_source_map || cvars::dxbc_source_map; @@ -157,86 +158,6 @@ void DxbcShaderTranslator::Reset() { std::memset(&stat_, 0, sizeof(stat_)); } -void DxbcShaderTranslator::DxbcSrc::Write(std::vector& code, - bool is_integer, uint32_t mask, - bool force_vector) const { - uint32_t operand_token = GetOperandTokenTypeAndIndex(); - uint32_t mask_single_component = DxbcDest::GetMaskSingleComponent(mask); - uint32_t select_component = - mask_single_component != UINT32_MAX ? mask_single_component : 0; - bool is_vector = - force_vector || (mask != 0b0000 && mask_single_component == UINT32_MAX); - if (type_ == DxbcOperandType::kImmediate32) { - if (is_vector) { - operand_token |= uint32_t(DxbcOperandDimension::kVector) | - (uint32_t(DxbcComponentSelection::kSwizzle) << 2) | - (DxbcSrc::kXYZW << 4); - } else { - operand_token |= uint32_t(DxbcOperandDimension::kScalar); - } - code.push_back(operand_token); - if (is_vector) { - for (uint32_t i = 0; i < 4; ++i) { - code.push_back((mask & (1 << i)) ? GetModifiedImmediate(i, is_integer) - : 0); - } - } else { - code.push_back(GetModifiedImmediate(select_component, is_integer)); - } - } else { - switch (GetDimension()) { - case DxbcOperandDimension::kScalar: - if (is_vector) { - operand_token |= uint32_t(DxbcOperandDimension::kVector) | - (uint32_t(DxbcComponentSelection::kSwizzle) << 2) | - (DxbcSrc::kXXXX << 4); - } else { - operand_token |= uint32_t(DxbcOperandDimension::kScalar); - } - break; - case DxbcOperandDimension::kVector: - operand_token |= uint32_t(DxbcOperandDimension::kVector); - if (is_vector) { - operand_token |= uint32_t(DxbcComponentSelection::kSwizzle) << 2; - // Clear swizzle of unused components to a used value to avoid - // referencing potentially uninitialized register components. - uint32_t used_component; - if (!xe::bit_scan_forward(mask, &used_component)) { - used_component = 0; - } - for (uint32_t i = 0; i < 4; ++i) { - uint32_t swizzle_index = (mask & (1 << i)) ? i : used_component; - operand_token |= - (((swizzle_ >> (swizzle_index * 2)) & 3) << (4 + i * 2)); - } - } else { - operand_token |= (uint32_t(DxbcComponentSelection::kSelect1) << 2) | - (((swizzle_ >> (select_component * 2)) & 3) << 4); - } - break; - default: - break; - } - DxbcOperandModifier modifier = DxbcOperandModifier::kNone; - if (absolute_ && negate_) { - modifier = DxbcOperandModifier::kAbsoluteNegate; - } else if (absolute_) { - modifier = DxbcOperandModifier::kAbsolute; - } else if (negate_) { - modifier = DxbcOperandModifier::kNegate; - } - if (modifier != DxbcOperandModifier::kNone) { - operand_token |= uint32_t(1) << 31; - } - code.push_back(operand_token); - if (modifier != DxbcOperandModifier::kNone) { - code.push_back(uint32_t(DxbcExtendedOperandType::kModifier) | - (uint32_t(modifier) << 6)); - } - DxbcOperandAddress::Write(code); - } -} - uint32_t DxbcShaderTranslator::GetModificationRegisterCount() const { return GetDxbcShaderModification().dynamic_addressable_register_count; } @@ -263,7 +184,7 @@ uint32_t DxbcShaderTranslator::PushSystemTemp(uint32_t zero_mask, zero_mask &= 0b1111; if (zero_mask) { for (uint32_t i = 0; i < count; ++i) { - DxbcOpMov(DxbcDest::R(register_index + i, zero_mask), DxbcSrc::LU(0)); + a_.OpMov(dxbc::Dest::R(register_index + i, zero_mask), dxbc::Src::LU(0)); } } return register_index; @@ -291,46 +212,46 @@ void DxbcShaderTranslator::ConvertPWLGamma( accumulator_temp_component != source_temp_component); assert_true(piece_temp != accumulator_temp || piece_temp_component != accumulator_temp_component); - DxbcSrc source_src(DxbcSrc::R(source_temp).Select(source_temp_component)); - DxbcDest piece_dest(DxbcDest::R(piece_temp, 1 << piece_temp_component)); - DxbcSrc piece_src(DxbcSrc::R(piece_temp).Select(piece_temp_component)); - DxbcDest accumulator_dest( - DxbcDest::R(accumulator_temp, 1 << accumulator_temp_component)); - DxbcSrc accumulator_src( - DxbcSrc::R(accumulator_temp).Select(accumulator_temp_component)); + dxbc::Src source_src(dxbc::Src::R(source_temp).Select(source_temp_component)); + dxbc::Dest piece_dest(dxbc::Dest::R(piece_temp, 1 << piece_temp_component)); + dxbc::Src piece_src(dxbc::Src::R(piece_temp).Select(piece_temp_component)); + dxbc::Dest accumulator_dest( + dxbc::Dest::R(accumulator_temp, 1 << accumulator_temp_component)); + dxbc::Src accumulator_src( + dxbc::Src::R(accumulator_temp).Select(accumulator_temp_component)); // For each piece: // 1) Calculate how far we are on it. Multiply by 1/width, subtract // start/width and saturate. // 2) Add the contribution of the piece - multiply the position on the piece // by its slope*width and accumulate. // Piece 1. - DxbcOpMul(piece_dest, source_src, - DxbcSrc::LF(to_gamma ? (1.0f / 0.0625f) : (1.0f / 0.25f)), true); - DxbcOpMul(accumulator_dest, piece_src, - DxbcSrc::LF(to_gamma ? (4.0f * 0.0625f) : (0.25f * 0.25f))); + a_.OpMul(piece_dest, source_src, + dxbc::Src::LF(to_gamma ? (1.0f / 0.0625f) : (1.0f / 0.25f)), true); + a_.OpMul(accumulator_dest, piece_src, + dxbc::Src::LF(to_gamma ? (4.0f * 0.0625f) : (0.25f * 0.25f))); // Piece 2. - DxbcOpMAd(piece_dest, source_src, - DxbcSrc::LF(to_gamma ? (1.0f / 0.0625f) : (1.0f / 0.125f)), - DxbcSrc::LF(to_gamma ? (-0.0625f / 0.0625f) : (-0.25f / 0.125f)), - true); - DxbcOpMAd(accumulator_dest, piece_src, - DxbcSrc::LF(to_gamma ? (2.0f * 0.0625f) : (0.5f * 0.125f)), - accumulator_src); + a_.OpMAd(piece_dest, source_src, + dxbc::Src::LF(to_gamma ? (1.0f / 0.0625f) : (1.0f / 0.125f)), + dxbc::Src::LF(to_gamma ? (-0.0625f / 0.0625f) : (-0.25f / 0.125f)), + true); + a_.OpMAd(accumulator_dest, piece_src, + dxbc::Src::LF(to_gamma ? (2.0f * 0.0625f) : (0.5f * 0.125f)), + accumulator_src); // Piece 3. - DxbcOpMAd(piece_dest, source_src, - DxbcSrc::LF(to_gamma ? (1.0f / 0.375f) : (1.0f / 0.375f)), - DxbcSrc::LF(to_gamma ? (-0.125f / 0.375f) : (-0.375f / 0.375f)), - true); - DxbcOpMAd(accumulator_dest, piece_src, - DxbcSrc::LF(to_gamma ? (1.0f * 0.375f) : (1.0f * 0.375f)), - accumulator_src); + a_.OpMAd(piece_dest, source_src, + dxbc::Src::LF(to_gamma ? (1.0f / 0.375f) : (1.0f / 0.375f)), + dxbc::Src::LF(to_gamma ? (-0.125f / 0.375f) : (-0.375f / 0.375f)), + true); + a_.OpMAd(accumulator_dest, piece_src, + dxbc::Src::LF(to_gamma ? (1.0f * 0.375f) : (1.0f * 0.375f)), + accumulator_src); // Piece 4. - DxbcOpMAd(piece_dest, source_src, - DxbcSrc::LF(to_gamma ? (1.0f / 0.5f) : (1.0f / 0.25f)), - DxbcSrc::LF(to_gamma ? (-0.5f / 0.5f) : (-0.75f / 0.25f)), true); - DxbcOpMAd(DxbcDest::R(target_temp, 1 << target_temp_component), piece_src, - DxbcSrc::LF(to_gamma ? (0.5f * 0.5f) : (2.0f * 0.25f)), - accumulator_src); + a_.OpMAd(piece_dest, source_src, + dxbc::Src::LF(to_gamma ? (1.0f / 0.5f) : (1.0f / 0.25f)), + dxbc::Src::LF(to_gamma ? (-0.5f / 0.5f) : (-0.75f / 0.25f)), true); + a_.OpMAd(dxbc::Dest::R(target_temp, 1 << target_temp_component), piece_src, + dxbc::Src::LF(to_gamma ? (0.5f * 0.5f) : (2.0f * 0.25f)), + accumulator_src); } void DxbcShaderTranslator::StartVertexShader_LoadVertexIndex() { @@ -350,82 +271,83 @@ void DxbcShaderTranslator::StartVertexShader_LoadVertexIndex() { reg = 0; } - DxbcDest index_dest(DxbcDest::R(reg, 0b0001)); - DxbcSrc index_src(DxbcSrc::R(reg, DxbcSrc::kXXXX)); + dxbc::Dest index_dest(dxbc::Dest::R(reg, 0b0001)); + dxbc::Src index_src(dxbc::Src::R(reg, dxbc::Src::kXXXX)); // Check if the closing vertex of a non-indexed line loop is being processed. system_constants_used_ |= 1ull << kSysConst_LineLoopClosingIndex_Index; - DxbcOpINE( + a_.OpINE( index_dest, - DxbcSrc::V(uint32_t(InOutRegister::kVSInVertexIndex), DxbcSrc::kXXXX), - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_LineLoopClosingIndex_Vec) + dxbc::Src::V(uint32_t(InOutRegister::kVSInVertexIndex), dxbc::Src::kXXXX), + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_LineLoopClosingIndex_Vec) .Select(kSysConst_LineLoopClosingIndex_Comp)); // Zero the index if processing the closing vertex of a line loop, or do // nothing (replace 0 with 0) if not needed. - DxbcOpAnd( + a_.OpAnd( index_dest, - DxbcSrc::V(uint32_t(InOutRegister::kVSInVertexIndex), DxbcSrc::kXXXX), + dxbc::Src::V(uint32_t(InOutRegister::kVSInVertexIndex), dxbc::Src::kXXXX), index_src); { // Swap the vertex index's endianness. system_constants_used_ |= 1ull << kSysConst_VertexIndexEndian_Index; - DxbcSrc endian_src(DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_VertexIndexEndian_Vec) - .Select(kSysConst_VertexIndexEndian_Comp)); - DxbcDest swap_temp_dest(DxbcDest::R(reg, 0b0010)); - DxbcSrc swap_temp_src(DxbcSrc::R(reg, DxbcSrc::kYYYY)); + dxbc::Src endian_src( + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_VertexIndexEndian_Vec) + .Select(kSysConst_VertexIndexEndian_Comp)); + dxbc::Dest swap_temp_dest(dxbc::Dest::R(reg, 0b0010)); + dxbc::Src swap_temp_src(dxbc::Src::R(reg, dxbc::Src::kYYYY)); // 8-in-16 or one half of 8-in-32. - DxbcOpSwitch(endian_src); - DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::Endian::k8in16))); - DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::Endian::k8in32))); + a_.OpSwitch(endian_src); + a_.OpCase(dxbc::Src::LU(uint32_t(xenos::Endian::k8in16))); + a_.OpCase(dxbc::Src::LU(uint32_t(xenos::Endian::k8in32))); // Temp = X0Z0. - DxbcOpAnd(swap_temp_dest, index_src, DxbcSrc::LU(0x00FF00FF)); + a_.OpAnd(swap_temp_dest, index_src, dxbc::Src::LU(0x00FF00FF)); // Index = YZW0. - DxbcOpUShR(index_dest, index_src, DxbcSrc::LU(8)); + a_.OpUShR(index_dest, index_src, dxbc::Src::LU(8)); // Index = Y0W0. - DxbcOpAnd(index_dest, index_src, DxbcSrc::LU(0x00FF00FF)); + a_.OpAnd(index_dest, index_src, dxbc::Src::LU(0x00FF00FF)); // Index = YXWZ. - DxbcOpUMAd(index_dest, swap_temp_src, DxbcSrc::LU(256), index_src); - DxbcOpBreak(); - DxbcOpEndSwitch(); + a_.OpUMAd(index_dest, swap_temp_src, dxbc::Src::LU(256), index_src); + a_.OpBreak(); + a_.OpEndSwitch(); // 16-in-32 or another half of 8-in-32. - DxbcOpSwitch(endian_src); - DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::Endian::k8in32))); - DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::Endian::k16in32))); + a_.OpSwitch(endian_src); + a_.OpCase(dxbc::Src::LU(uint32_t(xenos::Endian::k8in32))); + a_.OpCase(dxbc::Src::LU(uint32_t(xenos::Endian::k16in32))); // Temp = ZW00. - DxbcOpUShR(swap_temp_dest, index_src, DxbcSrc::LU(16)); + a_.OpUShR(swap_temp_dest, index_src, dxbc::Src::LU(16)); // Index = ZWXY. - DxbcOpBFI(index_dest, DxbcSrc::LU(16), DxbcSrc::LU(16), index_src, - swap_temp_src); - DxbcOpBreak(); - DxbcOpEndSwitch(); + a_.OpBFI(index_dest, dxbc::Src::LU(16), dxbc::Src::LU(16), index_src, + swap_temp_src); + a_.OpBreak(); + a_.OpEndSwitch(); if (!uses_register_dynamic_addressing) { // Break register dependency. - DxbcOpMov(swap_temp_dest, DxbcSrc::LF(0.0f)); + a_.OpMov(swap_temp_dest, dxbc::Src::LF(0.0f)); } } // Add the base vertex index. system_constants_used_ |= 1ull << kSysConst_VertexBaseIndex_Index; - DxbcOpIAdd(index_dest, index_src, - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_VertexBaseIndex_Vec) - .Select(kSysConst_VertexBaseIndex_Comp)); + a_.OpIAdd(index_dest, index_src, + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_VertexBaseIndex_Vec) + .Select(kSysConst_VertexBaseIndex_Comp)); // Convert to float. - DxbcOpIToF(index_dest, index_src); + a_.OpIToF(index_dest, index_src); if (uses_register_dynamic_addressing) { // Store to indexed GPR 0 in x0[0]. - DxbcOpMov(DxbcDest::X(0, 0, 0b0001), index_src); + a_.OpMov(dxbc::Dest::X(0, 0, 0b0001), index_src); PopSystemTemp(); } } @@ -436,8 +358,8 @@ void DxbcShaderTranslator::StartVertexOrDomainShader() { // Zero the interpolators. for (uint32_t i = 0; i < xenos::kMaxInterpolators; ++i) { - DxbcOpMov(DxbcDest::O(uint32_t(InOutRegister::kVSDSOutInterpolators) + i), - DxbcSrc::LF(0.0f)); + a_.OpMov(dxbc::Dest::O(uint32_t(InOutRegister::kVSDSOutInterpolators) + i), + dxbc::Src::LF(0.0f)); } // Remember that x# are only accessible via mov load or store - use a @@ -455,21 +377,21 @@ void DxbcShaderTranslator::StartVertexOrDomainShader() { // Copy the domain location to r0.xyz. // ZYX swizzle according to Call of Duty 3 and Viva Pinata. in_domain_location_used_ |= 0b0111; - DxbcOpMov(uses_register_dynamic_addressing ? DxbcDest::X(0, 0, 0b0111) - : DxbcDest::R(0, 0b0111), - DxbcSrc::VDomain(0b000110)); + a_.OpMov(uses_register_dynamic_addressing ? dxbc::Dest::X(0, 0, 0b0111) + : dxbc::Dest::R(0, 0b0111), + dxbc::Src::VDomain(0b000110)); if (register_count() >= 2) { // Copy the control point indices (already swapped and converted to // float by the host vertex and hull shaders) to r1.xyz. - DxbcDest control_point_index_dest(uses_register_dynamic_addressing - ? DxbcDest::X(0, 1) - : DxbcDest::R(1)); + dxbc::Dest control_point_index_dest(uses_register_dynamic_addressing + ? dxbc::Dest::X(0, 1) + : dxbc::Dest::R(1)); in_control_point_index_used_ = true; for (uint32_t i = 0; i < 3; ++i) { - DxbcOpMov(control_point_index_dest.Mask(1 << i), - DxbcSrc::VICP( - i, uint32_t(InOutRegister::kDSInControlPointIndex), - DxbcSrc::kXXXX)); + a_.OpMov(control_point_index_dest.Mask(1 << i), + dxbc::Src::VICP( + i, uint32_t(InOutRegister::kDSInControlPointIndex), + dxbc::Src::kXXXX)); } } } @@ -482,18 +404,19 @@ void DxbcShaderTranslator::StartVertexOrDomainShader() { // ZYX swizzle with r1.y == 0, according to the water shader in // Banjo-Kazooie: Nuts & Bolts. in_domain_location_used_ |= 0b0111; - DxbcOpMov(uses_register_dynamic_addressing ? DxbcDest::X(0, 0, 0b0111) - : DxbcDest::R(0, 0b0111), - DxbcSrc::VDomain(0b000110)); + a_.OpMov(uses_register_dynamic_addressing ? dxbc::Dest::X(0, 0, 0b0111) + : dxbc::Dest::R(0, 0b0111), + dxbc::Src::VDomain(0b000110)); if (register_count() >= 2) { // Copy the primitive index to r1.x as a float. uint32_t primitive_id_temp = uses_register_dynamic_addressing ? PushSystemTemp() : 1; in_primitive_id_used_ = true; - DxbcOpUToF(DxbcDest::R(primitive_id_temp, 0b0001), DxbcSrc::VPrim()); + a_.OpUToF(dxbc::Dest::R(primitive_id_temp, 0b0001), + dxbc::Src::VPrim()); if (uses_register_dynamic_addressing) { - DxbcOpMov(DxbcDest::X(0, 1, 0b0001), - DxbcSrc::R(primitive_id_temp, DxbcSrc::kXXXX)); + a_.OpMov(dxbc::Dest::X(0, 1, 0b0001), + dxbc::Src::R(primitive_id_temp, dxbc::Src::kXXXX)); // Release primitive_id_temp. PopSystemTemp(); } @@ -516,9 +439,10 @@ void DxbcShaderTranslator::StartVertexOrDomainShader() { // // Direct3D 12 passes the coordinates in a consistent order, so can // just use the identity swizzle. - DxbcOpMov(uses_register_dynamic_addressing ? DxbcDest::X(0, 1, 0b0010) - : DxbcDest::R(1, 0b0010), - DxbcSrc::LF(0.0f)); + a_.OpMov(uses_register_dynamic_addressing + ? dxbc::Dest::X(0, 1, 0b0010) + : dxbc::Dest::R(1, 0b0010), + dxbc::Src::LF(0.0f)); } } break; @@ -528,9 +452,9 @@ void DxbcShaderTranslator::StartVertexOrDomainShader() { if (register_count() >= 1) { // Copy the domain location to r0.xy. in_domain_location_used_ |= 0b0011; - DxbcOpMov(uses_register_dynamic_addressing ? DxbcDest::X(0, 0, 0b0011) - : DxbcDest::R(0, 0b0011), - DxbcSrc::VDomain()); + a_.OpMov(uses_register_dynamic_addressing ? dxbc::Dest::X(0, 0, 0b0011) + : dxbc::Dest::R(0, 0b0011), + dxbc::Src::VDomain()); // Control point indices according to the shader from the main menu of // Defender, which starts from `cndeq r2, c255.xxxy, r1.xyzz, r0.zzzz`, // where c255.x is 0, and c255.y is 1. @@ -539,20 +463,20 @@ void DxbcShaderTranslator::StartVertexOrDomainShader() { // r1.y for r0.x * r0.y // r1.z for (1 - r0.x) * r0.y in_control_point_index_used_ = true; - DxbcOpMov( - uses_register_dynamic_addressing ? DxbcDest::X(0, 0, 0b0100) - : DxbcDest::R(0, 0b0100), - DxbcSrc::VICP(0, uint32_t(InOutRegister::kDSInControlPointIndex), - DxbcSrc::kXXXX)); + a_.OpMov( + uses_register_dynamic_addressing ? dxbc::Dest::X(0, 0, 0b0100) + : dxbc::Dest::R(0, 0b0100), + dxbc::Src::VICP(0, uint32_t(InOutRegister::kDSInControlPointIndex), + dxbc::Src::kXXXX)); if (register_count() >= 2) { - DxbcDest r1_dest(uses_register_dynamic_addressing ? DxbcDest::X(0, 1) - : DxbcDest::R(1)); + dxbc::Dest r1_dest(uses_register_dynamic_addressing + ? dxbc::Dest::X(0, 1) + : dxbc::Dest::R(1)); for (uint32_t i = 0; i < 3; ++i) { - DxbcOpMov( - r1_dest.Mask(1 << i), - DxbcSrc::VICP(1 + i, - uint32_t(InOutRegister::kDSInControlPointIndex), - DxbcSrc::kXXXX)); + a_.OpMov(r1_dest.Mask(1 << i), + dxbc::Src::VICP( + 1 + i, uint32_t(InOutRegister::kDSInControlPointIndex), + dxbc::Src::kXXXX)); } } } @@ -564,17 +488,17 @@ void DxbcShaderTranslator::StartVertexOrDomainShader() { // Copy the domain location to r0.yz. // XY swizzle according to the ground shader in Viva Pinata. in_domain_location_used_ |= 0b0011; - DxbcOpMov(uses_register_dynamic_addressing ? DxbcDest::X(0, 0, 0b0110) - : DxbcDest::R(0, 0b0110), - DxbcSrc::VDomain(0b010000)); + a_.OpMov(uses_register_dynamic_addressing ? dxbc::Dest::X(0, 0, 0b0110) + : dxbc::Dest::R(0, 0b0110), + dxbc::Src::VDomain(0b010000)); // Copy the primitive index to r0.x as a float. uint32_t primitive_id_temp = uses_register_dynamic_addressing ? PushSystemTemp() : 0; in_primitive_id_used_ = true; - DxbcOpUToF(DxbcDest::R(primitive_id_temp, 0b0001), DxbcSrc::VPrim()); + a_.OpUToF(dxbc::Dest::R(primitive_id_temp, 0b0001), dxbc::Src::VPrim()); if (uses_register_dynamic_addressing) { - DxbcOpMov(DxbcDest::X(0, 0, 0b0001), - DxbcSrc::R(primitive_id_temp, DxbcSrc::kXXXX)); + a_.OpMov(dxbc::Dest::X(0, 0, 0b0001), + dxbc::Src::R(primitive_id_temp, dxbc::Src::kXXXX)); // Release primitive_id_temp. PopSystemTemp(); } @@ -593,9 +517,10 @@ void DxbcShaderTranslator::StartVertexOrDomainShader() { // // Direct3D 12 passes the coordinates in a consistent order, so can // just use the identity swizzle. - DxbcOpMov(uses_register_dynamic_addressing ? DxbcDest::X(0, 1, 0b0001) - : DxbcDest::R(1, 0b0001), - DxbcSrc::LF(0.0f)); + a_.OpMov(uses_register_dynamic_addressing + ? dxbc::Dest::X(0, 1, 0b0001) + : dxbc::Dest::R(1, 0b0001), + dxbc::Src::LF(0.0f)); } } break; @@ -631,7 +556,7 @@ void DxbcShaderTranslator::StartPixelShader() { if (!edram_rov_used_ && current_shader().writes_depth()) { // Initialize the depth output if used, which must be written to regardless // of the taken execution path. - DxbcOpMov(DxbcDest::ODepth(), DxbcSrc::LF(0.0f)); + a_.OpMov(dxbc::Dest::ODepth(), dxbc::Src::LF(0.0f)); } uint32_t interpolator_count = @@ -643,10 +568,10 @@ void DxbcShaderTranslator::StartPixelShader() { uses_register_dynamic_addressing ? PushSystemTemp() : UINT32_MAX; system_constants_used_ |= 1ull << kSysConst_InterpolatorSamplingPattern_Index; - DxbcSrc sampling_pattern_src( - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_InterpolatorSamplingPattern_Vec) + dxbc::Src sampling_pattern_src( + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_InterpolatorSamplingPattern_Vec) .Select(kSysConst_InterpolatorSamplingPattern_Comp)); for (uint32_t i = 0; i < interpolator_count; ++i) { // With GPR dynamic addressing, first evaluate to centroid_temp r#, then @@ -655,25 +580,25 @@ void DxbcShaderTranslator::StartPixelShader() { uses_register_dynamic_addressing ? centroid_temp : i; // Check if the input needs to be interpolated at center (if the bit is // set). - DxbcOpAnd(DxbcDest::R(centroid_register, 0b0001), sampling_pattern_src, - DxbcSrc::LU(uint32_t(1) << i)); - DxbcOpIf(bool(xenos::SampleLocation::kCenter), - DxbcSrc::R(centroid_register, DxbcSrc::kXXXX)); + a_.OpAnd(dxbc::Dest::R(centroid_register, 0b0001), sampling_pattern_src, + dxbc::Src::LU(uint32_t(1) << i)); + a_.OpIf(bool(xenos::SampleLocation::kCenter), + dxbc::Src::R(centroid_register, dxbc::Src::kXXXX)); // At center. - DxbcOpMov(uses_register_dynamic_addressing ? DxbcDest::X(0, i) - : DxbcDest::R(i), - DxbcSrc::V(uint32_t(InOutRegister::kPSInInterpolators) + i)); - DxbcOpElse(); + a_.OpMov(uses_register_dynamic_addressing ? dxbc::Dest::X(0, i) + : dxbc::Dest::R(i), + dxbc::Src::V(uint32_t(InOutRegister::kPSInInterpolators) + i)); + a_.OpElse(); // At centroid. Not really important that 2x MSAA is emulated using // ForcedSampleCount 4 - what matters is that the sample position will // be within the primitive, and the value will not be extrapolated. - DxbcOpEvalCentroid( - DxbcDest::R(centroid_register), - DxbcSrc::V(uint32_t(InOutRegister::kPSInInterpolators) + i)); + a_.OpEvalCentroid( + dxbc::Dest::R(centroid_register), + dxbc::Src::V(uint32_t(InOutRegister::kPSInInterpolators) + i)); if (uses_register_dynamic_addressing) { - DxbcOpMov(DxbcDest::X(0, i), DxbcSrc::R(centroid_register)); + a_.OpMov(dxbc::Dest::X(0, i), dxbc::Src::R(centroid_register)); } - DxbcOpEndIf(); + a_.OpEndIf(); } if (centroid_temp != UINT32_MAX) { PopSystemTemp(); @@ -682,9 +607,9 @@ void DxbcShaderTranslator::StartPixelShader() { // SSAA instead of MSAA without ROV - everything is interpolated at // samples, can't extrapolate. for (uint32_t i = 0; i < interpolator_count; ++i) { - DxbcOpMov(uses_register_dynamic_addressing ? DxbcDest::X(0, i) - : DxbcDest::R(i), - DxbcSrc::V(uint32_t(InOutRegister::kPSInInterpolators) + i)); + a_.OpMov(uses_register_dynamic_addressing ? dxbc::Dest::X(0, i) + : dxbc::Dest::R(i), + dxbc::Src::V(uint32_t(InOutRegister::kPSInInterpolators) + i)); } } @@ -692,128 +617,128 @@ void DxbcShaderTranslator::StartPixelShader() { // absolute value) coordinates, facing (X sign bit) - to the specified // interpolator register (ps_param_gen). system_constants_used_ |= 1ull << kSysConst_PSParamGen_Index; - DxbcSrc param_gen_index_src( - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_PSParamGen_Vec) + dxbc::Src param_gen_index_src( + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_PSParamGen_Vec) .Select(kSysConst_PSParamGen_Comp)); uint32_t param_gen_temp = PushSystemTemp(); // Check if pixel parameters need to be written. - DxbcOpULT(DxbcDest::R(param_gen_temp, 0b0001), param_gen_index_src, - DxbcSrc::LU(interpolator_count)); - DxbcOpIf(true, DxbcSrc::R(param_gen_temp, DxbcSrc::kXXXX)); + a_.OpULT(dxbc::Dest::R(param_gen_temp, 0b0001), param_gen_index_src, + dxbc::Src::LU(interpolator_count)); + a_.OpIf(true, dxbc::Src::R(param_gen_temp, dxbc::Src::kXXXX)); { // XY - floored pixel position (Direct3D VPOS) in the absolute value, // faceness as X sign bit. Using Z as scratch register now. if (edram_rov_used_) { // Get XY address of the current host pixel as float. in_position_used_ |= 0b0011; - DxbcOpRoundZ(DxbcDest::R(param_gen_temp, 0b0011), - DxbcSrc::V(uint32_t(InOutRegister::kPSInPosition))); + a_.OpRoundZ(dxbc::Dest::R(param_gen_temp, 0b0011), + dxbc::Src::V(uint32_t(InOutRegister::kPSInPosition))); // Revert resolution scale - after truncating, so if the pixel position // is passed to tfetch (assuming the game doesn't round it by itself), // it will be sampled with higher resolution too. // Check if resolution scale is 2x2 and multiply by 0.5 in this case. system_constants_used_ |= 1ull << kSysConst_EdramResolutionSquareScale_Index; - DxbcOpIEq(DxbcDest::R(param_gen_temp, 0b0100), - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_EdramResolutionSquareScale_Vec) - .Select(kSysConst_EdramResolutionSquareScale_Comp), - DxbcSrc::LU(4)); - DxbcOpIf(true, DxbcSrc::R(param_gen_temp, DxbcSrc::kZZZZ)); + a_.OpIEq(dxbc::Dest::R(param_gen_temp, 0b0100), + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_EdramResolutionSquareScale_Vec) + .Select(kSysConst_EdramResolutionSquareScale_Comp), + dxbc::Src::LU(4)); + a_.OpIf(true, dxbc::Src::R(param_gen_temp, dxbc::Src::kZZZZ)); { - DxbcOpMul(DxbcDest::R(param_gen_temp, 0b0011), - DxbcSrc::R(param_gen_temp), DxbcSrc::LF(0.5f)); + a_.OpMul(dxbc::Dest::R(param_gen_temp, 0b0011), + dxbc::Src::R(param_gen_temp), dxbc::Src::LF(0.5f)); } - DxbcOpEndIf(); + a_.OpEndIf(); } else { // Get XY address of the current SSAA sample by converting // SV_Position.xy to an integer. in_position_used_ |= 0b0011; - DxbcOpFToU(DxbcDest::R(param_gen_temp, 0b0011), - DxbcSrc::V(uint32_t(InOutRegister::kPSInPosition))); + a_.OpFToU(dxbc::Dest::R(param_gen_temp, 0b0011), + dxbc::Src::V(uint32_t(InOutRegister::kPSInPosition))); // Undo SSAA that is used instead of MSAA - since it's used as a // workaround for MSAA emulation, guest pixel position must be the same // for all samples, so this should be done is integers (or before // truncating). system_constants_used_ |= 1ull << kSysConst_SampleCountLog2_Index; - DxbcOpUShR( - DxbcDest::R(param_gen_temp, 0b0011), DxbcSrc::R(param_gen_temp), - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_SampleCountLog2_Vec, - kSysConst_SampleCountLog2_Comp | - ((kSysConst_SampleCountLog2_Comp + 1) << 2))); + a_.OpUShR( + dxbc::Dest::R(param_gen_temp, 0b0011), dxbc::Src::R(param_gen_temp), + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_SampleCountLog2_Vec, + kSysConst_SampleCountLog2_Comp | + ((kSysConst_SampleCountLog2_Comp + 1) << 2))); // Convert the integer position to float Direct3D 9 VPOS. - DxbcOpUToF(DxbcDest::R(param_gen_temp, 0b0011), - DxbcSrc::R(param_gen_temp)); + a_.OpUToF(dxbc::Dest::R(param_gen_temp, 0b0011), + dxbc::Src::R(param_gen_temp)); } // Check if faceness applies to the current primitive type. system_constants_used_ |= 1ull << kSysConst_Flags_Index; - DxbcOpAnd(DxbcDest::R(param_gen_temp, 0b0100), - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_Flags_Vec) - .Select(kSysConst_Flags_Comp), - DxbcSrc::LU(kSysFlag_PrimitivePolygonal)); - DxbcOpIf(true, DxbcSrc::R(param_gen_temp, DxbcSrc::kZZZZ)); + a_.OpAnd(dxbc::Dest::R(param_gen_temp, 0b0100), + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_Flags_Vec) + .Select(kSysConst_Flags_Comp), + dxbc::Src::LU(kSysFlag_PrimitivePolygonal)); + a_.OpIf(true, dxbc::Src::R(param_gen_temp, dxbc::Src::kZZZZ)); { // Negate modifier flips the sign bit even for 0 - set it to minus for // backfaces. in_front_face_used_ = true; - DxbcOpMovC( - DxbcDest::R(param_gen_temp, 0b0001), - DxbcSrc::V(uint32_t(InOutRegister::kPSInFrontFace), DxbcSrc::kXXXX), - DxbcSrc::R(param_gen_temp, DxbcSrc::kXXXX), - -DxbcSrc::R(param_gen_temp, DxbcSrc::kXXXX)); + a_.OpMovC(dxbc::Dest::R(param_gen_temp, 0b0001), + dxbc::Src::V(uint32_t(InOutRegister::kPSInFrontFace), + dxbc::Src::kXXXX), + dxbc::Src::R(param_gen_temp, dxbc::Src::kXXXX), + -dxbc::Src::R(param_gen_temp, dxbc::Src::kXXXX)); } - DxbcOpEndIf(); + a_.OpEndIf(); // ZW - UV within a point sprite in the absolute value, at centroid if // requested for the interpolator. - DxbcDest point_coord_r_zw_dest(DxbcDest::R(param_gen_temp, 0b1100)); - DxbcSrc point_coord_v_xxxy_src(DxbcSrc::V( + dxbc::Dest point_coord_r_zw_dest(dxbc::Dest::R(param_gen_temp, 0b1100)); + dxbc::Src point_coord_v_xxxy_src(dxbc::Src::V( uint32_t(InOutRegister::kPSInPointParameters), 0b01000000)); if (edram_rov_used_) { system_constants_used_ |= 1ull << kSysConst_InterpolatorSamplingPattern_Index; - DxbcOpUBFE(DxbcDest::R(param_gen_temp, 0b0100), DxbcSrc::LU(1), - param_gen_index_src, - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_InterpolatorSamplingPattern_Vec) - .Select(kSysConst_InterpolatorSamplingPattern_Comp)); - DxbcOpIf(bool(xenos::SampleLocation::kCenter), - DxbcSrc::R(param_gen_temp, DxbcSrc::kZZZZ)); + a_.OpUBFE(dxbc::Dest::R(param_gen_temp, 0b0100), dxbc::Src::LU(1), + param_gen_index_src, + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_InterpolatorSamplingPattern_Vec) + .Select(kSysConst_InterpolatorSamplingPattern_Comp)); + a_.OpIf(bool(xenos::SampleLocation::kCenter), + dxbc::Src::R(param_gen_temp, dxbc::Src::kZZZZ)); // At center. - DxbcOpMov(point_coord_r_zw_dest, point_coord_v_xxxy_src); - DxbcOpElse(); + a_.OpMov(point_coord_r_zw_dest, point_coord_v_xxxy_src); + a_.OpElse(); // At centroid. - DxbcOpEvalCentroid(point_coord_r_zw_dest, point_coord_v_xxxy_src); - DxbcOpEndIf(); + a_.OpEvalCentroid(point_coord_r_zw_dest, point_coord_v_xxxy_src); + a_.OpEndIf(); } else { // At the SSAA sample. - DxbcOpMov(point_coord_r_zw_dest, point_coord_v_xxxy_src); + a_.OpMov(point_coord_r_zw_dest, point_coord_v_xxxy_src); } // Write ps_param_gen to the specified GPR. - DxbcSrc param_gen_src(DxbcSrc::R(param_gen_temp)); + dxbc::Src param_gen_src(dxbc::Src::R(param_gen_temp)); if (uses_register_dynamic_addressing) { // Copy the GPR number to r# for relative addressing. uint32_t param_gen_copy_temp = PushSystemTemp(); - DxbcOpMov(DxbcDest::R(param_gen_copy_temp, 0b0001), - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_PSParamGen_Vec) - .Select(kSysConst_PSParamGen_Comp)); + a_.OpMov(dxbc::Dest::R(param_gen_copy_temp, 0b0001), + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_PSParamGen_Vec) + .Select(kSysConst_PSParamGen_Comp)); // Write to the GPR. - DxbcOpMov(DxbcDest::X(0, DxbcIndex(param_gen_copy_temp, 0)), - param_gen_src); + a_.OpMov(dxbc::Dest::X(0, dxbc::Index(param_gen_copy_temp, 0)), + param_gen_src); // Release param_gen_copy_temp. PopSystemTemp(); } else { if (interpolator_count == 1) { - DxbcOpMov(DxbcDest::R(0), param_gen_src); + a_.OpMov(dxbc::Dest::R(0), param_gen_src); } else { // Write to the r# using binary search. uint32_t param_gen_copy_temp = PushSystemTemp(); @@ -821,25 +746,25 @@ void DxbcShaderTranslator::StartPixelShader() { const auto& self) -> void { assert_true(low < high); uint32_t mid = low + (high - low + 1) / 2; - DxbcOpULT(DxbcDest::R(param_gen_copy_temp, 0b0001), - param_gen_index_src, DxbcSrc::LU(mid)); - DxbcOpIf(true, DxbcSrc::R(param_gen_copy_temp, DxbcSrc::kXXXX)); + a_.OpULT(dxbc::Dest::R(param_gen_copy_temp, 0b0001), + param_gen_index_src, dxbc::Src::LU(mid)); + a_.OpIf(true, dxbc::Src::R(param_gen_copy_temp, dxbc::Src::kXXXX)); { if (low + 1 == mid) { - DxbcOpMov(DxbcDest::R(low), param_gen_src); + a_.OpMov(dxbc::Dest::R(low), param_gen_src); } else { self(low, mid - 1, self); } } - DxbcOpElse(); + a_.OpElse(); { if (mid == high) { - DxbcOpMov(DxbcDest::R(mid), param_gen_src); + a_.OpMov(dxbc::Dest::R(mid), param_gen_src); } else { self(mid, high, self); } } - DxbcOpEndIf(); + a_.OpEndIf(); }; param_gen_copy_node(0, interpolator_count - 1, param_gen_copy_node); // Release param_gen_copy_temp. @@ -848,7 +773,7 @@ void DxbcShaderTranslator::StartPixelShader() { } } // Close the ps_param_gen check. - DxbcOpEndIf(); + a_.OpEndIf(); // Release param_gen_temp. PopSystemTemp(); } @@ -863,9 +788,9 @@ void DxbcShaderTranslator::StartTranslation() { // Set the point size to a negative value to tell the geometry shader that // it should use the global point size if the vertex shader does not // override it. - DxbcOpMov( - DxbcDest::R(system_temp_point_size_edge_flag_kill_vertex_, 0b0001), - DxbcSrc::LF(-1.0f)); + a_.OpMov( + dxbc::Dest::R(system_temp_point_size_edge_flag_kill_vertex_, 0b0001), + dxbc::Src::LF(-1.0f)); } else if (is_pixel_shader()) { if (edram_rov_used_) { // Will be initialized unconditionally. @@ -934,10 +859,10 @@ void DxbcShaderTranslator::StartTranslation() { // references them after only initializing them conditionally. for (uint32_t i = is_pixel_shader() ? xenos::kMaxInterpolators : 0; i < register_count(); ++i) { - DxbcOpMov(current_shader().uses_register_dynamic_addressing() - ? DxbcDest::X(0, i) - : DxbcDest::R(i), - DxbcSrc::LF(0.0f)); + a_.OpMov(current_shader().uses_register_dynamic_addressing() + ? dxbc::Dest::X(0, i) + : dxbc::Dest::R(i), + dxbc::Src::LF(0.0f)); } } @@ -954,64 +879,64 @@ void DxbcShaderTranslator::StartTranslation() { } // Start the main loop (for jumping to labels by setting pc and continuing). - DxbcOpLoop(); + a_.OpLoop(); // Switch and the first label (pc == 0). if (UseSwitchForControlFlow()) { - DxbcOpSwitch(DxbcSrc::R(system_temp_ps_pc_p0_a0_, DxbcSrc::kYYYY)); - DxbcOpCase(DxbcSrc::LU(0)); + a_.OpSwitch(dxbc::Src::R(system_temp_ps_pc_p0_a0_, dxbc::Src::kYYYY)); + a_.OpCase(dxbc::Src::LU(0)); } else { - DxbcOpIf(false, DxbcSrc::R(system_temp_ps_pc_p0_a0_, DxbcSrc::kYYYY)); + a_.OpIf(false, dxbc::Src::R(system_temp_ps_pc_p0_a0_, dxbc::Src::kYYYY)); } } void DxbcShaderTranslator::CompleteVertexOrDomainShader() { uint32_t temp = PushSystemTemp(); - DxbcDest temp_x_dest(DxbcDest::R(temp, 0b0001)); - DxbcSrc temp_x_src(DxbcSrc::R(temp, DxbcSrc::kXXXX)); + dxbc::Dest temp_x_dest(dxbc::Dest::R(temp, 0b0001)); + dxbc::Src temp_x_src(dxbc::Src::R(temp, dxbc::Src::kXXXX)); system_constants_used_ |= 1ull << kSysConst_Flags_Index; - DxbcSrc flags_src(DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_Flags_Vec) - .Select(kSysConst_Flags_Comp)); + dxbc::Src flags_src(dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_Flags_Vec) + .Select(kSysConst_Flags_Comp)); // Check if the shader already returns W, not 1/W, and if it doesn't, turn 1/W // into W. Using div rather than relaxed-precision rcp for safety. - DxbcOpAnd(temp_x_dest, flags_src, DxbcSrc::LU(kSysFlag_WNotReciprocal)); - DxbcOpIf(false, temp_x_src); - DxbcOpDiv(DxbcDest::R(system_temp_position_, 0b1000), DxbcSrc::LF(1.0f), - DxbcSrc::R(system_temp_position_, DxbcSrc::kWWWW)); - DxbcOpEndIf(); + a_.OpAnd(temp_x_dest, flags_src, dxbc::Src::LU(kSysFlag_WNotReciprocal)); + a_.OpIf(false, temp_x_src); + a_.OpDiv(dxbc::Dest::R(system_temp_position_, 0b1000), dxbc::Src::LF(1.0f), + dxbc::Src::R(system_temp_position_, dxbc::Src::kWWWW)); + a_.OpEndIf(); // Check if the shader returns XY/W rather than XY, and if it does, revert // that. // TODO(Triang3l): Check if having XY or Z pre-divided by W should result in // affine interpolation. - DxbcOpAnd(temp_x_dest, flags_src, DxbcSrc::LU(kSysFlag_XYDividedByW)); - DxbcOpIf(true, temp_x_src); - DxbcOpMul(DxbcDest::R(system_temp_position_, 0b0011), - DxbcSrc::R(system_temp_position_), - DxbcSrc::R(system_temp_position_, DxbcSrc::kWWWW)); - DxbcOpEndIf(); + a_.OpAnd(temp_x_dest, flags_src, dxbc::Src::LU(kSysFlag_XYDividedByW)); + a_.OpIf(true, temp_x_src); + a_.OpMul(dxbc::Dest::R(system_temp_position_, 0b0011), + dxbc::Src::R(system_temp_position_), + dxbc::Src::R(system_temp_position_, dxbc::Src::kWWWW)); + a_.OpEndIf(); // Check if the shader returns Z/W rather than Z, and if it does, revert that. // TODO(Triang3l): Check if having XY or Z pre-divided by W should result in // affine interpolation. - DxbcOpAnd(temp_x_dest, flags_src, DxbcSrc::LU(kSysFlag_ZDividedByW)); - DxbcOpIf(true, temp_x_src); - DxbcOpMul(DxbcDest::R(system_temp_position_, 0b0100), - DxbcSrc::R(system_temp_position_, DxbcSrc::kZZZZ), - DxbcSrc::R(system_temp_position_, DxbcSrc::kWWWW)); - DxbcOpEndIf(); + a_.OpAnd(temp_x_dest, flags_src, dxbc::Src::LU(kSysFlag_ZDividedByW)); + a_.OpIf(true, temp_x_src); + a_.OpMul(dxbc::Dest::R(system_temp_position_, 0b0100), + dxbc::Src::R(system_temp_position_, dxbc::Src::kZZZZ), + dxbc::Src::R(system_temp_position_, dxbc::Src::kWWWW)); + a_.OpEndIf(); // Zero-initialize SV_ClipDistance# (for user clip planes) and SV_CullDistance // (for vertex kill) in case they're not needed. - DxbcOpMov(DxbcDest::O(uint32_t(InOutRegister::kVSDSOutClipDistance0123)), - DxbcSrc::LF(0.0f)); - DxbcOpMov(DxbcDest::O( - uint32_t(InOutRegister::kVSDSOutClipDistance45AndCullDistance), - 0b0111), - DxbcSrc::LF(0.0f)); + a_.OpMov(dxbc::Dest::O(uint32_t(InOutRegister::kVSDSOutClipDistance0123)), + dxbc::Src::LF(0.0f)); + a_.OpMov(dxbc::Dest::O( + uint32_t(InOutRegister::kVSDSOutClipDistance45AndCullDistance), + 0b0111), + dxbc::Src::LF(0.0f)); // Clip against user clip planes. // Not possible to handle UCP_CULL_ONLY_ENA with the same shader though, since // there can be only 8 SV_ClipDistance + SV_CullDistance values at most, but @@ -1022,90 +947,90 @@ void DxbcShaderTranslator::CompleteVertexOrDomainShader() { // just zeroing the clip planes in the constants, so Infinity and NaN in the // position won't have any effect caused by this if clip planes are // disabled. - DxbcOpAnd(temp_x_dest, flags_src, - DxbcSrc::LU(kSysFlag_UserClipPlane0 << i)); - DxbcOpIf(true, temp_x_src); - DxbcOpDP4(DxbcDest::O( - uint32_t(InOutRegister::kVSDSOutClipDistance0123) + (i >> 2), - 1 << (i & 3)), - DxbcSrc::R(system_temp_position_), - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_UserClipPlanes_Vec + i)); - DxbcOpEndIf(); + a_.OpAnd(temp_x_dest, flags_src, + dxbc::Src::LU(kSysFlag_UserClipPlane0 << i)); + a_.OpIf(true, temp_x_src); + a_.OpDP4(dxbc::Dest::O( + uint32_t(InOutRegister::kVSDSOutClipDistance0123) + (i >> 2), + 1 << (i & 3)), + dxbc::Src::R(system_temp_position_), + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_UserClipPlanes_Vec + i)); + a_.OpEndIf(); } // Apply scale for guest to host viewport and clip space conversion. Also, if // the vertex shader is multipass, the NDC scale constant can be used to set // position to NaN to kill all primitives. system_constants_used_ |= 1ull << kSysConst_NDCScale_Index; - DxbcOpMul(DxbcDest::R(system_temp_position_, 0b0111), - DxbcSrc::R(system_temp_position_), - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_NDCScale_Vec, - kSysConst_NDCScale_Comp * 0b010101 + 0b100100)); + a_.OpMul(dxbc::Dest::R(system_temp_position_, 0b0111), + dxbc::Src::R(system_temp_position_), + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_NDCScale_Vec, + kSysConst_NDCScale_Comp * 0b010101 + 0b100100)); // Apply offset (multiplied by W) used for the same purposes. system_constants_used_ |= 1ull << kSysConst_NDCOffset_Index; - DxbcOpMAd(DxbcDest::R(system_temp_position_, 0b0111), - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_NDCOffset_Vec, - kSysConst_NDCOffset_Comp * 0b010101 + 0b100100), - DxbcSrc::R(system_temp_position_, DxbcSrc::kWWWW), - DxbcSrc::R(system_temp_position_)); + a_.OpMAd(dxbc::Dest::R(system_temp_position_, 0b0111), + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_NDCOffset_Vec, + kSysConst_NDCOffset_Comp * 0b010101 + 0b100100), + dxbc::Src::R(system_temp_position_, dxbc::Src::kWWWW), + dxbc::Src::R(system_temp_position_)); // Write Z and W of the position to a separate attribute so ROV output can get // per-sample depth. - DxbcOpMov(DxbcDest::O(uint32_t(InOutRegister::kVSDSOutClipSpaceZW), 0b0011), - DxbcSrc::R(system_temp_position_, 0b1110)); + a_.OpMov(dxbc::Dest::O(uint32_t(InOutRegister::kVSDSOutClipSpaceZW), 0b0011), + dxbc::Src::R(system_temp_position_, 0b1110)); // Assuming SV_CullDistance was zeroed earlier in this function. // Kill the primitive if needed - check if the shader wants to kill. // TODO(Triang3l): Find if the condition is actually the flag being non-zero. - DxbcOpNE( - temp_x_dest, - DxbcSrc::R(system_temp_point_size_edge_flag_kill_vertex_, DxbcSrc::kZZZZ), - DxbcSrc::LF(0.0f)); - DxbcOpIf(true, temp_x_src); + a_.OpNE(temp_x_dest, + dxbc::Src::R(system_temp_point_size_edge_flag_kill_vertex_, + dxbc::Src::kZZZZ), + dxbc::Src::LF(0.0f)); + a_.OpIf(true, temp_x_src); { // Extract the killing condition. - DxbcOpAnd(temp_x_dest, flags_src, - DxbcSrc::LU(kSysFlag_KillIfAnyVertexKilled)); - DxbcOpIf(true, temp_x_src); + a_.OpAnd(temp_x_dest, flags_src, + dxbc::Src::LU(kSysFlag_KillIfAnyVertexKilled)); + a_.OpIf(true, temp_x_src); { // Kill the primitive if any vertex is killed - write NaN to position. - DxbcOpMov(DxbcDest::R(system_temp_position_, 0b1000), - DxbcSrc::LF(std::nanf(""))); + a_.OpMov(dxbc::Dest::R(system_temp_position_, 0b1000), + dxbc::Src::LF(std::nanf(""))); } - DxbcOpElse(); + a_.OpElse(); { // Kill the primitive if all vertices are killed - set SV_CullDistance to // negative. - DxbcOpMov( - DxbcDest::O( + a_.OpMov( + dxbc::Dest::O( uint32_t(InOutRegister::kVSDSOutClipDistance45AndCullDistance), 0b0100), - DxbcSrc::LF(-1.0f)); + dxbc::Src::LF(-1.0f)); } - DxbcOpEndIf(); + a_.OpEndIf(); } - DxbcOpEndIf(); + a_.OpEndIf(); // Write the position to the output. - DxbcOpMov(DxbcDest::O(uint32_t(InOutRegister::kVSDSOutPosition)), - DxbcSrc::R(system_temp_position_)); + a_.OpMov(dxbc::Dest::O(uint32_t(InOutRegister::kVSDSOutPosition)), + dxbc::Src::R(system_temp_position_)); // Zero the point coordinate (will be set in the geometry shader if needed) // and write the point size. - DxbcOpMov( - DxbcDest::O(uint32_t(InOutRegister::kVSDSOutPointParameters), 0b0011), - DxbcSrc::LF(0.0f)); - DxbcOpMov( - DxbcDest::O(uint32_t(InOutRegister::kVSDSOutPointParameters), 0b0100), - DxbcSrc::R(system_temp_point_size_edge_flag_kill_vertex_, - DxbcSrc::kXXXX)); + a_.OpMov( + dxbc::Dest::O(uint32_t(InOutRegister::kVSDSOutPointParameters), 0b0011), + dxbc::Src::LF(0.0f)); + a_.OpMov( + dxbc::Dest::O(uint32_t(InOutRegister::kVSDSOutPointParameters), 0b0100), + dxbc::Src::R(system_temp_point_size_edge_flag_kill_vertex_, + dxbc::Src::kXXXX)); // Release temp. PopSystemTemp(); @@ -1118,14 +1043,14 @@ void DxbcShaderTranslator::CompleteShaderCode() { CloseExecConditionals(); // Close the last label and the switch. if (UseSwitchForControlFlow()) { - DxbcOpBreak(); - DxbcOpEndSwitch(); + a_.OpBreak(); + a_.OpEndSwitch(); } else { - DxbcOpEndIf(); + a_.OpEndIf(); } // End the main loop. - DxbcOpBreak(); - DxbcOpEndLoop(); + a_.OpBreak(); + a_.OpEndLoop(); // Release the following system temporary values so epilogue can reuse them: // - system_temp_result_. @@ -1166,7 +1091,7 @@ void DxbcShaderTranslator::CompleteShaderCode() { } // Return from `main`. - DxbcOpRet(); + a_.OpRet(); if (is_vertex_shader()) { // Release system_temp_position_ and @@ -1368,33 +1293,33 @@ void DxbcShaderTranslator::EmitInstructionDisassembly() { target[length] = '\0'; // Don't leave uninitialized data, and make sure multiple invocations of the // translator for the same Xenos shader give the same DXBC. - std::memset(target + length + 1, 0xAB, + std::memset(target + length + 1, dxbc::kAlignmentPadding, length_dwords * sizeof(uint32_t) - length - 1); } -DxbcShaderTranslator::DxbcSrc DxbcShaderTranslator::LoadOperand( - const InstructionOperand& operand, uint32_t needed_components, - bool& temp_pushed_out) { +dxbc::Src DxbcShaderTranslator::LoadOperand(const InstructionOperand& operand, + uint32_t needed_components, + bool& temp_pushed_out) { temp_pushed_out = false; uint32_t first_needed_component; if (!xe::bit_scan_forward(needed_components, &first_needed_component)) { - return DxbcSrc::LF(0.0f); + return dxbc::Src::LF(0.0f); } - DxbcIndex index(operand.storage_index); + dxbc::Index index(operand.storage_index); switch (operand.storage_addressing_mode) { case InstructionStorageAddressingMode::kStatic: break; case InstructionStorageAddressingMode::kAddressAbsolute: - index = DxbcIndex(system_temp_ps_pc_p0_a0_, 3, operand.storage_index); + index = dxbc::Index(system_temp_ps_pc_p0_a0_, 3, operand.storage_index); break; case InstructionStorageAddressingMode::kAddressRelative: - index = DxbcIndex(system_temp_aL_, 0, operand.storage_index); + index = dxbc::Index(system_temp_aL_, 0, operand.storage_index); break; } - DxbcSrc src(DxbcSrc::LF(0.0f)); + dxbc::Src src(dxbc::Src::LF(0.0f)); switch (operand.storage_source) { case InstructionStorageSource::kRegister: { if (current_shader().uses_register_dynamic_addressing()) { @@ -1413,13 +1338,13 @@ DxbcShaderTranslator::DxbcSrc DxbcShaderTranslator::LoadOperand( 1 << (uint32_t(component) - uint32_t(SwizzleSource::kX)); } assert_not_zero(used_swizzle_components); - DxbcOpMov(DxbcDest::R(temp, used_swizzle_components), - DxbcSrc::X(0, index)); - src = DxbcSrc::R(temp); + a_.OpMov(dxbc::Dest::R(temp, used_swizzle_components), + dxbc::Src::X(0, index)); + src = dxbc::Src::R(temp); } else { assert_true(operand.storage_addressing_mode == InstructionStorageAddressingMode::kStatic); - src = DxbcSrc::R(index.index_); + src = dxbc::Src::R(index.index_); } } break; case InstructionStorageSource::kConstantFloat: { @@ -1435,18 +1360,18 @@ DxbcShaderTranslator::DxbcSrc DxbcShaderTranslator::LoadOperand( operand.storage_index); assert_true(float_constant_index != UINT32_MAX); if (float_constant_index == UINT32_MAX) { - return DxbcSrc::LF(0.0f); + return dxbc::Src::LF(0.0f); } index.index_ = float_constant_index; } else { assert_true(constant_register_map.float_dynamic_addressing); } - src = DxbcSrc::CB(cbuffer_index_float_constants_, - uint32_t(CbufferRegister::kFloatConstants), index); + src = dxbc::Src::CB(cbuffer_index_float_constants_, + uint32_t(CbufferRegister::kFloatConstants), index); } break; default: assert_unhandled_case(operand.storage_source); - return DxbcSrc::LF(0.0f); + return dxbc::Src::LF(0.0f); } // Swizzle, skipping unneeded components similar to how FXC skips components, @@ -1465,7 +1390,7 @@ DxbcShaderTranslator::DxbcSrc DxbcShaderTranslator::LoadOperand( } void DxbcShaderTranslator::StoreResult(const InstructionResult& result, - const DxbcSrc& src, + const dxbc::Src& src, bool can_store_memexport_address) { uint32_t used_write_mask = result.GetUsedWriteMask(); if (!used_write_mask) { @@ -1473,43 +1398,43 @@ void DxbcShaderTranslator::StoreResult(const InstructionResult& result, } // Get the destination address and type. - DxbcDest dest(DxbcDest::Null()); + dxbc::Dest dest(dxbc::Dest::Null()); bool is_clamped = result.is_clamped; switch (result.storage_target) { case InstructionStorageTarget::kNone: return; case InstructionStorageTarget::kRegister: if (current_shader().uses_register_dynamic_addressing()) { - DxbcIndex register_index(result.storage_index); + dxbc::Index register_index(result.storage_index); switch (result.storage_addressing_mode) { case InstructionStorageAddressingMode::kStatic: break; case InstructionStorageAddressingMode::kAddressAbsolute: register_index = - DxbcIndex(system_temp_ps_pc_p0_a0_, 3, result.storage_index); + dxbc::Index(system_temp_ps_pc_p0_a0_, 3, result.storage_index); break; case InstructionStorageAddressingMode::kAddressRelative: register_index = - DxbcIndex(system_temp_aL_, 0, result.storage_index); + dxbc::Index(system_temp_aL_, 0, result.storage_index); break; } - dest = DxbcDest::X(0, register_index); + dest = dxbc::Dest::X(0, register_index); } else { assert_true(result.storage_addressing_mode == InstructionStorageAddressingMode::kStatic); - dest = DxbcDest::R(result.storage_index); + dest = dxbc::Dest::R(result.storage_index); } break; case InstructionStorageTarget::kInterpolator: - dest = DxbcDest::O(uint32_t(InOutRegister::kVSDSOutInterpolators) + - result.storage_index); + dest = dxbc::Dest::O(uint32_t(InOutRegister::kVSDSOutInterpolators) + + result.storage_index); break; case InstructionStorageTarget::kPosition: - dest = DxbcDest::R(system_temp_position_); + dest = dxbc::Dest::R(system_temp_position_); break; case InstructionStorageTarget::kPointSizeEdgeFlagKillVertex: assert_zero(used_write_mask & 0b1000); - dest = DxbcDest::R(system_temp_point_size_edge_flag_kill_vertex_); + dest = dxbc::Dest::R(system_temp_point_size_edge_flag_kill_vertex_); break; case InstructionStorageTarget::kExportAddress: // Validate memexport writes (Halo 3 has some weird invalid ones). @@ -1519,7 +1444,7 @@ void DxbcShaderTranslator::StoreResult(const InstructionResult& result, UINT32_MAX) { return; } - dest = DxbcDest::R( + dest = dxbc::Dest::R( system_temps_memexport_address_[memexport_alloc_current_count_ - 1]); break; case InstructionStorageTarget::kExportData: { @@ -1530,23 +1455,23 @@ void DxbcShaderTranslator::StoreResult(const InstructionResult& result, [result.storage_index] == UINT32_MAX) { return; } - dest = DxbcDest::R( + dest = dxbc::Dest::R( system_temps_memexport_data_[memexport_alloc_current_count_ - 1] [result.storage_index]); // Mark that the eM# has been written to and needs to be exported. assert_not_zero(used_write_mask); uint32_t memexport_index = memexport_alloc_current_count_ - 1; - DxbcOpOr(DxbcDest::R(system_temp_memexport_written_, - 1 << (memexport_index >> 2)), - DxbcSrc::R(system_temp_memexport_written_) - .Select(memexport_index >> 2), - DxbcSrc::LU(uint32_t(1) << (result.storage_index + - ((memexport_index & 3) << 3)))); + a_.OpOr(dxbc::Dest::R(system_temp_memexport_written_, + 1 << (memexport_index >> 2)), + dxbc::Src::R(system_temp_memexport_written_) + .Select(memexport_index >> 2), + dxbc::Src::LU(uint32_t(1) << (result.storage_index + + ((memexport_index & 3) << 3)))); } break; case InstructionStorageTarget::kColor: assert_not_zero(used_write_mask); assert_true(current_shader().writes_color_target(result.storage_index)); - dest = DxbcDest::R(system_temps_color_[result.storage_index]); + dest = dxbc::Dest::R(system_temps_color_[result.storage_index]); if (edram_rov_used_) { // For ROV output, mark that the color has been written to. // According to: @@ -1556,9 +1481,9 @@ void DxbcShaderTranslator::StoreResult(const InstructionResult& result, // components of a written target are undefined, not sure if this // behavior is respected on the real GPU, but the ROV code currently // doesn't preserve unmodified components). - DxbcOpOr(DxbcDest::R(system_temp_rov_params_, 0b0001), - DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX), - DxbcSrc::LU(uint32_t(1) << (8 + result.storage_index))); + a_.OpOr(dxbc::Dest::R(system_temp_rov_params_, 0b0001), + dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kXXXX), + dxbc::Src::LU(uint32_t(1) << (8 + result.storage_index))); } break; case InstructionStorageTarget::kDepth: @@ -1567,9 +1492,9 @@ void DxbcShaderTranslator::StoreResult(const InstructionResult& result, assert_true(used_write_mask == 0b0001); assert_true(current_shader().writes_depth()); if (IsDepthStencilSystemTempUsed()) { - dest = DxbcDest::R(system_temp_depth_stencil_); + dest = dxbc::Dest::R(system_temp_depth_stencil_); } else { - dest = DxbcDest::ODepth(); + dest = dxbc::Dest::ODepth(); } // Depth outside [0, 1] is not safe for use with the ROV code and with // 20e4-as-32 conversion. Though 20e4 float depth can store values between @@ -1579,7 +1504,7 @@ void DxbcShaderTranslator::StoreResult(const InstructionResult& result, is_clamped = true; break; } - if (dest.type_ == DxbcOperandType::kNull) { + if (dest.type_ == dxbc::OperandType::kNull) { return; } @@ -1602,15 +1527,15 @@ void DxbcShaderTranslator::StoreResult(const InstructionResult& result, } } if (used_write_mask != constant_mask) { - DxbcOpMov(dest.Mask(used_write_mask & ~constant_mask), - src.SwizzleSwizzled(src_additional_swizzle), is_clamped); + a_.OpMov(dest.Mask(used_write_mask & ~constant_mask), + src.SwizzleSwizzled(src_additional_swizzle), is_clamped); } if (constant_mask) { - DxbcOpMov(dest.Mask(constant_mask), - DxbcSrc::LF(float(constant_1_mask & 1), - float((constant_1_mask >> 1) & 1), - float((constant_1_mask >> 2) & 1), - float((constant_1_mask >> 3) & 1))); + a_.OpMov(dest.Mask(constant_mask), + dxbc::Src::LF(float(constant_1_mask & 1), + float((constant_1_mask >> 1) & 1), + float((constant_1_mask >> 2) & 1), + float((constant_1_mask >> 3) & 1))); } } @@ -1660,20 +1585,21 @@ void DxbcShaderTranslator::UpdateExecConditionalsAndEmitDisassembly( if (cbuffer_index_bool_loop_constants_ == kBindingIndexUnallocated) { cbuffer_index_bool_loop_constants_ = cbuffer_count_++; } - DxbcOpAnd(DxbcDest::R(bool_constant_test_temp, 0b0001), - DxbcSrc::CB(cbuffer_index_bool_loop_constants_, - uint32_t(CbufferRegister::kBoolLoopConstants), - bool_constant_index >> 7) - .Select((bool_constant_index >> 5) & 3), - DxbcSrc::LU(uint32_t(1) << (bool_constant_index & 31))); + a_.OpAnd(dxbc::Dest::R(bool_constant_test_temp, 0b0001), + dxbc::Src::CB(cbuffer_index_bool_loop_constants_, + uint32_t(CbufferRegister::kBoolLoopConstants), + bool_constant_index >> 7) + .Select((bool_constant_index >> 5) & 3), + dxbc::Src::LU(uint32_t(1) << (bool_constant_index & 31))); // Open the new `if`. - DxbcOpIf(condition, DxbcSrc::R(bool_constant_test_temp, DxbcSrc::kXXXX)); + a_.OpIf(condition, dxbc::Src::R(bool_constant_test_temp, dxbc::Src::kXXXX)); // Release bool_constant_test_temp. PopSystemTemp(); cf_exec_bool_constant_ = bool_constant_index; cf_exec_bool_constant_condition_ = condition; } else if (type == ParsedExecInstruction::Type::kPredicated) { - DxbcOpIf(condition, DxbcSrc::R(system_temp_ps_pc_p0_a0_, DxbcSrc::kZZZZ)); + a_.OpIf(condition, + dxbc::Src::R(system_temp_ps_pc_p0_a0_, dxbc::Src::kZZZZ)); cf_exec_predicated_ = true; cf_exec_predicate_condition_ = condition; } @@ -1685,7 +1611,7 @@ void DxbcShaderTranslator::CloseExecConditionals() { // Exec level. if (cf_exec_bool_constant_ != kCfExecBoolConstantNone || cf_exec_predicated_) { - DxbcOpEndIf(); + a_.OpEndIf(); cf_exec_bool_constant_ = kCfExecBoolConstantNone; cf_exec_predicated_ = false; } @@ -1722,22 +1648,22 @@ void DxbcShaderTranslator::UpdateInstructionPredicationAndEmitDisassembly( return; } - DxbcOpIf(condition, DxbcSrc::R(system_temp_ps_pc_p0_a0_, DxbcSrc::kZZZZ)); + a_.OpIf(condition, dxbc::Src::R(system_temp_ps_pc_p0_a0_, dxbc::Src::kZZZZ)); cf_instruction_predicate_if_open_ = true; cf_instruction_predicate_condition_ = condition; } void DxbcShaderTranslator::CloseInstructionPredication() { if (cf_instruction_predicate_if_open_) { - DxbcOpEndIf(); + a_.OpEndIf(); cf_instruction_predicate_if_open_ = false; } } void DxbcShaderTranslator::JumpToLabel(uint32_t address) { - DxbcOpMov(DxbcDest::R(system_temp_ps_pc_p0_a0_, 0b0010), - DxbcSrc::LU(address)); - DxbcOpContinue(); + a_.OpMov(dxbc::Dest::R(system_temp_ps_pc_p0_a0_, 0b0010), + dxbc::Src::LU(address)); + a_.OpContinue(); } void DxbcShaderTranslator::ProcessLabel(uint32_t cf_index) { @@ -1754,17 +1680,17 @@ void DxbcShaderTranslator::ProcessLabel(uint32_t cf_index) { // non-empty switch case must end with a break. JumpToLabel(cf_index); // Close the previous label. - DxbcOpBreak(); + a_.OpBreak(); // Go to the next label. - DxbcOpCase(DxbcSrc::LU(cf_index)); + a_.OpCase(dxbc::Src::LU(cf_index)); } else { // Close the previous label. - DxbcOpEndIf(); + a_.OpEndIf(); // if (pc <= cf_index) uint32_t test_temp = PushSystemTemp(); - DxbcOpUGE(DxbcDest::R(test_temp, 0b0001), DxbcSrc::LU(cf_index), - DxbcSrc::R(system_temp_ps_pc_p0_a0_, DxbcSrc::kYYYY)); - DxbcOpIf(true, DxbcSrc::R(test_temp, DxbcSrc::kXXXX)); + a_.OpUGE(dxbc::Dest::R(test_temp, 0b0001), dxbc::Src::LU(cf_index), + dxbc::Src::R(system_temp_ps_pc_p0_a0_, dxbc::Src::kYYYY)); + a_.OpIf(true, dxbc::Src::R(test_temp, dxbc::Src::kXXXX)); // Release test_temp. PopSystemTemp(); } @@ -1787,12 +1713,12 @@ void DxbcShaderTranslator::ProcessExecInstructionEnd( CloseInstructionPredication(); if (UseSwitchForControlFlow()) { // Write an invalid value to pc. - DxbcOpMov(DxbcDest::R(system_temp_ps_pc_p0_a0_, 0b0010), - DxbcSrc::LU(UINT32_MAX)); + a_.OpMov(dxbc::Dest::R(system_temp_ps_pc_p0_a0_, 0b0010), + dxbc::Src::LU(UINT32_MAX)); // Go to the next iteration, where switch cases won't be reached. - DxbcOpContinue(); + a_.OpContinue(); } else { - DxbcOpBreak(); + a_.OpBreak(); } } } @@ -1815,32 +1741,32 @@ void DxbcShaderTranslator::ProcessLoopStartInstruction( if (cbuffer_index_bool_loop_constants_ == kBindingIndexUnallocated) { cbuffer_index_bool_loop_constants_ = cbuffer_count_++; } - DxbcSrc loop_constant_src( - DxbcSrc::CB(cbuffer_index_bool_loop_constants_, - uint32_t(CbufferRegister::kBoolLoopConstants), - 2 + (instr.loop_constant_index >> 2)) + dxbc::Src loop_constant_src( + dxbc::Src::CB(cbuffer_index_bool_loop_constants_, + uint32_t(CbufferRegister::kBoolLoopConstants), + 2 + (instr.loop_constant_index >> 2)) .Select(instr.loop_constant_index & 3)); // Push the count to the loop count stack - move XYZ to YZW and set X to this // loop count. - DxbcOpMov(DxbcDest::R(system_temp_loop_count_, 0b1110), - DxbcSrc::R(system_temp_loop_count_, 0b10010000)); - DxbcOpAnd(DxbcDest::R(system_temp_loop_count_, 0b0001), loop_constant_src, - DxbcSrc::LU(UINT8_MAX)); + a_.OpMov(dxbc::Dest::R(system_temp_loop_count_, 0b1110), + dxbc::Src::R(system_temp_loop_count_, 0b10010000)); + a_.OpAnd(dxbc::Dest::R(system_temp_loop_count_, 0b0001), loop_constant_src, + dxbc::Src::LU(UINT8_MAX)); // Push aL - keep the same value as in the previous loop if repeating, or the // new one otherwise. - DxbcOpMov(DxbcDest::R(system_temp_aL_, instr.is_repeat ? 0b1111 : 0b1110), - DxbcSrc::R(system_temp_aL_, 0b10010000)); + a_.OpMov(dxbc::Dest::R(system_temp_aL_, instr.is_repeat ? 0b1111 : 0b1110), + dxbc::Src::R(system_temp_aL_, 0b10010000)); if (!instr.is_repeat) { - DxbcOpUBFE(DxbcDest::R(system_temp_aL_, 0b0001), DxbcSrc::LU(8), - DxbcSrc::LU(8), loop_constant_src); + a_.OpUBFE(dxbc::Dest::R(system_temp_aL_, 0b0001), dxbc::Src::LU(8), + dxbc::Src::LU(8), loop_constant_src); } // Break if the loop counter is 0 (since the condition is checked in the end). - DxbcOpIf(false, DxbcSrc::R(system_temp_loop_count_, DxbcSrc::kXXXX)); + a_.OpIf(false, dxbc::Src::R(system_temp_loop_count_, dxbc::Src::kXXXX)); JumpToLabel(instr.loop_skip_address); - DxbcOpEndIf(); + a_.OpEndIf(); } void DxbcShaderTranslator::ProcessLoopEndInstruction( @@ -1857,9 +1783,9 @@ void DxbcShaderTranslator::ProcessLoopEndInstruction( } // Subtract 1 from the loop counter. - DxbcOpIAdd(DxbcDest::R(system_temp_loop_count_, 0b0001), - DxbcSrc::R(system_temp_loop_count_, DxbcSrc::kXXXX), - DxbcSrc::LI(-1)); + a_.OpIAdd(dxbc::Dest::R(system_temp_loop_count_, 0b0001), + dxbc::Src::R(system_temp_loop_count_, dxbc::Src::kXXXX), + dxbc::Src::LI(-1)); if (instr.is_predicated_break) { // if (loop_count.x == 0 || [!]p0) @@ -1867,38 +1793,38 @@ void DxbcShaderTranslator::ProcessLoopEndInstruction( if (instr.predicate_condition) { // If p0 is non-zero, set the test value to 0 (since if_z is used, // otherwise check if the loop counter is zero). - DxbcOpMovC(DxbcDest::R(break_case_temp, 0b0001), - DxbcSrc::R(system_temp_ps_pc_p0_a0_, DxbcSrc::kZZZZ), - DxbcSrc::LU(0), - DxbcSrc::R(system_temp_loop_count_, DxbcSrc::kXXXX)); + a_.OpMovC(dxbc::Dest::R(break_case_temp, 0b0001), + dxbc::Src::R(system_temp_ps_pc_p0_a0_, dxbc::Src::kZZZZ), + dxbc::Src::LU(0), + dxbc::Src::R(system_temp_loop_count_, dxbc::Src::kXXXX)); } else { // If p0 is zero, set the test value to 0 (since if_z is used, otherwise // check if the loop counter is zero). - DxbcOpMovC(DxbcDest::R(break_case_temp, 0b0001), - DxbcSrc::R(system_temp_ps_pc_p0_a0_, DxbcSrc::kZZZZ), - DxbcSrc::R(system_temp_loop_count_, DxbcSrc::kXXXX), - DxbcSrc::LU(0)); + a_.OpMovC(dxbc::Dest::R(break_case_temp, 0b0001), + dxbc::Src::R(system_temp_ps_pc_p0_a0_, dxbc::Src::kZZZZ), + dxbc::Src::R(system_temp_loop_count_, dxbc::Src::kXXXX), + dxbc::Src::LU(0)); } - DxbcOpIf(false, DxbcSrc::R(break_case_temp, DxbcSrc::kXXXX)); + a_.OpIf(false, dxbc::Src::R(break_case_temp, dxbc::Src::kXXXX)); // Release break_case_temp. PopSystemTemp(); } else { // if (loop_count.x == 0) - DxbcOpIf(false, DxbcSrc::R(system_temp_loop_count_, DxbcSrc::kXXXX)); + a_.OpIf(false, dxbc::Src::R(system_temp_loop_count_, dxbc::Src::kXXXX)); } { // Break case. // Pop the current loop off the loop counter and the relative address // stacks - move YZW to XYZ and set W to 0. - DxbcOpMov(DxbcDest::R(system_temp_loop_count_, 0b0111), - DxbcSrc::R(system_temp_loop_count_, 0b111001)); - DxbcOpMov(DxbcDest::R(system_temp_loop_count_, 0b1000), DxbcSrc::LU(0)); - DxbcOpMov(DxbcDest::R(system_temp_aL_, 0b0111), - DxbcSrc::R(system_temp_aL_, 0b111001)); - DxbcOpMov(DxbcDest::R(system_temp_aL_, 0b1000), DxbcSrc::LI(0)); + a_.OpMov(dxbc::Dest::R(system_temp_loop_count_, 0b0111), + dxbc::Src::R(system_temp_loop_count_, 0b111001)); + a_.OpMov(dxbc::Dest::R(system_temp_loop_count_, 0b1000), dxbc::Src::LU(0)); + a_.OpMov(dxbc::Dest::R(system_temp_aL_, 0b0111), + dxbc::Src::R(system_temp_aL_, 0b111001)); + a_.OpMov(dxbc::Dest::R(system_temp_aL_, 0b1000), dxbc::Src::LI(0)); // Now going to fall through to the next exec (no need to jump). } - DxbcOpElse(); + a_.OpElse(); { // Continue case. uint32_t aL_add_temp = PushSystemTemp(); @@ -1907,22 +1833,22 @@ void DxbcShaderTranslator::ProcessLoopEndInstruction( if (cbuffer_index_bool_loop_constants_ == kBindingIndexUnallocated) { cbuffer_index_bool_loop_constants_ = cbuffer_count_++; } - DxbcOpIBFE(DxbcDest::R(aL_add_temp, 0b0001), DxbcSrc::LU(8), - DxbcSrc::LU(16), - DxbcSrc::CB(cbuffer_index_bool_loop_constants_, - uint32_t(CbufferRegister::kBoolLoopConstants), - 2 + (instr.loop_constant_index >> 2)) - .Select(instr.loop_constant_index & 3)); + a_.OpIBFE(dxbc::Dest::R(aL_add_temp, 0b0001), dxbc::Src::LU(8), + dxbc::Src::LU(16), + dxbc::Src::CB(cbuffer_index_bool_loop_constants_, + uint32_t(CbufferRegister::kBoolLoopConstants), + 2 + (instr.loop_constant_index >> 2)) + .Select(instr.loop_constant_index & 3)); // Add the needed value to aL. - DxbcOpIAdd(DxbcDest::R(system_temp_aL_, 0b0001), - DxbcSrc::R(system_temp_aL_, DxbcSrc::kXXXX), - DxbcSrc::R(aL_add_temp, DxbcSrc::kXXXX)); + a_.OpIAdd(dxbc::Dest::R(system_temp_aL_, 0b0001), + dxbc::Src::R(system_temp_aL_, dxbc::Src::kXXXX), + dxbc::Src::R(aL_add_temp, dxbc::Src::kXXXX)); // Release aL_add_temp. PopSystemTemp(); // Jump back to the beginning of the loop body. JumpToLabel(instr.loop_body_address); } - DxbcOpEndIf(); + a_.OpEndIf(); } void DxbcShaderTranslator::ProcessJumpInstruction( @@ -1975,59 +1901,59 @@ uint32_t DxbcShaderTranslator::AppendString(std::vector& dest, std::memcpy(&dest[dest_position], source, size); // Don't leave uninitialized data, and make sure multiple invocations of the // translator for the same Xenos shader give the same DXBC. - std::memset(reinterpret_cast(&dest[dest_position]) + size, 0xAB, - size_aligned - size); + std::memset(reinterpret_cast(&dest[dest_position]) + size, + dxbc::kAlignmentPadding, size_aligned - size); return uint32_t(size_aligned); } const DxbcShaderTranslator::RdefType DxbcShaderTranslator::rdef_types_[size_t( DxbcShaderTranslator::RdefTypeIndex::kCount)] = { // kFloat - {"float", DxbcRdefVariableClass::kScalar, DxbcRdefVariableType::kFloat, 1, - 1, 0, 0, RdefTypeIndex::kUnknown, nullptr}, + {"float", dxbc::RdefVariableClass::kScalar, dxbc::RdefVariableType::kFloat, + 1, 1, 0, 0, RdefTypeIndex::kUnknown, nullptr}, // kFloat2 - {"float2", DxbcRdefVariableClass::kVector, DxbcRdefVariableType::kFloat, 1, - 2, 0, 0, RdefTypeIndex::kUnknown, nullptr}, + {"float2", dxbc::RdefVariableClass::kVector, dxbc::RdefVariableType::kFloat, + 1, 2, 0, 0, RdefTypeIndex::kUnknown, nullptr}, // kFloat3 - {"float3", DxbcRdefVariableClass::kVector, DxbcRdefVariableType::kFloat, 1, - 3, 0, 0, RdefTypeIndex::kUnknown, nullptr}, + {"float3", dxbc::RdefVariableClass::kVector, dxbc::RdefVariableType::kFloat, + 1, 3, 0, 0, RdefTypeIndex::kUnknown, nullptr}, // kFloat4 - {"float4", DxbcRdefVariableClass::kVector, DxbcRdefVariableType::kFloat, 1, - 4, 0, 0, RdefTypeIndex::kUnknown, nullptr}, + {"float4", dxbc::RdefVariableClass::kVector, dxbc::RdefVariableType::kFloat, + 1, 4, 0, 0, RdefTypeIndex::kUnknown, nullptr}, // kInt - {"int", DxbcRdefVariableClass::kScalar, DxbcRdefVariableType::kInt, 1, 1, 0, - 0, RdefTypeIndex::kUnknown, nullptr}, + {"int", dxbc::RdefVariableClass::kScalar, dxbc::RdefVariableType::kInt, 1, + 1, 0, 0, RdefTypeIndex::kUnknown, nullptr}, // kUint - {"uint", DxbcRdefVariableClass::kScalar, DxbcRdefVariableType::kUInt, 1, 1, - 0, 0, RdefTypeIndex::kUnknown, nullptr}, + {"uint", dxbc::RdefVariableClass::kScalar, dxbc::RdefVariableType::kUInt, 1, + 1, 0, 0, RdefTypeIndex::kUnknown, nullptr}, // kUint2 - {"uint2", DxbcRdefVariableClass::kVector, DxbcRdefVariableType::kUInt, 1, 2, - 0, 0, RdefTypeIndex::kUnknown, nullptr}, + {"uint2", dxbc::RdefVariableClass::kVector, dxbc::RdefVariableType::kUInt, + 1, 2, 0, 0, RdefTypeIndex::kUnknown, nullptr}, // kUint4 - {"uint4", DxbcRdefVariableClass::kVector, DxbcRdefVariableType::kUInt, 1, 4, - 0, 0, RdefTypeIndex::kUnknown, nullptr}, + {"uint4", dxbc::RdefVariableClass::kVector, dxbc::RdefVariableType::kUInt, + 1, 4, 0, 0, RdefTypeIndex::kUnknown, nullptr}, // kFloat4Array4 - {nullptr, DxbcRdefVariableClass::kVector, DxbcRdefVariableType::kFloat, 1, - 4, 4, 0, RdefTypeIndex::kFloat4, nullptr}, + {nullptr, dxbc::RdefVariableClass::kVector, dxbc::RdefVariableType::kFloat, + 1, 4, 4, 0, RdefTypeIndex::kFloat4, nullptr}, // kFloat4Array6 - {nullptr, DxbcRdefVariableClass::kVector, DxbcRdefVariableType::kFloat, 1, - 4, 6, 0, RdefTypeIndex::kFloat4, nullptr}, + {nullptr, dxbc::RdefVariableClass::kVector, dxbc::RdefVariableType::kFloat, + 1, 4, 6, 0, RdefTypeIndex::kFloat4, nullptr}, // kFloat4ConstantArray - float constants - size written dynamically. - {nullptr, DxbcRdefVariableClass::kVector, DxbcRdefVariableType::kFloat, 1, - 4, 0, 0, RdefTypeIndex::kFloat4, nullptr}, + {nullptr, dxbc::RdefVariableClass::kVector, dxbc::RdefVariableType::kFloat, + 1, 4, 0, 0, RdefTypeIndex::kFloat4, nullptr}, // kUint4Array2 - {nullptr, DxbcRdefVariableClass::kVector, DxbcRdefVariableType::kUInt, 1, 4, - 2, 0, RdefTypeIndex::kUint4, nullptr}, + {nullptr, dxbc::RdefVariableClass::kVector, dxbc::RdefVariableType::kUInt, + 1, 4, 2, 0, RdefTypeIndex::kUint4, nullptr}, // kUint4Array8 - {nullptr, DxbcRdefVariableClass::kVector, DxbcRdefVariableType::kUInt, 1, 4, - 8, 0, RdefTypeIndex::kUint4, nullptr}, + {nullptr, dxbc::RdefVariableClass::kVector, dxbc::RdefVariableType::kUInt, + 1, 4, 8, 0, RdefTypeIndex::kUint4, nullptr}, // kUint4Array48 - {nullptr, DxbcRdefVariableClass::kVector, DxbcRdefVariableType::kUInt, 1, 4, - 48, 0, RdefTypeIndex::kUint4, nullptr}, + {nullptr, dxbc::RdefVariableClass::kVector, dxbc::RdefVariableType::kUInt, + 1, 4, 48, 0, RdefTypeIndex::kUint4, nullptr}, // kUint4DescriptorIndexArray - bindless descriptor indices - size written // dynamically. - {nullptr, DxbcRdefVariableClass::kVector, DxbcRdefVariableType::kUInt, 1, 4, - 0, 0, RdefTypeIndex::kUint4, nullptr}, + {nullptr, dxbc::RdefVariableClass::kVector, dxbc::RdefVariableType::kUInt, + 1, 4, 0, 0, RdefTypeIndex::kUint4, nullptr}, }; const DxbcShaderTranslator::SystemConstantRdef DxbcShaderTranslator:: @@ -2288,7 +2214,7 @@ void DxbcShaderTranslator::WriteResourceDefinitions() { shader_object_.push_back(system_cbuffer_constant_offset); shader_object_.push_back(constant.size); shader_object_.push_back((system_constants_used_ & (1ull << i)) - ? kDxbcRdefVariableFlagUsed + ? dxbc::kRdefVariableFlagUsed : 0); shader_object_.push_back(types_offset + uint32_t(constant.type) * type_size); @@ -2312,7 +2238,7 @@ void DxbcShaderTranslator::WriteResourceDefinitions() { shader_object_.push_back(0); shader_object_.push_back(constant_register_map.float_count * 4 * sizeof(float)); - shader_object_.push_back(kDxbcRdefVariableFlagUsed); + shader_object_.push_back(dxbc::kRdefVariableFlagUsed); shader_object_.push_back(types_offset + uint32_t(RdefTypeIndex::kFloat4ConstantArray) * type_size); @@ -2330,7 +2256,7 @@ void DxbcShaderTranslator::WriteResourceDefinitions() { shader_object_.push_back(constant_name_offset_bool); shader_object_.push_back(0); shader_object_.push_back(2 * 4 * sizeof(uint32_t)); - shader_object_.push_back(kDxbcRdefVariableFlagUsed); + shader_object_.push_back(dxbc::kRdefVariableFlagUsed); shader_object_.push_back(types_offset + uint32_t(RdefTypeIndex::kUint4Array2) * type_size); shader_object_.push_back(0); @@ -2343,7 +2269,7 @@ void DxbcShaderTranslator::WriteResourceDefinitions() { shader_object_.push_back(constant_name_offset_loop); shader_object_.push_back(2 * 4 * sizeof(uint32_t)); shader_object_.push_back(8 * 4 * sizeof(uint32_t)); - shader_object_.push_back(kDxbcRdefVariableFlagUsed); + shader_object_.push_back(dxbc::kRdefVariableFlagUsed); shader_object_.push_back(types_offset + uint32_t(RdefTypeIndex::kUint4Array8) * type_size); shader_object_.push_back(0); @@ -2360,7 +2286,7 @@ void DxbcShaderTranslator::WriteResourceDefinitions() { shader_object_.push_back(constant_name_offset_fetch); shader_object_.push_back(0); shader_object_.push_back(32 * 6 * sizeof(uint32_t)); - shader_object_.push_back(kDxbcRdefVariableFlagUsed); + shader_object_.push_back(dxbc::kRdefVariableFlagUsed); shader_object_.push_back( types_offset + uint32_t(RdefTypeIndex::kUint4Array48) * type_size); shader_object_.push_back(0); @@ -2379,7 +2305,7 @@ void DxbcShaderTranslator::WriteResourceDefinitions() { shader_object_.push_back(0); shader_object_.push_back( xe::align(GetBindlessResourceCount(), uint32_t(4)) * sizeof(uint32_t)); - shader_object_.push_back(kDxbcRdefVariableFlagUsed); + shader_object_.push_back(dxbc::kRdefVariableFlagUsed); shader_object_.push_back( types_offset + uint32_t(RdefTypeIndex::kUint4DescriptorIndexArray) * type_size); @@ -2430,7 +2356,7 @@ void DxbcShaderTranslator::WriteResourceDefinitions() { shader_object_.push_back(constant_offset_system); shader_object_.push_back( uint32_t(xe::align(sizeof(SystemConstants), 4 * sizeof(uint32_t)))); - shader_object_.push_back(uint32_t(DxbcRdefCbufferType::kCbuffer)); + shader_object_.push_back(uint32_t(dxbc::RdefCbufferType::kCbuffer)); // No D3D_SHADER_CBUFFER_FLAGS. shader_object_.push_back(0); } else if (i == cbuffer_index_float_constants_) { @@ -2440,7 +2366,7 @@ void DxbcShaderTranslator::WriteResourceDefinitions() { shader_object_.push_back(constant_offset_float); shader_object_.push_back(constant_register_map.float_count * 4 * sizeof(float)); - shader_object_.push_back(uint32_t(DxbcRdefCbufferType::kCbuffer)); + shader_object_.push_back(uint32_t(dxbc::RdefCbufferType::kCbuffer)); shader_object_.push_back(0); } else if (i == cbuffer_index_bool_loop_constants_) { shader_object_.push_back(cbuffer_name_offset_bool_loop); @@ -2448,14 +2374,14 @@ void DxbcShaderTranslator::WriteResourceDefinitions() { shader_object_.push_back(2); shader_object_.push_back(constant_offset_bool_loop); shader_object_.push_back((2 + 8) * 4 * sizeof(uint32_t)); - shader_object_.push_back(uint32_t(DxbcRdefCbufferType::kCbuffer)); + shader_object_.push_back(uint32_t(dxbc::RdefCbufferType::kCbuffer)); shader_object_.push_back(0); } else if (i == cbuffer_index_fetch_constants_) { shader_object_.push_back(cbuffer_name_offset_fetch); shader_object_.push_back(1); shader_object_.push_back(constant_offset_fetch); shader_object_.push_back(32 * 6 * sizeof(uint32_t)); - shader_object_.push_back(uint32_t(DxbcRdefCbufferType::kCbuffer)); + shader_object_.push_back(uint32_t(dxbc::RdefCbufferType::kCbuffer)); shader_object_.push_back(0); } else if (i == cbuffer_index_descriptor_indices_) { assert_not_zero(GetBindlessResourceCount()); @@ -2465,7 +2391,7 @@ void DxbcShaderTranslator::WriteResourceDefinitions() { shader_object_.push_back( xe::align(GetBindlessResourceCount(), uint32_t(4)) * sizeof(uint32_t)); - shader_object_.push_back(uint32_t(DxbcRdefCbufferType::kCbuffer)); + shader_object_.push_back(uint32_t(dxbc::RdefCbufferType::kCbuffer)); shader_object_.push_back(0); } else { assert_unhandled_case(i); @@ -2534,16 +2460,16 @@ void DxbcShaderTranslator::WriteResourceDefinitions() { if (bindless_resources_used_) { // Bindless sampler heap. shader_object_.push_back(sampler_name_offset); - shader_object_.push_back(uint32_t(DxbcRdefInputType::kSampler)); - shader_object_.push_back(uint32_t(DxbcRdefReturnType::kVoid)); - shader_object_.push_back(uint32_t(DxbcRdefDimension::kUnknown)); + shader_object_.push_back(uint32_t(dxbc::RdefInputType::kSampler)); + shader_object_.push_back(uint32_t(dxbc::RdefReturnType::kVoid)); + shader_object_.push_back(uint32_t(dxbc::RdefDimension::kUnknown)); // Multisampling not applicable. shader_object_.push_back(0); // Registers s0:*. shader_object_.push_back(0); // Unbounded number of bindings. shader_object_.push_back(0); - // No DxbcRdefInputFlags. + // No dxbc::RdefInputFlags. shader_object_.push_back(0); // Register space 0. shader_object_.push_back(0); @@ -2555,16 +2481,16 @@ void DxbcShaderTranslator::WriteResourceDefinitions() { for (uint32_t i = 0; i < uint32_t(sampler_bindings_.size()); ++i) { const SamplerBinding& sampler_binding = sampler_bindings_[i]; shader_object_.push_back(sampler_current_name_offset); - shader_object_.push_back(uint32_t(DxbcRdefInputType::kSampler)); - shader_object_.push_back(uint32_t(DxbcRdefReturnType::kVoid)); - shader_object_.push_back(uint32_t(DxbcRdefDimension::kUnknown)); + shader_object_.push_back(uint32_t(dxbc::RdefInputType::kSampler)); + shader_object_.push_back(uint32_t(dxbc::RdefReturnType::kVoid)); + shader_object_.push_back(uint32_t(dxbc::RdefDimension::kUnknown)); // Multisampling not applicable. shader_object_.push_back(0); // Register s[i]. shader_object_.push_back(i); // One binding. shader_object_.push_back(1); - // No DxbcRdefInputFlags. + // No dxbc::RdefInputFlags. shader_object_.push_back(0); // Register space 0. shader_object_.push_back(0); @@ -2581,20 +2507,20 @@ void DxbcShaderTranslator::WriteResourceDefinitions() { if (i == srv_index_shared_memory_) { // Shared memory (when memexport isn't used in the pipeline). shader_object_.push_back(shared_memory_srv_name_offset); - shader_object_.push_back(uint32_t(DxbcRdefInputType::kByteAddress)); - shader_object_.push_back(uint32_t(DxbcRdefReturnType::kMixed)); - shader_object_.push_back(uint32_t(DxbcRdefDimension::kSRVBuffer)); + shader_object_.push_back(uint32_t(dxbc::RdefInputType::kByteAddress)); + shader_object_.push_back(uint32_t(dxbc::RdefReturnType::kMixed)); + shader_object_.push_back(uint32_t(dxbc::RdefDimension::kSRVBuffer)); // Multisampling not applicable. shader_object_.push_back(0); shader_object_.push_back(uint32_t(SRVMainRegister::kSharedMemory)); // One binding. shader_object_.push_back(1); - // No DxbcRdefInputFlags. + // No dxbc::RdefInputFlags. shader_object_.push_back(0); shader_object_.push_back(uint32_t(SRVSpace::kMain)); } else { uint32_t texture_name_offset; - DxbcRdefDimension texture_dimension; + dxbc::RdefDimension texture_dimension; uint32_t texture_register; uint32_t texture_register_count; SRVSpace texture_register_space; @@ -2602,16 +2528,16 @@ void DxbcShaderTranslator::WriteResourceDefinitions() { // Bindless texture heap. if (i == srv_index_bindless_textures_3d_) { texture_name_offset = bindless_textures_3d_name_offset; - texture_dimension = DxbcRdefDimension::kSRVTexture3D; + texture_dimension = dxbc::RdefDimension::kSRVTexture3D; texture_register_space = SRVSpace::kBindlessTextures3D; } else if (i == srv_index_bindless_textures_cube_) { texture_name_offset = bindless_textures_cube_name_offset; - texture_dimension = DxbcRdefDimension::kSRVTextureCube; + texture_dimension = dxbc::RdefDimension::kSRVTextureCube; texture_register_space = SRVSpace::kBindlessTexturesCube; } else { assert_true(i == srv_index_bindless_textures_2d_); texture_name_offset = bindless_textures_2d_name_offset; - texture_dimension = DxbcRdefDimension::kSRVTexture2DArray; + texture_dimension = dxbc::RdefDimension::kSRVTexture2DArray; texture_register_space = SRVSpace::kBindlessTextures2DArray; } texture_register = 0; @@ -2626,15 +2552,15 @@ void DxbcShaderTranslator::WriteResourceDefinitions() { texture_name_offset = texture_binding.bindful_srv_rdef_name_offset; switch (texture_binding.dimension) { case xenos::FetchOpDimension::k3DOrStacked: - texture_dimension = DxbcRdefDimension::kSRVTexture3D; + texture_dimension = dxbc::RdefDimension::kSRVTexture3D; break; case xenos::FetchOpDimension::kCube: - texture_dimension = DxbcRdefDimension::kSRVTextureCube; + texture_dimension = dxbc::RdefDimension::kSRVTextureCube; break; default: assert_true(texture_binding.dimension == xenos::FetchOpDimension::k2D); - texture_dimension = DxbcRdefDimension::kSRVTexture2DArray; + texture_dimension = dxbc::RdefDimension::kSRVTexture2DArray; } texture_register = uint32_t(SRVMainRegister::kBindfulTexturesStart) + texture_binding_index; @@ -2642,15 +2568,15 @@ void DxbcShaderTranslator::WriteResourceDefinitions() { texture_register_space = SRVSpace::kMain; } shader_object_.push_back(texture_name_offset); - shader_object_.push_back(uint32_t(DxbcRdefInputType::kTexture)); - shader_object_.push_back(uint32_t(DxbcRdefReturnType::kFloat)); + shader_object_.push_back(uint32_t(dxbc::RdefInputType::kTexture)); + shader_object_.push_back(uint32_t(dxbc::RdefReturnType::kFloat)); shader_object_.push_back(uint32_t(texture_dimension)); // Not multisampled. shader_object_.push_back(0xFFFFFFFFu); shader_object_.push_back(texture_register); shader_object_.push_back(texture_register_count); // 4-component. - shader_object_.push_back(DxbcRdefInputFlagsComponents); + shader_object_.push_back(dxbc::kRdefInputFlagsComponents); shader_object_.push_back(uint32_t(texture_register_space)); } // SRV ID T[i]. @@ -2662,30 +2588,31 @@ void DxbcShaderTranslator::WriteResourceDefinitions() { if (i == uav_index_shared_memory_) { // Shared memory (when memexport is used in the pipeline). shader_object_.push_back(shared_memory_uav_name_offset); - shader_object_.push_back(uint32_t(DxbcRdefInputType::kUAVRWByteAddress)); - shader_object_.push_back(uint32_t(DxbcRdefReturnType::kMixed)); - shader_object_.push_back(uint32_t(DxbcRdefDimension::kUAVBuffer)); + shader_object_.push_back( + uint32_t(dxbc::RdefInputType::kUAVRWByteAddress)); + shader_object_.push_back(uint32_t(dxbc::RdefReturnType::kMixed)); + shader_object_.push_back(uint32_t(dxbc::RdefDimension::kUAVBuffer)); // Multisampling not applicable. shader_object_.push_back(0); shader_object_.push_back(uint32_t(UAVRegister::kSharedMemory)); // One binding. shader_object_.push_back(1); - // No DxbcRdefInputFlags. + // No dxbc::RdefInputFlags. shader_object_.push_back(0); // Register space 0. shader_object_.push_back(0); } else if (i == uav_index_edram_) { // EDRAM R32_UINT buffer. shader_object_.push_back(edram_name_offset); - shader_object_.push_back(uint32_t(DxbcRdefInputType::kUAVRWTyped)); - shader_object_.push_back(uint32_t(DxbcRdefReturnType::kUInt)); - shader_object_.push_back(uint32_t(DxbcRdefDimension::kUAVBuffer)); + shader_object_.push_back(uint32_t(dxbc::RdefInputType::kUAVRWTyped)); + shader_object_.push_back(uint32_t(dxbc::RdefReturnType::kUInt)); + shader_object_.push_back(uint32_t(dxbc::RdefDimension::kUAVBuffer)); // Not multisampled. shader_object_.push_back(0xFFFFFFFFu); shader_object_.push_back(uint32_t(UAVRegister::kEdram)); // One binding. shader_object_.push_back(1); - // No DxbcRdefInputFlags. + // No dxbc::RdefInputFlags. shader_object_.push_back(0); // Register space 0. shader_object_.push_back(0); @@ -2717,16 +2644,16 @@ void DxbcShaderTranslator::WriteResourceDefinitions() { } else { assert_unhandled_case(i); } - shader_object_.push_back(uint32_t(DxbcRdefInputType::kCbuffer)); - shader_object_.push_back(uint32_t(DxbcRdefReturnType::kVoid)); - shader_object_.push_back(uint32_t(DxbcRdefDimension::kUnknown)); + shader_object_.push_back(uint32_t(dxbc::RdefInputType::kCbuffer)); + shader_object_.push_back(uint32_t(dxbc::RdefReturnType::kVoid)); + shader_object_.push_back(uint32_t(dxbc::RdefDimension::kUnknown)); // Multisampling not applicable. shader_object_.push_back(0); shader_object_.push_back(register_index); // One binding. shader_object_.push_back(1); // Like `cbuffer`, don't need `ConstantBuffer` properties. - shader_object_.push_back(DxbcRdefInputFlagUserPacked); + shader_object_.push_back(dxbc::kRdefInputFlagUserPacked); // Register space 0. shader_object_.push_back(0); // CBV ID CB[i]. @@ -2740,10 +2667,10 @@ void DxbcShaderTranslator::WriteInputSignature() { uint32_t chunk_position = uint32_t(shader_object_.size()); // Reserve space for the header. shader_object_.resize(shader_object_.size() + - sizeof(DxbcSignature) / sizeof(uint32_t)); + sizeof(dxbc::Signature) / sizeof(uint32_t)); uint32_t parameter_count = 0; constexpr size_t kParameterDwords = - sizeof(DxbcSignatureParameter) / sizeof(uint32_t); + sizeof(dxbc::SignatureParameter) / sizeof(uint32_t); if (IsDxbcVertexShader()) { // Unswapped vertex index (SV_VertexID). @@ -2751,11 +2678,10 @@ void DxbcShaderTranslator::WriteInputSignature() { shader_object_.resize(shader_object_.size() + kParameterDwords); ++parameter_count; { - DxbcSignatureParameter& vertex_id = - *reinterpret_cast(shader_object_.data() + - vertex_id_position); - vertex_id.system_value = DxbcName::kVertexID; - vertex_id.component_type = DxbcSignatureRegisterComponentType::kUInt32; + auto& vertex_id = *reinterpret_cast( + shader_object_.data() + vertex_id_position); + vertex_id.system_value = dxbc::Name::kVertexID; + vertex_id.component_type = dxbc::SignatureRegisterComponentType::kUInt32; vertex_id.register_index = uint32_t(InOutRegister::kVSInVertexIndex); vertex_id.mask = 0b0001; vertex_id.always_reads_mask = (register_count() >= 1) ? 0b0001 : 0b0000; @@ -2765,9 +2691,8 @@ void DxbcShaderTranslator::WriteInputSignature() { uint32_t semantic_offset = uint32_t((shader_object_.size() - chunk_position) * sizeof(uint32_t)); { - DxbcSignatureParameter& vertex_id = - *reinterpret_cast(shader_object_.data() + - vertex_id_position); + auto& vertex_id = *reinterpret_cast( + shader_object_.data() + vertex_id_position); vertex_id.semantic_name = semantic_offset; } semantic_offset += AppendString(shader_object_, "SV_VertexID"); @@ -2782,11 +2707,10 @@ void DxbcShaderTranslator::WriteInputSignature() { shader_object_.resize(shader_object_.size() + kParameterDwords); ++parameter_count; { - DxbcSignatureParameter& control_point_index = - *reinterpret_cast( - shader_object_.data() + control_point_index_position); + auto& control_point_index = *reinterpret_cast( + shader_object_.data() + control_point_index_position); control_point_index.component_type = - DxbcSignatureRegisterComponentType::kFloat32; + dxbc::SignatureRegisterComponentType::kFloat32; control_point_index.register_index = uint32_t(InOutRegister::kDSInControlPointIndex); control_point_index.mask = 0b0001; @@ -2798,9 +2722,8 @@ void DxbcShaderTranslator::WriteInputSignature() { uint32_t semantic_offset = uint32_t((shader_object_.size() - chunk_position) * sizeof(uint32_t)); { - DxbcSignatureParameter& control_point_index = - *reinterpret_cast( - shader_object_.data() + control_point_index_position); + auto& control_point_index = *reinterpret_cast( + shader_object_.data() + control_point_index_position); control_point_index.semantic_name = semantic_offset; } semantic_offset += AppendString(shader_object_, "XEVERTEXID"); @@ -2815,14 +2738,13 @@ void DxbcShaderTranslator::WriteInputSignature() { xenos::kMaxInterpolators * kParameterDwords); parameter_count += xenos::kMaxInterpolators; { - DxbcSignatureParameter* interpolators = - reinterpret_cast(shader_object_.data() + - interpolator_position); + auto interpolators = reinterpret_cast( + shader_object_.data() + interpolator_position); for (uint32_t i = 0; i < xenos::kMaxInterpolators; ++i) { - DxbcSignatureParameter& interpolator = interpolators[i]; + dxbc::SignatureParameter& interpolator = interpolators[i]; interpolator.semantic_index = i; interpolator.component_type = - DxbcSignatureRegisterComponentType::kFloat32; + dxbc::SignatureRegisterComponentType::kFloat32; interpolator.register_index = uint32_t(InOutRegister::kPSInInterpolators) + i; interpolator.mask = 0b1111; @@ -2840,12 +2762,11 @@ void DxbcShaderTranslator::WriteInputSignature() { shader_object_.resize(shader_object_.size() + kParameterDwords); ++parameter_count; { - DxbcSignatureParameter& point_parameters = - *reinterpret_cast(shader_object_.data() + - point_parameters_position); + auto& point_parameters = *reinterpret_cast( + shader_object_.data() + point_parameters_position); point_parameters.semantic_index = kPointParametersTexCoord; point_parameters.component_type = - DxbcSignatureRegisterComponentType::kFloat32; + dxbc::SignatureRegisterComponentType::kFloat32; point_parameters.register_index = uint32_t(InOutRegister::kPSInPointParameters); point_parameters.mask = 0b0111; @@ -2857,12 +2778,11 @@ void DxbcShaderTranslator::WriteInputSignature() { shader_object_.resize(shader_object_.size() + kParameterDwords); ++parameter_count; { - DxbcSignatureParameter& clip_space_zw = - *reinterpret_cast(shader_object_.data() + - clip_space_zw_position); + auto& clip_space_zw = *reinterpret_cast( + shader_object_.data() + clip_space_zw_position); clip_space_zw.semantic_index = kClipSpaceZWTexCoord; clip_space_zw.component_type = - DxbcSignatureRegisterComponentType::kFloat32; + dxbc::SignatureRegisterComponentType::kFloat32; clip_space_zw.register_index = uint32_t(InOutRegister::kPSInClipSpaceZW); clip_space_zw.mask = 0b0011; clip_space_zw.always_reads_mask = edram_rov_used_ ? 0b0011 : 0b0000; @@ -2875,11 +2795,10 @@ void DxbcShaderTranslator::WriteInputSignature() { shader_object_.resize(shader_object_.size() + kParameterDwords); ++parameter_count; { - DxbcSignatureParameter& position = - *reinterpret_cast(shader_object_.data() + - position_position); - position.system_value = DxbcName::kPosition; - position.component_type = DxbcSignatureRegisterComponentType::kFloat32; + auto& position = *reinterpret_cast( + shader_object_.data() + position_position); + position.system_value = dxbc::Name::kPosition; + position.component_type = dxbc::SignatureRegisterComponentType::kFloat32; position.register_index = uint32_t(InOutRegister::kPSInPosition); position.mask = 0b1111; position.always_reads_mask = in_position_used_; @@ -2890,12 +2809,11 @@ void DxbcShaderTranslator::WriteInputSignature() { shader_object_.resize(shader_object_.size() + kParameterDwords); ++parameter_count; { - DxbcSignatureParameter& is_front_face = - *reinterpret_cast(shader_object_.data() + - is_front_face_position); - is_front_face.system_value = DxbcName::kIsFrontFace; + auto& is_front_face = *reinterpret_cast( + shader_object_.data() + is_front_face_position); + is_front_face.system_value = dxbc::Name::kIsFrontFace; is_front_face.component_type = - DxbcSignatureRegisterComponentType::kUInt32; + dxbc::SignatureRegisterComponentType::kUInt32; is_front_face.register_index = uint32_t(InOutRegister::kPSInFrontFace); is_front_face.mask = 0b0001; is_front_face.always_reads_mask = in_front_face_used_ ? 0b0001 : 0b0000; @@ -2905,33 +2823,28 @@ void DxbcShaderTranslator::WriteInputSignature() { uint32_t semantic_offset = uint32_t((shader_object_.size() - chunk_position) * sizeof(uint32_t)); { - DxbcSignatureParameter* interpolators = - reinterpret_cast(shader_object_.data() + - interpolator_position); + auto interpolators = reinterpret_cast( + shader_object_.data() + interpolator_position); for (uint32_t i = 0; i < xenos::kMaxInterpolators; ++i) { interpolators[i].semantic_name = semantic_offset; } - DxbcSignatureParameter& point_parameters = - *reinterpret_cast(shader_object_.data() + - point_parameters_position); + auto& point_parameters = *reinterpret_cast( + shader_object_.data() + point_parameters_position); point_parameters.semantic_name = semantic_offset; - DxbcSignatureParameter& clip_space_zw = - *reinterpret_cast(shader_object_.data() + - clip_space_zw_position); + auto& clip_space_zw = *reinterpret_cast( + shader_object_.data() + clip_space_zw_position); clip_space_zw.semantic_name = semantic_offset; } semantic_offset += AppendString(shader_object_, "TEXCOORD"); { - DxbcSignatureParameter& position = - *reinterpret_cast(shader_object_.data() + - position_position); + auto& position = *reinterpret_cast( + shader_object_.data() + position_position); position.semantic_name = semantic_offset; } semantic_offset += AppendString(shader_object_, "SV_Position"); { - DxbcSignatureParameter& is_front_face = - *reinterpret_cast(shader_object_.data() + - is_front_face_position); + auto& is_front_face = *reinterpret_cast( + shader_object_.data() + is_front_face_position); is_front_face.semantic_name = semantic_offset; } semantic_offset += AppendString(shader_object_, "SV_IsFrontFace"); @@ -2939,10 +2852,10 @@ void DxbcShaderTranslator::WriteInputSignature() { // Header. { - DxbcSignature& header = *reinterpret_cast( - shader_object_.data() + chunk_position); + auto& header = *reinterpret_cast(shader_object_.data() + + chunk_position); header.parameter_count = parameter_count; - header.parameter_info_offset = sizeof(DxbcSignature); + header.parameter_info_offset = sizeof(dxbc::Signature); } } @@ -2953,35 +2866,35 @@ void DxbcShaderTranslator::WritePatchConstantSignature() { uint32_t chunk_position = uint32_t(shader_object_.size()); // Reserve space for the header. shader_object_.resize(shader_object_.size() + - sizeof(DxbcSignature) / sizeof(uint32_t)); + sizeof(dxbc::Signature) / sizeof(uint32_t)); uint32_t parameter_count = 0; constexpr size_t kParameterDwords = - sizeof(DxbcSignatureParameter) / sizeof(uint32_t); + sizeof(dxbc::SignatureParameter) / sizeof(uint32_t); // FXC always compiles with SV_TessFactor and SV_InsideTessFactor input, so // this is required even if not referenced (HS and DS have very strict // linkage, by the way, everything that HS outputs must be listed in DS // inputs). uint32_t tess_factor_edge_count = 0; - DxbcName tess_factor_edge_system_value = DxbcName::kUndefined; + dxbc::Name tess_factor_edge_system_value = dxbc::Name::kUndefined; uint32_t tess_factor_inside_count = 0; - DxbcName tess_factor_inside_system_value = DxbcName::kUndefined; + dxbc::Name tess_factor_inside_system_value = dxbc::Name::kUndefined; Shader::HostVertexShaderType host_vertex_shader_type = GetDxbcShaderModification().host_vertex_shader_type; switch (host_vertex_shader_type) { case Shader::HostVertexShaderType::kTriangleDomainCPIndexed: case Shader::HostVertexShaderType::kTriangleDomainPatchIndexed: tess_factor_edge_count = 3; - tess_factor_edge_system_value = DxbcName::kFinalTriEdgeTessFactor; + tess_factor_edge_system_value = dxbc::Name::kFinalTriEdgeTessFactor; tess_factor_inside_count = 1; - tess_factor_inside_system_value = DxbcName::kFinalTriInsideTessFactor; + tess_factor_inside_system_value = dxbc::Name::kFinalTriInsideTessFactor; break; case Shader::HostVertexShaderType::kQuadDomainCPIndexed: case Shader::HostVertexShaderType::kQuadDomainPatchIndexed: tess_factor_edge_count = 4; - tess_factor_edge_system_value = DxbcName::kFinalQuadEdgeTessFactor; + tess_factor_edge_system_value = dxbc::Name::kFinalQuadEdgeTessFactor; tess_factor_inside_count = 2; - tess_factor_inside_system_value = DxbcName::kFinalQuadInsideTessFactor; + tess_factor_inside_system_value = dxbc::Name::kFinalQuadInsideTessFactor; break; default: // TODO(Triang3l): Support line patches. @@ -2996,15 +2909,14 @@ void DxbcShaderTranslator::WritePatchConstantSignature() { tess_factor_edge_count * kParameterDwords); parameter_count += tess_factor_edge_count; { - DxbcSignatureParameter* tess_factors_edge = - reinterpret_cast(shader_object_.data() + - tess_factor_edge_position); + auto tess_factors_edge = reinterpret_cast( + shader_object_.data() + tess_factor_edge_position); for (uint32_t i = 0; i < tess_factor_edge_count; ++i) { - DxbcSignatureParameter& tess_factor_edge = tess_factors_edge[i]; + dxbc::SignatureParameter& tess_factor_edge = tess_factors_edge[i]; tess_factor_edge.semantic_index = i; tess_factor_edge.system_value = tess_factor_edge_system_value; tess_factor_edge.component_type = - DxbcSignatureRegisterComponentType::kFloat32; + dxbc::SignatureRegisterComponentType::kFloat32; // Not using any of these, just assigning consecutive registers. tess_factor_edge.register_index = i; tess_factor_edge.mask = 0b0001; @@ -3017,15 +2929,14 @@ void DxbcShaderTranslator::WritePatchConstantSignature() { tess_factor_inside_count * kParameterDwords); parameter_count += tess_factor_inside_count; { - DxbcSignatureParameter* tess_factors_inside = - reinterpret_cast(shader_object_.data() + - tess_factor_inside_position); + auto tess_factors_inside = reinterpret_cast( + shader_object_.data() + tess_factor_inside_position); for (uint32_t i = 0; i < tess_factor_inside_count; ++i) { - DxbcSignatureParameter& tess_factor_inside = tess_factors_inside[i]; + dxbc::SignatureParameter& tess_factor_inside = tess_factors_inside[i]; tess_factor_inside.semantic_index = i; tess_factor_inside.system_value = tess_factor_inside_system_value; tess_factor_inside.component_type = - DxbcSignatureRegisterComponentType::kFloat32; + dxbc::SignatureRegisterComponentType::kFloat32; // Not using any of these, just assigning consecutive registers. tess_factor_inside.register_index = tess_factor_edge_count + i; tess_factor_inside.mask = 0b0001; @@ -3036,18 +2947,16 @@ void DxbcShaderTranslator::WritePatchConstantSignature() { uint32_t semantic_offset = uint32_t((shader_object_.size() - chunk_position) * sizeof(uint32_t)); { - DxbcSignatureParameter* tess_factors_edge = - reinterpret_cast(shader_object_.data() + - tess_factor_edge_position); + auto tess_factors_edge = reinterpret_cast( + shader_object_.data() + tess_factor_edge_position); for (uint32_t i = 0; i < tess_factor_edge_count; ++i) { tess_factors_edge[i].semantic_name = semantic_offset; } } semantic_offset += AppendString(shader_object_, "SV_TessFactor"); { - DxbcSignatureParameter* tess_factors_inside = - reinterpret_cast(shader_object_.data() + - tess_factor_inside_position); + auto tess_factors_inside = reinterpret_cast( + shader_object_.data() + tess_factor_inside_position); for (uint32_t i = 0; i < tess_factor_inside_count; ++i) { tess_factors_inside[i].semantic_name = semantic_offset; } @@ -3056,10 +2965,10 @@ void DxbcShaderTranslator::WritePatchConstantSignature() { // Header. { - DxbcSignature& header = *reinterpret_cast( - shader_object_.data() + chunk_position); + auto& header = *reinterpret_cast(shader_object_.data() + + chunk_position); header.parameter_count = parameter_count; - header.parameter_info_offset = sizeof(DxbcSignature); + header.parameter_info_offset = sizeof(dxbc::Signature); } } @@ -3069,10 +2978,10 @@ void DxbcShaderTranslator::WriteOutputSignature() { uint32_t chunk_position = uint32_t(shader_object_.size()); // Reserve space for the header. shader_object_.resize(shader_object_.size() + - sizeof(DxbcSignature) / sizeof(uint32_t)); + sizeof(dxbc::Signature) / sizeof(uint32_t)); uint32_t parameter_count = 0; constexpr size_t kParameterDwords = - sizeof(DxbcSignatureParameter) / sizeof(uint32_t); + sizeof(dxbc::SignatureParameter) / sizeof(uint32_t); if (is_vertex_shader()) { // Intepolators (TEXCOORD#). @@ -3081,14 +2990,13 @@ void DxbcShaderTranslator::WriteOutputSignature() { xenos::kMaxInterpolators * kParameterDwords); parameter_count += xenos::kMaxInterpolators; { - DxbcSignatureParameter* interpolators = - reinterpret_cast(shader_object_.data() + - interpolator_position); + auto interpolators = reinterpret_cast( + shader_object_.data() + interpolator_position); for (uint32_t i = 0; i < xenos::kMaxInterpolators; ++i) { - DxbcSignatureParameter& interpolator = interpolators[i]; + dxbc::SignatureParameter& interpolator = interpolators[i]; interpolator.semantic_index = i; interpolator.component_type = - DxbcSignatureRegisterComponentType::kFloat32; + dxbc::SignatureRegisterComponentType::kFloat32; interpolator.register_index = uint32_t(InOutRegister::kVSDSOutInterpolators) + i; interpolator.mask = 0b1111; @@ -3101,12 +3009,11 @@ void DxbcShaderTranslator::WriteOutputSignature() { shader_object_.resize(shader_object_.size() + kParameterDwords); ++parameter_count; { - DxbcSignatureParameter& point_parameters = - *reinterpret_cast(shader_object_.data() + - point_parameters_position); + auto& point_parameters = *reinterpret_cast( + shader_object_.data() + point_parameters_position); point_parameters.semantic_index = kPointParametersTexCoord; point_parameters.component_type = - DxbcSignatureRegisterComponentType::kFloat32; + dxbc::SignatureRegisterComponentType::kFloat32; point_parameters.register_index = uint32_t(InOutRegister::kVSDSOutPointParameters); point_parameters.mask = 0b0111; @@ -3118,12 +3025,11 @@ void DxbcShaderTranslator::WriteOutputSignature() { shader_object_.resize(shader_object_.size() + kParameterDwords); ++parameter_count; { - DxbcSignatureParameter& clip_space_zw = - *reinterpret_cast(shader_object_.data() + - clip_space_zw_position); + auto& clip_space_zw = *reinterpret_cast( + shader_object_.data() + clip_space_zw_position); clip_space_zw.semantic_index = kClipSpaceZWTexCoord; clip_space_zw.component_type = - DxbcSignatureRegisterComponentType::kFloat32; + dxbc::SignatureRegisterComponentType::kFloat32; clip_space_zw.register_index = uint32_t(InOutRegister::kVSDSOutClipSpaceZW); clip_space_zw.mask = 0b0011; @@ -3135,11 +3041,10 @@ void DxbcShaderTranslator::WriteOutputSignature() { shader_object_.resize(shader_object_.size() + kParameterDwords); ++parameter_count; { - DxbcSignatureParameter& position = - *reinterpret_cast(shader_object_.data() + - position_position); - position.system_value = DxbcName::kPosition; - position.component_type = DxbcSignatureRegisterComponentType::kFloat32; + auto& position = *reinterpret_cast( + shader_object_.data() + position_position); + position.system_value = dxbc::Name::kPosition; + position.component_type = dxbc::SignatureRegisterComponentType::kFloat32; position.register_index = uint32_t(InOutRegister::kVSDSOutPosition); position.mask = 0b1111; } @@ -3149,12 +3054,11 @@ void DxbcShaderTranslator::WriteOutputSignature() { shader_object_.resize(shader_object_.size() + kParameterDwords); ++parameter_count; { - DxbcSignatureParameter& clip_distance_0123 = - *reinterpret_cast( - shader_object_.data() + clip_distance_0123_position); - clip_distance_0123.system_value = DxbcName::kClipDistance; + auto& clip_distance_0123 = *reinterpret_cast( + shader_object_.data() + clip_distance_0123_position); + clip_distance_0123.system_value = dxbc::Name::kClipDistance; clip_distance_0123.component_type = - DxbcSignatureRegisterComponentType::kFloat32; + dxbc::SignatureRegisterComponentType::kFloat32; clip_distance_0123.register_index = uint32_t(InOutRegister::kVSDSOutClipDistance0123); clip_distance_0123.mask = 0b1111; @@ -3163,13 +3067,12 @@ void DxbcShaderTranslator::WriteOutputSignature() { shader_object_.resize(shader_object_.size() + kParameterDwords); ++parameter_count; { - DxbcSignatureParameter& clip_distance_45 = - *reinterpret_cast(shader_object_.data() + - clip_distance_45_position); + auto& clip_distance_45 = *reinterpret_cast( + shader_object_.data() + clip_distance_45_position); clip_distance_45.semantic_index = 1; - clip_distance_45.system_value = DxbcName::kClipDistance; + clip_distance_45.system_value = dxbc::Name::kClipDistance; clip_distance_45.component_type = - DxbcSignatureRegisterComponentType::kFloat32; + dxbc::SignatureRegisterComponentType::kFloat32; clip_distance_45.register_index = uint32_t(InOutRegister::kVSDSOutClipDistance45AndCullDistance); clip_distance_45.mask = 0b0011; @@ -3179,12 +3082,11 @@ void DxbcShaderTranslator::WriteOutputSignature() { shader_object_.resize(shader_object_.size() + kParameterDwords); ++parameter_count; { - DxbcSignatureParameter& cull_distance = - *reinterpret_cast(shader_object_.data() + - cull_distance_position); - cull_distance.system_value = DxbcName::kCullDistance; + auto& cull_distance = *reinterpret_cast( + shader_object_.data() + cull_distance_position); + cull_distance.system_value = dxbc::Name::kCullDistance; cull_distance.component_type = - DxbcSignatureRegisterComponentType::kFloat32; + dxbc::SignatureRegisterComponentType::kFloat32; cull_distance.register_index = uint32_t(InOutRegister::kVSDSOutClipDistance45AndCullDistance); cull_distance.mask = 0b0100; @@ -3195,44 +3097,37 @@ void DxbcShaderTranslator::WriteOutputSignature() { uint32_t semantic_offset = uint32_t((shader_object_.size() - chunk_position) * sizeof(uint32_t)); { - DxbcSignatureParameter* interpolators = - reinterpret_cast(shader_object_.data() + - interpolator_position); + auto interpolators = reinterpret_cast( + shader_object_.data() + interpolator_position); for (uint32_t i = 0; i < xenos::kMaxInterpolators; ++i) { interpolators[i].semantic_name = semantic_offset; } - DxbcSignatureParameter& point_parameters = - *reinterpret_cast(shader_object_.data() + - point_parameters_position); + auto& point_parameters = *reinterpret_cast( + shader_object_.data() + point_parameters_position); point_parameters.semantic_name = semantic_offset; - DxbcSignatureParameter& clip_space_zw = - *reinterpret_cast(shader_object_.data() + - clip_space_zw_position); + auto& clip_space_zw = *reinterpret_cast( + shader_object_.data() + clip_space_zw_position); clip_space_zw.semantic_name = semantic_offset; } semantic_offset += AppendString(shader_object_, "TEXCOORD"); { - DxbcSignatureParameter& position = - *reinterpret_cast(shader_object_.data() + - position_position); + auto& position = *reinterpret_cast( + shader_object_.data() + position_position); position.semantic_name = semantic_offset; } semantic_offset += AppendString(shader_object_, "SV_Position"); { - DxbcSignatureParameter& clip_distance_0123 = - *reinterpret_cast( - shader_object_.data() + clip_distance_0123_position); + auto& clip_distance_0123 = *reinterpret_cast( + shader_object_.data() + clip_distance_0123_position); clip_distance_0123.semantic_name = semantic_offset; - DxbcSignatureParameter& clip_distance_45 = - *reinterpret_cast(shader_object_.data() + - clip_distance_45_position); + auto& clip_distance_45 = *reinterpret_cast( + shader_object_.data() + clip_distance_45_position); clip_distance_45.semantic_name = semantic_offset; } semantic_offset += AppendString(shader_object_, "SV_ClipDistance"); { - DxbcSignatureParameter& cull_distance = - *reinterpret_cast(shader_object_.data() + - cull_distance_position); + auto& cull_distance = *reinterpret_cast( + shader_object_.data() + cull_distance_position); cull_distance.semantic_name = semantic_offset; } semantic_offset += AppendString(shader_object_, "SV_CullDistance"); @@ -3244,13 +3139,13 @@ void DxbcShaderTranslator::WriteOutputSignature() { target_position = shader_object_.size(); shader_object_.resize(shader_object_.size() + 4 * kParameterDwords); parameter_count += 4; - DxbcSignatureParameter* targets = - reinterpret_cast(shader_object_.data() + - target_position); + auto targets = reinterpret_cast( + shader_object_.data() + target_position); for (uint32_t i = 0; i < 4; ++i) { - DxbcSignatureParameter& target = targets[i]; + dxbc::SignatureParameter& target = targets[i]; target.semantic_index = i; - target.component_type = DxbcSignatureRegisterComponentType::kFloat32; + target.component_type = + dxbc::SignatureRegisterComponentType::kFloat32; target.register_index = i; target.mask = 0b1111; // All are always written because X360 RTs are dynamically remapped to @@ -3266,10 +3161,9 @@ void DxbcShaderTranslator::WriteOutputSignature() { depth_position = shader_object_.size(); shader_object_.resize(shader_object_.size() + kParameterDwords); ++parameter_count; - DxbcSignatureParameter& depth = - *reinterpret_cast(shader_object_.data() + - depth_position); - depth.component_type = DxbcSignatureRegisterComponentType::kFloat32; + auto& depth = *reinterpret_cast( + shader_object_.data() + depth_position); + depth.component_type = dxbc::SignatureRegisterComponentType::kFloat32; depth.register_index = UINT32_MAX; depth.mask = 0b0001; depth.never_writes_mask = 0b1110; @@ -3280,9 +3174,8 @@ void DxbcShaderTranslator::WriteOutputSignature() { uint32_t((shader_object_.size() - chunk_position) * sizeof(uint32_t)); if (target_position != SIZE_MAX) { { - DxbcSignatureParameter* targets = - reinterpret_cast(shader_object_.data() + - target_position); + auto targets = reinterpret_cast( + shader_object_.data() + target_position); for (uint32_t i = 0; i < 4; ++i) { targets[i].semantic_name = semantic_offset; } @@ -3291,9 +3184,8 @@ void DxbcShaderTranslator::WriteOutputSignature() { } if (depth_position != SIZE_MAX) { { - DxbcSignatureParameter& depth = - *reinterpret_cast(shader_object_.data() + - depth_position); + auto& depth = *reinterpret_cast( + shader_object_.data() + depth_position); depth.semantic_name = semantic_offset; } const char* depth_semantic_name; @@ -3311,10 +3203,10 @@ void DxbcShaderTranslator::WriteOutputSignature() { // Header. { - DxbcSignature& header = *reinterpret_cast( - shader_object_.data() + chunk_position); + auto& header = *reinterpret_cast(shader_object_.data() + + chunk_position); header.parameter_count = parameter_count; - header.parameter_info_offset = sizeof(DxbcSignature); + header.parameter_info_offset = sizeof(dxbc::Signature); } } @@ -3353,17 +3245,17 @@ void DxbcShaderTranslator::WriteShaderCode() { // Not using control point data since Xenos only has a vertex shader acting // as both vertex shader and domain shader. stat_.c_control_points = 3; - stat_.tessellator_domain = DxbcTessellatorDomain::kTriangle; + stat_.tessellator_domain = dxbc::TessellatorDomain::kTriangle; switch (shader_modification.host_vertex_shader_type) { case Shader::HostVertexShaderType::kTriangleDomainCPIndexed: case Shader::HostVertexShaderType::kTriangleDomainPatchIndexed: stat_.c_control_points = 3; - stat_.tessellator_domain = DxbcTessellatorDomain::kTriangle; + stat_.tessellator_domain = dxbc::TessellatorDomain::kTriangle; break; case Shader::HostVertexShaderType::kQuadDomainCPIndexed: case Shader::HostVertexShaderType::kQuadDomainPatchIndexed: stat_.c_control_points = 4; - stat_.tessellator_domain = DxbcTessellatorDomain::kQuad; + stat_.tessellator_domain = dxbc::TessellatorDomain::kQuad; break; default: // TODO(Triang3l): Support line patches. diff --git a/src/xenia/gpu/dxbc_shader_translator.h b/src/xenia/gpu/dxbc_shader_translator.h index 808b311fa..11fc98fc5 100644 --- a/src/xenia/gpu/dxbc_shader_translator.h +++ b/src/xenia/gpu/dxbc_shader_translator.h @@ -10,14 +10,13 @@ #ifndef XENIA_GPU_DXBC_SHADER_TRANSLATOR_H_ #define XENIA_GPU_DXBC_SHADER_TRANSLATOR_H_ -#include #include #include #include -#include "xenia/base/assert.h" #include "xenia/base/math.h" #include "xenia/base/string_buffer.h" +#include "xenia/gpu/dxbc.h" #include "xenia/gpu/shader_translator.h" namespace xe { @@ -41,61 +40,7 @@ namespace gpu { // equivalent code in HLSL and running it through FXC, try with /Od, try with // full optimization, but if you see that FXC follows a different pattern than // what you are expecting, do what FXC does!!! -// Most important limitations: -// - Absolute, negate and saturate are only supported by instructions that -// explicitly support them. See MSDN pages of the specific instructions you -// want to use with modifiers: -// https://docs.microsoft.com/en-us/windows/win32/direct3dhlsl/dx9-graphics-reference-asm -// - Component selection in the general case (ALU instructions - things like -// resource access and flow control mostly explicitly need a specific -// component selection mode defined in the specification of the instruction): -// - 0-component - for operand types with no data (samplers, labels). -// - 1-component - for scalar destination operand types, and for scalar source -// operand types when the destination vector has 1 component masked -// (including scalar immediates). -// - Mask - for vector destination operand types. -// - Swizzle - for both vector and scalar (replicated in this case) source -// operand types, when the destination vector has 2 or more components -// masked. Immediates in this case have XYZW swizzle. -// - Select 1 - for vector source operand types, when the destination has 1 -// component masked or is of a scalar type. -// - Input operands (v#) can be used only as sources, output operands (o#) can -// be used only as destinations. -// - Indexable temporaries (x#) can only be used as a destination or a source -// operand (but not both at once) of a mov instruction - a load/store pattern -// here. Also, movs involving x# are counted as ArrayInstructions rather than -// MovInstructions in STAT. The other operand can be anything that most other -// instructions accept, but it still must be a mov with x# on one side. -// TODO(Triang3l): Fix all places in the translator currently violating these -// rules. -// !NOTE!: The D3D11.3 Functional Specification on Microsoft's GitHub profile, -// as of March 27th, 2020, is NOT a reliable reference, even though it contains -// many DXBC details! There are multiple places where it clearly contradicts -// what FXC does, even when targeting old shader models like 4_0: -// - The limit of 1 immediate or constant buffer source operand per instruction -// is totally ignored by FXC - in simple tests, it can emit an instruction -// with two constant buffer sources, or one constant buffer source and one -// immediate, or a multiply-add with two immediate operands. -// - It says x# can be used wherever r# can be used - in synthetic tests, FXC -// always accesses x# in a load/store way via mov. -// - It says x# can be used for indexing, including nested indexing of x# (one -// level deep), however, FXC moves the inner index operand to r# first in this -// case. -// -// For bytecode structure, see d3d12TokenizedProgramFormat.hpp from the Windows -// Driver Kit, and DXILConv from DirectX Shader Compiler. -// -// Avoid using uninitialized register components - such as registers written to -// in "if" and not in "else", but then used outside unconditionally or with a -// different condition (or even with the same condition, but in a different "if" -// block). This will cause crashes on AMD drivers, and will also limit -// optimization possibilities as this may result in false dependencies. Always -// mov l(0, 0, 0, 0) to such components before potential branching - -// PushSystemTemp accepts a zero mask for this purpose. -// -// Clamping of non-negative values must be done first to the lower bound (using -// max), then to the upper bound (using min), to match the saturate modifier -// behavior, which results in 0 for NaN. +// SEE THE NOTES DXBC.H BEFORE WRITING ANYTHING RELATED TO DXBC! class DxbcShaderTranslator : public ShaderTranslator { public: DxbcShaderTranslator(uint32_t vendor_id, bool bindless_resources_used, @@ -505,1455 +450,6 @@ class DxbcShaderTranslator : public ShaderTranslator { void ProcessAluInstruction(const ParsedAluInstruction& instr) override; private: - // D3D_SHADER_VARIABLE_CLASS - enum class DxbcRdefVariableClass : uint32_t { - kScalar, - kVector, - kMatrixRows, - kMatrixColumns, - kObject, - kStruct, - kInterfaceClass, - kInterfacePointer, - }; - - // D3D_SHADER_VARIABLE_TYPE subset - enum class DxbcRdefVariableType : uint32_t { - kInt = 2, - kFloat = 3, - kUInt = 19, - }; - - // D3D_SHADER_VARIABLE_FLAGS - enum DxbcRdefVariableFlags : uint32_t { - kDxbcRdefVariableFlagUserPacked = 1 << 0, - kDxbcRdefVariableFlagUsed = 1 << 1, - kDxbcRdefVariableFlagInterfacePointer = 1 << 2, - kDxbcRdefVariableFlagInterfaceParameter = 1 << 3, - }; - - // D3D_CBUFFER_TYPE - enum class DxbcRdefCbufferType : uint32_t { - kCbuffer, - kTbuffer, - kInterfacePointers, - kResourceBindInfo, - }; - - // D3D_SHADER_INPUT_TYPE - enum class DxbcRdefInputType : uint32_t { - kCbuffer, - kTbuffer, - kTexture, - kSampler, - kUAVRWTyped, - kStructured, - kUAVRWStructured, - kByteAddress, - kUAVRWByteAddress, - kUAVAppendStructured, - kUAVConsumeStructured, - kUAVRWStructuredWithCounter, - }; - - // D3D_RESOURCE_RETURN_TYPE - enum class DxbcRdefReturnType : uint32_t { - kVoid, - kUNorm, - kSNorm, - kSInt, - kUInt, - kFloat, - kMixed, - kDouble, - kContinued, - }; - - // D3D12_SRV_DIMENSION/D3D12_UAV_DIMENSION - enum class DxbcRdefDimension : uint32_t { - kUnknown = 0, - - kSRVBuffer = 1, - kSRVTexture1D, - kSRVTexture1DArray, - kSRVTexture2D, - kSRVTexture2DArray, - kSRVTexture2DMS, - kSRVTexture2DMSArray, - kSRVTexture3D, - kSRVTextureCube, - kSRVTextureCubeArray, - - kUAVBuffer = 1, - kUAVTexture1D, - kUAVTexture1DArray, - kUAVTexture2D, - kUAVTexture2DArray, - kUAVTexture3D, - }; - - // D3D_SHADER_INPUT_FLAGS - enum DxbcRdefInputFlags : uint32_t { - // For constant buffers, UserPacked is set if it was declared as `cbuffer` - // rather than `ConstantBuffer` (not dynamically indexable; though - // non-uniform dynamic indexing of constant buffers also didn't work on AMD - // drivers in 2018). - DxbcRdefInputFlagUserPacked = 1 << 0, - DxbcRdefInputFlagComparisonSampler = 1 << 1, - DxbcRdefInputFlagComponent0 = 1 << 2, - DxbcRdefInputFlagComponent1 = 1 << 3, - DxbcRdefInputFlagsComponents = - DxbcRdefInputFlagComponent0 | DxbcRdefInputFlagComponent1, - DxbcRdefInputFlagUnused = 1 << 4, - }; - - // D3D_NAME subset - enum class DxbcName : uint32_t { - kUndefined = 0, - kPosition = 1, - kClipDistance = 2, - kCullDistance = 3, - kVertexID = 6, - kIsFrontFace = 9, - kFinalQuadEdgeTessFactor = 11, - kFinalQuadInsideTessFactor = 12, - kFinalTriEdgeTessFactor = 13, - kFinalTriInsideTessFactor = 14, - }; - - // D3D_REGISTER_COMPONENT_TYPE - enum class DxbcSignatureRegisterComponentType : uint32_t { - kUnknown, - kUInt32, - kSInt32, - kFloat32, - }; - - // D3D10_INTERNALSHADER_PARAMETER - struct DxbcSignatureParameter { - // Offset in bytes from the start of the chunk. - uint32_t semantic_name; - uint32_t semantic_index; - // kUndefined for pixel shader outputs - inferred from the component type - // and what is used in the shader. - DxbcName system_value; - DxbcSignatureRegisterComponentType component_type; - // o#/v# when there's linkage, SV_Target index or -1 in pixel shader output. - uint32_t register_index; - uint8_t mask; - union { - // For an output signature. - uint8_t never_writes_mask; - // For an input signature. - uint8_t always_reads_mask; - }; - }; - static_assert(alignof(DxbcSignatureParameter) <= sizeof(uint32_t)); - - // D3D10_INTERNALSHADER_SIGNATURE - struct DxbcSignature { - uint32_t parameter_count; - // Offset in bytes from the start of the chunk. - uint32_t parameter_info_offset; - }; - static_assert(alignof(DxbcSignature) <= sizeof(uint32_t)); - - // D3D11_SB_TESSELLATOR_DOMAIN - enum class DxbcTessellatorDomain : uint32_t { - kUndefined, - kIsoline, - kTriangle, - kQuad, - }; - - // D3D10_SB_OPERAND_TYPE subset - enum class DxbcOperandType : uint32_t { - kTemp = 0, - kInput = 1, - kOutput = 2, - // Only usable as destination or source (but not both) in mov (and it - // becomes an array instruction this way). - kIndexableTemp = 3, - kImmediate32 = 4, - kSampler = 6, - kResource = 7, - kConstantBuffer = 8, - kLabel = 10, - kInputPrimitiveID = 11, - kOutputDepth = 12, - kNull = 13, - kInputControlPoint = 25, - kInputDomainPoint = 28, - kUnorderedAccessView = 30, - kInputCoverageMask = 35, - kOutputDepthLessEqual = 39, - }; - - // D3D10_SB_OPERAND_INDEX_DIMENSION - static constexpr uint32_t GetDxbcOperandIndexDimension(DxbcOperandType type) { - switch (type) { - case DxbcOperandType::kTemp: - case DxbcOperandType::kInput: - case DxbcOperandType::kOutput: - case DxbcOperandType::kLabel: - return 1; - case DxbcOperandType::kIndexableTemp: - case DxbcOperandType::kSampler: - case DxbcOperandType::kResource: - case DxbcOperandType::kInputControlPoint: - case DxbcOperandType::kUnorderedAccessView: - return 2; - case DxbcOperandType::kConstantBuffer: - return 3; - default: - return 0; - } - } - - // D3D10_SB_OPERAND_NUM_COMPONENTS - enum class DxbcOperandDimension : uint32_t { - kNoData, // D3D10_SB_OPERAND_0_COMPONENT - kScalar, // D3D10_SB_OPERAND_1_COMPONENT - kVector, // D3D10_SB_OPERAND_4_COMPONENT - }; - - static constexpr DxbcOperandDimension GetDxbcOperandDimension( - DxbcOperandType type, bool dest_in_dcl = false) { - switch (type) { - case DxbcOperandType::kSampler: - case DxbcOperandType::kLabel: - case DxbcOperandType::kNull: - return DxbcOperandDimension::kNoData; - case DxbcOperandType::kInputPrimitiveID: - case DxbcOperandType::kOutputDepth: - case DxbcOperandType::kOutputDepthLessEqual: - return DxbcOperandDimension::kScalar; - case DxbcOperandType::kInputCoverageMask: - return dest_in_dcl ? DxbcOperandDimension::kScalar - : DxbcOperandDimension::kVector; - default: - return DxbcOperandDimension::kVector; - } - } - - // D3D10_SB_OPERAND_4_COMPONENT_SELECTION_MODE - enum class DxbcComponentSelection { - kMask, - kSwizzle, - kSelect1, - }; - - struct DxbcIndex { - // D3D10_SB_OPERAND_INDEX_REPRESENTATION - enum class Representation : uint32_t { - kImmediate32, - kImmediate64, - kRelative, - kImmediate32PlusRelative, - kImmediate64PlusRelative, - }; - - uint32_t index_; - // UINT32_MAX if absolute. Lower 2 bits are the component index, upper bits - // are the temp register index. Applicable to indexable temps, inputs, - // outputs except for pixel shaders, constant buffers and bindings. - uint32_t relative_to_temp_; - - // Implicit constructor. - DxbcIndex(uint32_t index = 0) - : index_(index), relative_to_temp_(UINT32_MAX) {} - DxbcIndex(uint32_t temp, uint32_t temp_component, uint32_t offset = 0) - : index_(offset), relative_to_temp_((temp << 2) | temp_component) {} - - Representation GetRepresentation() const { - if (relative_to_temp_ != UINT32_MAX) { - return index_ != 0 ? Representation::kImmediate32PlusRelative - : Representation::kRelative; - } - return Representation::kImmediate32; - } - uint32_t GetLength() const { - return relative_to_temp_ != UINT32_MAX ? (index_ != 0 ? 3 : 2) : 1; - } - void Write(std::vector& code) const { - if (relative_to_temp_ == UINT32_MAX || index_ != 0) { - code.push_back(index_); - } - if (relative_to_temp_ != UINT32_MAX) { - // Encode selecting one component from absolute-indexed r#. - code.push_back(uint32_t(DxbcOperandDimension::kVector) | - (uint32_t(DxbcComponentSelection::kSelect1) << 2) | - ((relative_to_temp_ & 3) << 4) | - (uint32_t(DxbcOperandType::kTemp) << 12) | (1 << 20) | - (uint32_t(Representation::kImmediate32) << 22)); - code.push_back(relative_to_temp_ >> 2); - } - } - }; - - struct DxbcOperandAddress { - DxbcOperandType type_; - DxbcIndex index_1d_, index_2d_, index_3d_; - - explicit DxbcOperandAddress(DxbcOperandType type, - DxbcIndex index_1d = DxbcIndex(), - DxbcIndex index_2d = DxbcIndex(), - DxbcIndex index_3d = DxbcIndex()) - : type_(type), - index_1d_(index_1d), - index_2d_(index_2d), - index_3d_(index_3d) {} - - DxbcOperandDimension GetDimension(bool dest_in_dcl = false) const { - return GetDxbcOperandDimension(type_, dest_in_dcl); - } - uint32_t GetIndexDimension() const { - return GetDxbcOperandIndexDimension(type_); - } - uint32_t GetOperandTokenTypeAndIndex() const { - uint32_t index_dimension = GetIndexDimension(); - uint32_t operand_token = - (uint32_t(type_) << 12) | (index_dimension << 20); - if (index_dimension > 0) { - operand_token |= uint32_t(index_1d_.GetRepresentation()) << 22; - if (index_dimension > 1) { - operand_token |= uint32_t(index_2d_.GetRepresentation()) << 25; - if (index_dimension > 2) { - operand_token |= uint32_t(index_3d_.GetRepresentation()) << 28; - } - } - } - return operand_token; - } - uint32_t GetLength() const { - uint32_t length = 0; - uint32_t index_dimension = GetIndexDimension(); - if (index_dimension > 0) { - length += index_1d_.GetLength(); - if (index_dimension > 1) { - length += index_2d_.GetLength(); - if (index_dimension > 2) { - length += index_3d_.GetLength(); - } - } - } - return length; - } - void Write(std::vector& code) const { - uint32_t index_dimension = GetIndexDimension(); - if (index_dimension > 0) { - index_1d_.Write(code); - if (index_dimension > 1) { - index_2d_.Write(code); - if (index_dimension > 2) { - index_3d_.Write(code); - } - } - } - } - }; - - // D3D10_SB_EXTENDED_OPERAND_TYPE - enum class DxbcExtendedOperandType : uint32_t { - kEmpty, - kModifier, - }; - - // D3D10_SB_OPERAND_MODIFIER - enum class DxbcOperandModifier : uint32_t { - kNone, - kNegate, - kAbsolute, - kAbsoluteNegate, - }; - - struct DxbcDest : DxbcOperandAddress { - // Ignored for 0-component and 1-component operand types. - uint32_t write_mask_; - - explicit DxbcDest(DxbcOperandType type, uint32_t write_mask = 0b1111, - DxbcIndex index_1d = DxbcIndex(), - DxbcIndex index_2d = DxbcIndex(), - DxbcIndex index_3d = DxbcIndex()) - : DxbcOperandAddress(type, index_1d, index_2d, index_3d), - write_mask_(write_mask) {} - - static DxbcDest R(uint32_t index, uint32_t write_mask = 0b1111) { - return DxbcDest(DxbcOperandType::kTemp, write_mask, index); - } - static DxbcDest O(DxbcIndex index, uint32_t write_mask = 0b1111) { - return DxbcDest(DxbcOperandType::kOutput, write_mask, index); - } - static DxbcDest X(uint32_t index_1d, DxbcIndex index_2d, - uint32_t write_mask = 0b1111) { - return DxbcDest(DxbcOperandType::kIndexableTemp, write_mask, index_1d, - index_2d); - } - static DxbcDest ODepth() { - return DxbcDest(DxbcOperandType::kOutputDepth, 0b0001); - } - static DxbcDest Null() { return DxbcDest(DxbcOperandType::kNull, 0b0000); } - static DxbcDest U(uint32_t index_1d, DxbcIndex index_2d, - uint32_t write_mask = 0b1111) { - return DxbcDest(DxbcOperandType::kUnorderedAccessView, write_mask, - index_1d, index_2d); - } - static DxbcDest ODepthLE() { - return DxbcDest(DxbcOperandType::kOutputDepthLessEqual, 0b0001); - } - - uint32_t GetMask() const { - switch (GetDimension()) { - case DxbcOperandDimension::kNoData: - return 0b0000; - case DxbcOperandDimension::kScalar: - return 0b0001; - case DxbcOperandDimension::kVector: - return write_mask_; - default: - assert_unhandled_case(GetDimension()); - return 0b0000; - } - } - [[nodiscard]] DxbcDest Mask(uint32_t write_mask) const { - return DxbcDest(type_, write_mask, index_1d_, index_2d_, index_3d_); - } - [[nodiscard]] DxbcDest MaskMasked(uint32_t write_mask) const { - return DxbcDest(type_, write_mask_ & write_mask, index_1d_, index_2d_, - index_3d_); - } - static uint32_t GetMaskSingleComponent(uint32_t write_mask) { - uint32_t component; - if (xe::bit_scan_forward(write_mask, &component)) { - if ((write_mask >> component) == 1) { - return component; - } - } - return UINT32_MAX; - } - uint32_t GetMaskSingleComponent() const { - return GetMaskSingleComponent(GetMask()); - } - - uint32_t GetLength() const { return 1 + DxbcOperandAddress::GetLength(); } - void Write(std::vector& code, bool in_dcl = false) const { - uint32_t operand_token = GetOperandTokenTypeAndIndex(); - DxbcOperandDimension dimension = GetDimension(in_dcl); - operand_token |= uint32_t(dimension); - if (dimension == DxbcOperandDimension::kVector) { - assert_true(write_mask_ > 0b0000 && write_mask_ <= 0b1111); - operand_token |= - (uint32_t(DxbcComponentSelection::kMask) << 2) | (write_mask_ << 4); - } - code.push_back(operand_token); - DxbcOperandAddress::Write(code); - } - }; - - struct DxbcSrc : DxbcOperandAddress { - enum : uint32_t { - kXYZW = 0b11100100, - kXXXX = 0b00000000, - kYYYY = 0b01010101, - kZZZZ = 0b10101010, - kWWWW = 0b11111111, - }; - - // Ignored for 0-component and 1-component operand types. - uint32_t swizzle_; - bool absolute_; - bool negate_; - // Only valid for DxbcOperandType::kImmediate32. - uint32_t immediate_[4]; - - explicit DxbcSrc(DxbcOperandType type, uint32_t swizzle = kXYZW, - DxbcIndex index_1d = DxbcIndex(), - DxbcIndex index_2d = DxbcIndex(), - DxbcIndex index_3d = DxbcIndex()) - : DxbcOperandAddress(type, index_1d, index_2d, index_3d), - swizzle_(swizzle), - absolute_(false), - negate_(false) {} - - static DxbcSrc R(uint32_t index, uint32_t swizzle = kXYZW) { - return DxbcSrc(DxbcOperandType::kTemp, swizzle, index); - } - static DxbcSrc V(DxbcIndex index, uint32_t swizzle = kXYZW) { - return DxbcSrc(DxbcOperandType::kInput, swizzle, index); - } - static DxbcSrc X(uint32_t index_1d, DxbcIndex index_2d, - uint32_t swizzle = kXYZW) { - return DxbcSrc(DxbcOperandType::kIndexableTemp, swizzle, index_1d, - index_2d); - } - static DxbcSrc LU(uint32_t x, uint32_t y, uint32_t z, uint32_t w) { - DxbcSrc src(DxbcOperandType::kImmediate32, kXYZW); - src.immediate_[0] = x; - src.immediate_[1] = y; - src.immediate_[2] = z; - src.immediate_[3] = w; - return src; - } - static DxbcSrc LU(uint32_t x) { return LU(x, x, x, x); } - static DxbcSrc LI(int32_t x, int32_t y, int32_t z, int32_t w) { - return LU(uint32_t(x), uint32_t(y), uint32_t(z), uint32_t(w)); - } - static DxbcSrc LI(int32_t x) { return LI(x, x, x, x); } - static DxbcSrc LF(float x, float y, float z, float w) { - return LU(*reinterpret_cast(&x), - *reinterpret_cast(&y), - *reinterpret_cast(&z), - *reinterpret_cast(&w)); - } - static DxbcSrc LF(float x) { return LF(x, x, x, x); } - static DxbcSrc LP(const uint32_t* xyzw) { - return LU(xyzw[0], xyzw[1], xyzw[2], xyzw[3]); - } - static DxbcSrc LP(const int32_t* xyzw) { - return LI(xyzw[0], xyzw[1], xyzw[2], xyzw[3]); - } - static DxbcSrc LP(const float* xyzw) { - return LF(xyzw[0], xyzw[1], xyzw[2], xyzw[3]); - } - static DxbcSrc S(uint32_t index_1d, DxbcIndex index_2d) { - return DxbcSrc(DxbcOperandType::kSampler, kXXXX, index_1d, index_2d); - } - static DxbcSrc T(uint32_t index_1d, DxbcIndex index_2d, - uint32_t swizzle = kXYZW) { - return DxbcSrc(DxbcOperandType::kResource, swizzle, index_1d, index_2d); - } - static DxbcSrc CB(uint32_t index_1d, DxbcIndex index_2d, DxbcIndex index_3d, - uint32_t swizzle = kXYZW) { - return DxbcSrc(DxbcOperandType::kConstantBuffer, swizzle, index_1d, - index_2d, index_3d); - } - static DxbcSrc Label(uint32_t index) { - return DxbcSrc(DxbcOperandType::kLabel, kXXXX, index); - } - static DxbcSrc VPrim() { - return DxbcSrc(DxbcOperandType::kInputPrimitiveID, kXXXX); - } - static DxbcSrc VICP(DxbcIndex index_1d, DxbcIndex index_2d, - uint32_t swizzle = kXYZW) { - return DxbcSrc(DxbcOperandType::kInputControlPoint, swizzle, index_1d, - index_2d); - } - static DxbcSrc VDomain(uint32_t swizzle = kXYZW) { - return DxbcSrc(DxbcOperandType::kInputDomainPoint, swizzle); - } - static DxbcSrc U(uint32_t index_1d, DxbcIndex index_2d, - uint32_t swizzle = kXYZW) { - return DxbcSrc(DxbcOperandType::kUnorderedAccessView, swizzle, index_1d, - index_2d); - } - static DxbcSrc VCoverage() { - return DxbcSrc(DxbcOperandType::kInputCoverageMask, kXXXX); - } - - [[nodiscard]] DxbcSrc WithModifiers(bool absolute, bool negate) const { - DxbcSrc new_src(*this); - new_src.absolute_ = absolute; - new_src.negate_ = negate; - return new_src; - } - [[nodiscard]] DxbcSrc WithAbs(bool absolute) const { - return WithModifiers(absolute, negate_); - } - [[nodiscard]] DxbcSrc WithNeg(bool negate) const { - return WithModifiers(absolute_, negate); - } - [[nodiscard]] DxbcSrc Abs() const { return WithModifiers(true, false); } - [[nodiscard]] DxbcSrc operator-() const { - return WithModifiers(absolute_, !negate_); - } - [[nodiscard]] DxbcSrc Swizzle(uint32_t swizzle) const { - DxbcSrc new_src(*this); - new_src.swizzle_ = swizzle; - return new_src; - } - [[nodiscard]] DxbcSrc SwizzleSwizzled(uint32_t swizzle) const { - DxbcSrc new_src(*this); - new_src.swizzle_ = 0; - for (uint32_t i = 0; i < 4; ++i) { - new_src.swizzle_ |= ((swizzle_ >> (((swizzle >> (i * 2)) & 3) * 2)) & 3) - << (i * 2); - } - return new_src; - } - [[nodiscard]] DxbcSrc Select(uint32_t component) const { - DxbcSrc new_src(*this); - new_src.swizzle_ = component * 0b01010101; - return new_src; - } - [[nodiscard]] DxbcSrc SelectFromSwizzled(uint32_t component) const { - DxbcSrc new_src(*this); - new_src.swizzle_ = ((swizzle_ >> (component * 2)) & 3) * 0b01010101; - return new_src; - } - - uint32_t GetLength(uint32_t mask, bool force_vector = false) const { - bool is_vector = force_vector || - (mask != 0b0000 && - DxbcDest::GetMaskSingleComponent(mask) == UINT32_MAX); - if (type_ == DxbcOperandType::kImmediate32) { - return is_vector ? 5 : 2; - } - return ((absolute_ || negate_) ? 2 : 1) + DxbcOperandAddress::GetLength(); - } - static constexpr uint32_t GetModifiedImmediate(uint32_t value, - bool is_integer, - bool absolute, bool negate) { - if (is_integer) { - if (absolute) { - *reinterpret_cast(&value) = - std::abs(*reinterpret_cast(&value)); - } - if (negate) { - *reinterpret_cast(&value) = - -*reinterpret_cast(&value); - } - } else { - if (absolute) { - value &= uint32_t(INT32_MAX); - } - if (negate) { - value ^= uint32_t(INT32_MAX) + 1; - } - } - return value; - } - uint32_t GetModifiedImmediate(uint32_t swizzle_index, - bool is_integer) const { - return GetModifiedImmediate( - immediate_[(swizzle_ >> (swizzle_index * 2)) & 3], is_integer, - absolute_, negate_); - } - void Write(std::vector& code, bool is_integer, uint32_t mask, - bool force_vector = false) const; - }; - - // D3D10_SB_OPCODE_TYPE subset - enum class DxbcOpcode : uint32_t { - kAdd = 0, - kAnd = 1, - kBreak = 2, - kCall = 4, - kCallC = 5, - kCase = 6, - kContinue = 7, - kDefault = 10, - kDiscard = 13, - kDiv = 14, - kDP2 = 15, - kDP3 = 16, - kDP4 = 17, - kElse = 18, - kEndIf = 21, - kEndLoop = 22, - kEndSwitch = 23, - kEq = 24, - kExp = 25, - kFrc = 26, - kFToI = 27, - kFToU = 28, - kGE = 29, - kIAdd = 30, - kIf = 31, - kIEq = 32, - kIGE = 33, - kILT = 34, - kIMAd = 35, - kIMax = 36, - kIMin = 37, - kIMul = 38, - kINE = 39, - kIShL = 41, - kIToF = 43, - kLabel = 44, - kLog = 47, - kLoop = 48, - kLT = 49, - kMAd = 50, - kMin = 51, - kMax = 52, - kMov = 54, - kMovC = 55, - kMul = 56, - kNE = 57, - kNot = 59, - kOr = 60, - kRet = 62, - kRetC = 63, - kRoundNE = 64, - kRoundNI = 65, - kRoundZ = 67, - kRSq = 68, - kSampleL = 72, - kSampleD = 73, - kSqRt = 75, - kSwitch = 76, - kSinCos = 77, - kULT = 79, - kUGE = 80, - kUMul = 81, - kUMAd = 82, - kUMax = 83, - kUMin = 84, - kUShR = 85, - kUToF = 86, - kXOr = 87, - kLOD = 108, - kDerivRTXCoarse = 122, - kDerivRTXFine = 123, - kDerivRTYCoarse = 124, - kDerivRTYFine = 125, - kRcp = 129, - kF32ToF16 = 130, - kF16ToF32 = 131, - kFirstBitHi = 135, - kUBFE = 138, - kIBFE = 139, - kBFI = 140, - kBFRev = 141, - kLdUAVTyped = 163, - kStoreUAVTyped = 164, - kLdRaw = 165, - kStoreRaw = 166, - kEvalSampleIndex = 204, - kEvalCentroid = 205, - }; - - // D3D10_SB_EXTENDED_OPCODE_TYPE - enum class DxbcExtendedOpcodeType : uint32_t { - kEmpty, - kSampleControls, - kResourceDim, - kResourceReturnType, - }; - - static constexpr uint32_t DxbcOpcodeToken( - DxbcOpcode opcode, uint32_t operands_length, bool saturate = false, - uint32_t extended_opcode_count = 0) { - return uint32_t(opcode) | (saturate ? (uint32_t(1) << 13) : 0) | - ((uint32_t(1) + extended_opcode_count + operands_length) << 24) | - (extended_opcode_count ? (uint32_t(1) << 31) : 0); - } - - static constexpr uint32_t DxbcSampleControlsExtendedOpcodeToken( - int32_t aoffimmi_u, int32_t aoffimmi_v, int32_t aoffimmi_w, - bool extended = false) { - return uint32_t(DxbcExtendedOpcodeType::kSampleControls) | - ((uint32_t(aoffimmi_u) & uint32_t(0b1111)) << 9) | - ((uint32_t(aoffimmi_v) & uint32_t(0b1111)) << 13) | - ((uint32_t(aoffimmi_w) & uint32_t(0b1111)) << 17) | - (extended ? (uint32_t(1) << 31) : 0); - } - - void DxbcEmitAluOp(DxbcOpcode opcode, uint32_t src_are_integer, - const DxbcDest& dest, const DxbcSrc& src, - bool saturate = false) { - uint32_t dest_write_mask = dest.GetMask(); - uint32_t operands_length = - dest.GetLength() + src.GetLength(dest_write_mask); - shader_code_.reserve(shader_code_.size() + 1 + operands_length); - shader_code_.push_back(DxbcOpcodeToken(opcode, operands_length, saturate)); - dest.Write(shader_code_); - src.Write(shader_code_, (src_are_integer & 0b1) != 0, dest_write_mask); - ++stat_.instruction_count; - } - void DxbcEmitAluOp(DxbcOpcode opcode, uint32_t src_are_integer, - const DxbcDest& dest, const DxbcSrc& src0, - const DxbcSrc& src1, bool saturate = false) { - uint32_t dest_write_mask = dest.GetMask(); - uint32_t operands_length = dest.GetLength() + - src0.GetLength(dest_write_mask) + - src1.GetLength(dest_write_mask); - shader_code_.reserve(shader_code_.size() + 1 + operands_length); - shader_code_.push_back(DxbcOpcodeToken(opcode, operands_length, saturate)); - dest.Write(shader_code_); - src0.Write(shader_code_, (src_are_integer & 0b1) != 0, dest_write_mask); - src1.Write(shader_code_, (src_are_integer & 0b10) != 0, dest_write_mask); - ++stat_.instruction_count; - } - void DxbcEmitAluOp(DxbcOpcode opcode, uint32_t src_are_integer, - const DxbcDest& dest, const DxbcSrc& src0, - const DxbcSrc& src1, const DxbcSrc& src2, - bool saturate = false) { - uint32_t dest_write_mask = dest.GetMask(); - uint32_t operands_length = - dest.GetLength() + src0.GetLength(dest_write_mask) + - src1.GetLength(dest_write_mask) + src2.GetLength(dest_write_mask); - shader_code_.reserve(shader_code_.size() + 1 + operands_length); - shader_code_.push_back(DxbcOpcodeToken(opcode, operands_length, saturate)); - dest.Write(shader_code_); - src0.Write(shader_code_, (src_are_integer & 0b1) != 0, dest_write_mask); - src1.Write(shader_code_, (src_are_integer & 0b10) != 0, dest_write_mask); - src2.Write(shader_code_, (src_are_integer & 0b100) != 0, dest_write_mask); - ++stat_.instruction_count; - } - void DxbcEmitAluOp(DxbcOpcode opcode, uint32_t src_are_integer, - const DxbcDest& dest, const DxbcSrc& src0, - const DxbcSrc& src1, const DxbcSrc& src2, - const DxbcSrc& src3, bool saturate = false) { - uint32_t dest_write_mask = dest.GetMask(); - uint32_t operands_length = - dest.GetLength() + src0.GetLength(dest_write_mask) + - src1.GetLength(dest_write_mask) + src2.GetLength(dest_write_mask) + - src3.GetLength(dest_write_mask); - shader_code_.reserve(shader_code_.size() + 1 + operands_length); - shader_code_.push_back(DxbcOpcodeToken(opcode, operands_length, saturate)); - dest.Write(shader_code_); - src0.Write(shader_code_, (src_are_integer & 0b1) != 0, dest_write_mask); - src1.Write(shader_code_, (src_are_integer & 0b10) != 0, dest_write_mask); - src2.Write(shader_code_, (src_are_integer & 0b100) != 0, dest_write_mask); - src3.Write(shader_code_, (src_are_integer & 0b1000) != 0, dest_write_mask); - ++stat_.instruction_count; - } - void DxbcEmitAluOp(DxbcOpcode opcode, uint32_t src_are_integer, - const DxbcDest& dest0, const DxbcDest& dest1, - const DxbcSrc& src, bool saturate = false) { - uint32_t dest_write_mask = dest0.GetMask() | dest1.GetMask(); - uint32_t operands_length = - dest0.GetLength() + dest1.GetLength() + src.GetLength(dest_write_mask); - shader_code_.reserve(shader_code_.size() + 1 + operands_length); - shader_code_.push_back(DxbcOpcodeToken(opcode, operands_length, saturate)); - dest0.Write(shader_code_); - dest1.Write(shader_code_); - src.Write(shader_code_, (src_are_integer & 0b1) != 0, dest_write_mask); - ++stat_.instruction_count; - } - void DxbcEmitAluOp(DxbcOpcode opcode, uint32_t src_are_integer, - const DxbcDest& dest0, const DxbcDest& dest1, - const DxbcSrc& src0, const DxbcSrc& src1, - bool saturate = false) { - uint32_t dest_write_mask = dest0.GetMask() | dest1.GetMask(); - uint32_t operands_length = dest0.GetLength() + dest1.GetLength() + - src0.GetLength(dest_write_mask) + - src1.GetLength(dest_write_mask); - shader_code_.reserve(shader_code_.size() + 1 + operands_length); - shader_code_.push_back(DxbcOpcodeToken(opcode, operands_length, saturate)); - dest0.Write(shader_code_); - dest1.Write(shader_code_); - src0.Write(shader_code_, (src_are_integer & 0b1) != 0, dest_write_mask); - src1.Write(shader_code_, (src_are_integer & 0b10) != 0, dest_write_mask); - ++stat_.instruction_count; - } - void DxbcEmitFlowOp(DxbcOpcode opcode, const DxbcSrc& src, - bool test = false) { - uint32_t operands_length = src.GetLength(0b0000); - shader_code_.reserve(shader_code_.size() + 1 + operands_length); - shader_code_.push_back(DxbcOpcodeToken(opcode, operands_length) | - (test ? (1 << 18) : 0)); - src.Write(shader_code_, true, 0b0000); - ++stat_.instruction_count; - } - void DxbcEmitFlowOp(DxbcOpcode opcode, const DxbcSrc& src0, - const DxbcSrc& src1, bool test = false) { - uint32_t operands_length = src0.GetLength(0b0000) + src1.GetLength(0b0000); - shader_code_.reserve(shader_code_.size() + 1 + operands_length); - shader_code_.push_back(DxbcOpcodeToken(opcode, operands_length) | - (test ? (1 << 18) : 0)); - src0.Write(shader_code_, true, 0b0000); - src1.Write(shader_code_, true, 0b0000); - ++stat_.instruction_count; - } - - void DxbcOpAdd(const DxbcDest& dest, const DxbcSrc& src0, const DxbcSrc& src1, - bool saturate = false) { - DxbcEmitAluOp(DxbcOpcode::kAdd, 0b00, dest, src0, src1, saturate); - ++stat_.float_instruction_count; - } - void DxbcOpAnd(const DxbcDest& dest, const DxbcSrc& src0, - const DxbcSrc& src1) { - DxbcEmitAluOp(DxbcOpcode::kAnd, 0b11, dest, src0, src1); - ++stat_.uint_instruction_count; - } - void DxbcOpBreak() { - shader_code_.push_back(DxbcOpcodeToken(DxbcOpcode::kBreak, 0)); - ++stat_.instruction_count; - } - void DxbcOpCall(const DxbcSrc& label) { - DxbcEmitFlowOp(DxbcOpcode::kCall, label); - ++stat_.static_flow_control_count; - } - void DxbcOpCallC(bool test, const DxbcSrc& src, const DxbcSrc& label) { - DxbcEmitFlowOp(DxbcOpcode::kCallC, src, label, test); - ++stat_.dynamic_flow_control_count; - } - void DxbcOpCase(const DxbcSrc& src) { - DxbcEmitFlowOp(DxbcOpcode::kCase, src); - ++stat_.static_flow_control_count; - } - void DxbcOpContinue() { - shader_code_.push_back(DxbcOpcodeToken(DxbcOpcode::kContinue, 0)); - ++stat_.instruction_count; - } - void DxbcOpDefault() { - shader_code_.push_back(DxbcOpcodeToken(DxbcOpcode::kDefault, 0)); - ++stat_.instruction_count; - ++stat_.static_flow_control_count; - } - void DxbcOpDiscard(bool test, const DxbcSrc& src) { - DxbcEmitFlowOp(DxbcOpcode::kDiscard, src, test); - } - void DxbcOpDiv(const DxbcDest& dest, const DxbcSrc& src0, const DxbcSrc& src1, - bool saturate = false) { - DxbcEmitAluOp(DxbcOpcode::kDiv, 0b00, dest, src0, src1, saturate); - ++stat_.float_instruction_count; - } - void DxbcOpDP2(const DxbcDest& dest, const DxbcSrc& src0, const DxbcSrc& src1, - bool saturate = false) { - uint32_t operands_length = - dest.GetLength() + src0.GetLength(0b0011) + src1.GetLength(0b0011); - shader_code_.reserve(shader_code_.size() + 1 + operands_length); - shader_code_.push_back( - DxbcOpcodeToken(DxbcOpcode::kDP2, operands_length, saturate)); - dest.Write(shader_code_); - src0.Write(shader_code_, false, 0b0011); - src1.Write(shader_code_, false, 0b0011); - ++stat_.instruction_count; - ++stat_.float_instruction_count; - } - void DxbcOpDP3(const DxbcDest& dest, const DxbcSrc& src0, const DxbcSrc& src1, - bool saturate = false) { - uint32_t operands_length = - dest.GetLength() + src0.GetLength(0b0111) + src1.GetLength(0b0111); - shader_code_.reserve(shader_code_.size() + 1 + operands_length); - shader_code_.push_back( - DxbcOpcodeToken(DxbcOpcode::kDP3, operands_length, saturate)); - dest.Write(shader_code_); - src0.Write(shader_code_, false, 0b0111); - src1.Write(shader_code_, false, 0b0111); - ++stat_.instruction_count; - ++stat_.float_instruction_count; - } - void DxbcOpDP4(const DxbcDest& dest, const DxbcSrc& src0, const DxbcSrc& src1, - bool saturate = false) { - uint32_t operands_length = - dest.GetLength() + src0.GetLength(0b1111) + src1.GetLength(0b1111); - shader_code_.reserve(shader_code_.size() + 1 + operands_length); - shader_code_.push_back( - DxbcOpcodeToken(DxbcOpcode::kDP4, operands_length, saturate)); - dest.Write(shader_code_); - src0.Write(shader_code_, false, 0b1111); - src1.Write(shader_code_, false, 0b1111); - ++stat_.instruction_count; - ++stat_.float_instruction_count; - } - void DxbcOpElse() { - shader_code_.push_back(DxbcOpcodeToken(DxbcOpcode::kElse, 0)); - ++stat_.instruction_count; - } - void DxbcOpEndIf() { - shader_code_.push_back(DxbcOpcodeToken(DxbcOpcode::kEndIf, 0)); - ++stat_.instruction_count; - } - void DxbcOpEndLoop() { - shader_code_.push_back(DxbcOpcodeToken(DxbcOpcode::kEndLoop, 0)); - ++stat_.instruction_count; - } - void DxbcOpEndSwitch() { - shader_code_.push_back(DxbcOpcodeToken(DxbcOpcode::kEndSwitch, 0)); - ++stat_.instruction_count; - } - void DxbcOpEq(const DxbcDest& dest, const DxbcSrc& src0, - const DxbcSrc& src1) { - DxbcEmitAluOp(DxbcOpcode::kEq, 0b00, dest, src0, src1); - ++stat_.float_instruction_count; - } - void DxbcOpExp(const DxbcDest& dest, const DxbcSrc& src, - bool saturate = false) { - DxbcEmitAluOp(DxbcOpcode::kExp, 0b0, dest, src, saturate); - ++stat_.float_instruction_count; - } - void DxbcOpFrc(const DxbcDest& dest, const DxbcSrc& src, - bool saturate = false) { - DxbcEmitAluOp(DxbcOpcode::kFrc, 0b0, dest, src, saturate); - ++stat_.float_instruction_count; - } - void DxbcOpFToI(const DxbcDest& dest, const DxbcSrc& src) { - DxbcEmitAluOp(DxbcOpcode::kFToI, 0b0, dest, src); - ++stat_.conversion_instruction_count; - } - void DxbcOpFToU(const DxbcDest& dest, const DxbcSrc& src) { - DxbcEmitAluOp(DxbcOpcode::kFToU, 0b0, dest, src); - ++stat_.conversion_instruction_count; - } - void DxbcOpGE(const DxbcDest& dest, const DxbcSrc& src0, - const DxbcSrc& src1) { - DxbcEmitAluOp(DxbcOpcode::kGE, 0b00, dest, src0, src1); - ++stat_.float_instruction_count; - } - void DxbcOpIAdd(const DxbcDest& dest, const DxbcSrc& src0, - const DxbcSrc& src1) { - DxbcEmitAluOp(DxbcOpcode::kIAdd, 0b11, dest, src0, src1); - ++stat_.int_instruction_count; - } - void DxbcOpIf(bool test, const DxbcSrc& src) { - DxbcEmitFlowOp(DxbcOpcode::kIf, src, test); - ++stat_.dynamic_flow_control_count; - } - void DxbcOpIEq(const DxbcDest& dest, const DxbcSrc& src0, - const DxbcSrc& src1) { - DxbcEmitAluOp(DxbcOpcode::kIEq, 0b11, dest, src0, src1); - ++stat_.int_instruction_count; - } - void DxbcOpIGE(const DxbcDest& dest, const DxbcSrc& src0, - const DxbcSrc& src1) { - DxbcEmitAluOp(DxbcOpcode::kIGE, 0b11, dest, src0, src1); - ++stat_.int_instruction_count; - } - void DxbcOpILT(const DxbcDest& dest, const DxbcSrc& src0, - const DxbcSrc& src1) { - DxbcEmitAluOp(DxbcOpcode::kILT, 0b11, dest, src0, src1); - ++stat_.int_instruction_count; - } - void DxbcOpIMAd(const DxbcDest& dest, const DxbcSrc& mul0, - const DxbcSrc& mul1, const DxbcSrc& add) { - DxbcEmitAluOp(DxbcOpcode::kIMAd, 0b111, dest, mul0, mul1, add); - ++stat_.int_instruction_count; - } - void DxbcOpIMax(const DxbcDest& dest, const DxbcSrc& src0, - const DxbcSrc& src1) { - DxbcEmitAluOp(DxbcOpcode::kIMax, 0b11, dest, src0, src1); - ++stat_.int_instruction_count; - } - void DxbcOpIMin(const DxbcDest& dest, const DxbcSrc& src0, - const DxbcSrc& src1) { - DxbcEmitAluOp(DxbcOpcode::kIMin, 0b11, dest, src0, src1); - ++stat_.int_instruction_count; - } - void DxbcOpIMul(const DxbcDest& dest_hi, const DxbcDest& dest_lo, - const DxbcSrc& src0, const DxbcSrc& src1) { - DxbcEmitAluOp(DxbcOpcode::kIMul, 0b11, dest_hi, dest_lo, src0, src1); - ++stat_.int_instruction_count; - } - void DxbcOpINE(const DxbcDest& dest, const DxbcSrc& src0, - const DxbcSrc& src1) { - DxbcEmitAluOp(DxbcOpcode::kINE, 0b11, dest, src0, src1); - ++stat_.int_instruction_count; - } - void DxbcOpIShL(const DxbcDest& dest, const DxbcSrc& value, - const DxbcSrc& shift) { - DxbcEmitAluOp(DxbcOpcode::kIShL, 0b11, dest, value, shift); - ++stat_.int_instruction_count; - } - void DxbcOpIToF(const DxbcDest& dest, const DxbcSrc& src) { - DxbcEmitAluOp(DxbcOpcode::kIToF, 0b1, dest, src); - ++stat_.conversion_instruction_count; - } - void DxbcOpLabel(const DxbcSrc& label) { - // The label is source, not destination, for simplicity, to unify it will - // call/callc (in DXBC it's just a zero-component label operand). - uint32_t operands_length = label.GetLength(0b0000); - shader_code_.reserve(shader_code_.size() + 1 + operands_length); - shader_code_.push_back( - DxbcOpcodeToken(DxbcOpcode::kLabel, operands_length)); - label.Write(shader_code_, true, 0b0000); - // Doesn't count towards stat_.instruction_count. - } - void DxbcOpLog(const DxbcDest& dest, const DxbcSrc& src, - bool saturate = false) { - DxbcEmitAluOp(DxbcOpcode::kLog, 0b0, dest, src, saturate); - ++stat_.float_instruction_count; - } - void DxbcOpLoop() { - shader_code_.push_back(DxbcOpcodeToken(DxbcOpcode::kLoop, 0)); - ++stat_.instruction_count; - ++stat_.dynamic_flow_control_count; - } - void DxbcOpLT(const DxbcDest& dest, const DxbcSrc& src0, - const DxbcSrc& src1) { - DxbcEmitAluOp(DxbcOpcode::kLT, 0b00, dest, src0, src1); - ++stat_.float_instruction_count; - } - void DxbcOpMAd(const DxbcDest& dest, const DxbcSrc& mul0, const DxbcSrc& mul1, - const DxbcSrc& add, bool saturate = false) { - DxbcEmitAluOp(DxbcOpcode::kMAd, 0b000, dest, mul0, mul1, add, saturate); - ++stat_.float_instruction_count; - } - void DxbcOpMin(const DxbcDest& dest, const DxbcSrc& src0, const DxbcSrc& src1, - bool saturate = false) { - DxbcEmitAluOp(DxbcOpcode::kMin, 0b00, dest, src0, src1, saturate); - ++stat_.float_instruction_count; - } - void DxbcOpMax(const DxbcDest& dest, const DxbcSrc& src0, const DxbcSrc& src1, - bool saturate = false) { - DxbcEmitAluOp(DxbcOpcode::kMax, 0b00, dest, src0, src1, saturate); - ++stat_.float_instruction_count; - } - void DxbcOpMov(const DxbcDest& dest, const DxbcSrc& src, - bool saturate = false) { - DxbcEmitAluOp(DxbcOpcode::kMov, 0b0, dest, src, saturate); - if (dest.type_ == DxbcOperandType::kIndexableTemp || - src.type_ == DxbcOperandType::kIndexableTemp) { - ++stat_.array_instruction_count; - } else { - ++stat_.mov_instruction_count; - } - } - void DxbcOpMovC(const DxbcDest& dest, const DxbcSrc& test, - const DxbcSrc& src_nz, const DxbcSrc& src_z, - bool saturate = false) { - DxbcEmitAluOp(DxbcOpcode::kMovC, 0b001, dest, test, src_nz, src_z, - saturate); - ++stat_.movc_instruction_count; - } - void DxbcOpMul(const DxbcDest& dest, const DxbcSrc& src0, const DxbcSrc& src1, - bool saturate = false) { - DxbcEmitAluOp(DxbcOpcode::kMul, 0b00, dest, src0, src1, saturate); - ++stat_.float_instruction_count; - } - void DxbcOpNE(const DxbcDest& dest, const DxbcSrc& src0, - const DxbcSrc& src1) { - DxbcEmitAluOp(DxbcOpcode::kNE, 0b00, dest, src0, src1); - ++stat_.float_instruction_count; - } - void DxbcOpNot(const DxbcDest& dest, const DxbcSrc& src) { - DxbcEmitAluOp(DxbcOpcode::kNot, 0b1, dest, src); - ++stat_.uint_instruction_count; - } - void DxbcOpOr(const DxbcDest& dest, const DxbcSrc& src0, - const DxbcSrc& src1) { - DxbcEmitAluOp(DxbcOpcode::kOr, 0b11, dest, src0, src1); - ++stat_.uint_instruction_count; - } - void DxbcOpRet() { - shader_code_.push_back(DxbcOpcodeToken(DxbcOpcode::kRet, 0)); - ++stat_.instruction_count; - ++stat_.static_flow_control_count; - } - void DxbcOpRetC(bool test, const DxbcSrc& src) { - DxbcEmitFlowOp(DxbcOpcode::kRetC, src, test); - ++stat_.dynamic_flow_control_count; - } - void DxbcOpRoundNE(const DxbcDest& dest, const DxbcSrc& src, - bool saturate = false) { - DxbcEmitAluOp(DxbcOpcode::kRoundNE, 0b0, dest, src, saturate); - ++stat_.float_instruction_count; - } - void DxbcOpRoundNI(const DxbcDest& dest, const DxbcSrc& src, - bool saturate = false) { - DxbcEmitAluOp(DxbcOpcode::kRoundNI, 0b0, dest, src, saturate); - ++stat_.float_instruction_count; - } - void DxbcOpRoundZ(const DxbcDest& dest, const DxbcSrc& src, - bool saturate = false) { - DxbcEmitAluOp(DxbcOpcode::kRoundZ, 0b0, dest, src, saturate); - ++stat_.float_instruction_count; - } - void DxbcOpRSq(const DxbcDest& dest, const DxbcSrc& src, - bool saturate = false) { - DxbcEmitAluOp(DxbcOpcode::kRSq, 0b0, dest, src, saturate); - ++stat_.float_instruction_count; - } - void DxbcOpSampleL(const DxbcDest& dest, const DxbcSrc& address, - uint32_t address_components, const DxbcSrc& resource, - const DxbcSrc& sampler, const DxbcSrc& lod, - int32_t aoffimmi_u = 0, int32_t aoffimmi_v = 0, - int32_t aoffimmi_w = 0) { - uint32_t dest_write_mask = dest.GetMask(); - uint32_t sample_controls = 0; - if (aoffimmi_u || aoffimmi_v || aoffimmi_w) { - sample_controls = DxbcSampleControlsExtendedOpcodeToken( - aoffimmi_u, aoffimmi_v, aoffimmi_w); - } - uint32_t address_mask = (1 << address_components) - 1; - uint32_t operands_length = - dest.GetLength() + address.GetLength(address_mask) + - resource.GetLength(dest_write_mask, true) + sampler.GetLength(0b0000) + - lod.GetLength(0b0000); - shader_code_.reserve(shader_code_.size() + 1 + (sample_controls ? 1 : 0) + - operands_length); - shader_code_.push_back(DxbcOpcodeToken( - DxbcOpcode::kSampleL, operands_length, false, sample_controls ? 1 : 0)); - if (sample_controls) { - shader_code_.push_back(sample_controls); - } - dest.Write(shader_code_); - address.Write(shader_code_, false, address_mask); - resource.Write(shader_code_, false, dest_write_mask, true); - sampler.Write(shader_code_, false, 0b0000); - lod.Write(shader_code_, false, 0b0000); - ++stat_.instruction_count; - ++stat_.texture_normal_instructions; - } - void DxbcOpSampleD(const DxbcDest& dest, const DxbcSrc& address, - uint32_t address_components, const DxbcSrc& resource, - const DxbcSrc& sampler, const DxbcSrc& x_derivatives, - const DxbcSrc& y_derivatives, - uint32_t derivatives_components, int32_t aoffimmi_u = 0, - int32_t aoffimmi_v = 0, int32_t aoffimmi_w = 0) { - // If the address is 1-component, the derivatives are 1-component, if the - // address is 4-component, the derivatives are 4-component. - assert_true(derivatives_components <= address_components); - uint32_t dest_write_mask = dest.GetMask(); - uint32_t sample_controls = 0; - if (aoffimmi_u || aoffimmi_v || aoffimmi_w) { - sample_controls = DxbcSampleControlsExtendedOpcodeToken( - aoffimmi_u, aoffimmi_v, aoffimmi_w); - } - uint32_t address_mask = (1 << address_components) - 1; - uint32_t derivatives_mask = (1 << derivatives_components) - 1; - uint32_t operands_length = - dest.GetLength() + address.GetLength(address_mask) + - resource.GetLength(dest_write_mask, true) + sampler.GetLength(0b0000) + - x_derivatives.GetLength(derivatives_mask, address_components > 1) + - y_derivatives.GetLength(derivatives_mask, address_components > 1); - shader_code_.reserve(shader_code_.size() + 1 + (sample_controls ? 1 : 0) + - operands_length); - shader_code_.push_back(DxbcOpcodeToken( - DxbcOpcode::kSampleD, operands_length, false, sample_controls ? 1 : 0)); - if (sample_controls) { - shader_code_.push_back(sample_controls); - } - dest.Write(shader_code_); - address.Write(shader_code_, false, address_mask); - resource.Write(shader_code_, false, dest_write_mask, true); - sampler.Write(shader_code_, false, 0b0000); - x_derivatives.Write(shader_code_, false, derivatives_mask, - address_components > 1); - y_derivatives.Write(shader_code_, false, derivatives_mask, - address_components > 1); - ++stat_.instruction_count; - ++stat_.texture_gradient_instructions; - } - void DxbcOpSqRt(const DxbcDest& dest, const DxbcSrc& src, - bool saturate = false) { - DxbcEmitAluOp(DxbcOpcode::kSqRt, 0b0, dest, src, saturate); - ++stat_.float_instruction_count; - } - void DxbcOpSwitch(const DxbcSrc& src) { - DxbcEmitFlowOp(DxbcOpcode::kSwitch, src); - ++stat_.dynamic_flow_control_count; - } - void DxbcOpSinCos(const DxbcDest& dest_sin, const DxbcDest& dest_cos, - const DxbcSrc& src, bool saturate = false) { - DxbcEmitAluOp(DxbcOpcode::kSinCos, 0b0, dest_sin, dest_cos, src, saturate); - ++stat_.float_instruction_count; - } - void DxbcOpULT(const DxbcDest& dest, const DxbcSrc& src0, - const DxbcSrc& src1) { - DxbcEmitAluOp(DxbcOpcode::kULT, 0b11, dest, src0, src1); - ++stat_.uint_instruction_count; - } - void DxbcOpUGE(const DxbcDest& dest, const DxbcSrc& src0, - const DxbcSrc& src1) { - DxbcEmitAluOp(DxbcOpcode::kUGE, 0b11, dest, src0, src1); - ++stat_.uint_instruction_count; - } - void DxbcOpUMul(const DxbcDest& dest_hi, const DxbcDest& dest_lo, - const DxbcSrc& src0, const DxbcSrc& src1) { - DxbcEmitAluOp(DxbcOpcode::kUMul, 0b11, dest_hi, dest_lo, src0, src1); - ++stat_.uint_instruction_count; - } - void DxbcOpUMAd(const DxbcDest& dest, const DxbcSrc& mul0, - const DxbcSrc& mul1, const DxbcSrc& add) { - DxbcEmitAluOp(DxbcOpcode::kUMAd, 0b111, dest, mul0, mul1, add); - ++stat_.uint_instruction_count; - } - void DxbcOpUMax(const DxbcDest& dest, const DxbcSrc& src0, - const DxbcSrc& src1) { - DxbcEmitAluOp(DxbcOpcode::kUMax, 0b11, dest, src0, src1); - ++stat_.uint_instruction_count; - } - void DxbcOpUMin(const DxbcDest& dest, const DxbcSrc& src0, - const DxbcSrc& src1) { - DxbcEmitAluOp(DxbcOpcode::kUMin, 0b11, dest, src0, src1); - ++stat_.uint_instruction_count; - } - void DxbcOpUShR(const DxbcDest& dest, const DxbcSrc& value, - const DxbcSrc& shift) { - DxbcEmitAluOp(DxbcOpcode::kUShR, 0b11, dest, value, shift); - ++stat_.uint_instruction_count; - } - void DxbcOpUToF(const DxbcDest& dest, const DxbcSrc& src) { - DxbcEmitAluOp(DxbcOpcode::kUToF, 0b1, dest, src); - ++stat_.conversion_instruction_count; - } - void DxbcOpXOr(const DxbcDest& dest, const DxbcSrc& src0, - const DxbcSrc& src1) { - DxbcEmitAluOp(DxbcOpcode::kXOr, 0b11, dest, src0, src1); - ++stat_.uint_instruction_count; - } - void DxbcOpLOD(const DxbcDest& dest, const DxbcSrc& address, - uint32_t address_components, const DxbcSrc& resource, - const DxbcSrc& sampler) { - uint32_t dest_write_mask = dest.GetMask(); - uint32_t address_mask = (1 << address_components) - 1; - uint32_t operands_length = - dest.GetLength() + address.GetLength(address_mask) + - resource.GetLength(dest_write_mask) + sampler.GetLength(0b0000); - shader_code_.reserve(shader_code_.size() + 1 + operands_length); - shader_code_.push_back(DxbcOpcodeToken(DxbcOpcode::kLOD, operands_length)); - dest.Write(shader_code_); - address.Write(shader_code_, false, address_mask); - resource.Write(shader_code_, false, dest_write_mask); - sampler.Write(shader_code_, false, 0b0000); - ++stat_.instruction_count; - ++stat_.lod_instructions; - } - void DxbcOpDerivRTXCoarse(const DxbcDest& dest, const DxbcSrc& src, - bool saturate = false) { - DxbcEmitAluOp(DxbcOpcode::kDerivRTXCoarse, 0b0, dest, src, saturate); - ++stat_.float_instruction_count; - } - void DxbcOpDerivRTXFine(const DxbcDest& dest, const DxbcSrc& src, - bool saturate = false) { - DxbcEmitAluOp(DxbcOpcode::kDerivRTXFine, 0b0, dest, src, saturate); - ++stat_.float_instruction_count; - } - void DxbcOpDerivRTYCoarse(const DxbcDest& dest, const DxbcSrc& src, - bool saturate = false) { - DxbcEmitAluOp(DxbcOpcode::kDerivRTYCoarse, 0b0, dest, src, saturate); - ++stat_.float_instruction_count; - } - void DxbcOpDerivRTYFine(const DxbcDest& dest, const DxbcSrc& src, - bool saturate = false) { - DxbcEmitAluOp(DxbcOpcode::kDerivRTYFine, 0b0, dest, src, saturate); - ++stat_.float_instruction_count; - } - void DxbcOpRcp(const DxbcDest& dest, const DxbcSrc& src, - bool saturate = false) { - DxbcEmitAluOp(DxbcOpcode::kRcp, 0b0, dest, src, saturate); - ++stat_.float_instruction_count; - } - void DxbcOpF32ToF16(const DxbcDest& dest, const DxbcSrc& src) { - DxbcEmitAluOp(DxbcOpcode::kF32ToF16, 0b0, dest, src); - ++stat_.conversion_instruction_count; - } - void DxbcOpF16ToF32(const DxbcDest& dest, const DxbcSrc& src) { - DxbcEmitAluOp(DxbcOpcode::kF16ToF32, 0b1, dest, src); - ++stat_.conversion_instruction_count; - } - void DxbcOpFirstBitHi(const DxbcDest& dest, const DxbcSrc& src) { - DxbcEmitAluOp(DxbcOpcode::kFirstBitHi, 0b1, dest, src); - ++stat_.uint_instruction_count; - } - void DxbcOpUBFE(const DxbcDest& dest, const DxbcSrc& width, - const DxbcSrc& offset, const DxbcSrc& src) { - DxbcEmitAluOp(DxbcOpcode::kUBFE, 0b111, dest, width, offset, src); - ++stat_.uint_instruction_count; - } - void DxbcOpIBFE(const DxbcDest& dest, const DxbcSrc& width, - const DxbcSrc& offset, const DxbcSrc& src) { - DxbcEmitAluOp(DxbcOpcode::kIBFE, 0b111, dest, width, offset, src); - ++stat_.int_instruction_count; - } - void DxbcOpBFI(const DxbcDest& dest, const DxbcSrc& width, - const DxbcSrc& offset, const DxbcSrc& from, - const DxbcSrc& to) { - DxbcEmitAluOp(DxbcOpcode::kBFI, 0b1111, dest, width, offset, from, to); - ++stat_.uint_instruction_count; - } - void DxbcOpBFRev(const DxbcDest& dest, const DxbcSrc& src) { - DxbcEmitAluOp(DxbcOpcode::kBFRev, 0b1, dest, src); - ++stat_.uint_instruction_count; - } - void DxbcOpLdUAVTyped(const DxbcDest& dest, const DxbcSrc& address, - uint32_t address_components, const DxbcSrc& uav) { - uint32_t dest_write_mask = dest.GetMask(); - uint32_t address_mask = (1 << address_components) - 1; - uint32_t operands_length = dest.GetLength() + - address.GetLength(address_mask, true) + - uav.GetLength(dest_write_mask, true); - shader_code_.reserve(shader_code_.size() + 1 + operands_length); - shader_code_.push_back( - DxbcOpcodeToken(DxbcOpcode::kLdUAVTyped, operands_length)); - dest.Write(shader_code_); - address.Write(shader_code_, true, address_mask, true); - uav.Write(shader_code_, false, dest_write_mask, true); - ++stat_.instruction_count; - ++stat_.texture_load_instructions; - } - void DxbcOpStoreUAVTyped(const DxbcDest& dest, const DxbcSrc& address, - uint32_t address_components, const DxbcSrc& value) { - uint32_t dest_write_mask = dest.GetMask(); - // Typed UAV writes don't support write masking. - assert_true(dest_write_mask == 0b1111); - uint32_t address_mask = (1 << address_components) - 1; - uint32_t operands_length = dest.GetLength() + - address.GetLength(address_mask, true) + - value.GetLength(dest_write_mask); - shader_code_.reserve(shader_code_.size() + 1 + operands_length); - shader_code_.push_back( - DxbcOpcodeToken(DxbcOpcode::kStoreUAVTyped, operands_length)); - dest.Write(shader_code_); - address.Write(shader_code_, true, address_mask, true); - value.Write(shader_code_, false, dest_write_mask); - ++stat_.instruction_count; - ++stat_.c_texture_store_instructions; - } - void DxbcOpLdRaw(const DxbcDest& dest, const DxbcSrc& byte_offset, - const DxbcSrc& src) { - // For Load, FXC emits code for writing to any component of the destination, - // with xxxx swizzle of the source SRV/UAV. - // For Load2/Load3/Load4, it's xy/xyz/xyzw write mask and xyxx/xyzx/xyzw - // swizzle. - uint32_t dest_write_mask = dest.GetMask(); - assert_true(dest_write_mask == 0b0001 || dest_write_mask == 0b0010 || - dest_write_mask == 0b0100 || dest_write_mask == 0b1000 || - dest_write_mask == 0b0011 || dest_write_mask == 0b0111 || - dest_write_mask == 0b1111); - uint32_t component_count = xe::bit_count(dest_write_mask); - assert_true((src.swizzle_ & ((1 << (component_count * 2)) - 1)) == - (DxbcSrc::kXYZW & ((1 << (component_count * 2)) - 1))); - uint32_t src_mask = (1 << component_count) - 1; - uint32_t operands_length = dest.GetLength() + - byte_offset.GetLength(0b0000) + - src.GetLength(src_mask, true); - shader_code_.reserve(shader_code_.size() + 1 + operands_length); - shader_code_.push_back( - DxbcOpcodeToken(DxbcOpcode::kLdRaw, operands_length)); - dest.Write(shader_code_); - byte_offset.Write(shader_code_, true, 0b0000); - src.Write(shader_code_, true, src_mask, true); - ++stat_.instruction_count; - ++stat_.texture_load_instructions; - } - void DxbcOpStoreRaw(const DxbcDest& dest, const DxbcSrc& byte_offset, - const DxbcSrc& value) { - uint32_t dest_write_mask = dest.GetMask(); - assert_true(dest_write_mask == 0b0001 || dest_write_mask == 0b0011 || - dest_write_mask == 0b0111 || dest_write_mask == 0b1111); - uint32_t operands_length = dest.GetLength() + - byte_offset.GetLength(0b0000) + - value.GetLength(dest_write_mask); - shader_code_.reserve(shader_code_.size() + 1 + operands_length); - shader_code_.push_back( - DxbcOpcodeToken(DxbcOpcode::kStoreRaw, operands_length)); - dest.Write(shader_code_); - byte_offset.Write(shader_code_, true, 0b0000); - value.Write(shader_code_, true, dest_write_mask); - ++stat_.instruction_count; - ++stat_.c_texture_store_instructions; - } - void DxbcOpEvalSampleIndex(const DxbcDest& dest, const DxbcSrc& value, - const DxbcSrc& sample_index) { - uint32_t dest_write_mask = dest.GetMask(); - uint32_t operands_length = dest.GetLength() + - value.GetLength(dest_write_mask) + - sample_index.GetLength(0b0000); - shader_code_.reserve(shader_code_.size() + 1 + operands_length); - shader_code_.push_back( - DxbcOpcodeToken(DxbcOpcode::kEvalSampleIndex, operands_length)); - dest.Write(shader_code_); - value.Write(shader_code_, false, dest_write_mask); - sample_index.Write(shader_code_, true, 0b0000); - ++stat_.instruction_count; - } - void DxbcOpEvalCentroid(const DxbcDest& dest, const DxbcSrc& value) { - uint32_t dest_write_mask = dest.GetMask(); - uint32_t operands_length = - dest.GetLength() + value.GetLength(dest_write_mask); - shader_code_.reserve(shader_code_.size() + 1 + operands_length); - shader_code_.push_back( - DxbcOpcodeToken(DxbcOpcode::kEvalCentroid, operands_length)); - dest.Write(shader_code_); - value.Write(shader_code_, false, dest_write_mask); - ++stat_.instruction_count; - } - enum : uint32_t { kSysConst_Flags_Index = 0, kSysConst_Flags_Vec = 0, @@ -2307,8 +803,8 @@ class DxbcShaderTranslator : public ShaderTranslator { // multiple times. void ExportToMemory_PackFixed32(const uint32_t* eM_temps, uint32_t eM_count, const uint32_t bits[4], - const DxbcSrc& is_integer, - const DxbcSrc& is_signed); + const dxbc::Src& is_integer, + const dxbc::Src& is_signed); void ExportToMemory(); void CompleteVertexOrDomainShader(); // Discards the SSAA sample if it's masked out by alpha to coverage. @@ -2318,7 +814,7 @@ class DxbcShaderTranslator : public ShaderTranslator { // Masks the sample away from system_temp_rov_params_.x if it's not covered. // threshold_offset and temp.temp_component can be the same if needed. void CompletePixelShader_ROV_AlphaToMaskSample( - uint32_t sample_index, float threshold_base, DxbcSrc threshold_offset, + uint32_t sample_index, float threshold_base, dxbc::Src threshold_offset, float threshold_offset_scale, uint32_t temp, uint32_t temp_component); // Performs alpha to coverage if necessary, updating the low (coverage) bits // of system_temp_rov_params_.x. @@ -2338,13 +834,13 @@ class DxbcShaderTranslator : public ShaderTranslator { // used directly with most DXBC instructions (like, if it's an indexable GPR), // in this case, temp_pushed_out will be set to true, and PopSystemTemp must // be done when the operand is not needed anymore. - DxbcSrc LoadOperand(const InstructionOperand& operand, - uint32_t needed_components, bool& temp_pushed_out); + dxbc::Src LoadOperand(const InstructionOperand& operand, + uint32_t needed_components, bool& temp_pushed_out); // Writes the specified source (src must be usable as a vector `mov` source, // including to x#) to an instruction storage target. // can_store_memexport_address is for safety, to allow only proper MADs with a // stream constant to write to eA. - void StoreResult(const InstructionResult& result, const DxbcSrc& src, + void StoreResult(const InstructionResult& result, const dxbc::Src& src, bool can_store_memexport_address = false); // The nesting of `if` instructions is the following: @@ -2405,22 +901,22 @@ class DxbcShaderTranslator : public ShaderTranslator { uint32_t GetBindlessResourceCount() const { return uint32_t(texture_bindings_.size() + sampler_bindings_.size()); } - // Marks fetch constants as used by the DXBC shader and returns DxbcSrc + // Marks fetch constants as used by the DXBC shader and returns dxbc::Src // for the words 01 (pair 0), 23 (pair 1) or 45 (pair 2) of the texture fetch // constant. - DxbcSrc RequestTextureFetchConstantWordPair(uint32_t fetch_constant_index, - uint32_t pair_index) { + dxbc::Src RequestTextureFetchConstantWordPair(uint32_t fetch_constant_index, + uint32_t pair_index) { if (cbuffer_index_fetch_constants_ == kBindingIndexUnallocated) { cbuffer_index_fetch_constants_ = cbuffer_count_++; } uint32_t total_pair_index = fetch_constant_index * 3 + pair_index; - return DxbcSrc::CB(cbuffer_index_fetch_constants_, - uint32_t(CbufferRegister::kFetchConstants), - total_pair_index >> 1, - (total_pair_index & 1) ? 0b10101110 : 0b00000100); + return dxbc::Src::CB(cbuffer_index_fetch_constants_, + uint32_t(CbufferRegister::kFetchConstants), + total_pair_index >> 1, + (total_pair_index & 1) ? 0b10101110 : 0b00000100); } - DxbcSrc RequestTextureFetchConstantWord(uint32_t fetch_constant_index, - uint32_t word_index) { + dxbc::Src RequestTextureFetchConstantWord(uint32_t fetch_constant_index, + uint32_t word_index) { return RequestTextureFetchConstantWordPair(fetch_constant_index, word_index >> 1) .SelectFromSwizzled(word_index & 1); @@ -2452,6 +948,13 @@ class DxbcShaderTranslator : public ShaderTranslator { // generated in the end of translation. std::vector shader_object_; + // The statistics chunk. + dxbc::Statistics stat_; + + // Assembler for shader_code_ and stat_ (must be placed after them for correct + // initialization order). + dxbc::Assembler a_; + // Buffer for instruction disassembly comments. StringBuffer instruction_disassembly_buffer_; @@ -2511,8 +1014,8 @@ class DxbcShaderTranslator : public ShaderTranslator { struct RdefType { // Name ignored for arrays. const char* name; - DxbcRdefVariableClass variable_class; - DxbcRdefVariableType variable_type; + dxbc::RdefVariableClass variable_class; + dxbc::RdefVariableType variable_type; uint32_t row_count; uint32_t column_count; // 0 for primitive types, 1 for structures, array size for arrays. @@ -2675,60 +1178,6 @@ class DxbcShaderTranslator : public ShaderTranslator { // Number of `alloc export`s encountered so far in the translation. The index // of the current eA/eM# temp register set is this minus 1, if it's not 0. uint32_t memexport_alloc_current_count_; - - // The STAT chunk (based on Wine d3dcompiler_parse_stat). - struct Statistics { - uint32_t instruction_count; - uint32_t temp_register_count; - // Unknown in Wine. - uint32_t def_count; - // Only inputs and outputs. - uint32_t dcl_count; - uint32_t float_instruction_count; - uint32_t int_instruction_count; - uint32_t uint_instruction_count; - // endif, ret. - uint32_t static_flow_control_count; - // if (but not else). - uint32_t dynamic_flow_control_count; - // Unknown in Wine. - uint32_t macro_instruction_count; - uint32_t temp_array_count; - uint32_t array_instruction_count; - uint32_t cut_instruction_count; - uint32_t emit_instruction_count; - uint32_t texture_normal_instructions; - uint32_t texture_load_instructions; - uint32_t texture_comp_instructions; - uint32_t texture_bias_instructions; - uint32_t texture_gradient_instructions; - // Not including indexable temp load/store. - uint32_t mov_instruction_count; - // Unknown in Wine. - uint32_t movc_instruction_count; - uint32_t conversion_instruction_count; - // Unknown in Wine. - uint32_t unknown_22; - uint32_t input_primitive; - uint32_t gs_output_topology; - uint32_t gs_max_output_vertex_count; - uint32_t unknown_26; - // Unknown in Wine, but confirmed by testing. - uint32_t lod_instructions; - uint32_t unknown_28; - uint32_t unknown_29; - uint32_t c_control_points; - uint32_t hs_output_primitive; - uint32_t hs_partitioning; - DxbcTessellatorDomain tessellator_domain; - // Unknown in Wine. - uint32_t c_barrier_instructions; - // Unknown in Wine. - uint32_t c_interlocked_instructions; - // Unknown in Wine, but confirmed by testing. - uint32_t c_texture_store_instructions; - }; - Statistics stat_; }; } // namespace gpu diff --git a/src/xenia/gpu/dxbc_shader_translator_alu.cc b/src/xenia/gpu/dxbc_shader_translator_alu.cc index b2d24f89b..7c7280338 100644 --- a/src/xenia/gpu/dxbc_shader_translator_alu.cc +++ b/src/xenia/gpu/dxbc_shader_translator_alu.cc @@ -22,7 +22,7 @@ using namespace ucode; void DxbcShaderTranslator::ProcessVectorAluOperation( const ParsedAluInstruction& instr, uint32_t& result_swizzle, bool& predicate_written) { - result_swizzle = DxbcSrc::kXYZW; + result_swizzle = dxbc::Src::kXYZW; predicate_written = false; uint32_t used_result_components = @@ -49,7 +49,8 @@ void DxbcShaderTranslator::ProcessVectorAluOperation( if (instr.vector_opcode == AluVectorOpcode::kCube) { operand_needed_components[0] &= 0b1101; } - DxbcSrc operands[3]{DxbcSrc::LF(0.0f), DxbcSrc::LF(0.0f), DxbcSrc::LF(0.0f)}; + dxbc::Src operands[3]{dxbc::Src::LF(0.0f), dxbc::Src::LF(0.0f), + dxbc::Src::LF(0.0f)}; uint32_t operand_temps = 0; for (uint32_t i = 0; i < operand_count; ++i) { bool operand_temp_pushed = false; @@ -60,18 +61,18 @@ void DxbcShaderTranslator::ProcessVectorAluOperation( } // Don't return without PopSystemTemp(operand_temps) from now on! - DxbcDest per_component_dest( - DxbcDest::R(system_temp_result_, used_result_components)); + dxbc::Dest per_component_dest( + dxbc::Dest::R(system_temp_result_, used_result_components)); switch (instr.vector_opcode) { case AluVectorOpcode::kAdd: - DxbcOpAdd(per_component_dest, operands[0], operands[1]); + a_.OpAdd(per_component_dest, operands[0], operands[1]); break; case AluVectorOpcode::kMul: case AluVectorOpcode::kMad: { // Not using DXBC mad to prevent fused multiply-add (mul followed by add // may be optimized into non-fused mad by the driver in the identical // operands case also). - DxbcOpMul(per_component_dest, operands[0], operands[1]); + a_.OpMul(per_component_dest, operands[0], operands[1]); uint32_t multiplicands_different = used_result_components & ~instr.vector_operands[0].GetIdenticalComponents( @@ -79,22 +80,22 @@ void DxbcShaderTranslator::ProcessVectorAluOperation( if (multiplicands_different) { // Shader Model 3: +-0 or denormal * anything = +0. uint32_t is_zero_temp = PushSystemTemp(); - DxbcOpMin(DxbcDest::R(is_zero_temp, multiplicands_different), - operands[0].Abs(), operands[1].Abs()); + a_.OpMin(dxbc::Dest::R(is_zero_temp, multiplicands_different), + operands[0].Abs(), operands[1].Abs()); // min isn't required to flush denormals, eq is. - DxbcOpEq(DxbcDest::R(is_zero_temp, multiplicands_different), - DxbcSrc::R(is_zero_temp), DxbcSrc::LF(0.0f)); + a_.OpEq(dxbc::Dest::R(is_zero_temp, multiplicands_different), + dxbc::Src::R(is_zero_temp), dxbc::Src::LF(0.0f)); // Not replacing true `0 + term` with movc of the term because +0 + -0 // should result in +0, not -0. - DxbcOpMovC(DxbcDest::R(system_temp_result_, multiplicands_different), - DxbcSrc::R(is_zero_temp), DxbcSrc::LF(0.0f), - DxbcSrc::R(system_temp_result_)); + a_.OpMovC(dxbc::Dest::R(system_temp_result_, multiplicands_different), + dxbc::Src::R(is_zero_temp), dxbc::Src::LF(0.0f), + dxbc::Src::R(system_temp_result_)); // Release is_zero_temp. PopSystemTemp(); } if (instr.vector_opcode == AluVectorOpcode::kMad) { - DxbcOpAdd(per_component_dest, DxbcSrc::R(system_temp_result_), - operands[2]); + a_.OpAdd(per_component_dest, dxbc::Src::R(system_temp_result_), + operands[2]); } } break; @@ -108,65 +109,65 @@ void DxbcShaderTranslator::ProcessVectorAluOperation( if (different) { // Shader Model 3 NaN behavior (a op b ? a : b, not fmax/fmin). if (instr.vector_opcode == AluVectorOpcode::kMin) { - DxbcOpLT(DxbcDest::R(system_temp_result_, different), operands[0], - operands[1]); + a_.OpLT(dxbc::Dest::R(system_temp_result_, different), operands[0], + operands[1]); } else { - DxbcOpGE(DxbcDest::R(system_temp_result_, different), operands[0], - operands[1]); + a_.OpGE(dxbc::Dest::R(system_temp_result_, different), operands[0], + operands[1]); } - DxbcOpMovC(DxbcDest::R(system_temp_result_, different), - DxbcSrc::R(system_temp_result_), operands[0], operands[1]); + a_.OpMovC(dxbc::Dest::R(system_temp_result_, different), + dxbc::Src::R(system_temp_result_), operands[0], operands[1]); } if (identical) { - DxbcOpMov(DxbcDest::R(system_temp_result_, identical), operands[0]); + a_.OpMov(dxbc::Dest::R(system_temp_result_, identical), operands[0]); } } break; case AluVectorOpcode::kSeq: - DxbcOpEq(per_component_dest, operands[0], operands[1]); - DxbcOpAnd(per_component_dest, DxbcSrc::R(system_temp_result_), - DxbcSrc::LF(1.0f)); + a_.OpEq(per_component_dest, operands[0], operands[1]); + a_.OpAnd(per_component_dest, dxbc::Src::R(system_temp_result_), + dxbc::Src::LF(1.0f)); break; case AluVectorOpcode::kSgt: - DxbcOpLT(per_component_dest, operands[1], operands[0]); - DxbcOpAnd(per_component_dest, DxbcSrc::R(system_temp_result_), - DxbcSrc::LF(1.0f)); + a_.OpLT(per_component_dest, operands[1], operands[0]); + a_.OpAnd(per_component_dest, dxbc::Src::R(system_temp_result_), + dxbc::Src::LF(1.0f)); break; case AluVectorOpcode::kSge: - DxbcOpGE(per_component_dest, operands[0], operands[1]); - DxbcOpAnd(per_component_dest, DxbcSrc::R(system_temp_result_), - DxbcSrc::LF(1.0f)); + a_.OpGE(per_component_dest, operands[0], operands[1]); + a_.OpAnd(per_component_dest, dxbc::Src::R(system_temp_result_), + dxbc::Src::LF(1.0f)); break; case AluVectorOpcode::kSne: - DxbcOpNE(per_component_dest, operands[0], operands[1]); - DxbcOpAnd(per_component_dest, DxbcSrc::R(system_temp_result_), - DxbcSrc::LF(1.0f)); + a_.OpNE(per_component_dest, operands[0], operands[1]); + a_.OpAnd(per_component_dest, dxbc::Src::R(system_temp_result_), + dxbc::Src::LF(1.0f)); break; case AluVectorOpcode::kFrc: - DxbcOpFrc(per_component_dest, operands[0]); + a_.OpFrc(per_component_dest, operands[0]); break; case AluVectorOpcode::kTrunc: - DxbcOpRoundZ(per_component_dest, operands[0]); + a_.OpRoundZ(per_component_dest, operands[0]); break; case AluVectorOpcode::kFloor: - DxbcOpRoundNI(per_component_dest, operands[0]); + a_.OpRoundNI(per_component_dest, operands[0]); break; case AluVectorOpcode::kCndEq: - DxbcOpEq(per_component_dest, operands[0], DxbcSrc::LF(0.0f)); - DxbcOpMovC(per_component_dest, DxbcSrc::R(system_temp_result_), - operands[1], operands[2]); + a_.OpEq(per_component_dest, operands[0], dxbc::Src::LF(0.0f)); + a_.OpMovC(per_component_dest, dxbc::Src::R(system_temp_result_), + operands[1], operands[2]); break; case AluVectorOpcode::kCndGe: - DxbcOpGE(per_component_dest, operands[0], DxbcSrc::LF(0.0f)); - DxbcOpMovC(per_component_dest, DxbcSrc::R(system_temp_result_), - operands[1], operands[2]); + a_.OpGE(per_component_dest, operands[0], dxbc::Src::LF(0.0f)); + a_.OpMovC(per_component_dest, dxbc::Src::R(system_temp_result_), + operands[1], operands[2]); break; case AluVectorOpcode::kCndGt: - DxbcOpLT(per_component_dest, DxbcSrc::LF(0.0f), operands[0]); - DxbcOpMovC(per_component_dest, DxbcSrc::R(system_temp_result_), - operands[1], operands[2]); + a_.OpLT(per_component_dest, dxbc::Src::LF(0.0f), operands[0]); + a_.OpMovC(per_component_dest, dxbc::Src::R(system_temp_result_), + operands[1], operands[2]); break; case AluVectorOpcode::kDp4: @@ -180,44 +181,44 @@ void DxbcShaderTranslator::ProcessVectorAluOperation( } else { component_count = 4; } - result_swizzle = DxbcSrc::kXXXX; + result_swizzle = dxbc::Src::kXXXX; uint32_t different = uint32_t((1 << component_count) - 1) & ~instr.vector_operands[0].GetIdenticalComponents( instr.vector_operands[1]); for (uint32_t i = 0; i < component_count; ++i) { - DxbcOpMul(DxbcDest::R(system_temp_result_, i ? 0b0010 : 0b0001), - operands[0].SelectFromSwizzled(i), - operands[1].SelectFromSwizzled(i)); + a_.OpMul(dxbc::Dest::R(system_temp_result_, i ? 0b0010 : 0b0001), + operands[0].SelectFromSwizzled(i), + operands[1].SelectFromSwizzled(i)); if ((different & (1 << i)) != 0) { // Shader Model 3: +-0 or denormal * anything = +0 (also not replacing // true `0 + term` with movc of the term because +0 + -0 should result // in +0, not -0). - DxbcOpMin(DxbcDest::R(system_temp_result_, 0b0100), - operands[0].SelectFromSwizzled(i).Abs(), - operands[1].SelectFromSwizzled(i).Abs()); - DxbcOpEq(DxbcDest::R(system_temp_result_, 0b0100), - DxbcSrc::R(system_temp_result_, DxbcSrc::kZZZZ), - DxbcSrc::LF(0.0f)); - DxbcOpMovC(DxbcDest::R(system_temp_result_, i ? 0b0010 : 0b0001), - DxbcSrc::R(system_temp_result_, DxbcSrc::kZZZZ), - DxbcSrc::LF(0.0f), - DxbcSrc::R(system_temp_result_, - i ? DxbcSrc::kYYYY : DxbcSrc::kXXXX)); + a_.OpMin(dxbc::Dest::R(system_temp_result_, 0b0100), + operands[0].SelectFromSwizzled(i).Abs(), + operands[1].SelectFromSwizzled(i).Abs()); + a_.OpEq(dxbc::Dest::R(system_temp_result_, 0b0100), + dxbc::Src::R(system_temp_result_, dxbc::Src::kZZZZ), + dxbc::Src::LF(0.0f)); + a_.OpMovC(dxbc::Dest::R(system_temp_result_, i ? 0b0010 : 0b0001), + dxbc::Src::R(system_temp_result_, dxbc::Src::kZZZZ), + dxbc::Src::LF(0.0f), + dxbc::Src::R(system_temp_result_, + i ? dxbc::Src::kYYYY : dxbc::Src::kXXXX)); } if (i) { // Not using DXBC dp# to avoid fused multiply-add, PC GPUs are scalar // as of 2020 anyway, and not using mad for the same reason (mul // followed by add may be optimized into non-fused mad by the driver // in the identical operands case also). - DxbcOpAdd(DxbcDest::R(system_temp_result_, 0b0001), - DxbcSrc::R(system_temp_result_, DxbcSrc::kXXXX), - DxbcSrc::R(system_temp_result_, DxbcSrc::kYYYY)); + a_.OpAdd(dxbc::Dest::R(system_temp_result_, 0b0001), + dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX), + dxbc::Src::R(system_temp_result_, dxbc::Src::kYYYY)); } } if (component_count == 2) { - DxbcOpAdd(DxbcDest::R(system_temp_result_, 0b0001), - DxbcSrc::R(system_temp_result_, DxbcSrc::kXXXX), - operands[2].SelectFromSwizzled(0)); + a_.OpAdd(dxbc::Dest::R(system_temp_result_, 0b0001), + dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX), + operands[2].SelectFromSwizzled(0)); } } break; @@ -225,110 +226,113 @@ void DxbcShaderTranslator::ProcessVectorAluOperation( // operands[0] is .z_xy. // Result is T coordinate, S coordinate, 2 * major axis, face ID. constexpr uint32_t kCubeX = 2, kCubeY = 3, kCubeZ = 0; - DxbcSrc cube_x_src(operands[0].SelectFromSwizzled(kCubeX)); - DxbcSrc cube_y_src(operands[0].SelectFromSwizzled(kCubeY)); - DxbcSrc cube_z_src(operands[0].SelectFromSwizzled(kCubeZ)); + dxbc::Src cube_x_src(operands[0].SelectFromSwizzled(kCubeX)); + dxbc::Src cube_y_src(operands[0].SelectFromSwizzled(kCubeY)); + dxbc::Src cube_z_src(operands[0].SelectFromSwizzled(kCubeZ)); // result.xy = bool2(abs(z) >= abs(x), abs(z) >= abs(y)) - DxbcOpGE(DxbcDest::R(system_temp_result_, 0b0011), cube_z_src.Abs(), - operands[0].SwizzleSwizzled(kCubeX | (kCubeY << 2)).Abs()); + a_.OpGE(dxbc::Dest::R(system_temp_result_, 0b0011), cube_z_src.Abs(), + operands[0].SwizzleSwizzled(kCubeX | (kCubeY << 2)).Abs()); // result.x = abs(z) >= abs(x) && abs(z) >= abs(y) - DxbcOpAnd(DxbcDest::R(system_temp_result_, 0b0001), - DxbcSrc::R(system_temp_result_, DxbcSrc::kXXXX), - DxbcSrc::R(system_temp_result_, DxbcSrc::kYYYY)); - DxbcDest tc_dest(DxbcDest::R(system_temp_result_, 0b0001)); - DxbcDest sc_dest(DxbcDest::R(system_temp_result_, 0b0010)); - DxbcDest ma_dest(DxbcDest::R(system_temp_result_, 0b0100)); - DxbcDest id_dest(DxbcDest::R(system_temp_result_, 0b1000)); - DxbcOpIf(true, DxbcSrc::R(system_temp_result_, DxbcSrc::kXXXX)); + a_.OpAnd(dxbc::Dest::R(system_temp_result_, 0b0001), + dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX), + dxbc::Src::R(system_temp_result_, dxbc::Src::kYYYY)); + dxbc::Dest tc_dest(dxbc::Dest::R(system_temp_result_, 0b0001)); + dxbc::Dest sc_dest(dxbc::Dest::R(system_temp_result_, 0b0010)); + dxbc::Dest ma_dest(dxbc::Dest::R(system_temp_result_, 0b0100)); + dxbc::Dest id_dest(dxbc::Dest::R(system_temp_result_, 0b1000)); + a_.OpIf(true, dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX)); { // Z is the major axis. // z < 0 needed for SC and ID, but the last to use is ID. uint32_t ma_neg_component = (used_result_components & 0b1000) ? 3 : 1; if (used_result_components & 0b1010) { - DxbcOpLT(DxbcDest::R(system_temp_result_, 1 << ma_neg_component), - cube_z_src, DxbcSrc::LF(0.0f)); + a_.OpLT(dxbc::Dest::R(system_temp_result_, 1 << ma_neg_component), + cube_z_src, dxbc::Src::LF(0.0f)); } if (used_result_components & 0b0001) { - DxbcOpMov(tc_dest, -cube_y_src); + a_.OpMov(tc_dest, -cube_y_src); } if (used_result_components & 0b0010) { - DxbcOpMovC(sc_dest, - DxbcSrc::R(system_temp_result_).Select(ma_neg_component), - -cube_x_src, cube_x_src); + a_.OpMovC(sc_dest, + dxbc::Src::R(system_temp_result_).Select(ma_neg_component), + -cube_x_src, cube_x_src); } if (used_result_components & 0b0100) { - DxbcOpMul(ma_dest, DxbcSrc::LF(2.0f), cube_z_src); + a_.OpMul(ma_dest, dxbc::Src::LF(2.0f), cube_z_src); } if (used_result_components & 0b1000) { - DxbcOpMovC(id_dest, - DxbcSrc::R(system_temp_result_).Select(ma_neg_component), - DxbcSrc::LF(5.0f), DxbcSrc::LF(4.0f)); + a_.OpMovC(id_dest, + dxbc::Src::R(system_temp_result_).Select(ma_neg_component), + dxbc::Src::LF(5.0f), dxbc::Src::LF(4.0f)); } } - DxbcOpElse(); + a_.OpElse(); { // result.x = abs(y) >= abs(x) - DxbcOpGE(DxbcDest::R(system_temp_result_, 0b0001), cube_y_src.Abs(), - cube_x_src.Abs()); - DxbcOpIf(true, DxbcSrc::R(system_temp_result_, DxbcSrc::kXXXX)); + a_.OpGE(dxbc::Dest::R(system_temp_result_, 0b0001), cube_y_src.Abs(), + cube_x_src.Abs()); + a_.OpIf(true, dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX)); { // Y is the major axis. // y < 0 needed for TC and ID, but the last to use is ID. uint32_t ma_neg_component = (used_result_components & 0b1000) ? 3 : 0; if (used_result_components & 0b1001) { - DxbcOpLT(DxbcDest::R(system_temp_result_, 1 << ma_neg_component), - cube_y_src, DxbcSrc::LF(0.0f)); + a_.OpLT(dxbc::Dest::R(system_temp_result_, 1 << ma_neg_component), + cube_y_src, dxbc::Src::LF(0.0f)); } if (used_result_components & 0b0001) { - DxbcOpMovC(tc_dest, - DxbcSrc::R(system_temp_result_).Select(ma_neg_component), - -cube_z_src, cube_z_src); + a_.OpMovC( + tc_dest, + dxbc::Src::R(system_temp_result_).Select(ma_neg_component), + -cube_z_src, cube_z_src); } if (used_result_components & 0b0010) { - DxbcOpMov(sc_dest, cube_x_src); + a_.OpMov(sc_dest, cube_x_src); } if (used_result_components & 0b0100) { - DxbcOpMul(ma_dest, DxbcSrc::LF(2.0f), cube_y_src); + a_.OpMul(ma_dest, dxbc::Src::LF(2.0f), cube_y_src); } if (used_result_components & 0b1000) { - DxbcOpMovC(id_dest, - DxbcSrc::R(system_temp_result_).Select(ma_neg_component), - DxbcSrc::LF(3.0f), DxbcSrc::LF(2.0f)); + a_.OpMovC( + id_dest, + dxbc::Src::R(system_temp_result_).Select(ma_neg_component), + dxbc::Src::LF(3.0f), dxbc::Src::LF(2.0f)); } } - DxbcOpElse(); + a_.OpElse(); { // X is the major axis. // x < 0 needed for SC and ID, but the last to use is ID. uint32_t ma_neg_component = (used_result_components & 0b1000) ? 3 : 1; if (used_result_components & 0b1010) { - DxbcOpLT(DxbcDest::R(system_temp_result_, 1 << ma_neg_component), - cube_x_src, DxbcSrc::LF(0.0f)); + a_.OpLT(dxbc::Dest::R(system_temp_result_, 1 << ma_neg_component), + cube_x_src, dxbc::Src::LF(0.0f)); } if (used_result_components & 0b0001) { - DxbcOpMov(tc_dest, -cube_y_src); + a_.OpMov(tc_dest, -cube_y_src); } if (used_result_components & 0b0010) { - DxbcOpMovC(sc_dest, - DxbcSrc::R(system_temp_result_).Select(ma_neg_component), - cube_z_src, -cube_z_src); + a_.OpMovC( + sc_dest, + dxbc::Src::R(system_temp_result_).Select(ma_neg_component), + cube_z_src, -cube_z_src); } if (used_result_components & 0b0100) { - DxbcOpMul(ma_dest, DxbcSrc::LF(2.0f), cube_x_src); + a_.OpMul(ma_dest, dxbc::Src::LF(2.0f), cube_x_src); } if (used_result_components & 0b1000) { - DxbcOpAnd(id_dest, - DxbcSrc::R(system_temp_result_).Select(ma_neg_component), - DxbcSrc::LF(1.0f)); + a_.OpAnd(id_dest, + dxbc::Src::R(system_temp_result_).Select(ma_neg_component), + dxbc::Src::LF(1.0f)); } } - DxbcOpEndIf(); + a_.OpEndIf(); } - DxbcOpEndIf(); + a_.OpEndIf(); } break; case AluVectorOpcode::kMax4: { - result_swizzle = DxbcSrc::kXXXX; + result_swizzle = dxbc::Src::kXXXX; // Find max of all different components of the first operand. // FIXME(Triang3l): Not caring about NaN because no info about the // correct order, just using SM4 max here, which replaces them with the @@ -345,269 +349,270 @@ void DxbcShaderTranslator::ProcessVectorAluOperation( uint32_t unique_component_1; xe::bit_scan_forward(remaining_components, &unique_component_1); remaining_components &= ~uint32_t(1 << unique_component_1); - DxbcOpMax(DxbcDest::R(system_temp_result_, 0b0001), - operands[0].Select(unique_component_0), - operands[0].Select(unique_component_1)); + a_.OpMax(dxbc::Dest::R(system_temp_result_, 0b0001), + operands[0].Select(unique_component_0), + operands[0].Select(unique_component_1)); while (remaining_components) { uint32_t unique_component; xe::bit_scan_forward(remaining_components, &unique_component); remaining_components &= ~uint32_t(1 << unique_component); - DxbcOpMax(DxbcDest::R(system_temp_result_, 0b0001), - DxbcSrc::R(system_temp_result_, DxbcSrc::kXXXX), - operands[0].Select(unique_component)); + a_.OpMax(dxbc::Dest::R(system_temp_result_, 0b0001), + dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX), + operands[0].Select(unique_component)); } } else { - DxbcOpMov(DxbcDest::R(system_temp_result_, 0b0001), - operands[0].Select(unique_component_0)); + a_.OpMov(dxbc::Dest::R(system_temp_result_, 0b0001), + operands[0].Select(unique_component_0)); } } break; case AluVectorOpcode::kSetpEqPush: predicate_written = true; - result_swizzle = DxbcSrc::kXXXX; + result_swizzle = dxbc::Src::kXXXX; // result.xy = src0.xw == 0.0 (x only if needed). - DxbcOpEq(DxbcDest::R(system_temp_result_, - used_result_components ? 0b0011 : 0b0010), - operands[0].SwizzleSwizzled(0b1100), DxbcSrc::LF(0.0f)); + a_.OpEq(dxbc::Dest::R(system_temp_result_, + used_result_components ? 0b0011 : 0b0010), + operands[0].SwizzleSwizzled(0b1100), dxbc::Src::LF(0.0f)); // result.zw = src1.xw == 0.0 (z only if needed). - DxbcOpEq(DxbcDest::R(system_temp_result_, - used_result_components ? 0b1100 : 0b1000), - operands[1].SwizzleSwizzled(0b11000000), DxbcSrc::LF(0.0f)); + a_.OpEq(dxbc::Dest::R(system_temp_result_, + used_result_components ? 0b1100 : 0b1000), + operands[1].SwizzleSwizzled(0b11000000), dxbc::Src::LF(0.0f)); // p0 = src0.w == 0.0 && src1.w == 0.0 - DxbcOpAnd(DxbcDest::R(system_temp_ps_pc_p0_a0_, 0b0100), - DxbcSrc::R(system_temp_result_, DxbcSrc::kYYYY), - DxbcSrc::R(system_temp_result_, DxbcSrc::kWWWW)); + a_.OpAnd(dxbc::Dest::R(system_temp_ps_pc_p0_a0_, 0b0100), + dxbc::Src::R(system_temp_result_, dxbc::Src::kYYYY), + dxbc::Src::R(system_temp_result_, dxbc::Src::kWWWW)); if (used_result_components) { // result = (src0.x == 0.0 && src1.x == 0.0) ? 0.0 : src0.x + 1.0 - DxbcOpAnd(DxbcDest::R(system_temp_result_, 0b0001), - DxbcSrc::R(system_temp_result_, DxbcSrc::kXXXX), - DxbcSrc::R(system_temp_result_, DxbcSrc::kZZZZ)); + a_.OpAnd(dxbc::Dest::R(system_temp_result_, 0b0001), + dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX), + dxbc::Src::R(system_temp_result_, dxbc::Src::kZZZZ)); // If the condition is true, 1 will be added to make it 0. - DxbcOpMovC(DxbcDest::R(system_temp_result_, 0b0001), - DxbcSrc::R(system_temp_result_, DxbcSrc::kXXXX), - DxbcSrc::LF(-1.0f), operands[0].SelectFromSwizzled(0)); - DxbcOpAdd(DxbcDest::R(system_temp_result_, 0b0001), - DxbcSrc::R(system_temp_result_, DxbcSrc::kXXXX), - DxbcSrc::LF(1.0f)); + a_.OpMovC(dxbc::Dest::R(system_temp_result_, 0b0001), + dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX), + dxbc::Src::LF(-1.0f), operands[0].SelectFromSwizzled(0)); + a_.OpAdd(dxbc::Dest::R(system_temp_result_, 0b0001), + dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX), + dxbc::Src::LF(1.0f)); } break; case AluVectorOpcode::kSetpNePush: predicate_written = true; - result_swizzle = DxbcSrc::kXXXX; + result_swizzle = dxbc::Src::kXXXX; // result.xy = src0.xw == 0.0 (x only if needed). - DxbcOpEq(DxbcDest::R(system_temp_result_, - used_result_components ? 0b0011 : 0b0010), - operands[0].SwizzleSwizzled(0b1100), DxbcSrc::LF(0.0f)); + a_.OpEq(dxbc::Dest::R(system_temp_result_, + used_result_components ? 0b0011 : 0b0010), + operands[0].SwizzleSwizzled(0b1100), dxbc::Src::LF(0.0f)); // result.zw = src1.xw != 0.0 (z only if needed). - DxbcOpNE(DxbcDest::R(system_temp_result_, - used_result_components ? 0b1100 : 0b1000), - operands[1].SwizzleSwizzled(0b11000000), DxbcSrc::LF(0.0f)); + a_.OpNE(dxbc::Dest::R(system_temp_result_, + used_result_components ? 0b1100 : 0b1000), + operands[1].SwizzleSwizzled(0b11000000), dxbc::Src::LF(0.0f)); // p0 = src0.w == 0.0 && src1.w != 0.0 - DxbcOpAnd(DxbcDest::R(system_temp_ps_pc_p0_a0_, 0b0100), - DxbcSrc::R(system_temp_result_, DxbcSrc::kYYYY), - DxbcSrc::R(system_temp_result_, DxbcSrc::kWWWW)); + a_.OpAnd(dxbc::Dest::R(system_temp_ps_pc_p0_a0_, 0b0100), + dxbc::Src::R(system_temp_result_, dxbc::Src::kYYYY), + dxbc::Src::R(system_temp_result_, dxbc::Src::kWWWW)); if (used_result_components) { // result = (src0.x == 0.0 && src1.x != 0.0) ? 0.0 : src0.x + 1.0 - DxbcOpAnd(DxbcDest::R(system_temp_result_, 0b0001), - DxbcSrc::R(system_temp_result_, DxbcSrc::kXXXX), - DxbcSrc::R(system_temp_result_, DxbcSrc::kZZZZ)); + a_.OpAnd(dxbc::Dest::R(system_temp_result_, 0b0001), + dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX), + dxbc::Src::R(system_temp_result_, dxbc::Src::kZZZZ)); // If the condition is true, 1 will be added to make it 0. - DxbcOpMovC(DxbcDest::R(system_temp_result_, 0b0001), - DxbcSrc::R(system_temp_result_, DxbcSrc::kXXXX), - DxbcSrc::LF(-1.0f), operands[0].SelectFromSwizzled(0)); - DxbcOpAdd(DxbcDest::R(system_temp_result_, 0b0001), - DxbcSrc::R(system_temp_result_, DxbcSrc::kXXXX), - DxbcSrc::LF(1.0f)); + a_.OpMovC(dxbc::Dest::R(system_temp_result_, 0b0001), + dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX), + dxbc::Src::LF(-1.0f), operands[0].SelectFromSwizzled(0)); + a_.OpAdd(dxbc::Dest::R(system_temp_result_, 0b0001), + dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX), + dxbc::Src::LF(1.0f)); } break; case AluVectorOpcode::kSetpGtPush: predicate_written = true; - result_swizzle = DxbcSrc::kXXXX; + result_swizzle = dxbc::Src::kXXXX; // result.xy = src0.xw == 0.0 (x only if needed). - DxbcOpEq(DxbcDest::R(system_temp_result_, - used_result_components ? 0b0011 : 0b0010), - operands[0].SwizzleSwizzled(0b1100), DxbcSrc::LF(0.0f)); + a_.OpEq(dxbc::Dest::R(system_temp_result_, + used_result_components ? 0b0011 : 0b0010), + operands[0].SwizzleSwizzled(0b1100), dxbc::Src::LF(0.0f)); // result.zw = src1.xw > 0.0 (z only if needed). - DxbcOpLT(DxbcDest::R(system_temp_result_, - used_result_components ? 0b1100 : 0b1000), - DxbcSrc::LF(0.0f), operands[1].SwizzleSwizzled(0b11000000)); + a_.OpLT(dxbc::Dest::R(system_temp_result_, + used_result_components ? 0b1100 : 0b1000), + dxbc::Src::LF(0.0f), operands[1].SwizzleSwizzled(0b11000000)); // p0 = src0.w == 0.0 && src1.w > 0.0 - DxbcOpAnd(DxbcDest::R(system_temp_ps_pc_p0_a0_, 0b0100), - DxbcSrc::R(system_temp_result_, DxbcSrc::kYYYY), - DxbcSrc::R(system_temp_result_, DxbcSrc::kWWWW)); + a_.OpAnd(dxbc::Dest::R(system_temp_ps_pc_p0_a0_, 0b0100), + dxbc::Src::R(system_temp_result_, dxbc::Src::kYYYY), + dxbc::Src::R(system_temp_result_, dxbc::Src::kWWWW)); if (used_result_components) { // result = (src0.x == 0.0 && src1.x > 0.0) ? 0.0 : src0.x + 1.0 - DxbcOpAnd(DxbcDest::R(system_temp_result_, 0b0001), - DxbcSrc::R(system_temp_result_, DxbcSrc::kXXXX), - DxbcSrc::R(system_temp_result_, DxbcSrc::kZZZZ)); + a_.OpAnd(dxbc::Dest::R(system_temp_result_, 0b0001), + dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX), + dxbc::Src::R(system_temp_result_, dxbc::Src::kZZZZ)); // If the condition is true, 1 will be added to make it 0. - DxbcOpMovC(DxbcDest::R(system_temp_result_, 0b0001), - DxbcSrc::R(system_temp_result_, DxbcSrc::kXXXX), - DxbcSrc::LF(-1.0f), operands[0].SelectFromSwizzled(0)); - DxbcOpAdd(DxbcDest::R(system_temp_result_, 0b0001), - DxbcSrc::R(system_temp_result_, DxbcSrc::kXXXX), - DxbcSrc::LF(1.0f)); + a_.OpMovC(dxbc::Dest::R(system_temp_result_, 0b0001), + dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX), + dxbc::Src::LF(-1.0f), operands[0].SelectFromSwizzled(0)); + a_.OpAdd(dxbc::Dest::R(system_temp_result_, 0b0001), + dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX), + dxbc::Src::LF(1.0f)); } break; case AluVectorOpcode::kSetpGePush: predicate_written = true; - result_swizzle = DxbcSrc::kXXXX; + result_swizzle = dxbc::Src::kXXXX; // result.xy = src0.xw == 0.0 (x only if needed). - DxbcOpEq(DxbcDest::R(system_temp_result_, - used_result_components ? 0b0011 : 0b0010), - operands[0].SwizzleSwizzled(0b1100), DxbcSrc::LF(0.0f)); + a_.OpEq(dxbc::Dest::R(system_temp_result_, + used_result_components ? 0b0011 : 0b0010), + operands[0].SwizzleSwizzled(0b1100), dxbc::Src::LF(0.0f)); // result.zw = src1.xw >= 0.0 (z only if needed). - DxbcOpGE(DxbcDest::R(system_temp_result_, - used_result_components ? 0b1100 : 0b1000), - operands[1].SwizzleSwizzled(0b11000000), DxbcSrc::LF(0.0f)); + a_.OpGE(dxbc::Dest::R(system_temp_result_, + used_result_components ? 0b1100 : 0b1000), + operands[1].SwizzleSwizzled(0b11000000), dxbc::Src::LF(0.0f)); // p0 = src0.w == 0.0 && src1.w >= 0.0 - DxbcOpAnd(DxbcDest::R(system_temp_ps_pc_p0_a0_, 0b0100), - DxbcSrc::R(system_temp_result_, DxbcSrc::kYYYY), - DxbcSrc::R(system_temp_result_, DxbcSrc::kWWWW)); + a_.OpAnd(dxbc::Dest::R(system_temp_ps_pc_p0_a0_, 0b0100), + dxbc::Src::R(system_temp_result_, dxbc::Src::kYYYY), + dxbc::Src::R(system_temp_result_, dxbc::Src::kWWWW)); if (used_result_components) { // result = (src0.x == 0.0 && src1.x >= 0.0) ? 0.0 : src0.x + 1.0 - DxbcOpAnd(DxbcDest::R(system_temp_result_, 0b0001), - DxbcSrc::R(system_temp_result_, DxbcSrc::kXXXX), - DxbcSrc::R(system_temp_result_, DxbcSrc::kZZZZ)); + a_.OpAnd(dxbc::Dest::R(system_temp_result_, 0b0001), + dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX), + dxbc::Src::R(system_temp_result_, dxbc::Src::kZZZZ)); // If the condition is true, 1 will be added to make it 0. - DxbcOpMovC(DxbcDest::R(system_temp_result_, 0b0001), - DxbcSrc::R(system_temp_result_, DxbcSrc::kXXXX), - DxbcSrc::LF(-1.0f), operands[0].SelectFromSwizzled(0)); - DxbcOpAdd(DxbcDest::R(system_temp_result_, 0b0001), - DxbcSrc::R(system_temp_result_, DxbcSrc::kXXXX), - DxbcSrc::LF(1.0f)); + a_.OpMovC(dxbc::Dest::R(system_temp_result_, 0b0001), + dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX), + dxbc::Src::LF(-1.0f), operands[0].SelectFromSwizzled(0)); + a_.OpAdd(dxbc::Dest::R(system_temp_result_, 0b0001), + dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX), + dxbc::Src::LF(1.0f)); } break; case AluVectorOpcode::kKillEq: - result_swizzle = DxbcSrc::kXXXX; - DxbcOpEq(DxbcDest::R(system_temp_result_), operands[0], operands[1]); - DxbcOpOr(DxbcDest::R(system_temp_result_, 0b0011), - DxbcSrc::R(system_temp_result_, 0b0100), - DxbcSrc::R(system_temp_result_, 0b1110)); - DxbcOpOr(DxbcDest::R(system_temp_result_, 0b0001), - DxbcSrc::R(system_temp_result_, DxbcSrc::kXXXX), - DxbcSrc::R(system_temp_result_, DxbcSrc::kYYYY)); + result_swizzle = dxbc::Src::kXXXX; + a_.OpEq(dxbc::Dest::R(system_temp_result_), operands[0], operands[1]); + a_.OpOr(dxbc::Dest::R(system_temp_result_, 0b0011), + dxbc::Src::R(system_temp_result_, 0b0100), + dxbc::Src::R(system_temp_result_, 0b1110)); + a_.OpOr(dxbc::Dest::R(system_temp_result_, 0b0001), + dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX), + dxbc::Src::R(system_temp_result_, dxbc::Src::kYYYY)); if (edram_rov_used_) { - DxbcOpRetC(true, DxbcSrc::R(system_temp_result_, DxbcSrc::kXXXX)); + a_.OpRetC(true, dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX)); } else { - DxbcOpDiscard(true, DxbcSrc::R(system_temp_result_, DxbcSrc::kXXXX)); + a_.OpDiscard(true, dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX)); } if (used_result_components) { - DxbcOpAnd(DxbcDest::R(system_temp_result_, 0b0001), - DxbcSrc::R(system_temp_result_, DxbcSrc::kXXXX), - DxbcSrc::LF(1.0f)); + a_.OpAnd(dxbc::Dest::R(system_temp_result_, 0b0001), + dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX), + dxbc::Src::LF(1.0f)); } break; case AluVectorOpcode::kKillGt: - result_swizzle = DxbcSrc::kXXXX; - DxbcOpLT(DxbcDest::R(system_temp_result_), operands[1], operands[0]); - DxbcOpOr(DxbcDest::R(system_temp_result_, 0b0011), - DxbcSrc::R(system_temp_result_, 0b0100), - DxbcSrc::R(system_temp_result_, 0b1110)); - DxbcOpOr(DxbcDest::R(system_temp_result_, 0b0001), - DxbcSrc::R(system_temp_result_, DxbcSrc::kXXXX), - DxbcSrc::R(system_temp_result_, DxbcSrc::kYYYY)); + result_swizzle = dxbc::Src::kXXXX; + a_.OpLT(dxbc::Dest::R(system_temp_result_), operands[1], operands[0]); + a_.OpOr(dxbc::Dest::R(system_temp_result_, 0b0011), + dxbc::Src::R(system_temp_result_, 0b0100), + dxbc::Src::R(system_temp_result_, 0b1110)); + a_.OpOr(dxbc::Dest::R(system_temp_result_, 0b0001), + dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX), + dxbc::Src::R(system_temp_result_, dxbc::Src::kYYYY)); if (edram_rov_used_) { - DxbcOpRetC(true, DxbcSrc::R(system_temp_result_, DxbcSrc::kXXXX)); + a_.OpRetC(true, dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX)); } else { - DxbcOpDiscard(true, DxbcSrc::R(system_temp_result_, DxbcSrc::kXXXX)); + a_.OpDiscard(true, dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX)); } if (used_result_components) { - DxbcOpAnd(DxbcDest::R(system_temp_result_, 0b0001), - DxbcSrc::R(system_temp_result_, DxbcSrc::kXXXX), - DxbcSrc::LF(1.0f)); + a_.OpAnd(dxbc::Dest::R(system_temp_result_, 0b0001), + dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX), + dxbc::Src::LF(1.0f)); } break; case AluVectorOpcode::kKillGe: - result_swizzle = DxbcSrc::kXXXX; - DxbcOpGE(DxbcDest::R(system_temp_result_), operands[0], operands[1]); - DxbcOpOr(DxbcDest::R(system_temp_result_, 0b0011), - DxbcSrc::R(system_temp_result_, 0b0100), - DxbcSrc::R(system_temp_result_, 0b1110)); - DxbcOpOr(DxbcDest::R(system_temp_result_, 0b0001), - DxbcSrc::R(system_temp_result_, DxbcSrc::kXXXX), - DxbcSrc::R(system_temp_result_, DxbcSrc::kYYYY)); + result_swizzle = dxbc::Src::kXXXX; + a_.OpGE(dxbc::Dest::R(system_temp_result_), operands[0], operands[1]); + a_.OpOr(dxbc::Dest::R(system_temp_result_, 0b0011), + dxbc::Src::R(system_temp_result_, 0b0100), + dxbc::Src::R(system_temp_result_, 0b1110)); + a_.OpOr(dxbc::Dest::R(system_temp_result_, 0b0001), + dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX), + dxbc::Src::R(system_temp_result_, dxbc::Src::kYYYY)); if (edram_rov_used_) { - DxbcOpRetC(true, DxbcSrc::R(system_temp_result_, DxbcSrc::kXXXX)); + a_.OpRetC(true, dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX)); } else { - DxbcOpDiscard(true, DxbcSrc::R(system_temp_result_, DxbcSrc::kXXXX)); + a_.OpDiscard(true, dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX)); } if (used_result_components) { - DxbcOpAnd(DxbcDest::R(system_temp_result_, 0b0001), - DxbcSrc::R(system_temp_result_, DxbcSrc::kXXXX), - DxbcSrc::LF(1.0f)); + a_.OpAnd(dxbc::Dest::R(system_temp_result_, 0b0001), + dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX), + dxbc::Src::LF(1.0f)); } break; case AluVectorOpcode::kKillNe: - result_swizzle = DxbcSrc::kXXXX; - DxbcOpNE(DxbcDest::R(system_temp_result_), operands[0], operands[1]); - DxbcOpOr(DxbcDest::R(system_temp_result_, 0b0011), - DxbcSrc::R(system_temp_result_, 0b0100), - DxbcSrc::R(system_temp_result_, 0b1110)); - DxbcOpOr(DxbcDest::R(system_temp_result_, 0b0001), - DxbcSrc::R(system_temp_result_, DxbcSrc::kXXXX), - DxbcSrc::R(system_temp_result_, DxbcSrc::kYYYY)); + result_swizzle = dxbc::Src::kXXXX; + a_.OpNE(dxbc::Dest::R(system_temp_result_), operands[0], operands[1]); + a_.OpOr(dxbc::Dest::R(system_temp_result_, 0b0011), + dxbc::Src::R(system_temp_result_, 0b0100), + dxbc::Src::R(system_temp_result_, 0b1110)); + a_.OpOr(dxbc::Dest::R(system_temp_result_, 0b0001), + dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX), + dxbc::Src::R(system_temp_result_, dxbc::Src::kYYYY)); if (edram_rov_used_) { - DxbcOpRetC(true, DxbcSrc::R(system_temp_result_, DxbcSrc::kXXXX)); + a_.OpRetC(true, dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX)); } else { - DxbcOpDiscard(true, DxbcSrc::R(system_temp_result_, DxbcSrc::kXXXX)); + a_.OpDiscard(true, dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX)); } if (used_result_components) { - DxbcOpAnd(DxbcDest::R(system_temp_result_, 0b0001), - DxbcSrc::R(system_temp_result_, DxbcSrc::kXXXX), - DxbcSrc::LF(1.0f)); + a_.OpAnd(dxbc::Dest::R(system_temp_result_, 0b0001), + dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX), + dxbc::Src::LF(1.0f)); } break; case AluVectorOpcode::kDst: if (used_result_components & 0b0001) { - DxbcOpMov(DxbcDest::R(system_temp_result_, 0b0001), DxbcSrc::LF(1.0f)); + a_.OpMov(dxbc::Dest::R(system_temp_result_, 0b0001), + dxbc::Src::LF(1.0f)); } if (used_result_components & 0b0010) { - DxbcOpMul(DxbcDest::R(system_temp_result_, 0b0010), - operands[0].SelectFromSwizzled(1), - operands[1].SelectFromSwizzled(1)); + a_.OpMul(dxbc::Dest::R(system_temp_result_, 0b0010), + operands[0].SelectFromSwizzled(1), + operands[1].SelectFromSwizzled(1)); if (!(instr.vector_operands[0].GetIdenticalComponents( instr.vector_operands[1]) & 0b0010)) { // Shader Model 3: +-0 or denormal * anything = +0. - DxbcOpMin(DxbcDest::R(system_temp_result_, 0b0100), - operands[0].SelectFromSwizzled(1).Abs(), - operands[1].SelectFromSwizzled(1).Abs()); + a_.OpMin(dxbc::Dest::R(system_temp_result_, 0b0100), + operands[0].SelectFromSwizzled(1).Abs(), + operands[1].SelectFromSwizzled(1).Abs()); // min isn't required to flush denormals, eq is. - DxbcOpEq(DxbcDest::R(system_temp_result_, 0b0100), - DxbcSrc::R(system_temp_result_, DxbcSrc::kZZZZ), - DxbcSrc::LF(0.0f)); - DxbcOpMovC(DxbcDest::R(system_temp_result_, 0b0010), - DxbcSrc::R(system_temp_result_, DxbcSrc::kZZZZ), - DxbcSrc::LF(0.0f), - DxbcSrc::R(system_temp_result_, DxbcSrc::kYYYY)); + a_.OpEq(dxbc::Dest::R(system_temp_result_, 0b0100), + dxbc::Src::R(system_temp_result_, dxbc::Src::kZZZZ), + dxbc::Src::LF(0.0f)); + a_.OpMovC(dxbc::Dest::R(system_temp_result_, 0b0010), + dxbc::Src::R(system_temp_result_, dxbc::Src::kZZZZ), + dxbc::Src::LF(0.0f), + dxbc::Src::R(system_temp_result_, dxbc::Src::kYYYY)); } } if (used_result_components & 0b0100) { - DxbcOpMov(DxbcDest::R(system_temp_result_, 0b0100), - operands[0].SelectFromSwizzled(2)); + a_.OpMov(dxbc::Dest::R(system_temp_result_, 0b0100), + operands[0].SelectFromSwizzled(2)); } if (used_result_components & 0b1000) { - DxbcOpMov(DxbcDest::R(system_temp_result_, 0b1000), - operands[1].SelectFromSwizzled(2)); + a_.OpMov(dxbc::Dest::R(system_temp_result_, 0b1000), + operands[1].SelectFromSwizzled(2)); } break; case AluVectorOpcode::kMaxA: - DxbcOpAdd(DxbcDest::R(system_temp_ps_pc_p0_a0_, 0b1000), - operands[0].SelectFromSwizzled(3), DxbcSrc::LF(0.5f)); - DxbcOpRoundNI(DxbcDest::R(system_temp_ps_pc_p0_a0_, 0b1000), - DxbcSrc::R(system_temp_ps_pc_p0_a0_, DxbcSrc::kWWWW)); - DxbcOpMax(DxbcDest::R(system_temp_ps_pc_p0_a0_, 0b1000), - DxbcSrc::R(system_temp_ps_pc_p0_a0_, DxbcSrc::kWWWW), - DxbcSrc::LF(-256.0f)); - DxbcOpMin(DxbcDest::R(system_temp_ps_pc_p0_a0_, 0b1000), - DxbcSrc::R(system_temp_ps_pc_p0_a0_, DxbcSrc::kWWWW), - DxbcSrc::LF(255.0f)); - DxbcOpFToI(DxbcDest::R(system_temp_ps_pc_p0_a0_, 0b1000), - DxbcSrc::R(system_temp_ps_pc_p0_a0_, DxbcSrc::kWWWW)); + a_.OpAdd(dxbc::Dest::R(system_temp_ps_pc_p0_a0_, 0b1000), + operands[0].SelectFromSwizzled(3), dxbc::Src::LF(0.5f)); + a_.OpRoundNI(dxbc::Dest::R(system_temp_ps_pc_p0_a0_, 0b1000), + dxbc::Src::R(system_temp_ps_pc_p0_a0_, dxbc::Src::kWWWW)); + a_.OpMax(dxbc::Dest::R(system_temp_ps_pc_p0_a0_, 0b1000), + dxbc::Src::R(system_temp_ps_pc_p0_a0_, dxbc::Src::kWWWW), + dxbc::Src::LF(-256.0f)); + a_.OpMin(dxbc::Dest::R(system_temp_ps_pc_p0_a0_, 0b1000), + dxbc::Src::R(system_temp_ps_pc_p0_a0_, dxbc::Src::kWWWW), + dxbc::Src::LF(255.0f)); + a_.OpFToI(dxbc::Dest::R(system_temp_ps_pc_p0_a0_, 0b1000), + dxbc::Src::R(system_temp_ps_pc_p0_a0_, dxbc::Src::kWWWW)); if (used_result_components) { uint32_t identical = instr.vector_operands[0].GetIdenticalComponents( instr.vector_operands[1]) & @@ -615,13 +620,14 @@ void DxbcShaderTranslator::ProcessVectorAluOperation( uint32_t different = used_result_components & ~identical; if (different) { // Shader Model 3 NaN behavior (a >= b ? a : b, not fmax). - DxbcOpGE(DxbcDest::R(system_temp_result_, different), operands[0], - operands[1]); - DxbcOpMovC(DxbcDest::R(system_temp_result_, different), - DxbcSrc::R(system_temp_result_), operands[0], operands[1]); + a_.OpGE(dxbc::Dest::R(system_temp_result_, different), operands[0], + operands[1]); + a_.OpMovC(dxbc::Dest::R(system_temp_result_, different), + dxbc::Src::R(system_temp_result_), operands[0], + operands[1]); } if (identical) { - DxbcOpMov(DxbcDest::R(system_temp_result_, identical), operands[0]); + a_.OpMov(dxbc::Dest::R(system_temp_result_, identical), operands[0]); } } break; @@ -629,7 +635,7 @@ void DxbcShaderTranslator::ProcessVectorAluOperation( default: assert_unhandled_case(instr.vector_opcode); EmitTranslationError("Unknown ALU vector operation"); - DxbcOpMov(DxbcDest::R(system_temp_result_), DxbcSrc::LF(0.0f)); + a_.OpMov(dxbc::Dest::R(system_temp_result_), dxbc::Src::LF(0.0f)); } PopSystemTemp(operand_temps); @@ -644,7 +650,7 @@ void DxbcShaderTranslator::ProcessScalarAluOperation( } // Load operands. - DxbcSrc operands_loaded[2]{DxbcSrc::LF(0.0f), DxbcSrc::LF(0.0f)}; + dxbc::Src operands_loaded[2]{dxbc::Src::LF(0.0f), dxbc::Src::LF(0.0f)}; uint32_t operand_temps = 0; for (uint32_t i = 0; i < instr.scalar_operand_count; ++i) { bool operand_temp_pushed = false; @@ -655,32 +661,33 @@ void DxbcShaderTranslator::ProcessScalarAluOperation( operand_temps += uint32_t(operand_temp_pushed); } // Don't return without PopSystemTemp(operand_temps) from now on! - DxbcSrc operand_0_a(operands_loaded[0].SelectFromSwizzled(0)); - DxbcSrc operand_0_b(operands_loaded[0].SelectFromSwizzled(1)); - DxbcSrc operand_1(operands_loaded[1].SelectFromSwizzled(0)); + dxbc::Src operand_0_a(operands_loaded[0].SelectFromSwizzled(0)); + dxbc::Src operand_0_b(operands_loaded[0].SelectFromSwizzled(1)); + dxbc::Src operand_1(operands_loaded[1].SelectFromSwizzled(0)); - DxbcDest ps_dest(DxbcDest::R(system_temp_ps_pc_p0_a0_, 0b0001)); - DxbcSrc ps_src(DxbcSrc::R(system_temp_ps_pc_p0_a0_, DxbcSrc::kXXXX)); + dxbc::Dest ps_dest(dxbc::Dest::R(system_temp_ps_pc_p0_a0_, 0b0001)); + dxbc::Src ps_src(dxbc::Src::R(system_temp_ps_pc_p0_a0_, dxbc::Src::kXXXX)); switch (instr.scalar_opcode) { case AluScalarOpcode::kAdds: - DxbcOpAdd(ps_dest, operand_0_a, operand_0_b); + a_.OpAdd(ps_dest, operand_0_a, operand_0_b); break; case AluScalarOpcode::kAddsPrev: - DxbcOpAdd(ps_dest, operand_0_a, ps_src); + a_.OpAdd(ps_dest, operand_0_a, ps_src); break; case AluScalarOpcode::kMuls: - DxbcOpMul(ps_dest, operand_0_a, operand_0_b); + a_.OpMul(ps_dest, operand_0_a, operand_0_b); if (instr.scalar_operands[0].components[0] != instr.scalar_operands[0].components[1]) { // Shader Model 3: +-0 or denormal * anything = +0. uint32_t is_zero_temp = PushSystemTemp(); - DxbcOpMin(DxbcDest::R(is_zero_temp, 0b0001), operand_0_a.Abs(), - operand_0_b.Abs()); + a_.OpMin(dxbc::Dest::R(is_zero_temp, 0b0001), operand_0_a.Abs(), + operand_0_b.Abs()); // min isn't required to flush denormals, eq is. - DxbcOpEq(DxbcDest::R(is_zero_temp, 0b0001), - DxbcSrc::R(is_zero_temp, DxbcSrc::kXXXX), DxbcSrc::LF(0.0f)); - DxbcOpMovC(ps_dest, DxbcSrc::R(is_zero_temp, DxbcSrc::kXXXX), - DxbcSrc::LF(0.0f), ps_src); + a_.OpEq(dxbc::Dest::R(is_zero_temp, 0b0001), + dxbc::Src::R(is_zero_temp, dxbc::Src::kXXXX), + dxbc::Src::LF(0.0f)); + a_.OpMovC(ps_dest, dxbc::Src::R(is_zero_temp, dxbc::Src::kXXXX), + dxbc::Src::LF(0.0f), ps_src); // Release is_zero_temp. PopSystemTemp(); } @@ -691,43 +698,44 @@ void DxbcShaderTranslator::ProcessScalarAluOperation( if (instr.scalar_opcode == AluScalarOpcode::kMulsPrev2) { // Check if need to select the src0.a * ps case. // ps != -FLT_MAX. - DxbcOpNE(DxbcDest::R(test_temp, 0b0001), ps_src, DxbcSrc::LF(-FLT_MAX)); + a_.OpNE(dxbc::Dest::R(test_temp, 0b0001), ps_src, + dxbc::Src::LF(-FLT_MAX)); // isfinite(ps), or |ps| <= FLT_MAX, or -|ps| >= -FLT_MAX, since // -FLT_MAX is already loaded to an SGPR, this is also false if it's // NaN. - DxbcOpGE(DxbcDest::R(test_temp, 0b0010), -ps_src.Abs(), - DxbcSrc::LF(-FLT_MAX)); - DxbcOpAnd(DxbcDest::R(test_temp, 0b0001), - DxbcSrc::R(test_temp, DxbcSrc::kXXXX), - DxbcSrc::R(test_temp, DxbcSrc::kYYYY)); + a_.OpGE(dxbc::Dest::R(test_temp, 0b0010), -ps_src.Abs(), + dxbc::Src::LF(-FLT_MAX)); + a_.OpAnd(dxbc::Dest::R(test_temp, 0b0001), + dxbc::Src::R(test_temp, dxbc::Src::kXXXX), + dxbc::Src::R(test_temp, dxbc::Src::kYYYY)); // isfinite(src0.b). - DxbcOpGE(DxbcDest::R(test_temp, 0b0010), -operand_0_b.Abs(), - DxbcSrc::LF(-FLT_MAX)); - DxbcOpAnd(DxbcDest::R(test_temp, 0b0001), - DxbcSrc::R(test_temp, DxbcSrc::kXXXX), - DxbcSrc::R(test_temp, DxbcSrc::kYYYY)); + a_.OpGE(dxbc::Dest::R(test_temp, 0b0010), -operand_0_b.Abs(), + dxbc::Src::LF(-FLT_MAX)); + a_.OpAnd(dxbc::Dest::R(test_temp, 0b0001), + dxbc::Src::R(test_temp, dxbc::Src::kXXXX), + dxbc::Src::R(test_temp, dxbc::Src::kYYYY)); // src0.b > 0 (need !(src0.b <= 0), but src0.b has already been checked // for NaN). - DxbcOpLT(DxbcDest::R(test_temp, 0b0010), DxbcSrc::LF(0.0f), - operand_0_b); - DxbcOpAnd(DxbcDest::R(test_temp, 0b0001), - DxbcSrc::R(test_temp, DxbcSrc::kXXXX), - DxbcSrc::R(test_temp, DxbcSrc::kYYYY)); - DxbcOpIf(true, DxbcSrc::R(test_temp, DxbcSrc::kXXXX)); + a_.OpLT(dxbc::Dest::R(test_temp, 0b0010), dxbc::Src::LF(0.0f), + operand_0_b); + a_.OpAnd(dxbc::Dest::R(test_temp, 0b0001), + dxbc::Src::R(test_temp, dxbc::Src::kXXXX), + dxbc::Src::R(test_temp, dxbc::Src::kYYYY)); + a_.OpIf(true, dxbc::Src::R(test_temp, dxbc::Src::kXXXX)); } // Shader Model 3: +-0 or denormal * anything = +0. - DxbcOpMin(DxbcDest::R(test_temp, 0b0001), operand_0_a.Abs(), - ps_src.Abs()); + a_.OpMin(dxbc::Dest::R(test_temp, 0b0001), operand_0_a.Abs(), + ps_src.Abs()); // min isn't required to flush denormals, eq is. - DxbcOpEq(DxbcDest::R(test_temp, 0b0001), - DxbcSrc::R(test_temp, DxbcSrc::kXXXX), DxbcSrc::LF(0.0f)); - DxbcOpMul(ps_dest, operand_0_a, ps_src); - DxbcOpMovC(ps_dest, DxbcSrc::R(test_temp, DxbcSrc::kXXXX), - DxbcSrc::LF(0.0f), ps_src); + a_.OpEq(dxbc::Dest::R(test_temp, 0b0001), + dxbc::Src::R(test_temp, dxbc::Src::kXXXX), dxbc::Src::LF(0.0f)); + a_.OpMul(ps_dest, operand_0_a, ps_src); + a_.OpMovC(ps_dest, dxbc::Src::R(test_temp, dxbc::Src::kXXXX), + dxbc::Src::LF(0.0f), ps_src); if (instr.scalar_opcode == AluScalarOpcode::kMulsPrev2) { - DxbcOpElse(); - DxbcOpMov(ps_dest, DxbcSrc::LF(-FLT_MAX)); - DxbcOpEndIf(); + a_.OpElse(); + a_.OpMov(ps_dest, dxbc::Src::LF(-FLT_MAX)); + a_.OpEndIf(); } // Release test_temp. PopSystemTemp(); @@ -738,290 +746,300 @@ void DxbcShaderTranslator::ProcessScalarAluOperation( // max is commonly used as mov. if (instr.scalar_operands[0].components[0] == instr.scalar_operands[0].components[1]) { - DxbcOpMov(ps_dest, operand_0_a); + a_.OpMov(ps_dest, operand_0_a); } else { // Shader Model 3 NaN behavior (a op b ? a : b, not fmax/fmin). if (instr.scalar_opcode == AluScalarOpcode::kMins) { - DxbcOpLT(ps_dest, operand_0_a, operand_0_b); + a_.OpLT(ps_dest, operand_0_a, operand_0_b); } else { - DxbcOpGE(ps_dest, operand_0_a, operand_0_b); + a_.OpGE(ps_dest, operand_0_a, operand_0_b); } - DxbcOpMovC(ps_dest, ps_src, operand_0_a, operand_0_b); + a_.OpMovC(ps_dest, ps_src, operand_0_a, operand_0_b); } break; case AluScalarOpcode::kSeqs: - DxbcOpEq(ps_dest, operand_0_a, DxbcSrc::LF(0.0f)); - DxbcOpAnd(ps_dest, ps_src, DxbcSrc::LF(1.0f)); + a_.OpEq(ps_dest, operand_0_a, dxbc::Src::LF(0.0f)); + a_.OpAnd(ps_dest, ps_src, dxbc::Src::LF(1.0f)); break; case AluScalarOpcode::kSgts: - DxbcOpLT(ps_dest, DxbcSrc::LF(0.0f), operand_0_a); - DxbcOpAnd(ps_dest, ps_src, DxbcSrc::LF(1.0f)); + a_.OpLT(ps_dest, dxbc::Src::LF(0.0f), operand_0_a); + a_.OpAnd(ps_dest, ps_src, dxbc::Src::LF(1.0f)); break; case AluScalarOpcode::kSges: - DxbcOpGE(ps_dest, operand_0_a, DxbcSrc::LF(0.0f)); - DxbcOpAnd(ps_dest, ps_src, DxbcSrc::LF(1.0f)); + a_.OpGE(ps_dest, operand_0_a, dxbc::Src::LF(0.0f)); + a_.OpAnd(ps_dest, ps_src, dxbc::Src::LF(1.0f)); break; case AluScalarOpcode::kSnes: - DxbcOpNE(ps_dest, operand_0_a, DxbcSrc::LF(0.0f)); - DxbcOpAnd(ps_dest, ps_src, DxbcSrc::LF(1.0f)); + a_.OpNE(ps_dest, operand_0_a, dxbc::Src::LF(0.0f)); + a_.OpAnd(ps_dest, ps_src, dxbc::Src::LF(1.0f)); break; case AluScalarOpcode::kFrcs: - DxbcOpFrc(ps_dest, operand_0_a); + a_.OpFrc(ps_dest, operand_0_a); break; case AluScalarOpcode::kTruncs: - DxbcOpRoundZ(ps_dest, operand_0_a); + a_.OpRoundZ(ps_dest, operand_0_a); break; case AluScalarOpcode::kFloors: - DxbcOpRoundNI(ps_dest, operand_0_a); + a_.OpRoundNI(ps_dest, operand_0_a); break; case AluScalarOpcode::kExp: - DxbcOpExp(ps_dest, operand_0_a); + a_.OpExp(ps_dest, operand_0_a); break; case AluScalarOpcode::kLogc: { - DxbcOpLog(ps_dest, operand_0_a); + a_.OpLog(ps_dest, operand_0_a); uint32_t is_neg_infinity_temp = PushSystemTemp(); - DxbcOpEq(DxbcDest::R(is_neg_infinity_temp, 0b0001), ps_src, - DxbcSrc::LF(-INFINITY)); - DxbcOpMovC(ps_dest, DxbcSrc::R(is_neg_infinity_temp, DxbcSrc::kXXXX), - DxbcSrc::LF(-FLT_MAX), ps_src); + a_.OpEq(dxbc::Dest::R(is_neg_infinity_temp, 0b0001), ps_src, + dxbc::Src::LF(-INFINITY)); + a_.OpMovC(ps_dest, dxbc::Src::R(is_neg_infinity_temp, dxbc::Src::kXXXX), + dxbc::Src::LF(-FLT_MAX), ps_src); // Release is_neg_infinity_temp. PopSystemTemp(); } break; case AluScalarOpcode::kLog: - DxbcOpLog(ps_dest, operand_0_a); + a_.OpLog(ps_dest, operand_0_a); break; case AluScalarOpcode::kRcpc: case AluScalarOpcode::kRsqc: { if (instr.scalar_opcode == AluScalarOpcode::kRsqc) { - DxbcOpRSq(ps_dest, operand_0_a); + a_.OpRSq(ps_dest, operand_0_a); } else { - DxbcOpRcp(ps_dest, operand_0_a); + a_.OpRcp(ps_dest, operand_0_a); } uint32_t is_infinity_temp = PushSystemTemp(); - DxbcOpEq(DxbcDest::R(is_infinity_temp, 0b0001), ps_src.Abs(), - DxbcSrc::LF(INFINITY)); + a_.OpEq(dxbc::Dest::R(is_infinity_temp, 0b0001), ps_src.Abs(), + dxbc::Src::LF(INFINITY)); // If +-Infinity (0x7F800000 or 0xFF800000), add -1 (0xFFFFFFFF) to turn // into +-FLT_MAX (0x7F7FFFFF or 0xFF7FFFFF). - DxbcOpIAdd(ps_dest, ps_src, DxbcSrc::R(is_infinity_temp, DxbcSrc::kXXXX)); + a_.OpIAdd(ps_dest, ps_src, + dxbc::Src::R(is_infinity_temp, dxbc::Src::kXXXX)); // Release is_infinity_temp. PopSystemTemp(); } break; case AluScalarOpcode::kRcpf: case AluScalarOpcode::kRsqf: { if (instr.scalar_opcode == AluScalarOpcode::kRsqf) { - DxbcOpRSq(ps_dest, operand_0_a); + a_.OpRSq(ps_dest, operand_0_a); } else { - DxbcOpRcp(ps_dest, operand_0_a); + a_.OpRcp(ps_dest, operand_0_a); } uint32_t is_not_infinity_temp = PushSystemTemp(); - DxbcOpNE(DxbcDest::R(is_not_infinity_temp, 0b0001), ps_src.Abs(), - DxbcSrc::LF(INFINITY)); + a_.OpNE(dxbc::Dest::R(is_not_infinity_temp, 0b0001), ps_src.Abs(), + dxbc::Src::LF(INFINITY)); // Keep the sign bit if infinity. - DxbcOpOr(DxbcDest::R(is_not_infinity_temp, 0b0001), - DxbcSrc::R(is_not_infinity_temp, DxbcSrc::kXXXX), - DxbcSrc::LU(uint32_t(1) << 31)); - DxbcOpAnd(ps_dest, ps_src, - DxbcSrc::R(is_not_infinity_temp, DxbcSrc::kXXXX)); + a_.OpOr(dxbc::Dest::R(is_not_infinity_temp, 0b0001), + dxbc::Src::R(is_not_infinity_temp, dxbc::Src::kXXXX), + dxbc::Src::LU(uint32_t(1) << 31)); + a_.OpAnd(ps_dest, ps_src, + dxbc::Src::R(is_not_infinity_temp, dxbc::Src::kXXXX)); // Release is_not_infinity_temp. PopSystemTemp(); } break; case AluScalarOpcode::kRcp: - DxbcOpRcp(ps_dest, operand_0_a); + a_.OpRcp(ps_dest, operand_0_a); break; case AluScalarOpcode::kRsq: - DxbcOpRSq(ps_dest, operand_0_a); + a_.OpRSq(ps_dest, operand_0_a); break; case AluScalarOpcode::kMaxAs: case AluScalarOpcode::kMaxAsf: if (instr.scalar_opcode == AluScalarOpcode::kMaxAsf) { - DxbcOpRoundNI(DxbcDest::R(system_temp_ps_pc_p0_a0_, 0b1000), - operand_0_a); + a_.OpRoundNI(dxbc::Dest::R(system_temp_ps_pc_p0_a0_, 0b1000), + operand_0_a); } else { - DxbcOpAdd(DxbcDest::R(system_temp_ps_pc_p0_a0_, 0b1000), operand_0_a, - DxbcSrc::LF(0.5f)); - DxbcOpRoundNI(DxbcDest::R(system_temp_ps_pc_p0_a0_, 0b1000), - DxbcSrc::R(system_temp_ps_pc_p0_a0_, DxbcSrc::kWWWW)); + a_.OpAdd(dxbc::Dest::R(system_temp_ps_pc_p0_a0_, 0b1000), operand_0_a, + dxbc::Src::LF(0.5f)); + a_.OpRoundNI(dxbc::Dest::R(system_temp_ps_pc_p0_a0_, 0b1000), + dxbc::Src::R(system_temp_ps_pc_p0_a0_, dxbc::Src::kWWWW)); } - DxbcOpMax(DxbcDest::R(system_temp_ps_pc_p0_a0_, 0b1000), - DxbcSrc::R(system_temp_ps_pc_p0_a0_, DxbcSrc::kWWWW), - DxbcSrc::LF(-256.0f)); - DxbcOpMin(DxbcDest::R(system_temp_ps_pc_p0_a0_, 0b1000), - DxbcSrc::R(system_temp_ps_pc_p0_a0_, DxbcSrc::kWWWW), - DxbcSrc::LF(255.0f)); - DxbcOpFToI(DxbcDest::R(system_temp_ps_pc_p0_a0_, 0b1000), - DxbcSrc::R(system_temp_ps_pc_p0_a0_, DxbcSrc::kWWWW)); + a_.OpMax(dxbc::Dest::R(system_temp_ps_pc_p0_a0_, 0b1000), + dxbc::Src::R(system_temp_ps_pc_p0_a0_, dxbc::Src::kWWWW), + dxbc::Src::LF(-256.0f)); + a_.OpMin(dxbc::Dest::R(system_temp_ps_pc_p0_a0_, 0b1000), + dxbc::Src::R(system_temp_ps_pc_p0_a0_, dxbc::Src::kWWWW), + dxbc::Src::LF(255.0f)); + a_.OpFToI(dxbc::Dest::R(system_temp_ps_pc_p0_a0_, 0b1000), + dxbc::Src::R(system_temp_ps_pc_p0_a0_, dxbc::Src::kWWWW)); if (instr.scalar_operands[0].components[0] == instr.scalar_operands[0].components[1]) { - DxbcOpMov(ps_dest, operand_0_a); + a_.OpMov(ps_dest, operand_0_a); } else { // Shader Model 3 NaN behavior (a >= b ? a : b, not fmax). - DxbcOpGE(ps_dest, operand_0_a, operand_0_b); - DxbcOpMovC(ps_dest, ps_src, operand_0_a, operand_0_b); + a_.OpGE(ps_dest, operand_0_a, operand_0_b); + a_.OpMovC(ps_dest, ps_src, operand_0_a, operand_0_b); } break; case AluScalarOpcode::kSubs: - DxbcOpAdd(ps_dest, operand_0_a, -operand_0_b); + a_.OpAdd(ps_dest, operand_0_a, -operand_0_b); break; case AluScalarOpcode::kSubsPrev: - DxbcOpAdd(ps_dest, operand_0_a, -ps_src); + a_.OpAdd(ps_dest, operand_0_a, -ps_src); break; case AluScalarOpcode::kSetpEq: predicate_written = true; - DxbcOpEq(DxbcDest::R(system_temp_ps_pc_p0_a0_, 0b0100), operand_0_a, - DxbcSrc::LF(0.0f)); - DxbcOpMovC(ps_dest, DxbcSrc::R(system_temp_ps_pc_p0_a0_, DxbcSrc::kZZZZ), - DxbcSrc::LF(0.0f), DxbcSrc::LF(1.0f)); + a_.OpEq(dxbc::Dest::R(system_temp_ps_pc_p0_a0_, 0b0100), operand_0_a, + dxbc::Src::LF(0.0f)); + a_.OpMovC(ps_dest, + dxbc::Src::R(system_temp_ps_pc_p0_a0_, dxbc::Src::kZZZZ), + dxbc::Src::LF(0.0f), dxbc::Src::LF(1.0f)); break; case AluScalarOpcode::kSetpNe: predicate_written = true; - DxbcOpNE(DxbcDest::R(system_temp_ps_pc_p0_a0_, 0b0100), operand_0_a, - DxbcSrc::LF(0.0f)); - DxbcOpMovC(ps_dest, DxbcSrc::R(system_temp_ps_pc_p0_a0_, DxbcSrc::kZZZZ), - DxbcSrc::LF(0.0f), DxbcSrc::LF(1.0f)); + a_.OpNE(dxbc::Dest::R(system_temp_ps_pc_p0_a0_, 0b0100), operand_0_a, + dxbc::Src::LF(0.0f)); + a_.OpMovC(ps_dest, + dxbc::Src::R(system_temp_ps_pc_p0_a0_, dxbc::Src::kZZZZ), + dxbc::Src::LF(0.0f), dxbc::Src::LF(1.0f)); break; case AluScalarOpcode::kSetpGt: predicate_written = true; - DxbcOpLT(DxbcDest::R(system_temp_ps_pc_p0_a0_, 0b0100), DxbcSrc::LF(0.0f), - operand_0_a); - DxbcOpMovC(ps_dest, DxbcSrc::R(system_temp_ps_pc_p0_a0_, DxbcSrc::kZZZZ), - DxbcSrc::LF(0.0f), DxbcSrc::LF(1.0f)); + a_.OpLT(dxbc::Dest::R(system_temp_ps_pc_p0_a0_, 0b0100), + dxbc::Src::LF(0.0f), operand_0_a); + a_.OpMovC(ps_dest, + dxbc::Src::R(system_temp_ps_pc_p0_a0_, dxbc::Src::kZZZZ), + dxbc::Src::LF(0.0f), dxbc::Src::LF(1.0f)); break; case AluScalarOpcode::kSetpGe: predicate_written = true; - DxbcOpGE(DxbcDest::R(system_temp_ps_pc_p0_a0_, 0b0100), operand_0_a, - DxbcSrc::LF(0.0f)); - DxbcOpMovC(ps_dest, DxbcSrc::R(system_temp_ps_pc_p0_a0_, DxbcSrc::kZZZZ), - DxbcSrc::LF(0.0f), DxbcSrc::LF(1.0f)); + a_.OpGE(dxbc::Dest::R(system_temp_ps_pc_p0_a0_, 0b0100), operand_0_a, + dxbc::Src::LF(0.0f)); + a_.OpMovC(ps_dest, + dxbc::Src::R(system_temp_ps_pc_p0_a0_, dxbc::Src::kZZZZ), + dxbc::Src::LF(0.0f), dxbc::Src::LF(1.0f)); break; case AluScalarOpcode::kSetpInv: predicate_written = true; // Calculate ps as if src0.a != 1.0 (the false predicate value case). - DxbcOpEq(ps_dest, operand_0_a, DxbcSrc::LF(0.0f)); - DxbcOpMovC(ps_dest, ps_src, DxbcSrc::LF(1.0f), operand_0_a); + a_.OpEq(ps_dest, operand_0_a, dxbc::Src::LF(0.0f)); + a_.OpMovC(ps_dest, ps_src, dxbc::Src::LF(1.0f), operand_0_a); // Set the predicate to src0.a == 1.0, and, if it's true, zero ps. - DxbcOpEq(DxbcDest::R(system_temp_ps_pc_p0_a0_, 0b0100), operand_0_a, - DxbcSrc::LF(1.0f)); - DxbcOpMovC(ps_dest, DxbcSrc::R(system_temp_ps_pc_p0_a0_, DxbcSrc::kZZZZ), - DxbcSrc::LF(0.0f), ps_src); + a_.OpEq(dxbc::Dest::R(system_temp_ps_pc_p0_a0_, 0b0100), operand_0_a, + dxbc::Src::LF(1.0f)); + a_.OpMovC(ps_dest, + dxbc::Src::R(system_temp_ps_pc_p0_a0_, dxbc::Src::kZZZZ), + dxbc::Src::LF(0.0f), ps_src); break; case AluScalarOpcode::kSetpPop: predicate_written = true; - DxbcOpAdd(ps_dest, operand_0_a, DxbcSrc::LF(-1.0f)); - DxbcOpGE(DxbcDest::R(system_temp_ps_pc_p0_a0_, 0b0100), DxbcSrc::LF(0.0f), - ps_src); - DxbcOpMovC(ps_dest, DxbcSrc::R(system_temp_ps_pc_p0_a0_, DxbcSrc::kZZZZ), - DxbcSrc::LF(0.0f), ps_src); + a_.OpAdd(ps_dest, operand_0_a, dxbc::Src::LF(-1.0f)); + a_.OpGE(dxbc::Dest::R(system_temp_ps_pc_p0_a0_, 0b0100), + dxbc::Src::LF(0.0f), ps_src); + a_.OpMovC(ps_dest, + dxbc::Src::R(system_temp_ps_pc_p0_a0_, dxbc::Src::kZZZZ), + dxbc::Src::LF(0.0f), ps_src); break; case AluScalarOpcode::kSetpClr: predicate_written = true; - DxbcOpMov(ps_dest, DxbcSrc::LF(FLT_MAX)); - DxbcOpMov(DxbcDest::R(system_temp_ps_pc_p0_a0_, 0b0100), DxbcSrc::LU(0)); + a_.OpMov(ps_dest, dxbc::Src::LF(FLT_MAX)); + a_.OpMov(dxbc::Dest::R(system_temp_ps_pc_p0_a0_, 0b0100), + dxbc::Src::LU(0)); break; case AluScalarOpcode::kSetpRstr: predicate_written = true; - DxbcOpEq(DxbcDest::R(system_temp_ps_pc_p0_a0_, 0b0100), operand_0_a, - DxbcSrc::LF(0.0f)); + a_.OpEq(dxbc::Dest::R(system_temp_ps_pc_p0_a0_, 0b0100), operand_0_a, + dxbc::Src::LF(0.0f)); // Just copying src0.a to ps (since it's set to 0 if it's 0) could work, // but flush denormals and zero sign just for safety. - DxbcOpMovC(ps_dest, DxbcSrc::R(system_temp_ps_pc_p0_a0_, DxbcSrc::kZZZZ), - DxbcSrc::LF(0.0f), operand_0_a); + a_.OpMovC(ps_dest, + dxbc::Src::R(system_temp_ps_pc_p0_a0_, dxbc::Src::kZZZZ), + dxbc::Src::LF(0.0f), operand_0_a); break; case AluScalarOpcode::kKillsEq: - DxbcOpEq(ps_dest, operand_0_a, DxbcSrc::LF(0.0f)); + a_.OpEq(ps_dest, operand_0_a, dxbc::Src::LF(0.0f)); if (edram_rov_used_) { - DxbcOpRetC(true, ps_src); + a_.OpRetC(true, ps_src); } else { - DxbcOpDiscard(true, ps_src); + a_.OpDiscard(true, ps_src); } - DxbcOpAnd(ps_dest, ps_src, DxbcSrc::LF(1.0f)); + a_.OpAnd(ps_dest, ps_src, dxbc::Src::LF(1.0f)); break; case AluScalarOpcode::kKillsGt: - DxbcOpLT(ps_dest, DxbcSrc::LF(0.0f), operand_0_a); + a_.OpLT(ps_dest, dxbc::Src::LF(0.0f), operand_0_a); if (edram_rov_used_) { - DxbcOpRetC(true, ps_src); + a_.OpRetC(true, ps_src); } else { - DxbcOpDiscard(true, ps_src); + a_.OpDiscard(true, ps_src); } - DxbcOpAnd(ps_dest, ps_src, DxbcSrc::LF(1.0f)); + a_.OpAnd(ps_dest, ps_src, dxbc::Src::LF(1.0f)); break; case AluScalarOpcode::kKillsGe: - DxbcOpGE(ps_dest, operand_0_a, DxbcSrc::LF(0.0f)); + a_.OpGE(ps_dest, operand_0_a, dxbc::Src::LF(0.0f)); if (edram_rov_used_) { - DxbcOpRetC(true, ps_src); + a_.OpRetC(true, ps_src); } else { - DxbcOpDiscard(true, ps_src); + a_.OpDiscard(true, ps_src); } - DxbcOpAnd(ps_dest, ps_src, DxbcSrc::LF(1.0f)); + a_.OpAnd(ps_dest, ps_src, dxbc::Src::LF(1.0f)); break; case AluScalarOpcode::kKillsNe: - DxbcOpNE(ps_dest, operand_0_a, DxbcSrc::LF(0.0f)); + a_.OpNE(ps_dest, operand_0_a, dxbc::Src::LF(0.0f)); if (edram_rov_used_) { - DxbcOpRetC(true, ps_src); + a_.OpRetC(true, ps_src); } else { - DxbcOpDiscard(true, ps_src); + a_.OpDiscard(true, ps_src); } - DxbcOpAnd(ps_dest, ps_src, DxbcSrc::LF(1.0f)); + a_.OpAnd(ps_dest, ps_src, dxbc::Src::LF(1.0f)); break; case AluScalarOpcode::kKillsOne: - DxbcOpEq(ps_dest, operand_0_a, DxbcSrc::LF(1.0f)); + a_.OpEq(ps_dest, operand_0_a, dxbc::Src::LF(1.0f)); if (edram_rov_used_) { - DxbcOpRetC(true, ps_src); + a_.OpRetC(true, ps_src); } else { - DxbcOpDiscard(true, ps_src); + a_.OpDiscard(true, ps_src); } - DxbcOpAnd(ps_dest, ps_src, DxbcSrc::LF(1.0f)); + a_.OpAnd(ps_dest, ps_src, dxbc::Src::LF(1.0f)); break; case AluScalarOpcode::kSqrt: - DxbcOpSqRt(ps_dest, operand_0_a); + a_.OpSqRt(ps_dest, operand_0_a); break; case AluScalarOpcode::kMulsc0: case AluScalarOpcode::kMulsc1: - DxbcOpMul(ps_dest, operand_0_a, operand_1); + a_.OpMul(ps_dest, operand_0_a, operand_1); if (!(instr.scalar_operands[0].GetIdenticalComponents( instr.scalar_operands[1]) & 0b0001)) { // Shader Model 3: +-0 or denormal * anything = +0. uint32_t is_zero_temp = PushSystemTemp(); - DxbcOpMin(DxbcDest::R(is_zero_temp, 0b0001), operand_0_a.Abs(), - operand_1.Abs()); + a_.OpMin(dxbc::Dest::R(is_zero_temp, 0b0001), operand_0_a.Abs(), + operand_1.Abs()); // min isn't required to flush denormals, eq is. - DxbcOpEq(DxbcDest::R(is_zero_temp, 0b0001), - DxbcSrc::R(is_zero_temp, DxbcSrc::kXXXX), DxbcSrc::LF(0.0f)); - DxbcOpMovC(ps_dest, DxbcSrc::R(is_zero_temp, DxbcSrc::kXXXX), - DxbcSrc::LF(0.0f), ps_src); + a_.OpEq(dxbc::Dest::R(is_zero_temp, 0b0001), + dxbc::Src::R(is_zero_temp, dxbc::Src::kXXXX), + dxbc::Src::LF(0.0f)); + a_.OpMovC(ps_dest, dxbc::Src::R(is_zero_temp, dxbc::Src::kXXXX), + dxbc::Src::LF(0.0f), ps_src); // Release is_zero_temp. PopSystemTemp(); } break; case AluScalarOpcode::kAddsc0: case AluScalarOpcode::kAddsc1: - DxbcOpAdd(ps_dest, operand_0_a, operand_1); + a_.OpAdd(ps_dest, operand_0_a, operand_1); break; case AluScalarOpcode::kSubsc0: case AluScalarOpcode::kSubsc1: - DxbcOpAdd(ps_dest, operand_0_a, -operand_1); + a_.OpAdd(ps_dest, operand_0_a, -operand_1); break; case AluScalarOpcode::kSin: - DxbcOpSinCos(ps_dest, DxbcDest::Null(), operand_0_a); + a_.OpSinCos(ps_dest, dxbc::Dest::Null(), operand_0_a); break; case AluScalarOpcode::kCos: - DxbcOpSinCos(DxbcDest::Null(), ps_dest, operand_0_a); + a_.OpSinCos(dxbc::Dest::Null(), ps_dest, operand_0_a); break; default: assert_unhandled_case(instr.scalar_opcode); EmitTranslationError("Unknown ALU scalar operation"); - DxbcOpMov(ps_dest, DxbcSrc::LF(0.0f)); + a_.OpMov(ps_dest, dxbc::Src::LF(0.0f)); } PopSystemTemp(operand_temps); @@ -1044,17 +1062,17 @@ void DxbcShaderTranslator::ProcessAluInstruction( // Whether the instruction has changed the predicate, and it needs to be // checked again later. bool predicate_written_vector = false; - uint32_t vector_result_swizzle = DxbcSrc::kXYZW; + uint32_t vector_result_swizzle = dxbc::Src::kXYZW; ProcessVectorAluOperation(instr, vector_result_swizzle, predicate_written_vector); bool predicate_written_scalar = false; ProcessScalarAluOperation(instr, predicate_written_scalar); StoreResult(instr.vector_and_constant_result, - DxbcSrc::R(system_temp_result_, vector_result_swizzle), + dxbc::Src::R(system_temp_result_, vector_result_swizzle), instr.GetMemExportStreamConstant() != UINT32_MAX); StoreResult(instr.scalar_result, - DxbcSrc::R(system_temp_ps_pc_p0_a0_, DxbcSrc::kXXXX)); + dxbc::Src::R(system_temp_ps_pc_p0_a0_, dxbc::Src::kXXXX)); if (predicate_written_vector || predicate_written_scalar) { cf_exec_predicate_written_ = true; diff --git a/src/xenia/gpu/dxbc_shader_translator_fetch.cc b/src/xenia/gpu/dxbc_shader_translator_fetch.cc index 8e81d0831..92bf89f0a 100644 --- a/src/xenia/gpu/dxbc_shader_translator_fetch.cc +++ b/src/xenia/gpu/dxbc_shader_translator_fetch.cc @@ -39,16 +39,16 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction( // Nothing to load - just constant 0/1 writes, or the swizzle includes only // components that don't exist in the format (writing zero instead of them). // Unpacking assumes at least some word is needed. - StoreResult(instr.result, DxbcSrc::LF(0.0f)); + StoreResult(instr.result, dxbc::Src::LF(0.0f)); return; } - // Create a 2-component DxbcSrc for the fetch constant (vf0 is in [0].xy of + // Create a 2-component dxbc::Src for the fetch constant (vf0 is in [0].xy of // the fetch constants array, vf1 is in [0].zw, vf2 is in [1].xy). if (cbuffer_index_fetch_constants_ == kBindingIndexUnallocated) { cbuffer_index_fetch_constants_ = cbuffer_count_++; } - DxbcSrc fetch_constant_src(DxbcSrc::CB( + dxbc::Src fetch_constant_src(dxbc::Src::CB( cbuffer_index_fetch_constants_, uint32_t(CbufferRegister::kFetchConstants), instr.operands[1].storage_index >> 1, @@ -62,8 +62,8 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction( // - Load the byte address in physical memory to system_temp_result_.w (so // it's not overwritten by data loads until the last one). - DxbcDest address_dest(DxbcDest::R(system_temp_result_, 0b1000)); - DxbcSrc address_src(DxbcSrc::R(system_temp_result_, DxbcSrc::kWWWW)); + dxbc::Dest address_dest(dxbc::Dest::R(system_temp_result_, 0b1000)); + dxbc::Src address_src(dxbc::Src::R(system_temp_result_, dxbc::Src::kWWWW)); if (instr.attributes.stride) { // Convert the index to an integer by flooring or by rounding to the nearest // (as floor(index + 0.5) because rounding to the nearest even makes no @@ -71,34 +71,34 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction( // http://web.archive.org/web/20100302145413/http://msdn.microsoft.com:80/en-us/library/bb313960.aspx { bool index_operand_temp_pushed = false; - DxbcSrc index_operand( + dxbc::Src index_operand( LoadOperand(instr.operands[0], 0b0001, index_operand_temp_pushed) .SelectFromSwizzled(0)); if (instr.attributes.is_index_rounded) { - DxbcOpAdd(address_dest, index_operand, DxbcSrc::LF(0.5f)); - DxbcOpRoundNI(address_dest, address_src); + a_.OpAdd(address_dest, index_operand, dxbc::Src::LF(0.5f)); + a_.OpRoundNI(address_dest, address_src); } else { - DxbcOpRoundNI(address_dest, index_operand); + a_.OpRoundNI(address_dest, index_operand); } if (index_operand_temp_pushed) { PopSystemTemp(); } } - DxbcOpFToI(address_dest, address_src); + a_.OpFToI(address_dest, address_src); // Extract the byte address from the fetch constant to // system_temp_result_.z. - DxbcOpAnd(DxbcDest::R(system_temp_result_, 0b0100), - fetch_constant_src.SelectFromSwizzled(0), - DxbcSrc::LU(~uint32_t(3))); + a_.OpAnd(dxbc::Dest::R(system_temp_result_, 0b0100), + fetch_constant_src.SelectFromSwizzled(0), + dxbc::Src::LU(~uint32_t(3))); // Merge the index and the base address. - DxbcOpIMAd(address_dest, address_src, - DxbcSrc::LU(instr.attributes.stride * sizeof(uint32_t)), - DxbcSrc::R(system_temp_result_, DxbcSrc::kZZZZ)); + a_.OpIMAd(address_dest, address_src, + dxbc::Src::LU(instr.attributes.stride * sizeof(uint32_t)), + dxbc::Src::R(system_temp_result_, dxbc::Src::kZZZZ)); } else { // Fetching from the same location - extract the byte address of the // beginning of the buffer. - DxbcOpAnd(address_dest, fetch_constant_src.SelectFromSwizzled(0), - DxbcSrc::LU(~uint32_t(3))); + a_.OpAnd(address_dest, fetch_constant_src.SelectFromSwizzled(0), + dxbc::Src::LU(~uint32_t(3))); } // Add the word offset from the instruction (signed), plus the offset of the // first needed word within the element. @@ -108,8 +108,8 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction( instr.attributes.offset + int32_t(first_word_index); if (first_word_buffer_offset) { // Add the constant word offset. - DxbcOpIAdd(address_dest, address_src, - DxbcSrc::LI(first_word_buffer_offset * sizeof(uint32_t))); + a_.OpIAdd(address_dest, address_src, + dxbc::Src::LI(first_word_buffer_offset * sizeof(uint32_t))); } // - Load needed words to system_temp_result_, words 0, 1, 2, 3 to X, Y, Z, W @@ -129,13 +129,13 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction( // shared memory is a UAV to system_temp_result_.x and check. In the `if`, put // the more likely case (SRV), in the `else`, the less likely one (UAV). system_constants_used_ |= 1ull << kSysConst_Flags_Index; - DxbcOpAnd(DxbcDest::R(system_temp_result_, 0b0001), - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_Flags_Vec) - .Select(kSysConst_Flags_Comp), - DxbcSrc::LU(kSysFlag_SharedMemoryIsUAV)); - DxbcOpIf(false, DxbcSrc::R(system_temp_result_, DxbcSrc::kXXXX)); + a_.OpAnd(dxbc::Dest::R(system_temp_result_, 0b0001), + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_Flags_Vec) + .Select(kSysConst_Flags_Comp), + dxbc::Src::LU(kSysFlag_SharedMemoryIsUAV)); + a_.OpIf(false, dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX)); if (srv_index_shared_memory_ == kBindingIndexUnallocated) { srv_index_shared_memory_ = srv_count_++; } @@ -144,13 +144,13 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction( } for (uint32_t i = 0; i < 2; ++i) { if (i) { - DxbcOpElse(); + a_.OpElse(); } - DxbcSrc shared_memory_src( - i ? DxbcSrc::U(uav_index_shared_memory_, - uint32_t(UAVRegister::kSharedMemory)) - : DxbcSrc::T(srv_index_shared_memory_, - uint32_t(SRVMainRegister::kSharedMemory))); + dxbc::Src shared_memory_src( + i ? dxbc::Src::U(uav_index_shared_memory_, + uint32_t(UAVRegister::kSharedMemory)) + : dxbc::Src::T(srv_index_shared_memory_, + uint32_t(SRVMainRegister::kSharedMemory))); uint32_t needed_words_remaining = needed_words; uint32_t word_index_previous = first_word_index; while (needed_words_remaining) { @@ -163,36 +163,36 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction( ~((uint32_t(1) << (word_index + word_count)) - uint32_t(1)); if (word_index != word_index_previous) { // Go to the word in the buffer. - DxbcOpIAdd( - address_dest, address_src, - DxbcSrc::LU((word_index - word_index_previous) * sizeof(uint32_t))); + a_.OpIAdd(address_dest, address_src, + dxbc::Src::LU((word_index - word_index_previous) * + sizeof(uint32_t))); word_index_previous = word_index; } // Can ld_raw either to the first multiple components, or to any scalar // component. - DxbcDest words_result_dest(DxbcDest::R( + dxbc::Dest words_result_dest(dxbc::Dest::R( system_temp_result_, ((1 << word_count) - 1) << word_index)); if (!word_index || word_count == 1) { // Read directly to system_temp_result_. - DxbcOpLdRaw(words_result_dest, address_src, shared_memory_src); + a_.OpLdRaw(words_result_dest, address_src, shared_memory_src); } else { // Read to the first components of a temporary register. uint32_t load_temp = PushSystemTemp(); - DxbcOpLdRaw(DxbcDest::R(load_temp, (1 << word_count) - 1), address_src, - shared_memory_src); + a_.OpLdRaw(dxbc::Dest::R(load_temp, (1 << word_count) - 1), address_src, + shared_memory_src); // Copy to system_temp_result_. - DxbcOpMov(words_result_dest, - DxbcSrc::R(load_temp, - (DxbcSrc::kXYZW & ((1 << (word_count * 2)) - 1)) - << (word_index * 2))); + a_.OpMov(words_result_dest, + dxbc::Src::R(load_temp, + (dxbc::Src::kXYZW & ((1 << (word_count * 2)) - 1)) + << (word_index * 2))); // Release load_temp. PopSystemTemp(); } } } - DxbcOpEndIf(); + a_.OpEndIf(); - DxbcSrc result_src(DxbcSrc::R(system_temp_result_)); + dxbc::Src result_src(dxbc::Src::R(system_temp_result_)); // - Endian swap the words. @@ -208,40 +208,42 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction( endian_temp = swap_temp; xe::bit_scan_forward(~needed_words, &endian_temp_component); } - DxbcOpAnd(DxbcDest::R(endian_temp, 1 << endian_temp_component), - fetch_constant_src.SelectFromSwizzled(1), DxbcSrc::LU(0b11)); - DxbcSrc endian_src(DxbcSrc::R(endian_temp).Select(endian_temp_component)); + a_.OpAnd(dxbc::Dest::R(endian_temp, 1 << endian_temp_component), + fetch_constant_src.SelectFromSwizzled(1), dxbc::Src::LU(0b11)); + dxbc::Src endian_src( + dxbc::Src::R(endian_temp).Select(endian_temp_component)); - DxbcDest swap_temp_dest(DxbcDest::R(swap_temp, needed_words)); - DxbcSrc swap_temp_src(DxbcSrc::R(swap_temp)); - DxbcDest swap_result_dest(DxbcDest::R(system_temp_result_, needed_words)); + dxbc::Dest swap_temp_dest(dxbc::Dest::R(swap_temp, needed_words)); + dxbc::Src swap_temp_src(dxbc::Src::R(swap_temp)); + dxbc::Dest swap_result_dest( + dxbc::Dest::R(system_temp_result_, needed_words)); // 8-in-16 or one half of 8-in-32. - DxbcOpSwitch(endian_src); - DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::Endian128::k8in16))); - DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::Endian128::k8in32))); + a_.OpSwitch(endian_src); + a_.OpCase(dxbc::Src::LU(uint32_t(xenos::Endian128::k8in16))); + a_.OpCase(dxbc::Src::LU(uint32_t(xenos::Endian128::k8in32))); // Temp = X0Z0. - DxbcOpAnd(swap_temp_dest, result_src, DxbcSrc::LU(0x00FF00FF)); + a_.OpAnd(swap_temp_dest, result_src, dxbc::Src::LU(0x00FF00FF)); // Result = YZW0. - DxbcOpUShR(swap_result_dest, result_src, DxbcSrc::LU(8)); + a_.OpUShR(swap_result_dest, result_src, dxbc::Src::LU(8)); // Result = Y0W0. - DxbcOpAnd(swap_result_dest, result_src, DxbcSrc::LU(0x00FF00FF)); + a_.OpAnd(swap_result_dest, result_src, dxbc::Src::LU(0x00FF00FF)); // Result = YXWZ. - DxbcOpUMAd(swap_result_dest, swap_temp_src, DxbcSrc::LU(256), result_src); - DxbcOpBreak(); - DxbcOpEndSwitch(); + a_.OpUMAd(swap_result_dest, swap_temp_src, dxbc::Src::LU(256), result_src); + a_.OpBreak(); + a_.OpEndSwitch(); // 16-in-32 or another half of 8-in-32. - DxbcOpSwitch(endian_src); - DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::Endian128::k8in32))); - DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::Endian128::k16in32))); + a_.OpSwitch(endian_src); + a_.OpCase(dxbc::Src::LU(uint32_t(xenos::Endian128::k8in32))); + a_.OpCase(dxbc::Src::LU(uint32_t(xenos::Endian128::k16in32))); // Temp = ZW00. - DxbcOpUShR(swap_temp_dest, result_src, DxbcSrc::LU(16)); + a_.OpUShR(swap_temp_dest, result_src, dxbc::Src::LU(16)); // Result = ZWXY. - DxbcOpBFI(swap_result_dest, DxbcSrc::LU(16), DxbcSrc::LU(16), result_src, - swap_temp_src); - DxbcOpBreak(); - DxbcOpEndSwitch(); + a_.OpBFI(swap_result_dest, dxbc::Src::LU(16), dxbc::Src::LU(16), result_src, + swap_temp_src); + a_.OpBreak(); + a_.OpEndSwitch(); // Release endian_temp (if allocated) and swap_temp. PopSystemTemp((endian_temp != swap_temp) ? 2 : 1); @@ -253,14 +255,14 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction( used_result_components & ((1 << xenos::GetVertexFormatComponentCount( instr.attributes.data_format)) - 1); - DxbcDest result_unpacked_dest( - DxbcDest::R(system_temp_result_, used_format_components)); + dxbc::Dest result_unpacked_dest( + dxbc::Dest::R(system_temp_result_, used_format_components)); // If needed_words is not zero (checked in the beginning), this must not be // zero too. For simplicity, it's assumed that something will be unpacked // here. assert_not_zero(used_format_components); uint32_t packed_widths[4] = {}, packed_offsets[4] = {}; - uint32_t packed_swizzle = DxbcSrc::kXXXX; + uint32_t packed_swizzle = dxbc::Src::kXXXX; switch (instr.attributes.data_format) { case xenos::VertexFormat::k_8_8_8_8: packed_widths[0] = packed_widths[1] = packed_widths[2] = @@ -305,10 +307,10 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction( if (packed_widths[0]) { // Handle packed integer formats. if (instr.attributes.is_signed) { - DxbcOpIBFE(result_unpacked_dest, DxbcSrc::LP(packed_widths), - DxbcSrc::LP(packed_offsets), - DxbcSrc::R(system_temp_result_, packed_swizzle)); - DxbcOpIToF(result_unpacked_dest, result_src); + a_.OpIBFE(result_unpacked_dest, dxbc::Src::LP(packed_widths), + dxbc::Src::LP(packed_offsets), + dxbc::Src::R(system_temp_result_, packed_swizzle)); + a_.OpIToF(result_unpacked_dest, result_src); if (!instr.attributes.is_integer) { float packed_scales[4] = {}; switch (instr.attributes.signed_rf_mode) { @@ -325,11 +327,11 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction( } } if (packed_scales_mask) { - DxbcOpMul(DxbcDest::R(system_temp_result_, packed_scales_mask), - result_src, DxbcSrc::LP(packed_scales)); + a_.OpMul(dxbc::Dest::R(system_temp_result_, packed_scales_mask), + result_src, dxbc::Src::LP(packed_scales)); } // Treat both -(2^(n-1)) and -(2^(n-1)-1) as -1. - DxbcOpMax(result_unpacked_dest, result_src, DxbcSrc::LF(-1.0f)); + a_.OpMax(result_unpacked_dest, result_src, dxbc::Src::LF(-1.0f)); } break; case xenos::SignedRepeatingFractionMode::kNoZero: { float packed_zeros[4] = {}; @@ -342,18 +344,18 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction( 1.0f / float((uint32_t(1) << packed_widths[i]) - 1); packed_scales[i] = 2.0f * packed_zeros[i]; } - DxbcOpMAd(result_unpacked_dest, result_src, - DxbcSrc::LP(packed_scales), DxbcSrc::LP(packed_zeros)); + a_.OpMAd(result_unpacked_dest, result_src, + dxbc::Src::LP(packed_scales), dxbc::Src::LP(packed_zeros)); } break; default: assert_unhandled_case(instr.attributes.signed_rf_mode); } } } else { - DxbcOpUBFE(result_unpacked_dest, DxbcSrc::LP(packed_widths), - DxbcSrc::LP(packed_offsets), - DxbcSrc::R(system_temp_result_, packed_swizzle)); - DxbcOpUToF(result_unpacked_dest, result_src); + a_.OpUBFE(result_unpacked_dest, dxbc::Src::LP(packed_widths), + dxbc::Src::LP(packed_offsets), + dxbc::Src::R(system_temp_result_, packed_swizzle)); + a_.OpUToF(result_unpacked_dest, result_src); if (!instr.attributes.is_integer) { float packed_scales[4] = {}; uint32_t packed_scales_mask = 0b0000; @@ -368,8 +370,8 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction( } } if (packed_scales_mask) { - DxbcOpMul(DxbcDest::R(system_temp_result_, packed_scales_mask), - result_src, DxbcSrc::LP(packed_scales)); + a_.OpMul(dxbc::Dest::R(system_temp_result_, packed_scales_mask), + result_src, dxbc::Src::LP(packed_scales)); } } } @@ -380,39 +382,39 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction( // FIXME(Triang3l): This converts from D3D10+ float16 with NaNs instead // of Xbox 360 float16 with extended range. However, haven't encountered // games relying on that yet. - DxbcOpUBFE(result_unpacked_dest, DxbcSrc::LU(16), - DxbcSrc::LU(0, 16, 0, 16), - DxbcSrc::R(system_temp_result_, 0b01010000)); - DxbcOpF16ToF32(result_unpacked_dest, result_src); + a_.OpUBFE(result_unpacked_dest, dxbc::Src::LU(16), + dxbc::Src::LU(0, 16, 0, 16), + dxbc::Src::R(system_temp_result_, 0b01010000)); + a_.OpF16ToF32(result_unpacked_dest, result_src); break; case xenos::VertexFormat::k_32: case xenos::VertexFormat::k_32_32: case xenos::VertexFormat::k_32_32_32_32: if (instr.attributes.is_signed) { - DxbcOpIToF(result_unpacked_dest, result_src); + a_.OpIToF(result_unpacked_dest, result_src); } else { - DxbcOpUToF(result_unpacked_dest, result_src); + a_.OpUToF(result_unpacked_dest, result_src); } if (!instr.attributes.is_integer) { if (instr.attributes.is_signed) { switch (instr.attributes.signed_rf_mode) { case xenos::SignedRepeatingFractionMode::kZeroClampMinusOne: - DxbcOpMul(result_unpacked_dest, result_src, - DxbcSrc::LF(1.0f / 2147483647.0f)); + a_.OpMul(result_unpacked_dest, result_src, + dxbc::Src::LF(1.0f / 2147483647.0f)); // No need to clamp to -1 if signed - 1/(2^31-1) is rounded to // 1/(2^31) as float32. break; case xenos::SignedRepeatingFractionMode::kNoZero: - DxbcOpMAd(result_unpacked_dest, result_src, - DxbcSrc::LF(1.0f / 2147483647.5f), - DxbcSrc::LF(0.5f / 2147483647.5f)); + a_.OpMAd(result_unpacked_dest, result_src, + dxbc::Src::LF(1.0f / 2147483647.5f), + dxbc::Src::LF(0.5f / 2147483647.5f)); break; default: assert_unhandled_case(instr.attributes.signed_rf_mode); } } else { - DxbcOpMul(result_unpacked_dest, result_src, - DxbcSrc::LF(1.0f / 4294967295.0f)); + a_.OpMul(result_unpacked_dest, result_src, + dxbc::Src::LF(1.0f / 4294967295.0f)); } } break; @@ -432,8 +434,8 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction( // - Apply the exponent bias. if (instr.attributes.exp_adjust) { - DxbcOpMul(result_unpacked_dest, result_src, - DxbcSrc::LF(std::ldexp(1.0f, instr.attributes.exp_adjust))); + a_.OpMul(result_unpacked_dest, result_src, + dxbc::Src::LF(std::ldexp(1.0f, instr.attributes.exp_adjust))); } // - Write zeros to components not present in the format. @@ -441,11 +443,11 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction( uint32_t used_missing_components = used_result_components & ~used_format_components; if (used_missing_components) { - DxbcOpMov(DxbcDest::R(system_temp_result_, used_missing_components), - DxbcSrc::LF(0.0f)); + a_.OpMov(dxbc::Dest::R(system_temp_result_, used_missing_components), + dxbc::Src::LF(0.0f)); } - StoreResult(instr.result, DxbcSrc::R(system_temp_result_)); + StoreResult(instr.result, dxbc::Src::R(system_temp_result_)); } uint32_t DxbcShaderTranslator::FindOrAddTextureBinding( @@ -573,9 +575,9 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( switch (instr.opcode) { case FetchOpcode::kSetTextureLod: { bool lod_operand_temp_pushed = false; - DxbcOpMov(DxbcDest::R(system_temp_grad_h_lod_, 0b1000), - LoadOperand(instr.operands[0], 0b0001, lod_operand_temp_pushed) - .SelectFromSwizzled(0)); + a_.OpMov(dxbc::Dest::R(system_temp_grad_h_lod_, 0b1000), + LoadOperand(instr.operands[0], 0b0001, lod_operand_temp_pushed) + .SelectFromSwizzled(0)); if (lod_operand_temp_pushed) { PopSystemTemp(); } @@ -583,8 +585,8 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( } case FetchOpcode::kSetTextureGradientsHorz: { bool grad_operand_temp_pushed = false; - DxbcOpMov( - DxbcDest::R(system_temp_grad_h_lod_, 0b0111), + a_.OpMov( + dxbc::Dest::R(system_temp_grad_h_lod_, 0b0111), LoadOperand(instr.operands[0], 0b0111, grad_operand_temp_pushed)); if (grad_operand_temp_pushed) { PopSystemTemp(); @@ -593,8 +595,8 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( } case FetchOpcode::kSetTextureGradientsVert: { bool grad_operand_temp_pushed = false; - DxbcOpMov( - DxbcDest::R(system_temp_grad_v_, 0b0111), + a_.OpMov( + dxbc::Dest::R(system_temp_grad_v_, 0b0111), LoadOperand(instr.operands[0], 0b0111, grad_operand_temp_pushed)); if (grad_operand_temp_pushed) { PopSystemTemp(); @@ -624,27 +626,29 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( } if (!used_result_nonzero_components) { // Nothing to fetch, only constant 0/1 writes. - StoreResult(instr.result, DxbcSrc::LF(0.0f)); + StoreResult(instr.result, dxbc::Src::LF(0.0f)); return; } if (instr.opcode == FetchOpcode::kGetTextureGradients) { // Handle before doing anything that actually needs the texture. bool grad_operand_temp_pushed = false; - DxbcSrc grad_operand = LoadOperand( + dxbc::Src grad_operand = LoadOperand( instr.operands[0], ((used_result_nonzero_components & 0b0011) ? 0b0001 : 0) | ((used_result_nonzero_components & 0b1100) ? 0b0010 : 0), grad_operand_temp_pushed); if (used_result_nonzero_components & 0b0101) { - DxbcOpDerivRTXCoarse(DxbcDest::R(system_temp_result_, - used_result_nonzero_components & 0b0101), - grad_operand.SwizzleSwizzled(0b010000)); + a_.OpDerivRTXCoarse( + dxbc::Dest::R(system_temp_result_, + used_result_nonzero_components & 0b0101), + grad_operand.SwizzleSwizzled(0b010000)); } if (used_result_nonzero_components & 0b1010) { - DxbcOpDerivRTYCoarse(DxbcDest::R(system_temp_result_, - used_result_nonzero_components & 0b1010), - grad_operand.SwizzleSwizzled(0b01000000)); + a_.OpDerivRTYCoarse( + dxbc::Dest::R(system_temp_result_, + used_result_nonzero_components & 0b1010), + grad_operand.SwizzleSwizzled(0b01000000)); } if (grad_operand_temp_pushed) { PopSystemTemp(); @@ -653,23 +657,23 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( // Scale the gradients to guest pixels with SSAA. uint32_t ssaa_scale_temp = PushSystemTemp(); system_constants_used_ |= 1ull << kSysConst_SampleCountLog2_Index; - DxbcOpMovC(DxbcDest::R(ssaa_scale_temp, - (used_result_nonzero_components & 0b0011) | - (used_result_nonzero_components >> 2)), - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_SampleCountLog2_Vec, - kSysConst_SampleCountLog2_Comp | - ((kSysConst_SampleCountLog2_Comp + 1) << 2)), - DxbcSrc::LF(2.0f), DxbcSrc::LF(1.0f)); - DxbcOpMul( - DxbcDest::R(system_temp_result_, used_result_nonzero_components), - DxbcSrc::R(system_temp_result_), - DxbcSrc::R(ssaa_scale_temp, 0b01000100)); + a_.OpMovC(dxbc::Dest::R(ssaa_scale_temp, + (used_result_nonzero_components & 0b0011) | + (used_result_nonzero_components >> 2)), + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_SampleCountLog2_Vec, + kSysConst_SampleCountLog2_Comp | + ((kSysConst_SampleCountLog2_Comp + 1) << 2)), + dxbc::Src::LF(2.0f), dxbc::Src::LF(1.0f)); + a_.OpMul( + dxbc::Dest::R(system_temp_result_, used_result_nonzero_components), + dxbc::Src::R(system_temp_result_), + dxbc::Src::R(ssaa_scale_temp, 0b01000100)); // Release ssaa_scale_temp. PopSystemTemp(); } - StoreResult(instr.result, DxbcSrc::R(system_temp_result_)); + StoreResult(instr.result, dxbc::Src::R(system_temp_result_)); return; } @@ -682,7 +686,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( // border color fraction (in the X component of the result). assert_always(); EmitTranslationError("getBCF is unimplemented", false); - StoreResult(instr.result, DxbcSrc::LF(0.0f)); + StoreResult(instr.result, dxbc::Src::LF(0.0f)); return; } @@ -692,7 +696,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( instr.opcode != FetchOpcode::kGetTextureWeights) { assert_unhandled_case(instr.opcode); EmitTranslationError("Unknown texture fetch operation"); - StoreResult(instr.result, DxbcSrc::LF(0.0f)); + StoreResult(instr.result, dxbc::Src::LF(0.0f)); return; } @@ -708,7 +712,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( EmitTranslationError( "getCompTexLOD used with explicit LOD or gradients - contradicts MSDN", false); - StoreResult(instr.result, DxbcSrc::LF(0.0f)); + StoreResult(instr.result, dxbc::Src::LF(0.0f)); return; } @@ -863,53 +867,53 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( if (size_needed_components) { switch (instr.dimension) { case xenos::FetchOpDimension::k1D: - DxbcOpUBFE(DxbcDest::R(size_and_is_3d_temp, 0b0001), DxbcSrc::LU(24), - DxbcSrc::LU(0), - RequestTextureFetchConstantWord(tfetch_index, 2)); + a_.OpUBFE(dxbc::Dest::R(size_and_is_3d_temp, 0b0001), dxbc::Src::LU(24), + dxbc::Src::LU(0), + RequestTextureFetchConstantWord(tfetch_index, 2)); break; case xenos::FetchOpDimension::k2D: case xenos::FetchOpDimension::kCube: - DxbcOpUBFE(DxbcDest::R(size_and_is_3d_temp, size_needed_components), - DxbcSrc::LU(13, 13, 0, 0), DxbcSrc::LU(0, 13, 0, 0), - RequestTextureFetchConstantWord(tfetch_index, 2)); + a_.OpUBFE(dxbc::Dest::R(size_and_is_3d_temp, size_needed_components), + dxbc::Src::LU(13, 13, 0, 0), dxbc::Src::LU(0, 13, 0, 0), + RequestTextureFetchConstantWord(tfetch_index, 2)); break; case xenos::FetchOpDimension::k3DOrStacked: // tfetch3D is used for both stacked and 3D - first, check if 3D. - DxbcOpUBFE(DxbcDest::R(size_and_is_3d_temp, 0b1000), DxbcSrc::LU(2), - DxbcSrc::LU(9), - RequestTextureFetchConstantWord(tfetch_index, 5)); - DxbcOpIEq(DxbcDest::R(size_and_is_3d_temp, 0b1000), - DxbcSrc::R(size_and_is_3d_temp, DxbcSrc::kWWWW), - DxbcSrc::LU(uint32_t(xenos::DataDimension::k3D))); + a_.OpUBFE(dxbc::Dest::R(size_and_is_3d_temp, 0b1000), dxbc::Src::LU(2), + dxbc::Src::LU(9), + RequestTextureFetchConstantWord(tfetch_index, 5)); + a_.OpIEq(dxbc::Dest::R(size_and_is_3d_temp, 0b1000), + dxbc::Src::R(size_and_is_3d_temp, dxbc::Src::kWWWW), + dxbc::Src::LU(uint32_t(xenos::DataDimension::k3D))); if (size_needed_components & 0b0111) { // Even if depth isn't needed specifically for stacked or specifically // for 3D later, load both cases anyway to make sure the register is // always initialized. - DxbcOpIf(true, DxbcSrc::R(size_and_is_3d_temp, DxbcSrc::kWWWW)); + a_.OpIf(true, dxbc::Src::R(size_and_is_3d_temp, dxbc::Src::kWWWW)); // Load the 3D texture size. - DxbcOpUBFE( - DxbcDest::R(size_and_is_3d_temp, size_needed_components & 0b0111), - DxbcSrc::LU(11, 11, 10, 0), DxbcSrc::LU(0, 11, 22, 0), - RequestTextureFetchConstantWord(tfetch_index, 2)); - DxbcOpElse(); + a_.OpUBFE(dxbc::Dest::R(size_and_is_3d_temp, + size_needed_components & 0b0111), + dxbc::Src::LU(11, 11, 10, 0), dxbc::Src::LU(0, 11, 22, 0), + RequestTextureFetchConstantWord(tfetch_index, 2)); + a_.OpElse(); // Load the 2D stacked texture size. - DxbcOpUBFE( - DxbcDest::R(size_and_is_3d_temp, size_needed_components & 0b0111), - DxbcSrc::LU(13, 13, 6, 0), DxbcSrc::LU(0, 13, 26, 0), - RequestTextureFetchConstantWord(tfetch_index, 2)); - DxbcOpEndIf(); + a_.OpUBFE(dxbc::Dest::R(size_and_is_3d_temp, + size_needed_components & 0b0111), + dxbc::Src::LU(13, 13, 6, 0), dxbc::Src::LU(0, 13, 26, 0), + RequestTextureFetchConstantWord(tfetch_index, 2)); + a_.OpEndIf(); } break; } if (size_needed_components & 0b0111) { // Fetch constants store size minus 1 - add 1. - DxbcOpIAdd( - DxbcDest::R(size_and_is_3d_temp, size_needed_components & 0b0111), - DxbcSrc::R(size_and_is_3d_temp), DxbcSrc::LU(1)); + a_.OpIAdd( + dxbc::Dest::R(size_and_is_3d_temp, size_needed_components & 0b0111), + dxbc::Src::R(size_and_is_3d_temp), dxbc::Src::LU(1)); // Convert the size to float for multiplication/division. - DxbcOpUToF( - DxbcDest::R(size_and_is_3d_temp, size_needed_components & 0b0111), - DxbcSrc::R(size_and_is_3d_temp)); + a_.OpUToF( + dxbc::Dest::R(size_and_is_3d_temp, size_needed_components & 0b0111), + dxbc::Src::R(size_and_is_3d_temp)); } } @@ -929,56 +933,57 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( // Need unnormalized coordinates. bool coord_operand_temp_pushed = false; - DxbcSrc coord_operand = + dxbc::Src coord_operand = LoadOperand(instr.operands[0], used_result_nonzero_components, coord_operand_temp_pushed); - DxbcSrc coord_src(coord_operand); + dxbc::Src coord_src(coord_operand); uint32_t offsets_needed = offsets_not_zero & used_result_nonzero_components; if (!instr.attributes.unnormalized_coordinates || offsets_needed) { // Using system_temp_result_ as a temporary for coordinate denormalization // and offsetting. - coord_src = DxbcSrc::R(system_temp_result_); - DxbcDest coord_dest( - DxbcDest::R(system_temp_result_, used_result_nonzero_components)); + coord_src = dxbc::Src::R(system_temp_result_); + dxbc::Dest coord_dest( + dxbc::Dest::R(system_temp_result_, used_result_nonzero_components)); if (instr.attributes.unnormalized_coordinates) { if (offsets_needed) { - DxbcOpAdd(coord_dest, coord_operand, DxbcSrc::LP(offsets)); + a_.OpAdd(coord_dest, coord_operand, dxbc::Src::LP(offsets)); } } else { assert_true((size_needed_components & used_result_nonzero_components) == used_result_nonzero_components); if (offsets_needed) { - DxbcOpMAd(coord_dest, coord_operand, DxbcSrc::R(size_and_is_3d_temp), - DxbcSrc::LP(offsets)); + a_.OpMAd(coord_dest, coord_operand, dxbc::Src::R(size_and_is_3d_temp), + dxbc::Src::LP(offsets)); } else { - DxbcOpMul(coord_dest, coord_operand, DxbcSrc::R(size_and_is_3d_temp)); + a_.OpMul(coord_dest, coord_operand, + dxbc::Src::R(size_and_is_3d_temp)); } } } // 0.5 has already been subtracted via offsets previously. - DxbcOpFrc(DxbcDest::R(system_temp_result_, used_result_nonzero_components), - coord_src); + a_.OpFrc(dxbc::Dest::R(system_temp_result_, used_result_nonzero_components), + coord_src); if (coord_operand_temp_pushed) { PopSystemTemp(); } } else { // - Component signedness, for selecting the SRV, and if data is needed. - DxbcSrc signs_uint_src( - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_TextureSwizzledSigns_Vec + (tfetch_index >> 4)) + dxbc::Src signs_uint_src( + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_TextureSwizzledSigns_Vec + (tfetch_index >> 4)) .Select((tfetch_index >> 2) & 3)); uint32_t signs_shift = (tfetch_index & 3) * 8; uint32_t signs_temp = UINT32_MAX; if (instr.opcode == FetchOpcode::kTextureFetch) { signs_temp = PushSystemTemp(); system_constants_used_ |= 1ull << kSysConst_TextureSwizzledSigns_Index; - DxbcOpUBFE(DxbcDest::R(signs_temp, used_result_nonzero_components), - DxbcSrc::LU(2), - DxbcSrc::LU(signs_shift, signs_shift + 2, signs_shift + 4, - signs_shift + 6), - signs_uint_src); + a_.OpUBFE(dxbc::Dest::R(signs_temp, used_result_nonzero_components), + dxbc::Src::LU(2), + dxbc::Src::LU(signs_shift, signs_shift + 2, signs_shift + 4, + signs_shift + 6), + signs_uint_src); } // - Coordinates. @@ -995,7 +1000,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( // converted later according to whether the texture is 3D). For cube maps, // coordinates need to be transformed back into the cube space. bool coord_operand_temp_pushed = false; - DxbcSrc coord_operand = LoadOperand( + dxbc::Src coord_operand = LoadOperand( instr.operands[0], (1 << xenos::GetFetchOpDimensionComponentCount(instr.dimension)) - 1, coord_operand_temp_pushed); @@ -1019,32 +1024,32 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( normalized_components); if (offsets_not_zero & normalized_components) { // FIXME(Triang3l): Offsets need to be applied at the LOD being fetched. - DxbcOpAdd(DxbcDest::R(coord_and_sampler_temp, normalized_components), - coord_operand, DxbcSrc::LP(offsets)); + a_.OpAdd(dxbc::Dest::R(coord_and_sampler_temp, normalized_components), + coord_operand, dxbc::Src::LP(offsets)); assert_not_zero(normalized_components & 0b011); - DxbcOpDiv( - DxbcDest::R(coord_and_sampler_temp, normalized_components & 0b011), - DxbcSrc::R(coord_and_sampler_temp), - DxbcSrc::R(size_and_is_3d_temp)); + a_.OpDiv(dxbc::Dest::R(coord_and_sampler_temp, + normalized_components & 0b011), + dxbc::Src::R(coord_and_sampler_temp), + dxbc::Src::R(size_and_is_3d_temp)); if (instr.dimension == xenos::FetchOpDimension::k3DOrStacked) { // Normalize if 3D. assert_true((size_needed_components & 0b1100) == 0b1100); - DxbcOpIf(true, DxbcSrc::R(size_and_is_3d_temp, DxbcSrc::kWWWW)); - DxbcOpDiv(DxbcDest::R(coord_and_sampler_temp, 0b0100), - DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kZZZZ), - DxbcSrc::R(size_and_is_3d_temp, DxbcSrc::kZZZZ)); - DxbcOpEndIf(); + a_.OpIf(true, dxbc::Src::R(size_and_is_3d_temp, dxbc::Src::kWWWW)); + a_.OpDiv(dxbc::Dest::R(coord_and_sampler_temp, 0b0100), + dxbc::Src::R(coord_and_sampler_temp, dxbc::Src::kZZZZ), + dxbc::Src::R(size_and_is_3d_temp, dxbc::Src::kZZZZ)); + a_.OpEndIf(); } } else { - DxbcOpDiv(DxbcDest::R(coord_and_sampler_temp, normalized_components), - coord_operand, DxbcSrc::R(size_and_is_3d_temp)); + a_.OpDiv(dxbc::Dest::R(coord_and_sampler_temp, normalized_components), + coord_operand, dxbc::Src::R(size_and_is_3d_temp)); if (instr.dimension == xenos::FetchOpDimension::k3DOrStacked) { // Don't normalize if stacked. assert_true((size_needed_components & 0b1000) == 0b1000); - DxbcOpMovC(DxbcDest::R(coord_and_sampler_temp, 0b0100), - DxbcSrc::R(size_and_is_3d_temp, DxbcSrc::kWWWW), - DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kZZZZ), - coord_operand.SelectFromSwizzled(2)); + a_.OpMovC(dxbc::Dest::R(coord_and_sampler_temp, 0b0100), + dxbc::Src::R(size_and_is_3d_temp, dxbc::Src::kWWWW), + dxbc::Src::R(coord_and_sampler_temp, dxbc::Src::kZZZZ), + coord_operand.SelectFromSwizzled(2)); } } } else { @@ -1055,133 +1060,133 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( // FIXME(Triang3l): Offsets need to be applied at the LOD being fetched. assert_true((size_needed_components & coords_with_offset) == coords_with_offset); - DxbcOpDiv(DxbcDest::R(coord_and_sampler_temp, coords_with_offset), - DxbcSrc::LP(offsets), DxbcSrc::R(size_and_is_3d_temp)); - DxbcOpAdd(DxbcDest::R(coord_and_sampler_temp, coords_with_offset), - coord_operand, DxbcSrc::R(coord_and_sampler_temp)); + a_.OpDiv(dxbc::Dest::R(coord_and_sampler_temp, coords_with_offset), + dxbc::Src::LP(offsets), dxbc::Src::R(size_and_is_3d_temp)); + a_.OpAdd(dxbc::Dest::R(coord_and_sampler_temp, coords_with_offset), + coord_operand, dxbc::Src::R(coord_and_sampler_temp)); } uint32_t coords_without_offset = ~coords_with_offset & normalized_components; // 3D/stacked without offset is handled separately. if (coords_without_offset & 0b011) { - DxbcOpMov( - DxbcDest::R(coord_and_sampler_temp, coords_without_offset & 0b011), - coord_operand); + a_.OpMov(dxbc::Dest::R(coord_and_sampler_temp, + coords_without_offset & 0b011), + coord_operand); } if (instr.dimension == xenos::FetchOpDimension::k3DOrStacked) { assert_true((size_needed_components & 0b1100) == 0b1100); if (coords_with_offset & 0b100) { // Denormalize and offset Z (re-apply the offset not to lose precision // as a result of division) if stacked. - DxbcOpIf(false, DxbcSrc::R(size_and_is_3d_temp, DxbcSrc::kWWWW)); - DxbcOpMAd(DxbcDest::R(coord_and_sampler_temp, 0b0100), - coord_operand.SelectFromSwizzled(2), - DxbcSrc::R(size_and_is_3d_temp, DxbcSrc::kZZZZ), - DxbcSrc::LF(offsets[2])); - DxbcOpEndIf(); + a_.OpIf(false, dxbc::Src::R(size_and_is_3d_temp, dxbc::Src::kWWWW)); + a_.OpMAd(dxbc::Dest::R(coord_and_sampler_temp, 0b0100), + coord_operand.SelectFromSwizzled(2), + dxbc::Src::R(size_and_is_3d_temp, dxbc::Src::kZZZZ), + dxbc::Src::LF(offsets[2])); + a_.OpEndIf(); } else { // Denormalize Z if stacked, and revert to normalized if 3D. - DxbcOpMul(DxbcDest::R(coord_and_sampler_temp, 0b0100), + a_.OpMul(dxbc::Dest::R(coord_and_sampler_temp, 0b0100), + coord_operand.SelectFromSwizzled(2), + dxbc::Src::R(size_and_is_3d_temp, dxbc::Src::kZZZZ)); + a_.OpMovC(dxbc::Dest::R(coord_and_sampler_temp, 0b0100), + dxbc::Src::R(size_and_is_3d_temp, dxbc::Src::kWWWW), coord_operand.SelectFromSwizzled(2), - DxbcSrc::R(size_and_is_3d_temp, DxbcSrc::kZZZZ)); - DxbcOpMovC(DxbcDest::R(coord_and_sampler_temp, 0b0100), - DxbcSrc::R(size_and_is_3d_temp, DxbcSrc::kWWWW), - coord_operand.SelectFromSwizzled(2), - DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kZZZZ)); + dxbc::Src::R(coord_and_sampler_temp, dxbc::Src::kZZZZ)); } } } switch (instr.dimension) { case xenos::FetchOpDimension::k1D: // Pad to 2D array coordinates. - DxbcOpMov(DxbcDest::R(coord_and_sampler_temp, 0b0110), - DxbcSrc::LF(0.0f)); + a_.OpMov(dxbc::Dest::R(coord_and_sampler_temp, 0b0110), + dxbc::Src::LF(0.0f)); break; case xenos::FetchOpDimension::k2D: // Pad to 2D array coordinates. - DxbcOpMov(DxbcDest::R(coord_and_sampler_temp, 0b0100), - DxbcSrc::LF(0.0f)); + a_.OpMov(dxbc::Dest::R(coord_and_sampler_temp, 0b0100), + dxbc::Src::LF(0.0f)); break; case xenos::FetchOpDimension::kCube: { // Transform from the major axis SC/TC plus 1 into cube coordinates. // Move SC/TC from 1...2 to -1...1. - DxbcOpMAd(DxbcDest::R(coord_and_sampler_temp, 0b0011), - DxbcSrc::R(coord_and_sampler_temp), DxbcSrc::LF(2.0f), - DxbcSrc::LF(-3.0f)); + a_.OpMAd(dxbc::Dest::R(coord_and_sampler_temp, 0b0011), + dxbc::Src::R(coord_and_sampler_temp), dxbc::Src::LF(2.0f), + dxbc::Src::LF(-3.0f)); // Get the face index (floored, within 0...5) as an integer to // coord_and_sampler_temp.z. if (offsets[2]) { - DxbcOpAdd(DxbcDest::R(coord_and_sampler_temp, 0b0100), - coord_operand.SelectFromSwizzled(2), - DxbcSrc::LF(offsets[2])); - DxbcOpFToU(DxbcDest::R(coord_and_sampler_temp, 0b0100), - DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kZZZZ)); + a_.OpAdd(dxbc::Dest::R(coord_and_sampler_temp, 0b0100), + coord_operand.SelectFromSwizzled(2), + dxbc::Src::LF(offsets[2])); + a_.OpFToU(dxbc::Dest::R(coord_and_sampler_temp, 0b0100), + dxbc::Src::R(coord_and_sampler_temp, dxbc::Src::kZZZZ)); } else { - DxbcOpFToU(DxbcDest::R(coord_and_sampler_temp, 0b0100), - coord_operand.SelectFromSwizzled(2)); + a_.OpFToU(dxbc::Dest::R(coord_and_sampler_temp, 0b0100), + coord_operand.SelectFromSwizzled(2)); } - DxbcOpUMin(DxbcDest::R(coord_and_sampler_temp, 0b0100), - DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kZZZZ), - DxbcSrc::LU(5)); + a_.OpUMin(dxbc::Dest::R(coord_and_sampler_temp, 0b0100), + dxbc::Src::R(coord_and_sampler_temp, dxbc::Src::kZZZZ), + dxbc::Src::LU(5)); // Split the face index into axis and sign (0 - positive, 1 - negative) // to coord_and_sampler_temp.zw (sign in W so it won't be overwritten). // Fine to overwrite W at this point, the sampler index hasn't been // loaded yet. - DxbcOpUBFE(DxbcDest::R(coord_and_sampler_temp, 0b1100), - DxbcSrc::LU(0, 0, 2, 1), DxbcSrc::LU(0, 0, 1, 0), - DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kZZZZ)); + a_.OpUBFE(dxbc::Dest::R(coord_and_sampler_temp, 0b1100), + dxbc::Src::LU(0, 0, 2, 1), dxbc::Src::LU(0, 0, 1, 0), + dxbc::Src::R(coord_and_sampler_temp, dxbc::Src::kZZZZ)); // Remap the axes in a way opposite to the ALU cube instruction. - DxbcOpSwitch(DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kZZZZ)); - DxbcOpCase(DxbcSrc::LU(0)); + a_.OpSwitch(dxbc::Src::R(coord_and_sampler_temp, dxbc::Src::kZZZZ)); + a_.OpCase(dxbc::Src::LU(0)); { // X is the major axis. // Y = -TC (TC overwritten). - DxbcOpMov(DxbcDest::R(coord_and_sampler_temp, 0b0010), - -DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kYYYY)); + a_.OpMov(dxbc::Dest::R(coord_and_sampler_temp, 0b0010), + -dxbc::Src::R(coord_and_sampler_temp, dxbc::Src::kYYYY)); // Z = neg ? SC : -SC. - DxbcOpMovC(DxbcDest::R(coord_and_sampler_temp, 0b0100), - DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kWWWW), - DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kXXXX), - -DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kXXXX)); + a_.OpMovC(dxbc::Dest::R(coord_and_sampler_temp, 0b0100), + dxbc::Src::R(coord_and_sampler_temp, dxbc::Src::kWWWW), + dxbc::Src::R(coord_and_sampler_temp, dxbc::Src::kXXXX), + -dxbc::Src::R(coord_and_sampler_temp, dxbc::Src::kXXXX)); // X = neg ? -1 : 1 (SC overwritten). - DxbcOpMovC(DxbcDest::R(coord_and_sampler_temp, 0b0001), - DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kWWWW), - DxbcSrc::LF(-1.0f), DxbcSrc::LF(1.0f)); + a_.OpMovC(dxbc::Dest::R(coord_and_sampler_temp, 0b0001), + dxbc::Src::R(coord_and_sampler_temp, dxbc::Src::kWWWW), + dxbc::Src::LF(-1.0f), dxbc::Src::LF(1.0f)); } - DxbcOpBreak(); - DxbcOpCase(DxbcSrc::LU(1)); + a_.OpBreak(); + a_.OpCase(dxbc::Src::LU(1)); { // Y is the major axis. // X = SC (already there). // Z = neg ? -TC : TC. - DxbcOpMovC(DxbcDest::R(coord_and_sampler_temp, 0b0100), - DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kWWWW), - -DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kYYYY), - DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kYYYY)); + a_.OpMovC(dxbc::Dest::R(coord_and_sampler_temp, 0b0100), + dxbc::Src::R(coord_and_sampler_temp, dxbc::Src::kWWWW), + -dxbc::Src::R(coord_and_sampler_temp, dxbc::Src::kYYYY), + dxbc::Src::R(coord_and_sampler_temp, dxbc::Src::kYYYY)); // Y = neg ? -1 : 1 (TC overwritten). - DxbcOpMovC(DxbcDest::R(coord_and_sampler_temp, 0b0010), - DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kWWWW), - DxbcSrc::LF(-1.0f), DxbcSrc::LF(1.0f)); + a_.OpMovC(dxbc::Dest::R(coord_and_sampler_temp, 0b0010), + dxbc::Src::R(coord_and_sampler_temp, dxbc::Src::kWWWW), + dxbc::Src::LF(-1.0f), dxbc::Src::LF(1.0f)); } - DxbcOpBreak(); - DxbcOpDefault(); + a_.OpBreak(); + a_.OpDefault(); { // Z is the major axis. // X = neg ? -SC : SC (SC overwritten). - DxbcOpMovC(DxbcDest::R(coord_and_sampler_temp, 0b0001), - DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kWWWW), - -DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kXXXX), - DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kXXXX)); + a_.OpMovC(dxbc::Dest::R(coord_and_sampler_temp, 0b0001), + dxbc::Src::R(coord_and_sampler_temp, dxbc::Src::kWWWW), + -dxbc::Src::R(coord_and_sampler_temp, dxbc::Src::kXXXX), + dxbc::Src::R(coord_and_sampler_temp, dxbc::Src::kXXXX)); // Y = -TC (TC overwritten). - DxbcOpMov(DxbcDest::R(coord_and_sampler_temp, 0b0010), - -DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kYYYY)); + a_.OpMov(dxbc::Dest::R(coord_and_sampler_temp, 0b0010), + -dxbc::Src::R(coord_and_sampler_temp, dxbc::Src::kYYYY)); // Z = neg ? -1 : 1. - DxbcOpMovC(DxbcDest::R(coord_and_sampler_temp, 0b0100), - DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kWWWW), - DxbcSrc::LF(-1.0f), DxbcSrc::LF(1.0f)); + a_.OpMovC(dxbc::Dest::R(coord_and_sampler_temp, 0b0100), + dxbc::Src::R(coord_and_sampler_temp, dxbc::Src::kWWWW), + dxbc::Src::LF(-1.0f), dxbc::Src::LF(1.0f)); } - DxbcOpBreak(); - DxbcOpEndSwitch(); + a_.OpBreak(); + a_.OpEndSwitch(); } break; default: break; @@ -1199,7 +1204,8 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( tfetch_index, instr.attributes.mag_filter, instr.attributes.min_filter, xenos::TextureFilter::kLinear, instr.attributes.aniso_filter); - DxbcSrc sampler(DxbcSrc::S(sampler_binding_index, sampler_binding_index)); + dxbc::Src sampler( + dxbc::Src::S(sampler_binding_index, sampler_binding_index)); if (bindless_resources_used_) { // Load the sampler index to coord_and_sampler_temp.w and use relative // sampler indexing. @@ -1208,30 +1214,30 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( } uint32_t sampler_bindless_descriptor_index = sampler_bindings_[sampler_binding_index].bindless_descriptor_index; - DxbcOpMov(DxbcDest::R(coord_and_sampler_temp, 0b1000), - DxbcSrc::CB(cbuffer_index_descriptor_indices_, - uint32_t(CbufferRegister::kDescriptorIndices), - sampler_bindless_descriptor_index >> 2) - .Select(sampler_bindless_descriptor_index & 3)); - sampler = DxbcSrc::S(0, DxbcIndex(coord_and_sampler_temp, 3)); + a_.OpMov(dxbc::Dest::R(coord_and_sampler_temp, 0b1000), + dxbc::Src::CB(cbuffer_index_descriptor_indices_, + uint32_t(CbufferRegister::kDescriptorIndices), + sampler_bindless_descriptor_index >> 2) + .Select(sampler_bindless_descriptor_index & 3)); + sampler = dxbc::Src::S(0, dxbc::Index(coord_and_sampler_temp, 3)); } // Check which SRV needs to be accessed - signed or unsigned. If there is // at least one non-signed component, will be using the unsigned one. uint32_t is_unsigned_temp = PushSystemTemp(); system_constants_used_ |= 1ull << kSysConst_TextureSwizzledSigns_Index; - DxbcOpUBFE(DxbcDest::R(is_unsigned_temp, 0b0001), DxbcSrc::LU(8), - DxbcSrc::LU(signs_shift), signs_uint_src); - DxbcOpINE( - DxbcDest::R(is_unsigned_temp, 0b0001), - DxbcSrc::R(is_unsigned_temp, DxbcSrc::kXXXX), - DxbcSrc::LU(uint32_t(xenos::TextureSign::kSigned) * 0b01010101)); + a_.OpUBFE(dxbc::Dest::R(is_unsigned_temp, 0b0001), dxbc::Src::LU(8), + dxbc::Src::LU(signs_shift), signs_uint_src); + a_.OpINE( + dxbc::Dest::R(is_unsigned_temp, 0b0001), + dxbc::Src::R(is_unsigned_temp, dxbc::Src::kXXXX), + dxbc::Src::LU(uint32_t(xenos::TextureSign::kSigned) * 0b01010101)); if (bindless_resources_used_) { // Bindless path - select the SRV index between unsigned and signed to // query. if (instr.dimension == xenos::FetchOpDimension::k3DOrStacked) { // Check if 3D. assert_true((size_needed_components & 0b1000) == 0b1000); - DxbcOpIf(true, DxbcSrc::R(size_and_is_3d_temp, DxbcSrc::kWWWW)); + a_.OpIf(true, dxbc::Src::R(size_and_is_3d_temp, dxbc::Src::kWWWW)); } for (uint32_t is_stacked = 0; is_stacked < @@ -1241,7 +1247,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( xenos::FetchOpDimension srv_dimension = instr.dimension; if (is_stacked) { srv_dimension = xenos::FetchOpDimension::k2D; - DxbcOpElse(); + a_.OpElse(); } uint32_t texture_binding_index_unsigned = FindOrAddTextureBinding(tfetch_index, srv_dimension, false); @@ -1256,16 +1262,16 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( if (cbuffer_index_descriptor_indices_ == kBindingIndexUnallocated) { cbuffer_index_descriptor_indices_ = cbuffer_count_++; } - DxbcOpMovC( - DxbcDest::R(is_unsigned_temp, 0b0001), - DxbcSrc::R(is_unsigned_temp, DxbcSrc::kXXXX), - DxbcSrc::CB(cbuffer_index_descriptor_indices_, - uint32_t(CbufferRegister::kDescriptorIndices), - texture_bindless_descriptor_index_unsigned >> 2) + a_.OpMovC( + dxbc::Dest::R(is_unsigned_temp, 0b0001), + dxbc::Src::R(is_unsigned_temp, dxbc::Src::kXXXX), + dxbc::Src::CB(cbuffer_index_descriptor_indices_, + uint32_t(CbufferRegister::kDescriptorIndices), + texture_bindless_descriptor_index_unsigned >> 2) .Select(texture_bindless_descriptor_index_unsigned & 3), - DxbcSrc::CB(cbuffer_index_descriptor_indices_, - uint32_t(CbufferRegister::kDescriptorIndices), - texture_bindless_descriptor_index_signed >> 2) + dxbc::Src::CB(cbuffer_index_descriptor_indices_, + uint32_t(CbufferRegister::kDescriptorIndices), + texture_bindless_descriptor_index_signed >> 2) .Select(texture_bindless_descriptor_index_signed & 3)); // Always 3 coordinate components (1D and 2D are padded to 2D // arrays, 3D and cube have 3 coordinate dimensions). Not caring @@ -1297,27 +1303,28 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( if (*bindless_srv_index == kBindingIndexUnallocated) { *bindless_srv_index = srv_count_++; } - DxbcOpLOD(DxbcDest::R(system_temp_result_, 0b0001), - DxbcSrc::R(coord_and_sampler_temp), 3, - DxbcSrc::T(*bindless_srv_index, - DxbcIndex(is_unsigned_temp, 0), DxbcSrc::kYYYY), - sampler); + a_.OpLOD( + dxbc::Dest::R(system_temp_result_, 0b0001), + dxbc::Src::R(coord_and_sampler_temp), 3, + dxbc::Src::T(*bindless_srv_index, + dxbc::Index(is_unsigned_temp, 0), dxbc::Src::kYYYY), + sampler); } if (instr.dimension == xenos::FetchOpDimension::k3DOrStacked) { // Close the 3D/stacked check. - DxbcOpEndIf(); + a_.OpEndIf(); } } else { // Bindful path - conditionally query one of the SRVs. - DxbcOpIf(true, DxbcSrc::R(is_unsigned_temp, DxbcSrc::kXXXX)); + a_.OpIf(true, dxbc::Src::R(is_unsigned_temp, dxbc::Src::kXXXX)); for (uint32_t is_signed = 0; is_signed < 2; ++is_signed) { if (is_signed) { - DxbcOpElse(); + a_.OpElse(); } if (instr.dimension == xenos::FetchOpDimension::k3DOrStacked) { // Check if 3D. assert_true((size_needed_components & 0b1000) == 0b1000); - DxbcOpIf(true, DxbcSrc::R(size_and_is_3d_temp, DxbcSrc::kWWWW)); + a_.OpIf(true, dxbc::Src::R(size_and_is_3d_temp, dxbc::Src::kWWWW)); } for (uint32_t is_stacked = 0; is_stacked < @@ -1325,37 +1332,37 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( : 1u); ++is_stacked) { if (is_stacked) { - DxbcOpElse(); + a_.OpElse(); } assert_true(used_result_nonzero_components == 0b0001); uint32_t texture_binding_index = FindOrAddTextureBinding( tfetch_index, is_stacked ? xenos::FetchOpDimension::k2D : instr.dimension, is_signed != 0); - DxbcOpLOD( - DxbcDest::R(system_temp_result_, 0b0001), - DxbcSrc::R(coord_and_sampler_temp), 3, - DxbcSrc::T( + a_.OpLOD( + dxbc::Dest::R(system_temp_result_, 0b0001), + dxbc::Src::R(coord_and_sampler_temp), 3, + dxbc::Src::T( texture_bindings_[texture_binding_index].bindful_srv_index, uint32_t(SRVMainRegister::kBindfulTexturesStart) + texture_binding_index, - DxbcSrc::kYYYY), + dxbc::Src::kYYYY), sampler); } if (instr.dimension == xenos::FetchOpDimension::k3DOrStacked) { // Close the 3D/stacked check. - DxbcOpEndIf(); + a_.OpEndIf(); } } // Close the signedness check. - DxbcOpEndIf(); + a_.OpEndIf(); } // Release is_unsigned_temp. PopSystemTemp(); } else { // - Gradients or LOD to be passed to the sample_d/sample_l. - DxbcSrc lod_src(DxbcSrc::LF(0.0f)); + dxbc::Src lod_src(dxbc::Src::LF(0.0f)); uint32_t grad_component_count = 0; // Will be allocated for both explicit and computed LOD. uint32_t grad_h_lod_temp = UINT32_MAX; @@ -1364,31 +1371,31 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( uint32_t grad_v_temp = UINT32_MAX; if (instr.attributes.mip_filter != xenos::TextureFilter::kBaseMap) { grad_h_lod_temp = PushSystemTemp(); - lod_src = DxbcSrc::R(grad_h_lod_temp, DxbcSrc::kWWWW); + lod_src = dxbc::Src::R(grad_h_lod_temp, dxbc::Src::kWWWW); // Accumulate the explicit LOD sources (in D3D11.3 specification order: // specified LOD + sampler LOD bias + instruction LOD bias). - DxbcDest lod_dest(DxbcDest::R(grad_h_lod_temp, 0b1000)); + dxbc::Dest lod_dest(dxbc::Dest::R(grad_h_lod_temp, 0b1000)); // Fetch constant LOD bias * 32. - DxbcOpIBFE(lod_dest, DxbcSrc::LU(10), DxbcSrc::LU(12), - RequestTextureFetchConstantWord(tfetch_index, 4)); - DxbcOpIToF(lod_dest, lod_src); + a_.OpIBFE(lod_dest, dxbc::Src::LU(10), dxbc::Src::LU(12), + RequestTextureFetchConstantWord(tfetch_index, 4)); + a_.OpIToF(lod_dest, lod_src); if (instr.attributes.use_register_lod) { // Divide the fetch constant LOD bias by 32, and add the register LOD // and the instruction LOD bias. - DxbcOpMAd(lod_dest, lod_src, DxbcSrc::LF(1.0f / 32.0f), - DxbcSrc::R(system_temp_grad_h_lod_, DxbcSrc::kWWWW)); + a_.OpMAd(lod_dest, lod_src, dxbc::Src::LF(1.0f / 32.0f), + dxbc::Src::R(system_temp_grad_h_lod_, dxbc::Src::kWWWW)); if (instr.attributes.lod_bias) { - DxbcOpAdd(lod_dest, lod_src, - DxbcSrc::LF(instr.attributes.lod_bias)); + a_.OpAdd(lod_dest, lod_src, + dxbc::Src::LF(instr.attributes.lod_bias)); } } else { // Divide the fetch constant LOD by 32, and add the instruction LOD // bias. if (instr.attributes.lod_bias) { - DxbcOpMAd(lod_dest, lod_src, DxbcSrc::LF(1.0f / 32.0f), - DxbcSrc::LF(instr.attributes.lod_bias)); + a_.OpMAd(lod_dest, lod_src, dxbc::Src::LF(1.0f / 32.0f), + dxbc::Src::LF(instr.attributes.lod_bias)); } else { - DxbcOpMul(lod_dest, lod_src, DxbcSrc::LF(1.0f / 32.0f)); + a_.OpMul(lod_dest, lod_src, dxbc::Src::LF(1.0f / 32.0f)); } } if (use_computed_lod) { @@ -1408,7 +1415,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( assert_not_zero(grad_component_count); uint32_t grad_mask = (1 << grad_component_count) - 1; // Convert the bias to a gradient scale. - DxbcOpExp(lod_dest, lod_src); + a_.OpExp(lod_dest, lod_src); // FIXME(Triang3l): Gradient exponent adjustment is currently not done // in getCompTexLOD, so don't do it here too. bool ssaa_scale_gradients = @@ -1417,28 +1424,28 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( #if 0 // Extract gradient exponent biases from the fetch constant and merge // them with the LOD bias. - DxbcOpIBFE(DxbcDest::R(grad_h_lod_temp, 0b0011), DxbcSrc::LU(5), - DxbcSrc::LU(22, 27, 0, 0), + a_.OpIBFE(dxbc::Dest::R(grad_h_lod_temp, 0b0011), dxbc::Src::LU(5), + dxbc::Src::LU(22, 27, 0, 0), RequestTextureFetchConstantWord(tfetch_index, 4)); if (ssaa_scale_gradients) { // Adjust the gradient scales to include the SSAA scale. system_constants_used_ |= 1ull << kSysConst_SampleCountLog2_Index; - DxbcOpIAdd(DxbcDest::R(grad_h_lod_temp, 0b0011), - DxbcSrc::R(grad_h_lod_temp), - DxbcSrc::CB( + a_.OpIAdd(dxbc::Dest::R(grad_h_lod_temp, 0b0011), + dxbc::Src::R(grad_h_lod_temp), + dxbc::Src::CB( cbuffer_index_system_constants_, uint32_t(CbufferRegister::kSystemConstants), kSysConst_SampleCountLog2_Vec, kSysConst_SampleCountLog2_Comp | ((kSysConst_SampleCountLog2_Comp + 1) << 2))); } - DxbcOpIMAd(DxbcDest::R(grad_h_lod_temp, 0b0011), - DxbcSrc::R(grad_h_lod_temp), DxbcSrc::LI(int32_t(1) << 23), - DxbcSrc::LF(1.0f)); - DxbcOpMul(DxbcDest::R(grad_v_temp, 0b1000), lod_src, - DxbcSrc::R(grad_h_lod_temp, DxbcSrc::kYYYY)); - DxbcOpMul(lod_dest, lod_src, - DxbcSrc::R(grad_h_lod_temp, DxbcSrc::kXXXX)); + a_.OpIMAd(dxbc::Dest::R(grad_h_lod_temp, 0b0011), + dxbc::Src::R(grad_h_lod_temp), dxbc::Src::LI(int32_t(1) << 23), + dxbc::Src::LF(1.0f)); + a_.OpMul(dxbc::Dest::R(grad_v_temp, 0b1000), lod_src, + dxbc::Src::R(grad_h_lod_temp, dxbc::Src::kYYYY)); + a_.OpMul(lod_dest, lod_src, + dxbc::Src::R(grad_h_lod_temp, dxbc::Src::kXXXX)); #else if (ssaa_scale_gradients) { // Adjust the gradient scales in each direction to include the SSAA @@ -1446,40 +1453,40 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( // be used. // ddy. system_constants_used_ |= 1ull << kSysConst_SampleCountLog2_Index; - DxbcOpMovC(DxbcDest::R(grad_v_temp, 0b1000), - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_SampleCountLog2_Vec) - .Select(kSysConst_SampleCountLog2_Comp + 1), - DxbcSrc::LF(2.0f), DxbcSrc::LF(1.0f)); - DxbcOpMul(DxbcDest::R(grad_v_temp, 0b1000), lod_src, - DxbcSrc::R(grad_v_temp, DxbcSrc::kWWWW)); + a_.OpMovC(dxbc::Dest::R(grad_v_temp, 0b1000), + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_SampleCountLog2_Vec) + .Select(kSysConst_SampleCountLog2_Comp + 1), + dxbc::Src::LF(2.0f), dxbc::Src::LF(1.0f)); + a_.OpMul(dxbc::Dest::R(grad_v_temp, 0b1000), lod_src, + dxbc::Src::R(grad_v_temp, dxbc::Src::kWWWW)); // ddx (after ddy handling, because the ddy code uses lod_src, and // it's being overwritten now). system_constants_used_ |= 1ull << kSysConst_SampleCountLog2_Index; - DxbcOpIf(true, - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_SampleCountLog2_Vec) - .Select(kSysConst_SampleCountLog2_Comp)); - DxbcOpMul(lod_dest, lod_src, DxbcSrc::LF(2.0f)); - DxbcOpEndIf(); + a_.OpIf(true, + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_SampleCountLog2_Vec) + .Select(kSysConst_SampleCountLog2_Comp)); + a_.OpMul(lod_dest, lod_src, dxbc::Src::LF(2.0f)); + a_.OpEndIf(); } #endif // Obtain the gradients and apply biases to them. if (instr.attributes.use_register_gradients) { // Register gradients are already in the cube space for cube maps. - DxbcOpMul(DxbcDest::R(grad_h_lod_temp, grad_mask), - DxbcSrc::R(system_temp_grad_h_lod_), lod_src); + a_.OpMul(dxbc::Dest::R(grad_h_lod_temp, grad_mask), + dxbc::Src::R(system_temp_grad_h_lod_), lod_src); // FIXME(Triang3l): Gradient exponent adjustment is currently not // done in getCompTexLOD, so don't do it here too. #if 0 - DxbcOpMul(DxbcDest::R(grad_v_temp, grad_mask), - DxbcSrc::R(system_temp_grad_v_), - DxbcSrc::R(grad_v_temp, DxbcSrc::kWWWW)); + a_.OpMul(dxbc::Dest::R(grad_v_temp, grad_mask), + dxbc::Src::R(system_temp_grad_v_), + dxbc::Src::R(grad_v_temp, dxbc::Src::kWWWW)); #else - DxbcOpMul(DxbcDest::R(grad_v_temp, grad_mask), - DxbcSrc::R(system_temp_grad_v_), lod_src); + a_.OpMul(dxbc::Dest::R(grad_v_temp, grad_mask), + dxbc::Src::R(system_temp_grad_v_), lod_src); #endif // TODO(Triang3l): Are cube map register gradients unnormalized if // the coordinates themselves are unnormalized? @@ -1491,51 +1498,54 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( } assert_true((size_needed_components & grad_norm_mask) == grad_norm_mask); - DxbcOpDiv(DxbcDest::R(grad_h_lod_temp, grad_norm_mask), - DxbcSrc::R(grad_h_lod_temp), - DxbcSrc::R(size_and_is_3d_temp)); - DxbcOpDiv(DxbcDest::R(grad_v_temp, grad_norm_mask), - DxbcSrc::R(grad_v_temp), - DxbcSrc::R(size_and_is_3d_temp)); + a_.OpDiv(dxbc::Dest::R(grad_h_lod_temp, grad_norm_mask), + dxbc::Src::R(grad_h_lod_temp), + dxbc::Src::R(size_and_is_3d_temp)); + a_.OpDiv(dxbc::Dest::R(grad_v_temp, grad_norm_mask), + dxbc::Src::R(grad_v_temp), + dxbc::Src::R(size_and_is_3d_temp)); // Normalize Z of the gradients for fetching from the 3D texture. assert_true((size_needed_components & 0b1100) == 0b1100); - DxbcOpIf(true, DxbcSrc::R(size_and_is_3d_temp, DxbcSrc::kWWWW)); - DxbcOpDiv(DxbcDest::R(grad_h_lod_temp, 0b0100), - DxbcSrc::R(grad_h_lod_temp, DxbcSrc::kZZZZ), - DxbcSrc::R(size_and_is_3d_temp, DxbcSrc::kZZZZ)); - DxbcOpDiv(DxbcDest::R(grad_v_temp, 0b0100), - DxbcSrc::R(grad_v_temp, DxbcSrc::kZZZZ), - DxbcSrc::R(size_and_is_3d_temp, DxbcSrc::kZZZZ)); - DxbcOpEndIf(); + a_.OpIf(true, + dxbc::Src::R(size_and_is_3d_temp, dxbc::Src::kWWWW)); + a_.OpDiv(dxbc::Dest::R(grad_h_lod_temp, 0b0100), + dxbc::Src::R(grad_h_lod_temp, dxbc::Src::kZZZZ), + dxbc::Src::R(size_and_is_3d_temp, dxbc::Src::kZZZZ)); + a_.OpDiv(dxbc::Dest::R(grad_v_temp, 0b0100), + dxbc::Src::R(grad_v_temp, dxbc::Src::kZZZZ), + dxbc::Src::R(size_and_is_3d_temp, dxbc::Src::kZZZZ)); + a_.OpEndIf(); } } else { // Coarse is according to the Direct3D 11.3 specification. - DxbcOpDerivRTXCoarse(DxbcDest::R(grad_h_lod_temp, grad_mask), - DxbcSrc::R(coord_and_sampler_temp)); - DxbcOpMul(DxbcDest::R(grad_h_lod_temp, grad_mask), - DxbcSrc::R(grad_h_lod_temp), lod_src); - DxbcOpDerivRTYCoarse(DxbcDest::R(grad_v_temp, grad_mask), - DxbcSrc::R(coord_and_sampler_temp)); + a_.OpDerivRTXCoarse(dxbc::Dest::R(grad_h_lod_temp, grad_mask), + dxbc::Src::R(coord_and_sampler_temp)); + a_.OpMul(dxbc::Dest::R(grad_h_lod_temp, grad_mask), + dxbc::Src::R(grad_h_lod_temp), lod_src); + a_.OpDerivRTYCoarse(dxbc::Dest::R(grad_v_temp, grad_mask), + dxbc::Src::R(coord_and_sampler_temp)); // FIXME(Triang3l): Gradient exponent adjustment is currently not // done in getCompTexLOD, so don't do it here too. #if 0 - DxbcOpMul(DxbcDest::R(grad_v_temp, grad_mask), - DxbcSrc::R(grad_v_temp), - DxbcSrc::R(grad_v_temp, DxbcSrc::kWWWW)); + a_.OpMul(dxbc::Dest::R(grad_v_temp, grad_mask), + dxbc::Src::R(grad_v_temp), + dxbc::Src::R(grad_v_temp, dxbc::Src::kWWWW)); #else // With SSAA gradient scaling, the scale is separate in each // direction. - DxbcOpMul( - DxbcDest::R(grad_v_temp, grad_mask), DxbcSrc::R(grad_v_temp), - ssaa_scale_gradients ? DxbcSrc::R(grad_v_temp, DxbcSrc::kWWWW) - : lod_src); + a_.OpMul(dxbc::Dest::R(grad_v_temp, grad_mask), + dxbc::Src::R(grad_v_temp), + ssaa_scale_gradients + ? dxbc::Src::R(grad_v_temp, dxbc::Src::kWWWW) + : lod_src); #endif } if (instr.dimension == xenos::FetchOpDimension::k1D) { // Pad the gradients to 2D because 1D textures are fetched as 2D // arrays. - DxbcOpMov(DxbcDest::R(grad_h_lod_temp, 0b0010), DxbcSrc::LF(0.0f)); - DxbcOpMov(DxbcDest::R(grad_v_temp, 0b0010), DxbcSrc::LF(0.0f)); + a_.OpMov(dxbc::Dest::R(grad_h_lod_temp, 0b0010), + dxbc::Src::LF(0.0f)); + a_.OpMov(dxbc::Dest::R(grad_v_temp, 0b0010), dxbc::Src::LF(0.0f)); grad_component_count = 2; } } @@ -1555,7 +1565,8 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( instr.attributes.min_filter, instr.attributes.mip_filter, use_computed_lod ? instr.attributes.aniso_filter : xenos::AnisoFilter::kDisabled); - DxbcSrc sampler(DxbcSrc::S(sampler_binding_index, sampler_binding_index)); + dxbc::Src sampler( + dxbc::Src::S(sampler_binding_index, sampler_binding_index)); if (bindless_resources_used_) { // Load the sampler index to coord_and_sampler_temp.w and use relative // sampler indexing. @@ -1564,25 +1575,25 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( } uint32_t sampler_bindless_descriptor_index = sampler_bindings_[sampler_binding_index].bindless_descriptor_index; - DxbcOpMov(DxbcDest::R(coord_and_sampler_temp, 0b1000), - DxbcSrc::CB(cbuffer_index_descriptor_indices_, - uint32_t(CbufferRegister::kDescriptorIndices), - sampler_bindless_descriptor_index >> 2) - .Select(sampler_bindless_descriptor_index & 3)); - sampler = DxbcSrc::S(0, DxbcIndex(coord_and_sampler_temp, 3)); + a_.OpMov(dxbc::Dest::R(coord_and_sampler_temp, 0b1000), + dxbc::Src::CB(cbuffer_index_descriptor_indices_, + uint32_t(CbufferRegister::kDescriptorIndices), + sampler_bindless_descriptor_index >> 2) + .Select(sampler_bindless_descriptor_index & 3)); + sampler = dxbc::Src::S(0, dxbc::Index(coord_and_sampler_temp, 3)); } // Break result register dependencies because textures will be sampled // conditionally, including the primary signs. - DxbcOpMov( - DxbcDest::R(system_temp_result_, used_result_nonzero_components), - DxbcSrc::LF(0.0f)); + a_.OpMov( + dxbc::Dest::R(system_temp_result_, used_result_nonzero_components), + dxbc::Src::LF(0.0f)); // Extract whether each component is signed. uint32_t is_signed_temp = PushSystemTemp(); - DxbcOpIEq(DxbcDest::R(is_signed_temp, used_result_nonzero_components), - DxbcSrc::R(signs_temp), - DxbcSrc::LU(uint32_t(xenos::TextureSign::kSigned))); + a_.OpIEq(dxbc::Dest::R(is_signed_temp, used_result_nonzero_components), + dxbc::Src::R(signs_temp), + dxbc::Src::LU(uint32_t(xenos::TextureSign::kSigned))); // Calculate the lerp factor between stacked texture layers if needed (or // 0 if point-sampled), and check which signedness SRVs need to be @@ -1600,7 +1611,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( // with l(0.0) fallback for the point sampling case. // - srv_selection_temp.w - first, scratch for calculations involving // these, then, unsigned or signed SRV description index. - DxbcSrc layer_lerp_factor_src(DxbcSrc::LF(0.0f)); + dxbc::Src layer_lerp_factor_src(dxbc::Src::LF(0.0f)); // W is always needed for bindless. uint32_t srv_selection_temp = bindless_resources_used_ ? PushSystemTemp() : UINT32_MAX; @@ -1622,76 +1633,78 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( srv_selection_temp = PushSystemTemp(); } layer_lerp_factor_src = - DxbcSrc::R(srv_selection_temp, DxbcSrc::kZZZZ); + dxbc::Src::R(srv_selection_temp, dxbc::Src::kZZZZ); // Initialize to point sampling, and break register dependency for 3D. - DxbcOpMov(DxbcDest::R(srv_selection_temp, 0b0100), DxbcSrc::LF(0.0f)); + a_.OpMov(dxbc::Dest::R(srv_selection_temp, 0b0100), + dxbc::Src::LF(0.0f)); assert_true((size_needed_components & 0b1000) == 0b1000); - DxbcOpIf(false, DxbcSrc::R(size_and_is_3d_temp, DxbcSrc::kWWWW)); + a_.OpIf(false, dxbc::Src::R(size_and_is_3d_temp, dxbc::Src::kWWWW)); // Check if minifying along layers (derivative > 1 along any axis). - DxbcOpMax(DxbcDest::R(srv_selection_temp, 0b1000), - DxbcSrc::R(grad_h_lod_temp, DxbcSrc::kZZZZ), - DxbcSrc::R(grad_v_temp, DxbcSrc::kZZZZ)); + a_.OpMax(dxbc::Dest::R(srv_selection_temp, 0b1000), + dxbc::Src::R(grad_h_lod_temp, dxbc::Src::kZZZZ), + dxbc::Src::R(grad_v_temp, dxbc::Src::kZZZZ)); if (!instr.attributes.unnormalized_coordinates) { // Denormalize the gradient if provided as normalized. assert_true((size_needed_components & 0b0100) == 0b0100); - DxbcOpMul(DxbcDest::R(srv_selection_temp, 0b1000), - DxbcSrc::R(srv_selection_temp, DxbcSrc::kWWWW), - DxbcSrc::R(size_and_is_3d_temp, DxbcSrc::kZZZZ)); + a_.OpMul(dxbc::Dest::R(srv_selection_temp, 0b1000), + dxbc::Src::R(srv_selection_temp, dxbc::Src::kWWWW), + dxbc::Src::R(size_and_is_3d_temp, dxbc::Src::kZZZZ)); } // For NaN, considering that magnification is being done. Zero // srv_selection_temp.w means magnifying, non-zero means minifying. - DxbcOpLT(DxbcDest::R(srv_selection_temp, 0b1000), DxbcSrc::LF(1.0f), - DxbcSrc::R(srv_selection_temp, DxbcSrc::kWWWW)); + a_.OpLT(dxbc::Dest::R(srv_selection_temp, 0b1000), + dxbc::Src::LF(1.0f), + dxbc::Src::R(srv_selection_temp, dxbc::Src::kWWWW)); if (vol_mag_filter_is_fetch_const || vol_min_filter_is_fetch_const) { - DxbcOpIf(false, DxbcSrc::R(srv_selection_temp, DxbcSrc::kWWWW)); + a_.OpIf(false, dxbc::Src::R(srv_selection_temp, dxbc::Src::kWWWW)); // Write the magnification filter to srv_selection_temp.w. In the // "if" rather than "else" because this is more likely to happen if // the layer is constant. if (vol_mag_filter_is_fetch_const) { - DxbcOpAnd(DxbcDest::R(srv_selection_temp, 0b1000), - RequestTextureFetchConstantWord(tfetch_index, 4), - DxbcSrc::LU(1)); + a_.OpAnd(dxbc::Dest::R(srv_selection_temp, 0b1000), + RequestTextureFetchConstantWord(tfetch_index, 4), + dxbc::Src::LU(1)); } else { - DxbcOpMov(DxbcDest::R(srv_selection_temp, 0b1000), - DxbcSrc::LU(uint32_t(vol_mag_filter_is_linear))); + a_.OpMov(dxbc::Dest::R(srv_selection_temp, 0b1000), + dxbc::Src::LU(uint32_t(vol_mag_filter_is_linear))); } - DxbcOpElse(); + a_.OpElse(); // Write the minification filter to srv_selection_temp.w. if (vol_min_filter_is_fetch_const) { - DxbcOpUBFE(DxbcDest::R(srv_selection_temp, 0b1000), - DxbcSrc::LU(1), DxbcSrc::LU(1), - RequestTextureFetchConstantWord(tfetch_index, 4)); + a_.OpUBFE(dxbc::Dest::R(srv_selection_temp, 0b1000), + dxbc::Src::LU(1), dxbc::Src::LU(1), + RequestTextureFetchConstantWord(tfetch_index, 4)); } else { - DxbcOpMov(DxbcDest::R(srv_selection_temp, 0b1000), - DxbcSrc::LU(uint32_t(vol_min_filter_is_linear))); + a_.OpMov(dxbc::Dest::R(srv_selection_temp, 0b1000), + dxbc::Src::LU(uint32_t(vol_min_filter_is_linear))); } // Close the magnification check. - DxbcOpEndIf(); + a_.OpEndIf(); // Check if the filter is linear. - DxbcOpIf(true, DxbcSrc::R(srv_selection_temp, DxbcSrc::kWWWW)); + a_.OpIf(true, dxbc::Src::R(srv_selection_temp, dxbc::Src::kWWWW)); } else if (vol_mag_filter_is_linear) { assert_false(vol_min_filter_is_linear); // Both overridden, one (magnification) is linear, another // (minification) is not - handle linear filtering if magnifying. - DxbcOpIf(false, DxbcSrc::R(srv_selection_temp, DxbcSrc::kWWWW)); + a_.OpIf(false, dxbc::Src::R(srv_selection_temp, dxbc::Src::kWWWW)); } else { assert_true(vol_min_filter_is_linear); assert_false(vol_mag_filter_is_linear); // Both overridden, one (minification) is linear, another // (magnification) is not - handle linear filtering if minifying. - DxbcOpIf(true, DxbcSrc::R(srv_selection_temp, DxbcSrc::kWWWW)); + a_.OpIf(true, dxbc::Src::R(srv_selection_temp, dxbc::Src::kWWWW)); } // For linear filtering, subtract 0.5 from the coordinates and store // the lerp factor. Flooring will be done later. - DxbcOpAdd(DxbcDest::R(coord_and_sampler_temp, 0b0100), - DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kZZZZ), - DxbcSrc::LF(-0.5f)); - DxbcOpFrc(DxbcDest::R(srv_selection_temp, 0b0100), - DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kZZZZ)); + a_.OpAdd(dxbc::Dest::R(coord_and_sampler_temp, 0b0100), + dxbc::Src::R(coord_and_sampler_temp, dxbc::Src::kZZZZ), + dxbc::Src::LF(-0.5f)); + a_.OpFrc(dxbc::Dest::R(srv_selection_temp, 0b0100), + dxbc::Src::R(coord_and_sampler_temp, dxbc::Src::kZZZZ)); // Close the linear check. - DxbcOpEndIf(); + a_.OpEndIf(); // Close the stacked check. - DxbcOpEndIf(); + a_.OpEndIf(); } else { // No gradients, or using the same filter overrides for magnifying and // minifying. Assume always magnifying if no gradients (LOD 0, always @@ -1702,35 +1715,35 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( srv_selection_temp = PushSystemTemp(); } layer_lerp_factor_src = - DxbcSrc::R(srv_selection_temp, DxbcSrc::kZZZZ); + dxbc::Src::R(srv_selection_temp, dxbc::Src::kZZZZ); // Initialize to point sampling, and break register dependency for // 3D. - DxbcOpMov(DxbcDest::R(srv_selection_temp, 0b0100), - DxbcSrc::LF(0.0f)); + a_.OpMov(dxbc::Dest::R(srv_selection_temp, 0b0100), + dxbc::Src::LF(0.0f)); assert_true((size_needed_components & 0b1000) == 0b1000); - DxbcOpIf(false, DxbcSrc::R(size_and_is_3d_temp, DxbcSrc::kWWWW)); + a_.OpIf(false, dxbc::Src::R(size_and_is_3d_temp, dxbc::Src::kWWWW)); if (vol_mag_filter_is_fetch_const) { // Extract the magnification filtering mode from the fetch // constant. - DxbcOpAnd(DxbcDest::R(srv_selection_temp, 0b1000), - RequestTextureFetchConstantWord(tfetch_index, 4), - DxbcSrc::LU(1)); + a_.OpAnd(dxbc::Dest::R(srv_selection_temp, 0b1000), + RequestTextureFetchConstantWord(tfetch_index, 4), + dxbc::Src::LU(1)); // Check if it's linear. - DxbcOpIf(true, DxbcSrc::R(srv_selection_temp, DxbcSrc::kWWWW)); + a_.OpIf(true, dxbc::Src::R(srv_selection_temp, dxbc::Src::kWWWW)); } // For linear filtering, subtract 0.5 from the coordinates and store // the lerp factor. Flooring will be done later. - DxbcOpAdd(DxbcDest::R(coord_and_sampler_temp, 0b0100), - DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kZZZZ), - DxbcSrc::LF(-0.5f)); - DxbcOpFrc(DxbcDest::R(srv_selection_temp, 0b0100), - DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kZZZZ)); + a_.OpAdd(dxbc::Dest::R(coord_and_sampler_temp, 0b0100), + dxbc::Src::R(coord_and_sampler_temp, dxbc::Src::kZZZZ), + dxbc::Src::LF(-0.5f)); + a_.OpFrc(dxbc::Dest::R(srv_selection_temp, 0b0100), + dxbc::Src::R(coord_and_sampler_temp, dxbc::Src::kZZZZ)); if (vol_mag_filter_is_fetch_const) { // Close the fetch constant linear filtering mode check. - DxbcOpEndIf(); + a_.OpEndIf(); } // Close the stacked check. - DxbcOpEndIf(); + a_.OpEndIf(); } } } @@ -1738,17 +1751,19 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( uint32_t result_first_component; xe::bit_scan_forward(used_result_nonzero_components, &result_first_component); - DxbcSrc is_all_signed_src( - DxbcSrc::R(is_signed_temp).Select(result_first_component)); - DxbcSrc is_any_signed_src( - DxbcSrc::R(is_signed_temp).Select(result_first_component)); + dxbc::Src is_all_signed_src( + dxbc::Src::R(is_signed_temp).Select(result_first_component)); + dxbc::Src is_any_signed_src( + dxbc::Src::R(is_signed_temp).Select(result_first_component)); if (used_result_nonzero_components != (1 << result_first_component)) { // Multiple components fetched - need to merge. if (srv_selection_temp == UINT32_MAX) { srv_selection_temp = PushSystemTemp(); } - DxbcDest is_all_signed_dest(DxbcDest::R(srv_selection_temp, 0b0001)); - DxbcDest is_any_signed_dest(DxbcDest::R(srv_selection_temp, 0b0010)); + dxbc::Dest is_all_signed_dest( + dxbc::Dest::R(srv_selection_temp, 0b0001)); + dxbc::Dest is_any_signed_dest( + dxbc::Dest::R(srv_selection_temp, 0b0010)); uint32_t result_remaining_components = used_result_nonzero_components & ~(uint32_t(1) << result_first_component); @@ -1756,14 +1771,16 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( while (xe::bit_scan_forward(result_remaining_components, &result_component)) { result_remaining_components &= ~(uint32_t(1) << result_component); - DxbcOpAnd(is_all_signed_dest, is_all_signed_src, - DxbcSrc::R(is_signed_temp).Select(result_component)); - DxbcOpOr(is_any_signed_dest, is_any_signed_src, - DxbcSrc::R(is_signed_temp).Select(result_component)); + a_.OpAnd(is_all_signed_dest, is_all_signed_src, + dxbc::Src::R(is_signed_temp).Select(result_component)); + a_.OpOr(is_any_signed_dest, is_any_signed_src, + dxbc::Src::R(is_signed_temp).Select(result_component)); // For the first component, both sources must both be two is_signed // components, to initialize. - is_all_signed_src = DxbcSrc::R(srv_selection_temp, DxbcSrc::kXXXX); - is_any_signed_src = DxbcSrc::R(srv_selection_temp, DxbcSrc::kYYYY); + is_all_signed_src = + dxbc::Src::R(srv_selection_temp, dxbc::Src::kXXXX); + is_any_signed_src = + dxbc::Src::R(srv_selection_temp, dxbc::Src::kYYYY); } } @@ -1773,7 +1790,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( if (instr.dimension == xenos::FetchOpDimension::k3DOrStacked) { assert_true((size_needed_components & 0b1000) == 0b1000); // The first fetch attempt will be for the 3D SRV. - DxbcOpIf(true, DxbcSrc::R(size_and_is_3d_temp, DxbcSrc::kWWWW)); + a_.OpIf(true, dxbc::Src::R(size_and_is_3d_temp, dxbc::Src::kWWWW)); } for (uint32_t is_stacked = 0; is_stacked < @@ -1788,14 +1805,14 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( srv_dimension = xenos::FetchOpDimension::k2D; srv_grad_component_count = 2; layer_lerp_needed = - layer_lerp_factor_src.type_ != DxbcOperandType::kImmediate32; - DxbcOpElse(); + layer_lerp_factor_src.type_ != dxbc::OperandType::kImmediate32; + a_.OpElse(); // Floor the array layer (Direct3D 12 does rounding to nearest even // for the layer index, but on the Xbox 360, addressing is similar to // that of 3D textures). This is needed for both point and linear // filtering (with linear, 0.5 was subtracted previously). - DxbcOpRoundNI(DxbcDest::R(coord_and_sampler_temp, 0b0100), - DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kZZZZ)); + a_.OpRoundNI(dxbc::Dest::R(coord_and_sampler_temp, 0b0100), + dxbc::Src::R(coord_and_sampler_temp, dxbc::Src::kZZZZ)); } uint32_t texture_binding_index_unsigned = FindOrAddTextureBinding(tfetch_index, srv_dimension, false); @@ -1805,7 +1822,8 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( texture_bindings_[texture_binding_index_unsigned]; const TextureBinding& texture_binding_signed = texture_bindings_[texture_binding_index_signed]; - DxbcSrc srv_unsigned(DxbcSrc::LF(0.0f)), srv_signed(DxbcSrc::LF(0.0f)); + dxbc::Src srv_unsigned(dxbc::Src::LF(0.0f)), + srv_signed(dxbc::Src::LF(0.0f)); if (bindless_resources_used_) { uint32_t* bindless_srv_index = nullptr; switch (srv_dimension) { @@ -1825,18 +1843,18 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( *bindless_srv_index = srv_count_++; } assert_true(srv_selection_temp != UINT32_MAX); - srv_unsigned = - DxbcSrc::T(*bindless_srv_index, DxbcIndex(srv_selection_temp, 3)); + srv_unsigned = dxbc::Src::T(*bindless_srv_index, + dxbc::Index(srv_selection_temp, 3)); srv_signed = srv_unsigned; } else { srv_unsigned = - DxbcSrc::T(texture_binding_unsigned.bindful_srv_index, - uint32_t(SRVMainRegister::kBindfulTexturesStart) + - texture_binding_index_unsigned); + dxbc::Src::T(texture_binding_unsigned.bindful_srv_index, + uint32_t(SRVMainRegister::kBindfulTexturesStart) + + texture_binding_index_unsigned); srv_signed = - DxbcSrc::T(texture_binding_signed.bindful_srv_index, - uint32_t(SRVMainRegister::kBindfulTexturesStart) + - texture_binding_index_signed); + dxbc::Src::T(texture_binding_signed.bindful_srv_index, + uint32_t(SRVMainRegister::kBindfulTexturesStart) + + texture_binding_index_signed); } for (uint32_t layer = 0; layer < (layer_lerp_needed ? 2u : 1u); ++layer) { @@ -1844,18 +1862,18 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( if (layer) { layer_value_temp = PushSystemTemp(); // Check if the lerp factor is not zero (or NaN). - DxbcOpNE(DxbcDest::R(layer_value_temp, 0b0001), - layer_lerp_factor_src, DxbcSrc::LF(0.0f)); + a_.OpNE(dxbc::Dest::R(layer_value_temp, 0b0001), + layer_lerp_factor_src, dxbc::Src::LF(0.0f)); // If the lerp factor is not zero, sample the next layer. - DxbcOpIf(true, DxbcSrc::R(layer_value_temp, DxbcSrc::kXXXX)); + a_.OpIf(true, dxbc::Src::R(layer_value_temp, dxbc::Src::kXXXX)); // Go to the next layer. - DxbcOpAdd(DxbcDest::R(coord_and_sampler_temp, 0b0100), - DxbcSrc::R(coord_and_sampler_temp, DxbcSrc::kZZZZ), - DxbcSrc::LF(1.0f)); + a_.OpAdd(dxbc::Dest::R(coord_and_sampler_temp, 0b0100), + dxbc::Src::R(coord_and_sampler_temp, dxbc::Src::kZZZZ), + dxbc::Src::LF(1.0f)); } // Always 3 coordinate components (1D and 2D are padded to 2D arrays, // 3D and cube have 3 coordinate dimensions). - DxbcOpIf(false, is_all_signed_src); + a_.OpIf(false, is_all_signed_src); { // Sample the unsigned texture. if (bindless_resources_used_) { @@ -1867,29 +1885,29 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( } uint32_t texture_bindless_descriptor_index = texture_binding_unsigned.bindless_descriptor_index; - DxbcOpMov( - DxbcDest::R(srv_selection_temp, 0b1000), - DxbcSrc::CB(cbuffer_index_descriptor_indices_, - uint32_t(CbufferRegister::kDescriptorIndices), - texture_bindless_descriptor_index >> 2) + a_.OpMov( + dxbc::Dest::R(srv_selection_temp, 0b1000), + dxbc::Src::CB(cbuffer_index_descriptor_indices_, + uint32_t(CbufferRegister::kDescriptorIndices), + texture_bindless_descriptor_index >> 2) .Select(texture_bindless_descriptor_index & 3)); } if (grad_v_temp != UINT32_MAX) { assert_not_zero(grad_component_count); - DxbcOpSampleD( - DxbcDest::R(layer_value_temp, used_result_nonzero_components), - DxbcSrc::R(coord_and_sampler_temp), 3, srv_unsigned, sampler, - DxbcSrc::R(grad_h_lod_temp), DxbcSrc::R(grad_v_temp), - srv_grad_component_count); + a_.OpSampleD(dxbc::Dest::R(layer_value_temp, + used_result_nonzero_components), + dxbc::Src::R(coord_and_sampler_temp), 3, + srv_unsigned, sampler, dxbc::Src::R(grad_h_lod_temp), + dxbc::Src::R(grad_v_temp), srv_grad_component_count); } else { - DxbcOpSampleL( - DxbcDest::R(layer_value_temp, used_result_nonzero_components), - DxbcSrc::R(coord_and_sampler_temp), 3, srv_unsigned, sampler, - lod_src); + a_.OpSampleL(dxbc::Dest::R(layer_value_temp, + used_result_nonzero_components), + dxbc::Src::R(coord_and_sampler_temp), 3, + srv_unsigned, sampler, lod_src); } } - DxbcOpEndIf(); - DxbcOpIf(true, is_any_signed_src); + a_.OpEndIf(); + a_.OpIf(true, is_any_signed_src); { // Sample the signed texture. uint32_t signed_temp = PushSystemTemp(); @@ -1902,46 +1920,47 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( } uint32_t texture_bindless_descriptor_index = texture_binding_signed.bindless_descriptor_index; - DxbcOpMov( - DxbcDest::R(srv_selection_temp, 0b1000), - DxbcSrc::CB(cbuffer_index_descriptor_indices_, - uint32_t(CbufferRegister::kDescriptorIndices), - texture_bindless_descriptor_index >> 2) + a_.OpMov( + dxbc::Dest::R(srv_selection_temp, 0b1000), + dxbc::Src::CB(cbuffer_index_descriptor_indices_, + uint32_t(CbufferRegister::kDescriptorIndices), + texture_bindless_descriptor_index >> 2) .Select(texture_bindless_descriptor_index & 3)); } if (grad_v_temp != UINT32_MAX) { assert_not_zero(grad_component_count); - DxbcOpSampleD( - DxbcDest::R(signed_temp, used_result_nonzero_components), - DxbcSrc::R(coord_and_sampler_temp), 3, srv_signed, sampler, - DxbcSrc::R(grad_h_lod_temp), DxbcSrc::R(grad_v_temp), + a_.OpSampleD( + dxbc::Dest::R(signed_temp, used_result_nonzero_components), + dxbc::Src::R(coord_and_sampler_temp), 3, srv_signed, sampler, + dxbc::Src::R(grad_h_lod_temp), dxbc::Src::R(grad_v_temp), srv_grad_component_count); } else { - DxbcOpSampleL( - DxbcDest::R(signed_temp, used_result_nonzero_components), - DxbcSrc::R(coord_and_sampler_temp), 3, srv_signed, sampler, + a_.OpSampleL( + dxbc::Dest::R(signed_temp, used_result_nonzero_components), + dxbc::Src::R(coord_and_sampler_temp), 3, srv_signed, sampler, lod_src); } - DxbcOpMovC( - DxbcDest::R(layer_value_temp, used_result_nonzero_components), - DxbcSrc::R(is_signed_temp), DxbcSrc::R(signed_temp), - DxbcSrc::R(layer_value_temp)); + a_.OpMovC( + dxbc::Dest::R(layer_value_temp, used_result_nonzero_components), + dxbc::Src::R(is_signed_temp), dxbc::Src::R(signed_temp), + dxbc::Src::R(layer_value_temp)); // Release signed_temp. PopSystemTemp(); } - DxbcOpEndIf(); + a_.OpEndIf(); if (layer) { assert_true(layer_value_temp != system_temp_result_); // Interpolate between the two layers. - DxbcOpAdd( - DxbcDest::R(layer_value_temp, used_result_nonzero_components), - DxbcSrc::R(layer_value_temp), -DxbcSrc::R(system_temp_result_)); - DxbcOpMAd(DxbcDest::R(system_temp_result_, - used_result_nonzero_components), - DxbcSrc::R(layer_value_temp), layer_lerp_factor_src, - DxbcSrc::R(system_temp_result_)); + a_.OpAdd( + dxbc::Dest::R(layer_value_temp, used_result_nonzero_components), + dxbc::Src::R(layer_value_temp), + -dxbc::Src::R(system_temp_result_)); + a_.OpMAd(dxbc::Dest::R(system_temp_result_, + used_result_nonzero_components), + dxbc::Src::R(layer_value_temp), layer_lerp_factor_src, + dxbc::Src::R(system_temp_result_)); // Close the linear filtering check. - DxbcOpEndIf(); + a_.OpEndIf(); // Release the allocated layer_value_temp. PopSystemTemp(); } @@ -1949,7 +1968,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( } if (instr.dimension == xenos::FetchOpDimension::k3DOrStacked) { // Close the stacked/3D check. - DxbcOpEndIf(); + a_.OpEndIf(); } if (srv_selection_temp != UINT32_MAX) { @@ -1979,21 +1998,21 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( if (!(used_result_nonzero_components & (1 << i))) { continue; } - DxbcDest component_dest(DxbcDest::R(system_temp_result_, 1 << i)); - DxbcSrc component_src(DxbcSrc::R(system_temp_result_).Select(i)); - DxbcOpSwitch(DxbcSrc::R(signs_temp).Select(i)); - DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::TextureSign::kUnsignedBiased))); - DxbcOpMAd(component_dest, component_src, DxbcSrc::LF(2.0f), - DxbcSrc::LF(-1.0f)); - DxbcOpBreak(); - DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::TextureSign::kGamma))); + dxbc::Dest component_dest(dxbc::Dest::R(system_temp_result_, 1 << i)); + dxbc::Src component_src(dxbc::Src::R(system_temp_result_).Select(i)); + a_.OpSwitch(dxbc::Src::R(signs_temp).Select(i)); + a_.OpCase(dxbc::Src::LU(uint32_t(xenos::TextureSign::kUnsignedBiased))); + a_.OpMAd(component_dest, component_src, dxbc::Src::LF(2.0f), + dxbc::Src::LF(-1.0f)); + a_.OpBreak(); + a_.OpCase(dxbc::Src::LU(uint32_t(xenos::TextureSign::kGamma))); uint32_t gamma_temp = PushSystemTemp(); ConvertPWLGamma(false, system_temp_result_, i, system_temp_result_, i, gamma_temp, 0, gamma_temp, 1); // Release gamma_temp. PopSystemTemp(); - DxbcOpBreak(); - DxbcOpEndSwitch(); + a_.OpBreak(); + a_.OpEndSwitch(); } } if (signs_temp != UINT32_MAX) { @@ -2008,15 +2027,15 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( if (instr.opcode == FetchOpcode::kTextureFetch) { // Apply the result exponent bias. uint32_t exp_adjust_temp = PushSystemTemp(); - DxbcOpIBFE(DxbcDest::R(exp_adjust_temp, 0b0001), DxbcSrc::LU(6), - DxbcSrc::LU(13), - RequestTextureFetchConstantWord(tfetch_index, 3)); - DxbcOpIMAd(DxbcDest::R(exp_adjust_temp, 0b0001), - DxbcSrc::R(exp_adjust_temp, DxbcSrc::kXXXX), - DxbcSrc::LI(int32_t(1) << 23), DxbcSrc::LF(1.0f)); - DxbcOpMul(DxbcDest::R(system_temp_result_, used_result_nonzero_components), - DxbcSrc::R(system_temp_result_), - DxbcSrc::R(exp_adjust_temp, DxbcSrc::kXXXX)); + a_.OpIBFE(dxbc::Dest::R(exp_adjust_temp, 0b0001), dxbc::Src::LU(6), + dxbc::Src::LU(13), + RequestTextureFetchConstantWord(tfetch_index, 3)); + a_.OpIMAd(dxbc::Dest::R(exp_adjust_temp, 0b0001), + dxbc::Src::R(exp_adjust_temp, dxbc::Src::kXXXX), + dxbc::Src::LI(int32_t(1) << 23), dxbc::Src::LF(1.0f)); + a_.OpMul(dxbc::Dest::R(system_temp_result_, used_result_nonzero_components), + dxbc::Src::R(system_temp_result_), + dxbc::Src::R(exp_adjust_temp, dxbc::Src::kXXXX)); // Release exp_adjust_temp. PopSystemTemp(); } @@ -2024,10 +2043,10 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction( uint32_t used_result_zero_components = used_result_components & ~used_result_nonzero_components; if (used_result_zero_components) { - DxbcOpMov(DxbcDest::R(system_temp_result_, used_result_zero_components), - DxbcSrc::LF(0.0f)); + a_.OpMov(dxbc::Dest::R(system_temp_result_, used_result_zero_components), + dxbc::Src::LF(0.0f)); } - StoreResult(instr.result, DxbcSrc::R(system_temp_result_)); + StoreResult(instr.result, dxbc::Src::R(system_temp_result_)); } } // namespace gpu diff --git a/src/xenia/gpu/dxbc_shader_translator_memexport.cc b/src/xenia/gpu/dxbc_shader_translator_memexport.cc index 76bec3e60..ce39e21b1 100644 --- a/src/xenia/gpu/dxbc_shader_translator_memexport.cc +++ b/src/xenia/gpu/dxbc_shader_translator_memexport.cc @@ -15,7 +15,7 @@ using namespace ucode; void DxbcShaderTranslator::ExportToMemory_PackFixed32( const uint32_t* eM_temps, uint32_t eM_count, const uint32_t bits[4], - const DxbcSrc& is_integer, const DxbcSrc& is_signed) { + const dxbc::Src& is_integer, const dxbc::Src& is_signed) { // Will insert with BFI - sign extension of red will be overwritten, not // truncated. assert_not_zero(bits[0]); @@ -26,64 +26,64 @@ void DxbcShaderTranslator::ExportToMemory_PackFixed32( mask |= 1 << i; } } - DxbcOpIf(true, is_signed); + a_.OpIf(true, is_signed); { float range[4]; for (uint32_t i = 0; i < 4; ++i) { range[i] = bits[i] ? float((uint32_t(1) << (bits[i] - 1)) - 1) : 0.0f; } - DxbcSrc range_src(DxbcSrc::LP(range)); - DxbcOpIf(false, is_integer); + dxbc::Src range_src(dxbc::Src::LP(range)); + a_.OpIf(false, is_integer); for (uint32_t i = 0; i < eM_count; ++i) { uint32_t eM_temp = eM_temps[i]; - DxbcOpMul(DxbcDest::R(eM_temp, mask), DxbcSrc::R(eM_temp), range_src); + a_.OpMul(dxbc::Dest::R(eM_temp, mask), dxbc::Src::R(eM_temp), range_src); } - DxbcOpEndIf(); + a_.OpEndIf(); for (uint32_t i = 0; i < eM_count; ++i) { - DxbcDest eM_dest(DxbcDest::R(eM_temps[i], mask)); - DxbcSrc eM_src(DxbcSrc::R(eM_temps[i])); + dxbc::Dest eM_dest(dxbc::Dest::R(eM_temps[i], mask)); + dxbc::Src eM_src(dxbc::Src::R(eM_temps[i])); // TODO(Triang3l): NaN should become zero, not -range. - DxbcOpMax(eM_dest, eM_src, -range_src); - DxbcOpMin(eM_dest, eM_src, range_src); + a_.OpMax(eM_dest, eM_src, -range_src); + a_.OpMin(eM_dest, eM_src, range_src); } } - DxbcOpElse(); + a_.OpElse(); { float range[4]; for (uint32_t i = 0; i < 4; ++i) { range[i] = float((uint32_t(1) << bits[i]) - 1); } - DxbcSrc range_src(DxbcSrc::LP(range)); - DxbcOpIf(false, is_integer); + dxbc::Src range_src(dxbc::Src::LP(range)); + a_.OpIf(false, is_integer); for (uint32_t i = 0; i < eM_count; ++i) { uint32_t eM_temp = eM_temps[i]; - DxbcOpMul(DxbcDest::R(eM_temp, mask), DxbcSrc::R(eM_temp), range_src); + a_.OpMul(dxbc::Dest::R(eM_temp, mask), dxbc::Src::R(eM_temp), range_src); } - DxbcOpEndIf(); + a_.OpEndIf(); for (uint32_t i = 0; i < eM_count; ++i) { - DxbcDest eM_dest(DxbcDest::R(eM_temps[i], mask)); - DxbcSrc eM_src(DxbcSrc::R(eM_temps[i])); - DxbcOpMax(eM_dest, eM_src, DxbcSrc::LF(0.0f)); - DxbcOpMin(eM_dest, eM_src, range_src); + dxbc::Dest eM_dest(dxbc::Dest::R(eM_temps[i], mask)); + dxbc::Src eM_src(dxbc::Src::R(eM_temps[i])); + a_.OpMax(eM_dest, eM_src, dxbc::Src::LF(0.0f)); + a_.OpMin(eM_dest, eM_src, range_src); } } - DxbcOpEndIf(); + a_.OpEndIf(); for (uint32_t i = 0; i < eM_count; ++i) { uint32_t eM_temp = eM_temps[i]; // Round to the nearest integer, according to the rules of handling integer // formats in Direct3D. // TODO(Triang3l): Round by adding +-0.5, not with round_ne. - DxbcOpRoundNE(DxbcDest::R(eM_temp, mask), DxbcSrc::R(eM_temp)); - DxbcOpFToI(DxbcDest::R(eM_temp, mask), DxbcSrc::R(eM_temp)); - DxbcDest eM_packed_dest(DxbcDest::R(eM_temp, 0b0001)); - DxbcSrc eM_packed_src(DxbcSrc::R(eM_temp, DxbcSrc::kXXXX)); + a_.OpRoundNE(dxbc::Dest::R(eM_temp, mask), dxbc::Src::R(eM_temp)); + a_.OpFToI(dxbc::Dest::R(eM_temp, mask), dxbc::Src::R(eM_temp)); + dxbc::Dest eM_packed_dest(dxbc::Dest::R(eM_temp, 0b0001)); + dxbc::Src eM_packed_src(dxbc::Src::R(eM_temp, dxbc::Src::kXXXX)); uint32_t offset = bits[0]; for (uint32_t j = 1; j < 4; ++j) { if (!bits[j]) { continue; } - DxbcOpBFI(eM_packed_dest, DxbcSrc::LU(bits[j]), DxbcSrc::LU(offset), - DxbcSrc::R(eM_temp).Select(j), eM_packed_src); + a_.OpBFI(eM_packed_dest, dxbc::Src::LU(bits[j]), dxbc::Src::LU(offset), + dxbc::Src::R(eM_temp).Select(j), eM_packed_src); offset += bits[j]; } } @@ -100,40 +100,40 @@ void DxbcShaderTranslator::ExportToMemory() { // Safety check if the shared memory is bound as UAV. system_constants_used_ |= 1ull << kSysConst_Flags_Index; - DxbcOpAnd(DxbcDest::R(control_temp, 0b0001), - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_Flags_Vec) - .Select(kSysConst_Flags_Comp), - DxbcSrc::LU(kSysFlag_SharedMemoryIsUAV)); + a_.OpAnd(dxbc::Dest::R(control_temp, 0b0001), + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_Flags_Vec) + .Select(kSysConst_Flags_Comp), + dxbc::Src::LU(kSysFlag_SharedMemoryIsUAV)); if (is_pixel_shader()) { // Disable memexport in pixel shaders with supersampling since VPOS is // ambiguous. if (edram_rov_used_) { system_constants_used_ |= 1ull << kSysConst_EdramResolutionSquareScale_Index; - DxbcOpULT(DxbcDest::R(control_temp, 0b0010), - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_EdramResolutionSquareScale_Vec) - .Select(kSysConst_EdramResolutionSquareScale_Comp), - DxbcSrc::LU(2)); - DxbcOpAnd(DxbcDest::R(control_temp, 0b0001), - DxbcSrc::R(control_temp, DxbcSrc::kXXXX), - DxbcSrc::R(control_temp, DxbcSrc::kYYYY)); + a_.OpULT(dxbc::Dest::R(control_temp, 0b0010), + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_EdramResolutionSquareScale_Vec) + .Select(kSysConst_EdramResolutionSquareScale_Comp), + dxbc::Src::LU(2)); + a_.OpAnd(dxbc::Dest::R(control_temp, 0b0001), + dxbc::Src::R(control_temp, dxbc::Src::kXXXX), + dxbc::Src::R(control_temp, dxbc::Src::kYYYY)); } else { // Enough to check just Y because it's scaled for both 2x and 4x. system_constants_used_ |= 1ull << kSysConst_SampleCountLog2_Index; - DxbcOpMovC(DxbcDest::R(control_temp, 0b0001), - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_SampleCountLog2_Vec) - .Select(kSysConst_SampleCountLog2_Comp + 1), - DxbcSrc::LU(0), DxbcSrc::R(control_temp, DxbcSrc::kXXXX)); + a_.OpMovC(dxbc::Dest::R(control_temp, 0b0001), + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_SampleCountLog2_Vec) + .Select(kSysConst_SampleCountLog2_Comp + 1), + dxbc::Src::LU(0), dxbc::Src::R(control_temp, dxbc::Src::kXXXX)); } } // Check if memexport can be done. - DxbcOpIf(true, DxbcSrc::R(control_temp, DxbcSrc::kXXXX)); + a_.OpIf(true, dxbc::Src::R(control_temp, dxbc::Src::kXXXX)); // control_temp.x is now free. for (uint32_t i = 0; i < Shader::kMaxMemExports; ++i) { @@ -160,21 +160,21 @@ void DxbcShaderTranslator::ExportToMemory() { } // Swap red and blue if needed. - DxbcOpAnd(DxbcDest::R(control_temp, 0b0001), - DxbcSrc::R(eA_temp, DxbcSrc::kZZZZ), - DxbcSrc::LU(uint32_t(1) << 19)); + a_.OpAnd(dxbc::Dest::R(control_temp, 0b0001), + dxbc::Src::R(eA_temp, dxbc::Src::kZZZZ), + dxbc::Src::LU(uint32_t(1) << 19)); for (uint32_t j = 0; j < eM_count; ++j) { uint32_t eM_temp = eM_temps[j]; - DxbcOpMovC(DxbcDest::R(eM_temp, 0b0101), - DxbcSrc::R(control_temp, DxbcSrc::kXXXX), - DxbcSrc::R(eM_temp, 0b000010), DxbcSrc::R(eM_temp)); + a_.OpMovC(dxbc::Dest::R(eM_temp, 0b0101), + dxbc::Src::R(control_temp, dxbc::Src::kXXXX), + dxbc::Src::R(eM_temp, 0b000010), dxbc::Src::R(eM_temp)); } // Initialize element size in control_temp.x to 4 bytes as this is the most // common size. - DxbcDest element_size_dest(DxbcDest::R(control_temp, 0b0001)); - DxbcSrc element_size_src(DxbcSrc::R(control_temp, DxbcSrc::kXXXX)); - DxbcOpMov(element_size_dest, DxbcSrc::LU(4)); + dxbc::Dest element_size_dest(dxbc::Dest::R(control_temp, 0b0001)); + dxbc::Src element_size_src(dxbc::Src::R(control_temp, dxbc::Src::kXXXX)); + a_.OpMov(element_size_dest, dxbc::Src::LU(4)); // Each eM should get a packed value in the destination format now. @@ -182,285 +182,288 @@ void DxbcShaderTranslator::ExportToMemory() { // Y - signedness if fixed-point. // Z - fractional/integer if fixed-point. // W - color format. - DxbcOpUBFE(DxbcDest::R(control_temp, 0b1110), DxbcSrc::LU(0, 1, 1, 6), - DxbcSrc::LU(0, 16, 17, 8), DxbcSrc::R(eA_temp, DxbcSrc::kZZZZ)); - DxbcSrc is_signed(DxbcSrc::R(control_temp, DxbcSrc::kYYYY)); - DxbcSrc is_integer(DxbcSrc::R(control_temp, DxbcSrc::kZZZZ)); + a_.OpUBFE(dxbc::Dest::R(control_temp, 0b1110), dxbc::Src::LU(0, 1, 1, 6), + dxbc::Src::LU(0, 16, 17, 8), + dxbc::Src::R(eA_temp, dxbc::Src::kZZZZ)); + dxbc::Src is_signed(dxbc::Src::R(control_temp, dxbc::Src::kYYYY)); + dxbc::Src is_integer(dxbc::Src::R(control_temp, dxbc::Src::kZZZZ)); // Convert and pack the format. - DxbcOpSwitch(DxbcSrc::R(control_temp, DxbcSrc::kWWWW)); + a_.OpSwitch(dxbc::Src::R(control_temp, dxbc::Src::kWWWW)); // control_temp.w is now free. { // k_8_8_8_8 // k_8_8_8_8_AS_16_16_16_16 - DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::ColorFormat::k_8_8_8_8))); - DxbcOpCase( - DxbcSrc::LU(uint32_t(xenos::ColorFormat::k_8_8_8_8_AS_16_16_16_16))); + a_.OpCase(dxbc::Src::LU(uint32_t(xenos::ColorFormat::k_8_8_8_8))); + a_.OpCase(dxbc::Src::LU( + uint32_t(xenos::ColorFormat::k_8_8_8_8_AS_16_16_16_16))); { uint32_t bits[4] = {8, 8, 8, 8}; ExportToMemory_PackFixed32(eM_temps, eM_count, bits, is_integer, is_signed); } - DxbcOpBreak(); + a_.OpBreak(); // k_2_10_10_10 // k_2_10_10_10_AS_16_16_16_16 - DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::ColorFormat::k_2_10_10_10))); - DxbcOpCase(DxbcSrc::LU( + a_.OpCase(dxbc::Src::LU(uint32_t(xenos::ColorFormat::k_2_10_10_10))); + a_.OpCase(dxbc::Src::LU( uint32_t(xenos::ColorFormat::k_2_10_10_10_AS_16_16_16_16))); { uint32_t bits[4] = {10, 10, 10, 2}; ExportToMemory_PackFixed32(eM_temps, eM_count, bits, is_integer, is_signed); } - DxbcOpBreak(); + a_.OpBreak(); // k_10_11_11 // k_10_11_11_AS_16_16_16_16 - DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::ColorFormat::k_10_11_11))); - DxbcOpCase( - DxbcSrc::LU(uint32_t(xenos::ColorFormat::k_10_11_11_AS_16_16_16_16))); + a_.OpCase(dxbc::Src::LU(uint32_t(xenos::ColorFormat::k_10_11_11))); + a_.OpCase(dxbc::Src::LU( + uint32_t(xenos::ColorFormat::k_10_11_11_AS_16_16_16_16))); { uint32_t bits[4] = {11, 11, 10}; ExportToMemory_PackFixed32(eM_temps, eM_count, bits, is_integer, is_signed); } - DxbcOpBreak(); + a_.OpBreak(); // k_11_11_10 // k_11_11_10_AS_16_16_16_16 - DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::ColorFormat::k_11_11_10))); - DxbcOpCase( - DxbcSrc::LU(uint32_t(xenos::ColorFormat::k_11_11_10_AS_16_16_16_16))); + a_.OpCase(dxbc::Src::LU(uint32_t(xenos::ColorFormat::k_11_11_10))); + a_.OpCase(dxbc::Src::LU( + uint32_t(xenos::ColorFormat::k_11_11_10_AS_16_16_16_16))); { uint32_t bits[4] = {10, 11, 11}; ExportToMemory_PackFixed32(eM_temps, eM_count, bits, is_integer, is_signed); } - DxbcOpBreak(); + a_.OpBreak(); // k_16_16 - DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::ColorFormat::k_16_16))); + a_.OpCase(dxbc::Src::LU(uint32_t(xenos::ColorFormat::k_16_16))); { uint32_t bits[4] = {16, 16}; ExportToMemory_PackFixed32(eM_temps, eM_count, bits, is_integer, is_signed); } - DxbcOpBreak(); + a_.OpBreak(); // k_16_16_16_16 - DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::ColorFormat::k_16_16_16_16))); - DxbcOpMov(element_size_dest, DxbcSrc::LU(8)); - DxbcOpIf(true, is_signed); + a_.OpCase(dxbc::Src::LU(uint32_t(xenos::ColorFormat::k_16_16_16_16))); + a_.OpMov(element_size_dest, dxbc::Src::LU(8)); + a_.OpIf(true, is_signed); { - DxbcOpIf(false, is_integer); + a_.OpIf(false, is_integer); for (uint32_t j = 0; j < eM_count; ++j) { uint32_t eM_temp = eM_temps[j]; - DxbcOpMul(DxbcDest::R(eM_temp), DxbcSrc::R(eM_temp), - DxbcSrc::LF(32767.0f)); + a_.OpMul(dxbc::Dest::R(eM_temp), dxbc::Src::R(eM_temp), + dxbc::Src::LF(32767.0f)); } - DxbcOpEndIf(); + a_.OpEndIf(); for (uint32_t j = 0; j < eM_count; ++j) { - DxbcDest eM_dest(DxbcDest::R(eM_temps[j])); - DxbcSrc eM_src(DxbcSrc::R(eM_temps[j])); + dxbc::Dest eM_dest(dxbc::Dest::R(eM_temps[j])); + dxbc::Src eM_src(dxbc::Src::R(eM_temps[j])); // TODO(Triang3l): NaN should become zero, not -range. - DxbcOpMax(eM_dest, eM_src, DxbcSrc::LF(-32767.0f)); - DxbcOpMin(eM_dest, eM_src, DxbcSrc::LF(32767.0f)); + a_.OpMax(eM_dest, eM_src, dxbc::Src::LF(-32767.0f)); + a_.OpMin(eM_dest, eM_src, dxbc::Src::LF(32767.0f)); } } - DxbcOpElse(); + a_.OpElse(); { - DxbcOpIf(false, is_integer); + a_.OpIf(false, is_integer); for (uint32_t j = 0; j < eM_count; ++j) { uint32_t eM_temp = eM_temps[j]; - DxbcOpMul(DxbcDest::R(eM_temp), DxbcSrc::R(eM_temp), - DxbcSrc::LF(65535.0f)); + a_.OpMul(dxbc::Dest::R(eM_temp), dxbc::Src::R(eM_temp), + dxbc::Src::LF(65535.0f)); } - DxbcOpEndIf(); + a_.OpEndIf(); for (uint32_t j = 0; j < eM_count; ++j) { - DxbcDest eM_dest(DxbcDest::R(eM_temps[j])); - DxbcSrc eM_src(DxbcSrc::R(eM_temps[j])); - DxbcOpMax(eM_dest, eM_src, DxbcSrc::LF(0.0f)); - DxbcOpMin(eM_dest, eM_src, DxbcSrc::LF(65535.0f)); + dxbc::Dest eM_dest(dxbc::Dest::R(eM_temps[j])); + dxbc::Src eM_src(dxbc::Src::R(eM_temps[j])); + a_.OpMax(eM_dest, eM_src, dxbc::Src::LF(0.0f)); + a_.OpMin(eM_dest, eM_src, dxbc::Src::LF(65535.0f)); } } - DxbcOpEndIf(); + a_.OpEndIf(); for (uint32_t j = 0; j < eM_count; ++j) { uint32_t eM_temp = eM_temps[j]; // Round to the nearest integer, according to the rules of handling // integer formats in Direct3D. // TODO(Triang3l): Round by adding +-0.5, not with round_ne. - DxbcOpRoundNE(DxbcDest::R(eM_temp), DxbcSrc::R(eM_temp)); - DxbcOpFToI(DxbcDest::R(eM_temp), DxbcSrc::R(eM_temp)); - DxbcOpBFI(DxbcDest::R(eM_temp, 0b0011), DxbcSrc::LU(16), - DxbcSrc::LU(16), DxbcSrc::R(eM_temp, 0b1101), - DxbcSrc::R(eM_temp, 0b1000)); + a_.OpRoundNE(dxbc::Dest::R(eM_temp), dxbc::Src::R(eM_temp)); + a_.OpFToI(dxbc::Dest::R(eM_temp), dxbc::Src::R(eM_temp)); + a_.OpBFI(dxbc::Dest::R(eM_temp, 0b0011), dxbc::Src::LU(16), + dxbc::Src::LU(16), dxbc::Src::R(eM_temp, 0b1101), + dxbc::Src::R(eM_temp, 0b1000)); } - DxbcOpBreak(); + a_.OpBreak(); // k_16_16_FLOAT - DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::ColorFormat::k_16_16_FLOAT))); + a_.OpCase(dxbc::Src::LU(uint32_t(xenos::ColorFormat::k_16_16_FLOAT))); for (uint32_t j = 0; j < eM_count; ++j) { uint32_t eM_temp = eM_temps[j]; - DxbcOpF32ToF16(DxbcDest::R(eM_temp, 0b0011), DxbcSrc::R(eM_temp)); - DxbcOpBFI(DxbcDest::R(eM_temp, 0b0001), DxbcSrc::LU(16), - DxbcSrc::LU(16), DxbcSrc::R(eM_temp, DxbcSrc::kYYYY), - DxbcSrc::R(eM_temp, DxbcSrc::kXXXX)); + a_.OpF32ToF16(dxbc::Dest::R(eM_temp, 0b0011), dxbc::Src::R(eM_temp)); + a_.OpBFI(dxbc::Dest::R(eM_temp, 0b0001), dxbc::Src::LU(16), + dxbc::Src::LU(16), dxbc::Src::R(eM_temp, dxbc::Src::kYYYY), + dxbc::Src::R(eM_temp, dxbc::Src::kXXXX)); } - DxbcOpBreak(); + a_.OpBreak(); // k_16_16_16_16_FLOAT - DxbcOpCase( - DxbcSrc::LU(uint32_t(xenos::ColorFormat::k_16_16_16_16_FLOAT))); - DxbcOpMov(element_size_dest, DxbcSrc::LU(8)); + a_.OpCase( + dxbc::Src::LU(uint32_t(xenos::ColorFormat::k_16_16_16_16_FLOAT))); + a_.OpMov(element_size_dest, dxbc::Src::LU(8)); for (uint32_t j = 0; j < eM_count; ++j) { uint32_t eM_temp = eM_temps[j]; - DxbcOpF32ToF16(DxbcDest::R(eM_temp), DxbcSrc::R(eM_temp)); - DxbcOpBFI(DxbcDest::R(eM_temp, 0b0011), DxbcSrc::LU(16), - DxbcSrc::LU(16), DxbcSrc::R(eM_temp, 0b1101), - DxbcSrc::R(eM_temp, 0b1000)); + a_.OpF32ToF16(dxbc::Dest::R(eM_temp), dxbc::Src::R(eM_temp)); + a_.OpBFI(dxbc::Dest::R(eM_temp, 0b0011), dxbc::Src::LU(16), + dxbc::Src::LU(16), dxbc::Src::R(eM_temp, 0b1101), + dxbc::Src::R(eM_temp, 0b1000)); } - DxbcOpBreak(); + a_.OpBreak(); // k_32_FLOAT // Already in the destination format, 4 bytes per element already // selected. // k_32_32_FLOAT - DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::ColorFormat::k_32_32_FLOAT))); - DxbcOpMov(element_size_dest, DxbcSrc::LU(8)); + a_.OpCase(dxbc::Src::LU(uint32_t(xenos::ColorFormat::k_32_32_FLOAT))); + a_.OpMov(element_size_dest, dxbc::Src::LU(8)); // Already in the destination format. - DxbcOpBreak(); + a_.OpBreak(); // k_32_32_32_32_FLOAT - DxbcOpCase( - DxbcSrc::LU(uint32_t(xenos::ColorFormat::k_32_32_32_32_FLOAT))); - DxbcOpMov(element_size_dest, DxbcSrc::LU(16)); + a_.OpCase( + dxbc::Src::LU(uint32_t(xenos::ColorFormat::k_32_32_32_32_FLOAT))); + a_.OpMov(element_size_dest, dxbc::Src::LU(16)); // Already in the destination format. - DxbcOpBreak(); + a_.OpBreak(); } - DxbcOpEndSwitch(); + a_.OpEndSwitch(); // control_temp.yz are now free. // Do endian swap. { - DxbcDest endian_dest(DxbcDest::R(control_temp, 0b0010)); - DxbcSrc endian_src(DxbcSrc::R(control_temp, DxbcSrc::kYYYY)); + dxbc::Dest endian_dest(dxbc::Dest::R(control_temp, 0b0010)); + dxbc::Src endian_src(dxbc::Src::R(control_temp, dxbc::Src::kYYYY)); // Extract endianness into control_temp.y. - DxbcOpAnd(endian_dest, DxbcSrc::R(eA_temp, DxbcSrc::kZZZZ), - DxbcSrc::LU(0b111)); + a_.OpAnd(endian_dest, dxbc::Src::R(eA_temp, dxbc::Src::kZZZZ), + dxbc::Src::LU(0b111)); // Change 8-in-64 and 8-in-128 to 8-in-32. for (uint32_t j = 0; j < 2; ++j) { - DxbcOpIEq(DxbcDest::R(control_temp, 0b0100), endian_src, - DxbcSrc::LU(uint32_t(j ? xenos::Endian128::k8in128 - : xenos::Endian128::k8in64))); + a_.OpIEq(dxbc::Dest::R(control_temp, 0b0100), endian_src, + dxbc::Src::LU(uint32_t(j ? xenos::Endian128::k8in128 + : xenos::Endian128::k8in64))); for (uint32_t k = 0; k < eM_count; ++k) { uint32_t eM_temp = eM_temps[k]; - DxbcOpMovC(DxbcDest::R(eM_temp), - DxbcSrc::R(control_temp, DxbcSrc::kZZZZ), - DxbcSrc::R(eM_temp, j ? 0b00011011 : 0b10110001), - DxbcSrc::R(eM_temp)); + a_.OpMovC(dxbc::Dest::R(eM_temp), + dxbc::Src::R(control_temp, dxbc::Src::kZZZZ), + dxbc::Src::R(eM_temp, j ? 0b00011011 : 0b10110001), + dxbc::Src::R(eM_temp)); } - DxbcOpMovC(endian_dest, DxbcSrc::R(control_temp, DxbcSrc::kZZZZ), - DxbcSrc::LU(uint32_t(xenos::Endian128::k8in32)), endian_src); + a_.OpMovC(endian_dest, dxbc::Src::R(control_temp, dxbc::Src::kZZZZ), + dxbc::Src::LU(uint32_t(xenos::Endian128::k8in32)), + endian_src); } uint32_t swap_temp = PushSystemTemp(); - DxbcDest swap_temp_dest(DxbcDest::R(swap_temp)); - DxbcSrc swap_temp_src(DxbcSrc::R(swap_temp)); + dxbc::Dest swap_temp_dest(dxbc::Dest::R(swap_temp)); + dxbc::Src swap_temp_src(dxbc::Src::R(swap_temp)); // 8-in-16 or one half of 8-in-32. - DxbcOpSwitch(endian_src); - DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::Endian128::k8in16))); - DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::Endian128::k8in32))); + a_.OpSwitch(endian_src); + a_.OpCase(dxbc::Src::LU(uint32_t(xenos::Endian128::k8in16))); + a_.OpCase(dxbc::Src::LU(uint32_t(xenos::Endian128::k8in32))); for (uint32_t j = 0; j < eM_count; ++j) { - DxbcDest eM_dest(DxbcDest::R(eM_temps[j])); - DxbcSrc eM_src(DxbcSrc::R(eM_temps[j])); + dxbc::Dest eM_dest(dxbc::Dest::R(eM_temps[j])); + dxbc::Src eM_src(dxbc::Src::R(eM_temps[j])); // Temp = X0Z0. - DxbcOpAnd(swap_temp_dest, eM_src, DxbcSrc::LU(0x00FF00FF)); + a_.OpAnd(swap_temp_dest, eM_src, dxbc::Src::LU(0x00FF00FF)); // eM = YZW0. - DxbcOpUShR(eM_dest, eM_src, DxbcSrc::LU(8)); + a_.OpUShR(eM_dest, eM_src, dxbc::Src::LU(8)); // eM = Y0W0. - DxbcOpAnd(eM_dest, eM_src, DxbcSrc::LU(0x00FF00FF)); + a_.OpAnd(eM_dest, eM_src, dxbc::Src::LU(0x00FF00FF)); // eM = YXWZ. - DxbcOpUMAd(eM_dest, swap_temp_src, DxbcSrc::LU(256), eM_src); + a_.OpUMAd(eM_dest, swap_temp_src, dxbc::Src::LU(256), eM_src); } - DxbcOpBreak(); - DxbcOpEndSwitch(); + a_.OpBreak(); + a_.OpEndSwitch(); // 16-in-32 or another half of 8-in-32. - DxbcOpSwitch(endian_src); - DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::Endian128::k8in32))); - DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::Endian128::k16in32))); + a_.OpSwitch(endian_src); + a_.OpCase(dxbc::Src::LU(uint32_t(xenos::Endian128::k8in32))); + a_.OpCase(dxbc::Src::LU(uint32_t(xenos::Endian128::k16in32))); for (uint32_t j = 0; j < eM_count; ++j) { - DxbcDest eM_dest(DxbcDest::R(eM_temps[j])); - DxbcSrc eM_src(DxbcSrc::R(eM_temps[j])); + dxbc::Dest eM_dest(dxbc::Dest::R(eM_temps[j])); + dxbc::Src eM_src(dxbc::Src::R(eM_temps[j])); // Temp = ZW00. - DxbcOpUShR(swap_temp_dest, eM_src, DxbcSrc::LU(16)); + a_.OpUShR(swap_temp_dest, eM_src, dxbc::Src::LU(16)); // eM = ZWXY. - DxbcOpBFI(eM_dest, DxbcSrc::LU(16), DxbcSrc::LU(16), eM_src, - swap_temp_src); + a_.OpBFI(eM_dest, dxbc::Src::LU(16), dxbc::Src::LU(16), eM_src, + swap_temp_src); } - DxbcOpBreak(); - DxbcOpEndSwitch(); + a_.OpBreak(); + a_.OpEndSwitch(); // Release swap_temp. PopSystemTemp(); } // control_temp.yz are now free. - DxbcDest address_dest(DxbcDest::R(eA_temp, 0b0001)); - DxbcSrc address_src(DxbcSrc::R(eA_temp, DxbcSrc::kXXXX)); + dxbc::Dest address_dest(dxbc::Dest::R(eA_temp, 0b0001)); + dxbc::Src address_src(dxbc::Src::R(eA_temp, dxbc::Src::kXXXX)); // Multiply the base address by dword size, also dropping the 0x40000000 // bit. - DxbcOpIShL(address_dest, address_src, DxbcSrc::LU(2)); + a_.OpIShL(address_dest, address_src, dxbc::Src::LU(2)); // Drop the exponent in the element index. - DxbcOpAnd(DxbcDest::R(eA_temp, 0b0010), DxbcSrc::R(eA_temp, DxbcSrc::kYYYY), - DxbcSrc::LU((1 << 23) - 1)); + a_.OpAnd(dxbc::Dest::R(eA_temp, 0b0010), + dxbc::Src::R(eA_temp, dxbc::Src::kYYYY), + dxbc::Src::LU((1 << 23) - 1)); // Add the offset of the first written element to the base address. - DxbcOpUMAd(address_dest, DxbcSrc::R(eA_temp, DxbcSrc::kYYYY), - element_size_src, address_src); + a_.OpUMAd(address_dest, dxbc::Src::R(eA_temp, dxbc::Src::kYYYY), + element_size_src, address_src); // Do the writes. - DxbcSrc eM_written_src( - DxbcSrc::R(system_temp_memexport_written_).Select(i >> 2)); + dxbc::Src eM_written_src( + dxbc::Src::R(system_temp_memexport_written_).Select(i >> 2)); uint32_t eM_written_base = 1u << ((i & 3) << 3); for (uint32_t j = 0; j < eM_count; ++j) { // Go to the next eM#. uint32_t eM_relative_offset = eM_offsets[j] - (j ? eM_offsets[j - 1] : 0); if (eM_relative_offset) { if (eM_relative_offset == 1) { - DxbcOpIAdd(address_dest, element_size_src, address_src); + a_.OpIAdd(address_dest, element_size_src, address_src); } else { - DxbcOpUMAd(address_dest, DxbcSrc::LU(eM_relative_offset), - element_size_src, address_src); + a_.OpUMAd(address_dest, dxbc::Src::LU(eM_relative_offset), + element_size_src, address_src); } } // Check if the eM# was actually written to on the execution path. - DxbcOpAnd(DxbcDest::R(control_temp, 0b0010), eM_written_src, - DxbcSrc::LU(eM_written_base << eM_offsets[j])); - DxbcOpIf(true, DxbcSrc::R(control_temp, DxbcSrc::kYYYY)); + a_.OpAnd(dxbc::Dest::R(control_temp, 0b0010), eM_written_src, + dxbc::Src::LU(eM_written_base << eM_offsets[j])); + a_.OpIf(true, dxbc::Src::R(control_temp, dxbc::Src::kYYYY)); // Write the element of the needed size. - DxbcSrc eM_src(DxbcSrc::R(eM_temps[j])); - DxbcOpSwitch(element_size_src); + dxbc::Src eM_src(dxbc::Src::R(eM_temps[j])); + a_.OpSwitch(element_size_src); for (uint32_t k = 1; k <= 4; k <<= 1) { - DxbcOpCase(DxbcSrc::LU(k * 4)); + a_.OpCase(dxbc::Src::LU(k * 4)); if (uav_index_shared_memory_ == kBindingIndexUnallocated) { uav_index_shared_memory_ = uav_count_++; } - DxbcOpStoreRaw( - DxbcDest::U(uav_index_shared_memory_, - uint32_t(UAVRegister::kSharedMemory), (1 << k) - 1), + a_.OpStoreRaw( + dxbc::Dest::U(uav_index_shared_memory_, + uint32_t(UAVRegister::kSharedMemory), (1 << k) - 1), address_src, eM_src); - DxbcOpBreak(); + a_.OpBreak(); } - DxbcOpEndSwitch(); - DxbcOpEndIf(); + a_.OpEndSwitch(); + a_.OpEndIf(); } // control_temp.y is now free. } // Close the memexport possibility check. - DxbcOpEndIf(); + a_.OpEndIf(); // Release control_temp. PopSystemTemp(); diff --git a/src/xenia/gpu/dxbc_shader_translator_om.cc b/src/xenia/gpu/dxbc_shader_translator_om.cc index 8c01648f1..1f8944f58 100644 --- a/src/xenia/gpu/dxbc_shader_translator_om.cc +++ b/src/xenia/gpu/dxbc_shader_translator_om.cc @@ -157,97 +157,97 @@ void DxbcShaderTranslator::StartPixelShader_LoadROVParameters() { // 4 (-> 1) to a temp SGPR. uint32_t resolution_scale_log2_temp = PushSystemTemp(); system_constants_used_ |= 1ull << kSysConst_EdramResolutionSquareScale_Index; - DxbcOpUShR(DxbcDest::R(resolution_scale_log2_temp, 0b0001), - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_EdramResolutionSquareScale_Vec) - .Select(kSysConst_EdramResolutionSquareScale_Comp), - DxbcSrc::LU(2)); + a_.OpUShR(dxbc::Dest::R(resolution_scale_log2_temp, 0b0001), + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_EdramResolutionSquareScale_Vec) + .Select(kSysConst_EdramResolutionSquareScale_Comp), + dxbc::Src::LU(2)); // Convert the pixel position (if resolution scale is 4, this will be 2x2 // bigger) to integer to system_temp_rov_params_.zw. // system_temp_rov_params_.z = X host pixel position as uint // system_temp_rov_params_.w = Y host pixel position as uint in_position_used_ |= 0b0011; - DxbcOpFToU(DxbcDest::R(system_temp_rov_params_, 0b1100), - DxbcSrc::V(uint32_t(InOutRegister::kPSInPosition), 0b01000000)); + a_.OpFToU(dxbc::Dest::R(system_temp_rov_params_, 0b1100), + dxbc::Src::V(uint32_t(InOutRegister::kPSInPosition), 0b01000000)); // Revert the resolution scale to convert the position to guest pixels. // system_temp_rov_params_.z = X guest pixel position / sample width // system_temp_rov_params_.w = Y guest pixel position / sample height - DxbcOpUShR(DxbcDest::R(system_temp_rov_params_, 0b1100), - DxbcSrc::R(system_temp_rov_params_), - DxbcSrc::R(resolution_scale_log2_temp, DxbcSrc::kXXXX)); + a_.OpUShR(dxbc::Dest::R(system_temp_rov_params_, 0b1100), + dxbc::Src::R(system_temp_rov_params_), + dxbc::Src::R(resolution_scale_log2_temp, dxbc::Src::kXXXX)); // Convert the position from pixels to samples. // system_temp_rov_params_.z = X guest sample 0 position // system_temp_rov_params_.w = Y guest sample 0 position system_constants_used_ |= 1ull << kSysConst_SampleCountLog2_Index; - DxbcOpIShL(DxbcDest::R(system_temp_rov_params_, 0b1100), - DxbcSrc::R(system_temp_rov_params_), - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_SampleCountLog2_Vec, - (kSysConst_SampleCountLog2_Comp << 4) | - ((kSysConst_SampleCountLog2_Comp + 1) << 6))); + a_.OpIShL(dxbc::Dest::R(system_temp_rov_params_, 0b1100), + dxbc::Src::R(system_temp_rov_params_), + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_SampleCountLog2_Vec, + (kSysConst_SampleCountLog2_Comp << 4) | + ((kSysConst_SampleCountLog2_Comp + 1) << 6))); // Get 80x16 samples tile index - start dividing X by 80 by getting the high // part of the result of multiplication of X by 0xCCCCCCCD into X. // system_temp_rov_params_.x = (X * 0xCCCCCCCD) >> 32, or X / 80 * 64 // system_temp_rov_params_.z = X guest sample 0 position // system_temp_rov_params_.w = Y guest sample 0 position - DxbcOpUMul(DxbcDest::R(system_temp_rov_params_, 0b0001), DxbcDest::Null(), - DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kZZZZ), - DxbcSrc::LU(0xCCCCCCCDu)); + a_.OpUMul(dxbc::Dest::R(system_temp_rov_params_, 0b0001), dxbc::Dest::Null(), + dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kZZZZ), + dxbc::Src::LU(0xCCCCCCCDu)); // Get 80x16 samples tile index - finish dividing X by 80 and divide Y by 16 // into system_temp_rov_params_.xy. // system_temp_rov_params_.x = X tile position // system_temp_rov_params_.y = Y tile position // system_temp_rov_params_.z = X guest sample 0 position // system_temp_rov_params_.w = Y guest sample 0 position - DxbcOpUShR(DxbcDest::R(system_temp_rov_params_, 0b0011), - DxbcSrc::R(system_temp_rov_params_, 0b00001100), - DxbcSrc::LU(6, 4, 0, 0)); + a_.OpUShR(dxbc::Dest::R(system_temp_rov_params_, 0b0011), + dxbc::Src::R(system_temp_rov_params_, 0b00001100), + dxbc::Src::LU(6, 4, 0, 0)); // Get the tile index to system_temp_rov_params_.y. // system_temp_rov_params_.x = X tile position // system_temp_rov_params_.y = tile index // system_temp_rov_params_.z = X guest sample 0 position // system_temp_rov_params_.w = Y guest sample 0 position system_constants_used_ |= 1ull << kSysConst_EdramPitchTiles_Index; - DxbcOpUMAd(DxbcDest::R(system_temp_rov_params_, 0b0010), - DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kYYYY), - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_EdramPitchTiles_Vec) - .Select(kSysConst_EdramPitchTiles_Comp), - DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX)); + a_.OpUMAd(dxbc::Dest::R(system_temp_rov_params_, 0b0010), + dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kYYYY), + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_EdramPitchTiles_Vec) + .Select(kSysConst_EdramPitchTiles_Comp), + dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kXXXX)); // Convert the tile index into a tile offset. // system_temp_rov_params_.x = X tile position // system_temp_rov_params_.y = tile offset // system_temp_rov_params_.z = X guest sample 0 position // system_temp_rov_params_.w = Y guest sample 0 position - DxbcOpUMul(DxbcDest::Null(), DxbcDest::R(system_temp_rov_params_, 0b0010), - DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kYYYY), - DxbcSrc::LU(1280)); + a_.OpUMul(dxbc::Dest::Null(), dxbc::Dest::R(system_temp_rov_params_, 0b0010), + dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kYYYY), + dxbc::Src::LU(1280)); // Get tile-local X sample index into system_temp_rov_params_.z. // system_temp_rov_params_.y = tile offset // system_temp_rov_params_.z = X sample 0 position within the tile // system_temp_rov_params_.w = Y guest sample 0 position - DxbcOpIMAd(DxbcDest::R(system_temp_rov_params_, 0b0100), - DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX), - DxbcSrc::LI(-80), - DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kZZZZ)); + a_.OpIMAd(dxbc::Dest::R(system_temp_rov_params_, 0b0100), + dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kXXXX), + dxbc::Src::LI(-80), + dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kZZZZ)); // Get tile-local Y sample index into system_temp_rov_params_.w. // system_temp_rov_params_.y = tile offset // system_temp_rov_params_.z = X sample 0 position within the tile // system_temp_rov_params_.w = Y sample 0 position within the tile - DxbcOpAnd(DxbcDest::R(system_temp_rov_params_, 0b1000), - DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kWWWW), - DxbcSrc::LU(15)); + a_.OpAnd(dxbc::Dest::R(system_temp_rov_params_, 0b1000), + dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kWWWW), + dxbc::Src::LU(15)); // Go to the target row within the tile in system_temp_rov_params_.y. // system_temp_rov_params_.y = row offset // system_temp_rov_params_.z = X sample 0 position within the tile - DxbcOpIMAd(DxbcDest::R(system_temp_rov_params_, 0b0010), - DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kWWWW), - DxbcSrc::LI(80), - DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kYYYY)); + a_.OpIMAd(dxbc::Dest::R(system_temp_rov_params_, 0b0010), + dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kWWWW), + dxbc::Src::LI(80), + dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kYYYY)); // Choose in which 40-sample half of the tile the pixel is, for swapping // 40-sample columns when accessing the depth buffer - games expect this // behavior when writing depth back to the EDRAM via color writing (GTA IV, @@ -255,79 +255,79 @@ void DxbcShaderTranslator::StartPixelShader_LoadROVParameters() { // system_temp_rov_params_.x = tile-local sample 0 X >= 40 // system_temp_rov_params_.y = row offset // system_temp_rov_params_.z = X sample 0 position within the tile - DxbcOpUGE(DxbcDest::R(system_temp_rov_params_, 0b0001), - DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kZZZZ), - DxbcSrc::LU(40)); + a_.OpUGE(dxbc::Dest::R(system_temp_rov_params_, 0b0001), + dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kZZZZ), + dxbc::Src::LU(40)); // Choose what to add to the depth/stencil X position. // system_temp_rov_params_.x = 40 or -40 offset for the depth buffer // system_temp_rov_params_.y = row offset // system_temp_rov_params_.z = X sample 0 position within the tile - DxbcOpMovC(DxbcDest::R(system_temp_rov_params_, 0b0001), - DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX), - DxbcSrc::LI(-40), DxbcSrc::LI(40)); + a_.OpMovC(dxbc::Dest::R(system_temp_rov_params_, 0b0001), + dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kXXXX), + dxbc::Src::LI(-40), dxbc::Src::LI(40)); // Flip tile halves for the depth/stencil buffer. // system_temp_rov_params_.x = X sample 0 position within the depth tile // system_temp_rov_params_.y = row offset // system_temp_rov_params_.z = X sample 0 position within the tile - DxbcOpIAdd(DxbcDest::R(system_temp_rov_params_, 0b0001), - DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kZZZZ), - DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX)); + a_.OpIAdd(dxbc::Dest::R(system_temp_rov_params_, 0b0001), + dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kZZZZ), + dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kXXXX)); if (any_color_targets_written) { // Write 32bpp color offset to system_temp_rov_params_.z. // system_temp_rov_params_.x = X sample 0 position within the depth tile // system_temp_rov_params_.y = row offset // system_temp_rov_params_.z = unscaled 32bpp color offset - DxbcOpIAdd(DxbcDest::R(system_temp_rov_params_, 0b0100), - DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kYYYY), - DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kZZZZ)); + a_.OpIAdd(dxbc::Dest::R(system_temp_rov_params_, 0b0100), + dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kYYYY), + dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kZZZZ)); } // Write depth/stencil offset to system_temp_rov_params_.y. // system_temp_rov_params_.y = unscaled 32bpp depth/stencil offset // system_temp_rov_params_.z = unscaled 32bpp color offset if needed - DxbcOpIAdd(DxbcDest::R(system_temp_rov_params_, 0b0010), - DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kYYYY), - DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX)); + a_.OpIAdd(dxbc::Dest::R(system_temp_rov_params_, 0b0010), + dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kYYYY), + dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kXXXX)); // Add the EDRAM base for depth/stencil. // system_temp_rov_params_.y = unscaled 32bpp depth/stencil address // system_temp_rov_params_.z = unscaled 32bpp color offset if needed system_constants_used_ |= 1ull << kSysConst_EdramDepthBaseDwords_Index; - DxbcOpIAdd(DxbcDest::R(system_temp_rov_params_, 0b0010), - DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kYYYY), - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_EdramDepthBaseDwords_Vec) - .Select(kSysConst_EdramDepthBaseDwords_Comp)); + a_.OpIAdd(dxbc::Dest::R(system_temp_rov_params_, 0b0010), + dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kYYYY), + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_EdramDepthBaseDwords_Vec) + .Select(kSysConst_EdramDepthBaseDwords_Comp)); // Apply the resolution scale. - DxbcOpIf(true, DxbcSrc::R(resolution_scale_log2_temp, DxbcSrc::kXXXX)); + a_.OpIf(true, dxbc::Src::R(resolution_scale_log2_temp, dxbc::Src::kXXXX)); // Release resolution_scale_log2_temp. PopSystemTemp(); { - DxbcDest offsets_dest(DxbcDest::R( + dxbc::Dest offsets_dest(dxbc::Dest::R( system_temp_rov_params_, any_color_targets_written ? 0b0110 : 0b0010)); // Scale the offsets by the resolution scale. // system_temp_rov_params_.y = scaled 32bpp depth/stencil first host pixel // address // system_temp_rov_params_.z = scaled 32bpp color first host pixel offset if // needed - DxbcOpIShL(offsets_dest, DxbcSrc::R(system_temp_rov_params_), - DxbcSrc::LU(2)); + a_.OpIShL(offsets_dest, dxbc::Src::R(system_temp_rov_params_), + dxbc::Src::LU(2)); // Add host pixel offsets. // system_temp_rov_params_.y = scaled 32bpp depth/stencil address // system_temp_rov_params_.z = scaled 32bpp color offset if needed in_position_used_ |= 0b0011; for (uint32_t i = 0; i < 2; ++i) { // Convert a position component to integer. - DxbcOpFToU(DxbcDest::R(system_temp_rov_params_, 0b0001), - DxbcSrc::V(uint32_t(InOutRegister::kPSInPosition)).Select(i)); + a_.OpFToU(dxbc::Dest::R(system_temp_rov_params_, 0b0001), + dxbc::Src::V(uint32_t(InOutRegister::kPSInPosition)).Select(i)); // Insert the host pixel offset on each axis. - DxbcOpBFI(offsets_dest, DxbcSrc::LU(1), DxbcSrc::LU(i), - DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX), - DxbcSrc::R(system_temp_rov_params_)); + a_.OpBFI(offsets_dest, dxbc::Src::LU(1), dxbc::Src::LU(i), + dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kXXXX), + dxbc::Src::R(system_temp_rov_params_)); } } // Close the resolution scale conditional. - DxbcOpEndIf(); + a_.OpEndIf(); if (any_color_targets_written) { // Get the 64bpp color offset to system_temp_rov_params_.w. @@ -336,9 +336,9 @@ void DxbcShaderTranslator::StartPixelShader_LoadROVParameters() { // system_temp_rov_params_.y = scaled 32bpp depth/stencil address // system_temp_rov_params_.z = scaled 32bpp color offset // system_temp_rov_params_.w = scaled 64bpp color offset - DxbcOpIShL(DxbcDest::R(system_temp_rov_params_, 0b1000), - DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kZZZZ), - DxbcSrc::LU(1)); + a_.OpIShL(dxbc::Dest::R(system_temp_rov_params_, 0b1000), + dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kZZZZ), + dxbc::Src::LU(1)); } // *************************************************************************** @@ -350,66 +350,66 @@ void DxbcShaderTranslator::StartPixelShader_LoadROVParameters() { // Check if 4x MSAA is enabled. system_constants_used_ |= 1ull << kSysConst_SampleCountLog2_Index; - DxbcOpIf(true, DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_SampleCountLog2_Vec) - .Select(kSysConst_SampleCountLog2_Comp)); + a_.OpIf(true, dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_SampleCountLog2_Vec) + .Select(kSysConst_SampleCountLog2_Comp)); { // Copy the 4x AA coverage to system_temp_rov_params_.x, making top-right // the sample [2] and bottom-left the sample [1] (the opposite of Direct3D // 12), because on the Xbox 360, 2x MSAA doubles the storage width, 4x MSAA // doubles the storage height. // Flip samples in bits 0:1 to bits 29:30. - DxbcOpBFRev(DxbcDest::R(system_temp_rov_params_, 0b0001), - DxbcSrc::VCoverage()); - DxbcOpUShR(DxbcDest::R(system_temp_rov_params_, 0b0001), - DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX), - DxbcSrc::LU(29)); - DxbcOpBFI(DxbcDest::R(system_temp_rov_params_, 0b0001), DxbcSrc::LU(2), - DxbcSrc::LU(1), - DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX), - DxbcSrc::VCoverage()); + a_.OpBFRev(dxbc::Dest::R(system_temp_rov_params_, 0b0001), + dxbc::Src::VCoverage()); + a_.OpUShR(dxbc::Dest::R(system_temp_rov_params_, 0b0001), + dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kXXXX), + dxbc::Src::LU(29)); + a_.OpBFI(dxbc::Dest::R(system_temp_rov_params_, 0b0001), dxbc::Src::LU(2), + dxbc::Src::LU(1), + dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kXXXX), + dxbc::Src::VCoverage()); } // Handle 1 or 2 samples. - DxbcOpElse(); + a_.OpElse(); { // Extract sample 3 coverage, which will be used as sample 1. - DxbcOpUBFE(DxbcDest::R(system_temp_rov_params_, 0b0001), DxbcSrc::LU(1), - DxbcSrc::LU(3), DxbcSrc::VCoverage()); + a_.OpUBFE(dxbc::Dest::R(system_temp_rov_params_, 0b0001), dxbc::Src::LU(1), + dxbc::Src::LU(3), dxbc::Src::VCoverage()); // Combine coverage of samples 0 (in bit 0 of vCoverage) and 3 (in bit 0 of // system_temp_rov_params_.x). - DxbcOpBFI(DxbcDest::R(system_temp_rov_params_, 0b0001), DxbcSrc::LU(31), - DxbcSrc::LU(1), - DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX), - DxbcSrc::VCoverage()); + a_.OpBFI(dxbc::Dest::R(system_temp_rov_params_, 0b0001), dxbc::Src::LU(31), + dxbc::Src::LU(1), + dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kXXXX), + dxbc::Src::VCoverage()); } // Close the 4x MSAA conditional. - DxbcOpEndIf(); + a_.OpEndIf(); } void DxbcShaderTranslator::ROV_DepthStencilTest() { uint32_t temp = PushSystemTemp(); - DxbcDest temp_x_dest(DxbcDest::R(temp, 0b0001)); - DxbcSrc temp_x_src(DxbcSrc::R(temp, DxbcSrc::kXXXX)); - DxbcDest temp_y_dest(DxbcDest::R(temp, 0b0010)); - DxbcSrc temp_y_src(DxbcSrc::R(temp, DxbcSrc::kYYYY)); - DxbcDest temp_z_dest(DxbcDest::R(temp, 0b0100)); - DxbcSrc temp_z_src(DxbcSrc::R(temp, DxbcSrc::kZZZZ)); - DxbcDest temp_w_dest(DxbcDest::R(temp, 0b1000)); - DxbcSrc temp_w_src(DxbcSrc::R(temp, DxbcSrc::kWWWW)); + dxbc::Dest temp_x_dest(dxbc::Dest::R(temp, 0b0001)); + dxbc::Src temp_x_src(dxbc::Src::R(temp, dxbc::Src::kXXXX)); + dxbc::Dest temp_y_dest(dxbc::Dest::R(temp, 0b0010)); + dxbc::Src temp_y_src(dxbc::Src::R(temp, dxbc::Src::kYYYY)); + dxbc::Dest temp_z_dest(dxbc::Dest::R(temp, 0b0100)); + dxbc::Src temp_z_src(dxbc::Src::R(temp, dxbc::Src::kZZZZ)); + dxbc::Dest temp_w_dest(dxbc::Dest::R(temp, 0b1000)); + dxbc::Src temp_w_src(dxbc::Src::R(temp, dxbc::Src::kWWWW)); // Check whether depth/stencil is enabled. // temp.x = kSysFlag_ROVDepthStencil system_constants_used_ |= 1ull << kSysConst_Flags_Index; - DxbcOpAnd(temp_x_dest, - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_Flags_Vec) - .Select(kSysConst_Flags_Comp), - DxbcSrc::LU(kSysFlag_ROVDepthStencil)); + a_.OpAnd(temp_x_dest, + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_Flags_Vec) + .Select(kSysConst_Flags_Comp), + dxbc::Src::LU(kSysFlag_ROVDepthStencil)); // Open the depth/stencil enabled conditional. // temp.x = free - DxbcOpIf(true, temp_x_src); + a_.OpIf(true, temp_x_src); bool depth_stencil_early = ROV_IsDepthStencilEarly(); bool shader_writes_depth = current_shader().writes_depth(); @@ -420,11 +420,11 @@ void DxbcShaderTranslator::ROV_DepthStencilTest() { // which is dynamic GPU state) discards the pixel. So, write directly to the // persistent register, system_temp_depth_stencil_, instead of a local // temporary register. - DxbcDest sample_depth_stencil_dest( - depth_stencil_early ? DxbcDest::R(system_temp_depth_stencil_, 1 << i) + dxbc::Dest sample_depth_stencil_dest( + depth_stencil_early ? dxbc::Dest::R(system_temp_depth_stencil_, 1 << i) : temp_x_dest); - DxbcSrc sample_depth_stencil_src( - depth_stencil_early ? DxbcSrc::R(system_temp_depth_stencil_).Select(i) + dxbc::Src sample_depth_stencil_src( + depth_stencil_early ? dxbc::Src::R(system_temp_depth_stencil_).Select(i) : temp_x_src); if (!i) { @@ -433,31 +433,31 @@ void DxbcShaderTranslator::ROV_DepthStencilTest() { // after the pixel shader in the pipeline, at least on Direct3D 11 and // Vulkan, thus applies to the shader's depth output too). system_constants_used_ |= 1ull << kSysConst_EdramDepthRange_Index; - DxbcOpMax(DxbcDest::R(system_temp_depth_stencil_, 0b0001), - DxbcSrc::R(system_temp_depth_stencil_, DxbcSrc::kXXXX), - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_EdramDepthRange_Vec) - .Select(kSysConst_EdramDepthRangeOffset_Comp)); + a_.OpMax(dxbc::Dest::R(system_temp_depth_stencil_, 0b0001), + dxbc::Src::R(system_temp_depth_stencil_, dxbc::Src::kXXXX), + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_EdramDepthRange_Vec) + .Select(kSysConst_EdramDepthRangeOffset_Comp)); // Calculate the upper Z range bound to temp.x for clamping after // biasing. // temp.x = viewport maximum depth system_constants_used_ |= 1ull << kSysConst_EdramDepthRange_Index; - DxbcOpAdd(temp_x_dest, - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_EdramDepthRange_Vec) - .Select(kSysConst_EdramDepthRangeOffset_Comp), - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_EdramDepthRange_Vec) - .Select(kSysConst_EdramDepthRangeScale_Comp)); + a_.OpAdd(temp_x_dest, + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_EdramDepthRange_Vec) + .Select(kSysConst_EdramDepthRangeOffset_Comp), + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_EdramDepthRange_Vec) + .Select(kSysConst_EdramDepthRangeScale_Comp)); // Clamp oDepth to the upper viewport depth bound (already not above 1, // but saturate for total safety). // temp.x = free - DxbcOpMin(DxbcDest::R(system_temp_depth_stencil_, 0b0001), - DxbcSrc::R(system_temp_depth_stencil_, DxbcSrc::kXXXX), - temp_x_src, true); + a_.OpMin(dxbc::Dest::R(system_temp_depth_stencil_, 0b0001), + dxbc::Src::R(system_temp_depth_stencil_, dxbc::Src::kXXXX), + temp_x_src, true); // Convert the shader-generated depth to 24-bit, using temp.x as // temporary. ROV_DepthTo24Bit(system_temp_depth_stencil_, 0, @@ -467,15 +467,15 @@ void DxbcShaderTranslator::ROV_DepthStencilTest() { // of coverage for polygon offset. // temp.x = first sample's clip space Z*W // temp.y = first sample's clip space W - DxbcOpEvalSampleIndex( - DxbcDest::R(temp, 0b0011), - DxbcSrc::V(uint32_t(InOutRegister::kPSInClipSpaceZW)), - DxbcSrc::LU(0)); + a_.OpEvalSampleIndex( + dxbc::Dest::R(temp, 0b0011), + dxbc::Src::V(uint32_t(InOutRegister::kPSInClipSpaceZW)), + dxbc::Src::LU(0)); // Calculate the first sample's Z/W to temp.x for conversion to 24-bit // and depth test. // temp.x? = first sample's clip space Z // temp.y = free - DxbcOpDiv(sample_depth_stencil_dest, temp_x_src, temp_y_src, true); + a_.OpDiv(sample_depth_stencil_dest, temp_x_src, temp_y_src, true); // Apply viewport Z range to the first sample because this would affect // the slope-scaled depth bias (tested on PC on Direct3D 12, by // comparing the fraction of the polygon's area with depth clamped - @@ -484,16 +484,16 @@ void DxbcShaderTranslator::ROV_DepthStencilTest() { // offset as well). // temp.x? = first sample's viewport space Z system_constants_used_ |= 1ull << kSysConst_EdramDepthRange_Index; - DxbcOpMAd(sample_depth_stencil_dest, sample_depth_stencil_src, - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_EdramDepthRange_Vec) - .Select(kSysConst_EdramDepthRangeScale_Comp), - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_EdramDepthRange_Vec) - .Select(kSysConst_EdramDepthRangeOffset_Comp), - true); + a_.OpMAd(sample_depth_stencil_dest, sample_depth_stencil_src, + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_EdramDepthRange_Vec) + .Select(kSysConst_EdramDepthRangeScale_Comp), + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_EdramDepthRange_Vec) + .Select(kSysConst_EdramDepthRangeOffset_Comp), + true); // Get the derivatives of a sample's depth, for the slope-scaled polygon // offset. Probably not very significant that it's for the sample 0 // rather than for the center, likely neither is accurate because Xenos @@ -502,14 +502,14 @@ void DxbcShaderTranslator::ROV_DepthStencilTest() { // temp.x? = first sample's viewport space Z // temp.y = ddx(z) // temp.z = ddy(z) - DxbcOpDerivRTXCoarse(temp_y_dest, sample_depth_stencil_src); - DxbcOpDerivRTYCoarse(temp_z_dest, sample_depth_stencil_src); + a_.OpDerivRTXCoarse(temp_y_dest, sample_depth_stencil_src); + a_.OpDerivRTYCoarse(temp_z_dest, sample_depth_stencil_src); // Get the maximum depth slope for polygon offset to temp.y. // https://docs.microsoft.com/en-us/windows/desktop/direct3d9/depth-bias // temp.x? = first sample's viewport space Z // temp.y = max(|ddx(z)|, |ddy(z)|) // temp.z = free - DxbcOpMax(temp_y_dest, temp_y_src.Abs(), temp_z_src.Abs()); + a_.OpMax(temp_y_dest, temp_y_src.Abs(), temp_z_src.Abs()); // Copy the needed polygon offset values to temp.zw. // temp.x? = first sample's viewport space Z // temp.y = max(|ddx(z)|, |ddy(z)|) @@ -519,40 +519,41 @@ void DxbcShaderTranslator::ROV_DepthStencilTest() { system_constants_used_ |= (1ull << kSysConst_EdramPolyOffsetFront_Index) | (1ull << kSysConst_EdramPolyOffsetBack_Index); - DxbcOpMovC( - DxbcDest::R(temp, 0b1100), - DxbcSrc::V(uint32_t(InOutRegister::kPSInFrontFace), DxbcSrc::kXXXX), - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_EdramPolyOffsetFront_Vec, - (kSysConst_EdramPolyOffsetFrontScale_Comp << 4) | - (kSysConst_EdramPolyOffsetFrontOffset_Comp << 6)), - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_EdramPolyOffsetBack_Vec, - (kSysConst_EdramPolyOffsetBackScale_Comp << 4) | - (kSysConst_EdramPolyOffsetBackOffset_Comp << 6))); + a_.OpMovC( + dxbc::Dest::R(temp, 0b1100), + dxbc::Src::V(uint32_t(InOutRegister::kPSInFrontFace), + dxbc::Src::kXXXX), + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_EdramPolyOffsetFront_Vec, + (kSysConst_EdramPolyOffsetFrontScale_Comp << 4) | + (kSysConst_EdramPolyOffsetFrontOffset_Comp << 6)), + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_EdramPolyOffsetBack_Vec, + (kSysConst_EdramPolyOffsetBackScale_Comp << 4) | + (kSysConst_EdramPolyOffsetBackOffset_Comp << 6))); // Apply the slope scale and the constant bias to the offset. // temp.x? = first sample's viewport space Z // temp.y = polygon offset // temp.z = free // temp.w = free - DxbcOpMAd(temp_y_dest, temp_y_src, temp_z_src, temp_w_src); + a_.OpMAd(temp_y_dest, temp_y_src, temp_z_src, temp_w_src); // Calculate the upper Z range bound to temp.z for clamping after // biasing. // temp.x? = first sample's viewport space Z // temp.y = polygon offset // temp.z = viewport maximum depth system_constants_used_ |= 1ull << kSysConst_EdramDepthRange_Index; - DxbcOpAdd(temp_z_dest, - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_EdramDepthRange_Vec) - .Select(kSysConst_EdramDepthRangeOffset_Comp), - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_EdramDepthRange_Vec) - .Select(kSysConst_EdramDepthRangeScale_Comp)); + a_.OpAdd(temp_z_dest, + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_EdramDepthRange_Vec) + .Select(kSysConst_EdramDepthRangeOffset_Comp), + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_EdramDepthRange_Vec) + .Select(kSysConst_EdramDepthRangeScale_Comp)); } } @@ -561,20 +562,21 @@ void DxbcShaderTranslator::ROV_DepthStencilTest() { // temp.y = polygon offset if not writing to oDepth // temp.z = viewport maximum depth if not writing to oDepth // temp.w = coverage of the current sample - DxbcOpAnd(temp_w_dest, DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX), - DxbcSrc::LU(1 << i)); + a_.OpAnd(temp_w_dest, + dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kXXXX), + dxbc::Src::LU(1 << i)); // Check if the current sample is covered. Release 1 VGPR. // temp.x = first sample's viewport space Z if not writing to oDepth // temp.y = polygon offset if not writing to oDepth // temp.z = viewport maximum depth if not writing to oDepth // temp.w = free - DxbcOpIf(true, temp_w_src); + a_.OpIf(true, temp_w_src); if (shader_writes_depth) { // Copy the 24-bit depth common to all samples to sample_depth_stencil. // temp.x = shader-generated 24-bit depth - DxbcOpMov(sample_depth_stencil_dest, - DxbcSrc::R(system_temp_depth_stencil_, DxbcSrc::kXXXX)); + a_.OpMov(sample_depth_stencil_dest, + dxbc::Src::R(system_temp_depth_stencil_, dxbc::Src::kXXXX)); } else { if (i) { // Sample's depth precalculated for sample 0 (for slope-scaled depth @@ -599,66 +601,67 @@ void DxbcShaderTranslator::ROV_DepthStencilTest() { // temp.w = sample's clip space W if (i == 1) { system_constants_used_ |= 1ull << kSysConst_SampleCountLog2_Index; - DxbcOpMovC(sample_depth_stencil_dest, - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_SampleCountLog2_Vec) - .Select(kSysConst_SampleCountLog2_Comp), - DxbcSrc::LU(3), DxbcSrc::LU(2)); - DxbcOpEvalSampleIndex( - DxbcDest::R(temp, 0b1001), - DxbcSrc::V(uint32_t(InOutRegister::kPSInClipSpaceZW), 0b01000000), + a_.OpMovC(sample_depth_stencil_dest, + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_SampleCountLog2_Vec) + .Select(kSysConst_SampleCountLog2_Comp), + dxbc::Src::LU(3), dxbc::Src::LU(2)); + a_.OpEvalSampleIndex( + dxbc::Dest::R(temp, 0b1001), + dxbc::Src::V(uint32_t(InOutRegister::kPSInClipSpaceZW), + 0b01000000), sample_depth_stencil_src); } else { - DxbcOpEvalSampleIndex( - DxbcDest::R(temp, 0b1001), - DxbcSrc::V(uint32_t(InOutRegister::kPSInClipSpaceZW), 0b01000000), - DxbcSrc::LU(i == 2 ? 1 : i)); + a_.OpEvalSampleIndex( + dxbc::Dest::R(temp, 0b1001), + dxbc::Src::V(uint32_t(InOutRegister::kPSInClipSpaceZW), + 0b01000000), + dxbc::Src::LU(i == 2 ? 1 : i)); } // Calculate Z/W for the current sample from the evaluated Z*W and W. // temp.x? = sample's clip space Z // temp.y = polygon offset if not writing to oDepth // temp.z = viewport maximum depth if not writing to oDepth // temp.w = free - DxbcOpDiv(sample_depth_stencil_dest, temp_x_src, temp_w_src, true); + a_.OpDiv(sample_depth_stencil_dest, temp_x_src, temp_w_src, true); // Apply viewport Z range the same way as it was applied to sample 0. // temp.x? = sample's viewport space Z // temp.y = polygon offset if not writing to oDepth // temp.z = viewport maximum depth if not writing to oDepth system_constants_used_ |= 1ull << kSysConst_EdramDepthRange_Index; - DxbcOpMAd(sample_depth_stencil_dest, sample_depth_stencil_src, - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_EdramDepthRange_Vec) - .Select(kSysConst_EdramDepthRangeScale_Comp), - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_EdramDepthRange_Vec) - .Select(kSysConst_EdramDepthRangeOffset_Comp), - true); + a_.OpMAd(sample_depth_stencil_dest, sample_depth_stencil_src, + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_EdramDepthRange_Vec) + .Select(kSysConst_EdramDepthRangeScale_Comp), + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_EdramDepthRange_Vec) + .Select(kSysConst_EdramDepthRangeOffset_Comp), + true); } // Add the bias to the depth of the sample. // temp.x? = sample's unclamped biased Z // temp.y = polygon offset if not writing to oDepth // temp.z = viewport maximum depth if not writing to oDepth - DxbcOpAdd(sample_depth_stencil_dest, sample_depth_stencil_src, - temp_y_src); + a_.OpAdd(sample_depth_stencil_dest, sample_depth_stencil_src, temp_y_src); // Clamp the biased depth to the lower viewport depth bound. // temp.x? = sample's lower-clamped biased Z // temp.y = polygon offset if not writing to oDepth // temp.z = viewport maximum depth if not writing to oDepth system_constants_used_ |= 1ull << kSysConst_EdramDepthRange_Index; - DxbcOpMax(sample_depth_stencil_dest, sample_depth_stencil_src, - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_EdramDepthRange_Vec) - .Select(kSysConst_EdramDepthRangeOffset_Comp)); + a_.OpMax(sample_depth_stencil_dest, sample_depth_stencil_src, + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_EdramDepthRange_Vec) + .Select(kSysConst_EdramDepthRangeOffset_Comp)); // Clamp the biased depth to the upper viewport depth bound. // temp.x? = sample's biased Z // temp.y = polygon offset if not writing to oDepth // temp.z = viewport maximum depth if not writing to oDepth - DxbcOpMin(sample_depth_stencil_dest, sample_depth_stencil_src, temp_z_src, - true); + a_.OpMin(sample_depth_stencil_dest, sample_depth_stencil_src, temp_z_src, + true); // Convert the sample's depth to 24-bit, using temp.w as a temporary. // temp.x? = sample's 24-bit Z // temp.y = polygon offset if not writing to oDepth @@ -676,73 +679,72 @@ void DxbcShaderTranslator::ROV_DepthStencilTest() { if (uav_index_edram_ == kBindingIndexUnallocated) { uav_index_edram_ = uav_count_++; } - DxbcOpLdUAVTyped(temp_w_dest, - DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kYYYY), 1, - DxbcSrc::U(uav_index_edram_, uint32_t(UAVRegister::kEdram), - DxbcSrc::kXXXX)); + a_.OpLdUAVTyped( + temp_w_dest, dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kYYYY), 1, + dxbc::Src::U(uav_index_edram_, uint32_t(UAVRegister::kEdram), + dxbc::Src::kXXXX)); uint32_t sample_temp = PushSystemTemp(); - DxbcDest sample_temp_x_dest(DxbcDest::R(sample_temp, 0b0001)); - DxbcSrc sample_temp_x_src(DxbcSrc::R(sample_temp, DxbcSrc::kXXXX)); - DxbcDest sample_temp_y_dest(DxbcDest::R(sample_temp, 0b0010)); - DxbcSrc sample_temp_y_src(DxbcSrc::R(sample_temp, DxbcSrc::kYYYY)); - DxbcDest sample_temp_z_dest(DxbcDest::R(sample_temp, 0b0100)); - DxbcSrc sample_temp_z_src(DxbcSrc::R(sample_temp, DxbcSrc::kZZZZ)); + dxbc::Dest sample_temp_x_dest(dxbc::Dest::R(sample_temp, 0b0001)); + dxbc::Src sample_temp_x_src(dxbc::Src::R(sample_temp, dxbc::Src::kXXXX)); + dxbc::Dest sample_temp_y_dest(dxbc::Dest::R(sample_temp, 0b0010)); + dxbc::Src sample_temp_y_src(dxbc::Src::R(sample_temp, dxbc::Src::kYYYY)); + dxbc::Dest sample_temp_z_dest(dxbc::Dest::R(sample_temp, 0b0100)); + dxbc::Src sample_temp_z_src(dxbc::Src::R(sample_temp, dxbc::Src::kZZZZ)); // Depth test. // Extract the old depth part to sample_depth_stencil. // sample_temp.x = old depth - DxbcOpUShR(sample_temp_x_dest, temp_w_src, DxbcSrc::LU(8)); + a_.OpUShR(sample_temp_x_dest, temp_w_src, dxbc::Src::LU(8)); // Get the difference between the new and the old depth, > 0 - greater, // == 0 - equal, < 0 - less. // sample_temp.x = old depth // sample_temp.y = depth difference - DxbcOpIAdd(sample_temp_y_dest, sample_depth_stencil_src, - -sample_temp_x_src); + a_.OpIAdd(sample_temp_y_dest, sample_depth_stencil_src, -sample_temp_x_src); // Check if the depth is "less" or "greater or equal". // sample_temp.x = old depth // sample_temp.y = depth difference // sample_temp.z = depth difference less than 0 - DxbcOpILT(sample_temp_z_dest, sample_temp_y_src, DxbcSrc::LI(0)); + a_.OpILT(sample_temp_z_dest, sample_temp_y_src, dxbc::Src::LI(0)); // Choose the passed depth function bits for "less" or for "greater". // sample_temp.x = old depth // sample_temp.y = depth difference // sample_temp.z = depth function passed bits for "less" or "greater" - DxbcOpMovC(sample_temp_z_dest, sample_temp_z_src, - DxbcSrc::LU(kSysFlag_ROVDepthPassIfLess), - DxbcSrc::LU(kSysFlag_ROVDepthPassIfGreater)); + a_.OpMovC(sample_temp_z_dest, sample_temp_z_src, + dxbc::Src::LU(kSysFlag_ROVDepthPassIfLess), + dxbc::Src::LU(kSysFlag_ROVDepthPassIfGreater)); // Do the "equal" testing. // sample_temp.x = old depth // sample_temp.y = depth function passed bits // sample_temp.z = free - DxbcOpMovC(sample_temp_y_dest, sample_temp_y_src, sample_temp_z_src, - DxbcSrc::LU(kSysFlag_ROVDepthPassIfEqual)); + a_.OpMovC(sample_temp_y_dest, sample_temp_y_src, sample_temp_z_src, + dxbc::Src::LU(kSysFlag_ROVDepthPassIfEqual)); // Mask the resulting bits with the ones that should pass. // sample_temp.x = old depth // sample_temp.y = masked depth function passed bits // sample_temp.z = free system_constants_used_ |= 1ull << kSysConst_Flags_Index; - DxbcOpAnd(sample_temp_y_dest, sample_temp_y_src, - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_Flags_Vec) - .Select(kSysConst_Flags_Comp)); + a_.OpAnd(sample_temp_y_dest, sample_temp_y_src, + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_Flags_Vec) + .Select(kSysConst_Flags_Comp)); // Check if depth test has passed. // sample_temp.x = old depth // sample_temp.y = free - DxbcOpIf(true, sample_temp_y_src); + a_.OpIf(true, sample_temp_y_src); { // Extract the depth write flag. // sample_temp.x = old depth // sample_temp.y = depth write mask system_constants_used_ |= 1ull << kSysConst_Flags_Index; - DxbcOpAnd(sample_temp_y_dest, - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_Flags_Vec) - .Select(kSysConst_Flags_Comp), - DxbcSrc::LU(kSysFlag_ROVDepthWrite)); + a_.OpAnd(sample_temp_y_dest, + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_Flags_Vec) + .Select(kSysConst_Flags_Comp), + dxbc::Src::LU(kSysFlag_ROVDepthWrite)); // If depth writing is disabled, don't change the depth. // temp.x? = resulting sample depth after the depth test // temp.y = polygon offset if not writing to oDepth @@ -750,354 +752,353 @@ void DxbcShaderTranslator::ROV_DepthStencilTest() { // temp.w = old depth/stencil // sample_temp.x = free // sample_temp.y = free - DxbcOpMovC(sample_depth_stencil_dest, sample_temp_y_src, - sample_depth_stencil_src, sample_temp_x_src); + a_.OpMovC(sample_depth_stencil_dest, sample_temp_y_src, + sample_depth_stencil_src, sample_temp_x_src); } // Depth test has failed. - DxbcOpElse(); + a_.OpElse(); { // Exclude the bit from the covered sample mask. // sample_temp.x = old depth - DxbcOpAnd(DxbcDest::R(system_temp_rov_params_, 0b0001), - DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX), - DxbcSrc::LU(~uint32_t(1 << i))); + a_.OpAnd(dxbc::Dest::R(system_temp_rov_params_, 0b0001), + dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kXXXX), + dxbc::Src::LU(~uint32_t(1 << i))); } - DxbcOpEndIf(); + a_.OpEndIf(); // Create packed depth/stencil, with the stencil value unchanged at this // point. // temp.x? = resulting sample depth, current resulting stencil // temp.y = polygon offset if not writing to oDepth // temp.z = viewport maximum depth if not writing to oDepth // temp.w = old depth/stencil - DxbcOpBFI(sample_depth_stencil_dest, DxbcSrc::LU(24), DxbcSrc::LU(8), - sample_depth_stencil_src, temp_w_src); + a_.OpBFI(sample_depth_stencil_dest, dxbc::Src::LU(24), dxbc::Src::LU(8), + sample_depth_stencil_src, temp_w_src); // Stencil test. // Extract the stencil test bit. // sample_temp.x = stencil test enabled system_constants_used_ |= 1ull << kSysConst_Flags_Index; - DxbcOpAnd(sample_temp_x_dest, - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_Flags_Vec) - .Select(kSysConst_Flags_Comp), - DxbcSrc::LU(kSysFlag_ROVStencilTest)); + a_.OpAnd(sample_temp_x_dest, + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_Flags_Vec) + .Select(kSysConst_Flags_Comp), + dxbc::Src::LU(kSysFlag_ROVStencilTest)); // Check if stencil test is enabled. // sample_temp.x = free - DxbcOpIf(true, sample_temp_x_src); + a_.OpIf(true, sample_temp_x_src); { - DxbcSrc stencil_front_src( - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_EdramStencil_Front_Vec)); - DxbcSrc stencil_back_src( - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_EdramStencil_Back_Vec)); + dxbc::Src stencil_front_src( + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_EdramStencil_Front_Vec)); + dxbc::Src stencil_back_src( + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_EdramStencil_Back_Vec)); // Check the current face to get the reference and apply the read mask. in_front_face_used_ = true; - DxbcOpIf(true, DxbcSrc::V(uint32_t(InOutRegister::kPSInFrontFace), - DxbcSrc::kXXXX)); + a_.OpIf(true, dxbc::Src::V(uint32_t(InOutRegister::kPSInFrontFace), + dxbc::Src::kXXXX)); system_constants_used_ |= 1ull << kSysConst_EdramStencil_Index; for (uint32_t j = 0; j < 2; ++j) { if (j) { // Go to the back face. - DxbcOpElse(); + a_.OpElse(); } - DxbcSrc stencil_side_src(j ? stencil_back_src : stencil_front_src); + dxbc::Src stencil_side_src(j ? stencil_back_src : stencil_front_src); // Read-mask the stencil reference. // sample_temp.x = read-masked stencil reference - DxbcOpAnd( - sample_temp_x_dest, - stencil_side_src.Select(kSysConst_EdramStencil_Reference_Comp), - stencil_side_src.Select(kSysConst_EdramStencil_ReadMask_Comp)); + a_.OpAnd(sample_temp_x_dest, + stencil_side_src.Select(kSysConst_EdramStencil_Reference_Comp), + stencil_side_src.Select(kSysConst_EdramStencil_ReadMask_Comp)); // Read-mask the old stencil value (also dropping the depth bits). // sample_temp.x = read-masked stencil reference // sample_temp.y = read-masked old stencil - DxbcOpAnd( - sample_temp_y_dest, temp_w_src, - stencil_side_src.Select(kSysConst_EdramStencil_ReadMask_Comp)); + a_.OpAnd(sample_temp_y_dest, temp_w_src, + stencil_side_src.Select(kSysConst_EdramStencil_ReadMask_Comp)); } // Close the face check. - DxbcOpEndIf(); + a_.OpEndIf(); // Get the difference between the stencil reference and the old stencil, // > 0 - greater, == 0 - equal, < 0 - less. // sample_temp.x = stencil difference // sample_temp.y = free - DxbcOpIAdd(sample_temp_x_dest, sample_temp_x_src, -sample_temp_y_src); + a_.OpIAdd(sample_temp_x_dest, sample_temp_x_src, -sample_temp_y_src); // Check if the stencil is "less" or "greater or equal". // sample_temp.x = stencil difference // sample_temp.y = stencil difference less than 0 - DxbcOpILT(sample_temp_y_dest, sample_temp_x_src, DxbcSrc::LI(0)); + a_.OpILT(sample_temp_y_dest, sample_temp_x_src, dxbc::Src::LI(0)); // Choose the passed depth function bits for "less" or for "greater". // sample_temp.x = stencil difference // sample_temp.y = stencil function passed bits for "less" or "greater" - DxbcOpMovC(sample_temp_y_dest, sample_temp_y_src, - DxbcSrc::LU(uint32_t(xenos::CompareFunction::kLess)), - DxbcSrc::LU(uint32_t(xenos::CompareFunction::kGreater))); + a_.OpMovC(sample_temp_y_dest, sample_temp_y_src, + dxbc::Src::LU(uint32_t(xenos::CompareFunction::kLess)), + dxbc::Src::LU(uint32_t(xenos::CompareFunction::kGreater))); // Do the "equal" testing. // sample_temp.x = stencil function passed bits // sample_temp.y = free - DxbcOpMovC(sample_temp_x_dest, sample_temp_x_src, sample_temp_y_src, - DxbcSrc::LU(uint32_t(xenos::CompareFunction::kEqual))); + a_.OpMovC(sample_temp_x_dest, sample_temp_x_src, sample_temp_y_src, + dxbc::Src::LU(uint32_t(xenos::CompareFunction::kEqual))); // Get the comparison function and the operations for the current face. // sample_temp.x = stencil function passed bits // sample_temp.y = stencil function and operations in_front_face_used_ = true; system_constants_used_ |= 1ull << kSysConst_EdramStencil_Index; - DxbcOpMovC( - sample_temp_y_dest, - DxbcSrc::V(uint32_t(InOutRegister::kPSInFrontFace), DxbcSrc::kXXXX), - stencil_front_src.Select(kSysConst_EdramStencil_FuncOps_Comp), - stencil_back_src.Select(kSysConst_EdramStencil_FuncOps_Comp)); + a_.OpMovC(sample_temp_y_dest, + dxbc::Src::V(uint32_t(InOutRegister::kPSInFrontFace), + dxbc::Src::kXXXX), + stencil_front_src.Select(kSysConst_EdramStencil_FuncOps_Comp), + stencil_back_src.Select(kSysConst_EdramStencil_FuncOps_Comp)); // Mask the resulting bits with the ones that should pass (the comparison // function is in the low 3 bits of the constant, and only ANDing 3-bit // values with it, so safe not to UBFE the function). // sample_temp.x = stencil test result // sample_temp.y = stencil function and operations - DxbcOpAnd(sample_temp_x_dest, sample_temp_x_src, sample_temp_y_src); + a_.OpAnd(sample_temp_x_dest, sample_temp_x_src, sample_temp_y_src); // Handle passing and failure of the stencil test, to choose the operation // and to discard the sample. // sample_temp.x = free // sample_temp.y = stencil function and operations - DxbcOpIf(true, sample_temp_x_src); + a_.OpIf(true, sample_temp_x_src); { // Check if depth test has passed for this sample to sample_temp.y (the // sample will only be processed if it's covered, so the only thing that // could unset the bit at this point that matters is the depth test). // sample_temp.x = depth test result // sample_temp.y = stencil function and operations - DxbcOpAnd(sample_temp_x_dest, - DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX), - DxbcSrc::LU(1 << i)); + a_.OpAnd(sample_temp_x_dest, + dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kXXXX), + dxbc::Src::LU(1 << i)); // Choose the bit offset of the stencil operation. // sample_temp.x = sample operation offset // sample_temp.y = stencil function and operations - DxbcOpMovC(sample_temp_x_dest, sample_temp_x_src, DxbcSrc::LU(6), - DxbcSrc::LU(9)); + a_.OpMovC(sample_temp_x_dest, sample_temp_x_src, dxbc::Src::LU(6), + dxbc::Src::LU(9)); // Extract the stencil operation. // sample_temp.x = stencil operation // sample_temp.y = free - DxbcOpUBFE(sample_temp_x_dest, DxbcSrc::LU(3), sample_temp_x_src, - sample_temp_y_src); + a_.OpUBFE(sample_temp_x_dest, dxbc::Src::LU(3), sample_temp_x_src, + sample_temp_y_src); } // Stencil test has failed. - DxbcOpElse(); + a_.OpElse(); { // Extract the stencil fail operation. // sample_temp.x = stencil operation // sample_temp.y = free - DxbcOpUBFE(sample_temp_x_dest, DxbcSrc::LU(3), DxbcSrc::LU(3), - sample_temp_y_src); + a_.OpUBFE(sample_temp_x_dest, dxbc::Src::LU(3), dxbc::Src::LU(3), + sample_temp_y_src); // Exclude the bit from the covered sample mask. // sample_temp.x = stencil operation - DxbcOpAnd(DxbcDest::R(system_temp_rov_params_, 0b0001), - DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX), - DxbcSrc::LU(~uint32_t(1 << i))); + a_.OpAnd(dxbc::Dest::R(system_temp_rov_params_, 0b0001), + dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kXXXX), + dxbc::Src::LU(~uint32_t(1 << i))); } // Close the stencil pass check. - DxbcOpEndIf(); + a_.OpEndIf(); // Open the stencil operation switch for writing the new stencil (not // caring about bits 8:31). // sample_temp.x = will contain unmasked new stencil in 0:7 and junk above - DxbcOpSwitch(sample_temp_x_src); + a_.OpSwitch(sample_temp_x_src); { // Zero. - DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::StencilOp::kZero))); - DxbcOpMov(sample_temp_x_dest, DxbcSrc::LU(0)); - DxbcOpBreak(); + a_.OpCase(dxbc::Src::LU(uint32_t(xenos::StencilOp::kZero))); + a_.OpMov(sample_temp_x_dest, dxbc::Src::LU(0)); + a_.OpBreak(); // Replace. - DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::StencilOp::kReplace))); + a_.OpCase(dxbc::Src::LU(uint32_t(xenos::StencilOp::kReplace))); in_front_face_used_ = true; system_constants_used_ |= 1ull << kSysConst_EdramStencil_Index; - DxbcOpMovC( + a_.OpMovC( sample_temp_x_dest, - DxbcSrc::V(uint32_t(InOutRegister::kPSInFrontFace), DxbcSrc::kXXXX), + dxbc::Src::V(uint32_t(InOutRegister::kPSInFrontFace), + dxbc::Src::kXXXX), stencil_front_src.Select(kSysConst_EdramStencil_Reference_Comp), stencil_back_src.Select(kSysConst_EdramStencil_Reference_Comp)); - DxbcOpBreak(); + a_.OpBreak(); // Increment and clamp. - DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::StencilOp::kIncrementClamp))); + a_.OpCase(dxbc::Src::LU(uint32_t(xenos::StencilOp::kIncrementClamp))); { // Clear the upper bits for saturation. - DxbcOpAnd(sample_temp_x_dest, temp_w_src, DxbcSrc::LU(UINT8_MAX)); + a_.OpAnd(sample_temp_x_dest, temp_w_src, dxbc::Src::LU(UINT8_MAX)); // Increment. - DxbcOpIAdd(sample_temp_x_dest, sample_temp_x_src, DxbcSrc::LI(1)); + a_.OpIAdd(sample_temp_x_dest, sample_temp_x_src, dxbc::Src::LI(1)); // Clamp. - DxbcOpIMin(sample_temp_x_dest, sample_temp_x_src, - DxbcSrc::LI(UINT8_MAX)); + a_.OpIMin(sample_temp_x_dest, sample_temp_x_src, + dxbc::Src::LI(UINT8_MAX)); } - DxbcOpBreak(); + a_.OpBreak(); // Decrement and clamp. - DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::StencilOp::kDecrementClamp))); + a_.OpCase(dxbc::Src::LU(uint32_t(xenos::StencilOp::kDecrementClamp))); { // Clear the upper bits for saturation. - DxbcOpAnd(sample_temp_x_dest, temp_w_src, DxbcSrc::LU(UINT8_MAX)); + a_.OpAnd(sample_temp_x_dest, temp_w_src, dxbc::Src::LU(UINT8_MAX)); // Increment. - DxbcOpIAdd(sample_temp_x_dest, sample_temp_x_src, DxbcSrc::LI(-1)); + a_.OpIAdd(sample_temp_x_dest, sample_temp_x_src, dxbc::Src::LI(-1)); // Clamp. - DxbcOpIMax(sample_temp_x_dest, sample_temp_x_src, DxbcSrc::LI(0)); + a_.OpIMax(sample_temp_x_dest, sample_temp_x_src, dxbc::Src::LI(0)); } - DxbcOpBreak(); + a_.OpBreak(); // Invert. - DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::StencilOp::kInvert))); - DxbcOpNot(sample_temp_x_dest, temp_w_src); - DxbcOpBreak(); + a_.OpCase(dxbc::Src::LU(uint32_t(xenos::StencilOp::kInvert))); + a_.OpNot(sample_temp_x_dest, temp_w_src); + a_.OpBreak(); // Increment and wrap. - DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::StencilOp::kIncrementWrap))); - DxbcOpIAdd(sample_temp_x_dest, temp_w_src, DxbcSrc::LI(1)); - DxbcOpBreak(); + a_.OpCase(dxbc::Src::LU(uint32_t(xenos::StencilOp::kIncrementWrap))); + a_.OpIAdd(sample_temp_x_dest, temp_w_src, dxbc::Src::LI(1)); + a_.OpBreak(); // Decrement and wrap. - DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::StencilOp::kDecrementWrap))); - DxbcOpIAdd(sample_temp_x_dest, temp_w_src, DxbcSrc::LI(-1)); - DxbcOpBreak(); + a_.OpCase(dxbc::Src::LU(uint32_t(xenos::StencilOp::kDecrementWrap))); + a_.OpIAdd(sample_temp_x_dest, temp_w_src, dxbc::Src::LI(-1)); + a_.OpBreak(); // Keep. - DxbcOpDefault(); - DxbcOpMov(sample_temp_x_dest, temp_w_src); - DxbcOpBreak(); + a_.OpDefault(); + a_.OpMov(sample_temp_x_dest, temp_w_src); + a_.OpBreak(); } // Close the new stencil switch. - DxbcOpEndSwitch(); + a_.OpEndSwitch(); // Select the stencil write mask for the face. // sample_temp.x = unmasked new stencil in 0:7 and junk above // sample_temp.y = stencil write mask in_front_face_used_ = true; system_constants_used_ |= 1ull << kSysConst_EdramStencil_Index; - DxbcOpMovC( - sample_temp_y_dest, - DxbcSrc::V(uint32_t(InOutRegister::kPSInFrontFace), DxbcSrc::kXXXX), - stencil_front_src.Select(kSysConst_EdramStencil_WriteMask_Comp), - stencil_back_src.Select(kSysConst_EdramStencil_WriteMask_Comp)); + a_.OpMovC(sample_temp_y_dest, + dxbc::Src::V(uint32_t(InOutRegister::kPSInFrontFace), + dxbc::Src::kXXXX), + stencil_front_src.Select(kSysConst_EdramStencil_WriteMask_Comp), + stencil_back_src.Select(kSysConst_EdramStencil_WriteMask_Comp)); // Apply the write mask to the new stencil, also dropping the upper 24 // bits. // sample_temp.x = masked new stencil // sample_temp.y = stencil write mask - DxbcOpAnd(sample_temp_x_dest, sample_temp_x_src, sample_temp_y_src); + a_.OpAnd(sample_temp_x_dest, sample_temp_x_src, sample_temp_y_src); // Invert the write mask for keeping the old stencil and the depth bits. // sample_temp.x = masked new stencil // sample_temp.y = inverted stencil write mask - DxbcOpNot(sample_temp_y_dest, sample_temp_y_src); + a_.OpNot(sample_temp_y_dest, sample_temp_y_src); // Remove the bits that will be replaced from the new combined // depth/stencil. // sample_temp.x = masked new stencil // sample_temp.y = free - DxbcOpAnd(sample_depth_stencil_dest, sample_depth_stencil_src, - sample_temp_y_src); + a_.OpAnd(sample_depth_stencil_dest, sample_depth_stencil_src, + sample_temp_y_src); // Merge the old and the new stencil. // temp.x? = resulting sample depth/stencil // temp.y = polygon offset if not writing to oDepth // temp.z = viewport maximum depth if not writing to oDepth // temp.w = old depth/stencil // sample_temp.x = free - DxbcOpOr(sample_depth_stencil_dest, sample_depth_stencil_src, - sample_temp_x_src); + a_.OpOr(sample_depth_stencil_dest, sample_depth_stencil_src, + sample_temp_x_src); } // Close the stencil test check. - DxbcOpEndIf(); + a_.OpEndIf(); // Check if the depth/stencil has failed not to modify the depth if it has. // sample_temp.x = whether depth/stencil has passed for this sample - DxbcOpAnd(sample_temp_x_dest, - DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX), - DxbcSrc::LU(1 << i)); + a_.OpAnd(sample_temp_x_dest, + dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kXXXX), + dxbc::Src::LU(1 << i)); // If the depth/stencil test has failed, don't change the depth. // sample_temp.x = free - DxbcOpIf(false, sample_temp_x_src); + a_.OpIf(false, sample_temp_x_src); { // Copy the new stencil over the old depth. // temp.x? = resulting sample depth/stencil // temp.y = polygon offset if not writing to oDepth // temp.z = viewport maximum depth if not writing to oDepth // temp.w = old depth/stencil - DxbcOpBFI(sample_depth_stencil_dest, DxbcSrc::LU(8), DxbcSrc::LU(0), - sample_depth_stencil_src, temp_w_src); + a_.OpBFI(sample_depth_stencil_dest, dxbc::Src::LU(8), dxbc::Src::LU(0), + sample_depth_stencil_src, temp_w_src); } // Close the depth/stencil passing check. - DxbcOpEndIf(); + a_.OpEndIf(); // Check if the new depth/stencil is different, and thus needs to be // written, to temp.w. // temp.x? = resulting sample depth/stencil // temp.y = polygon offset if not writing to oDepth // temp.z = viewport maximum depth if not writing to oDepth // temp.w = whether depth/stencil has been modified - DxbcOpINE(temp_w_dest, sample_depth_stencil_src, temp_w_src); + a_.OpINE(temp_w_dest, sample_depth_stencil_src, temp_w_src); if (depth_stencil_early && !current_shader().implicit_early_z_write_allowed()) { // Set the sample bit in bits 4:7 of system_temp_rov_params_.x - always // need to write late in this shader, as it may do something like // explicitly killing pixels. - DxbcOpBFI(DxbcDest::R(system_temp_rov_params_, 0b0001), DxbcSrc::LU(1), - DxbcSrc::LU(4 + i), temp_w_src, - DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX)); + a_.OpBFI(dxbc::Dest::R(system_temp_rov_params_, 0b0001), dxbc::Src::LU(1), + dxbc::Src::LU(4 + i), temp_w_src, + dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kXXXX)); } else { // Check if need to write. // temp.x? = resulting sample depth/stencil // temp.y = polygon offset if not writing to oDepth // temp.z = viewport maximum depth if not writing to oDepth // temp.w = free - DxbcOpIf(true, temp_w_src); + a_.OpIf(true, temp_w_src); { if (depth_stencil_early) { // Get if early depth/stencil write is enabled to temp.w. // temp.w = whether early depth/stencil write is enabled system_constants_used_ |= 1ull << kSysConst_Flags_Index; - DxbcOpAnd(temp_w_dest, - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_Flags_Vec) - .Select(kSysConst_Flags_Comp), - DxbcSrc::LU(kSysFlag_ROVDepthStencilEarlyWrite)); + a_.OpAnd(temp_w_dest, + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_Flags_Vec) + .Select(kSysConst_Flags_Comp), + dxbc::Src::LU(kSysFlag_ROVDepthStencilEarlyWrite)); // Check if need to write early. // temp.w = free - DxbcOpIf(true, temp_w_src); + a_.OpIf(true, temp_w_src); } // Write the new depth/stencil. if (uav_index_edram_ == kBindingIndexUnallocated) { uav_index_edram_ = uav_count_++; } - DxbcOpStoreUAVTyped( - DxbcDest::U(uav_index_edram_, uint32_t(UAVRegister::kEdram)), - DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kYYYY), 1, + a_.OpStoreUAVTyped( + dxbc::Dest::U(uav_index_edram_, uint32_t(UAVRegister::kEdram)), + dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kYYYY), 1, sample_depth_stencil_src); if (depth_stencil_early) { // Need to still run the shader to know whether to write the // depth/stencil value. - DxbcOpElse(); + a_.OpElse(); // Set the sample bit in bits 4:7 of system_temp_rov_params_.x if need // to write later (after checking if the sample is not discarded by a // kill instruction, alphatest or alpha-to-coverage). - DxbcOpOr(DxbcDest::R(system_temp_rov_params_, 0b0001), - DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX), - DxbcSrc::LU(1 << (4 + i))); + a_.OpOr(dxbc::Dest::R(system_temp_rov_params_, 0b0001), + dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kXXXX), + dxbc::Src::LU(1 << (4 + i))); // Close the early depth/stencil check. - DxbcOpEndIf(); + a_.OpEndIf(); } } // Close the write check. - DxbcOpEndIf(); + a_.OpEndIf(); } // Release sample_temp. PopSystemTemp(); // Close the sample conditional. - DxbcOpEndIf(); + a_.OpEndIf(); // Go to the next sample (samples are at +0, +80, +1, +81, so need to do // +80, -79, +80 and -81 after each sample). system_constants_used_ |= 1ull << kSysConst_EdramResolutionSquareScale_Index; - DxbcOpIMAd(DxbcDest::R(system_temp_rov_params_, 0b0010), - DxbcSrc::LI((i & 1) ? -78 - i : 80), - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_EdramResolutionSquareScale_Vec) - .Select(kSysConst_EdramResolutionSquareScale_Comp), - DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kYYYY)); + a_.OpIMAd(dxbc::Dest::R(system_temp_rov_params_, 0b0010), + dxbc::Src::LI((i & 1) ? -78 - i : 80), + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_EdramResolutionSquareScale_Vec) + .Select(kSysConst_EdramResolutionSquareScale_Comp), + dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kYYYY)); } if (ROV_IsDepthStencilEarly()) { @@ -1108,38 +1109,38 @@ void DxbcShaderTranslator::ROV_DepthStencilTest() { // reject at 2x2 quad granularity because texture fetches need derivatives. // temp.x = coverage | deferred depth/stencil write - DxbcOpAnd(DxbcDest::R(temp, 0b0001), - DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX), - DxbcSrc::LU(0b11111111)); + a_.OpAnd(dxbc::Dest::R(temp, 0b0001), + dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kXXXX), + dxbc::Src::LU(0b11111111)); // temp.x = 1.0 if any sample is covered or potentially needs stencil write // in the end of the shader in the current pixel - DxbcOpMovC(DxbcDest::R(temp, 0b0001), DxbcSrc::R(temp, DxbcSrc::kXXXX), - DxbcSrc::LF(1.0f), DxbcSrc::LF(0.0f)); + a_.OpMovC(dxbc::Dest::R(temp, 0b0001), dxbc::Src::R(temp, dxbc::Src::kXXXX), + dxbc::Src::LF(1.0f), dxbc::Src::LF(0.0f)); // temp.x = 1.0 if any sample is covered or potentially needs stencil write // in the end of the shader in the current pixel // temp.y = non-zero if anything is covered in the pixel across X - DxbcOpDerivRTXFine(DxbcDest::R(temp, 0b0010), - DxbcSrc::R(temp, DxbcSrc::kXXXX)); + a_.OpDerivRTXFine(dxbc::Dest::R(temp, 0b0010), + dxbc::Src::R(temp, dxbc::Src::kXXXX)); // temp.x = 1.0 if anything is covered in the current half of the quad // temp.y = free - DxbcOpMovC(DxbcDest::R(temp, 0b0001), DxbcSrc::R(temp, DxbcSrc::kYYYY), - DxbcSrc::LF(1.0f), DxbcSrc::R(temp, DxbcSrc::kXXXX)); + a_.OpMovC(dxbc::Dest::R(temp, 0b0001), dxbc::Src::R(temp, dxbc::Src::kYYYY), + dxbc::Src::LF(1.0f), dxbc::Src::R(temp, dxbc::Src::kXXXX)); // temp.x = 1.0 if anything is covered in the current half of the quad // temp.y = non-zero if anything is covered in the two pixels across Y - DxbcOpDerivRTYCoarse(DxbcDest::R(temp, 0b0010), - DxbcSrc::R(temp, DxbcSrc::kXXXX)); + a_.OpDerivRTYCoarse(dxbc::Dest::R(temp, 0b0010), + dxbc::Src::R(temp, dxbc::Src::kXXXX)); // temp.x = 1.0 if anything is covered in the current whole quad // temp.y = free - DxbcOpMovC(DxbcDest::R(temp, 0b0001), DxbcSrc::R(temp, DxbcSrc::kYYYY), - DxbcSrc::LF(1.0f), DxbcSrc::R(temp, DxbcSrc::kXXXX)); + a_.OpMovC(dxbc::Dest::R(temp, 0b0001), dxbc::Src::R(temp, dxbc::Src::kYYYY), + dxbc::Src::LF(1.0f), dxbc::Src::R(temp, dxbc::Src::kXXXX)); // End the shader if nothing is covered in the 2x2 quad after early // depth/stencil. // temp.x = free - DxbcOpRetC(false, DxbcSrc::R(temp, DxbcSrc::kXXXX)); + a_.OpRetC(false, dxbc::Src::R(temp, dxbc::Src::kXXXX)); } // Close the large depth/stencil conditional. - DxbcOpEndIf(); + a_.OpEndIf(); // Release temp. PopSystemTemp(); @@ -1151,171 +1152,172 @@ void DxbcShaderTranslator::ROV_UnpackColor( uint32_t temp2, uint32_t temp2_component) { assert_true(color_temp != packed_temp || packed_temp_components == 0); - DxbcSrc packed_temp_low( - DxbcSrc::R(packed_temp).Select(packed_temp_components)); - DxbcDest temp1_dest(DxbcDest::R(temp1, 1 << temp1_component)); - DxbcSrc temp1_src(DxbcSrc::R(temp1).Select(temp1_component)); - DxbcDest temp2_dest(DxbcDest::R(temp2, 1 << temp2_component)); - DxbcSrc temp2_src(DxbcSrc::R(temp2).Select(temp2_component)); + dxbc::Src packed_temp_low( + dxbc::Src::R(packed_temp).Select(packed_temp_components)); + dxbc::Dest temp1_dest(dxbc::Dest::R(temp1, 1 << temp1_component)); + dxbc::Src temp1_src(dxbc::Src::R(temp1).Select(temp1_component)); + dxbc::Dest temp2_dest(dxbc::Dest::R(temp2, 1 << temp2_component)); + dxbc::Src temp2_src(dxbc::Src::R(temp2).Select(temp2_component)); // Break register dependencies and initialize if there are not enough // components. The rest of the function will write at least RG (k_32_FLOAT and // k_32_32_FLOAT handled with the same default label), and if packed_temp is // the same as color_temp, the packed color won't be touched. - DxbcOpMov(DxbcDest::R(color_temp, 0b1100), - DxbcSrc::LF(0.0f, 0.0f, 0.0f, 1.0f)); + a_.OpMov(dxbc::Dest::R(color_temp, 0b1100), + dxbc::Src::LF(0.0f, 0.0f, 0.0f, 1.0f)); // Choose the packing based on the render target's format. system_constants_used_ |= 1ull << kSysConst_EdramRTFormatFlags_Index; - DxbcOpSwitch(DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_EdramRTFormatFlags_Vec) - .Select(rt_index)); + a_.OpSwitch(dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_EdramRTFormatFlags_Vec) + .Select(rt_index)); // *************************************************************************** // k_8_8_8_8 // k_8_8_8_8_GAMMA // *************************************************************************** for (uint32_t i = 0; i < 2; ++i) { - DxbcOpCase(DxbcSrc::LU(ROV_AddColorFormatFlags( + a_.OpCase(dxbc::Src::LU(ROV_AddColorFormatFlags( i ? xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA : xenos::ColorRenderTargetFormat::k_8_8_8_8))); // Unpack the components. - DxbcOpUBFE(DxbcDest::R(color_temp), DxbcSrc::LU(8), - DxbcSrc::LU(0, 8, 16, 24), packed_temp_low); + a_.OpUBFE(dxbc::Dest::R(color_temp), dxbc::Src::LU(8), + dxbc::Src::LU(0, 8, 16, 24), packed_temp_low); // Convert from fixed-point. - DxbcOpUToF(DxbcDest::R(color_temp), DxbcSrc::R(color_temp)); + a_.OpUToF(dxbc::Dest::R(color_temp), dxbc::Src::R(color_temp)); // Normalize. - DxbcOpMul(DxbcDest::R(color_temp), DxbcSrc::R(color_temp), - DxbcSrc::LF(1.0f / 255.0f)); + a_.OpMul(dxbc::Dest::R(color_temp), dxbc::Src::R(color_temp), + dxbc::Src::LF(1.0f / 255.0f)); if (i) { for (uint32_t j = 0; j < 3; ++j) { ConvertPWLGamma(false, color_temp, j, color_temp, j, temp1, temp1_component, temp2, temp2_component); } } - DxbcOpBreak(); + a_.OpBreak(); } // *************************************************************************** // k_2_10_10_10 // k_2_10_10_10_AS_10_10_10_10 // *************************************************************************** - DxbcOpCase(DxbcSrc::LU( + a_.OpCase(dxbc::Src::LU( ROV_AddColorFormatFlags(xenos::ColorRenderTargetFormat::k_2_10_10_10))); - DxbcOpCase(DxbcSrc::LU(ROV_AddColorFormatFlags( + a_.OpCase(dxbc::Src::LU(ROV_AddColorFormatFlags( xenos::ColorRenderTargetFormat::k_2_10_10_10_AS_10_10_10_10))); { // Unpack the components. - DxbcOpUBFE(DxbcDest::R(color_temp), DxbcSrc::LU(10, 10, 10, 2), - DxbcSrc::LU(0, 10, 20, 30), packed_temp_low); + a_.OpUBFE(dxbc::Dest::R(color_temp), dxbc::Src::LU(10, 10, 10, 2), + dxbc::Src::LU(0, 10, 20, 30), packed_temp_low); // Convert from fixed-point. - DxbcOpUToF(DxbcDest::R(color_temp), DxbcSrc::R(color_temp)); + a_.OpUToF(dxbc::Dest::R(color_temp), dxbc::Src::R(color_temp)); // Normalize. - DxbcOpMul(DxbcDest::R(color_temp), DxbcSrc::R(color_temp), - DxbcSrc::LF(1.0f / 1023.0f, 1.0f / 1023.0f, 1.0f / 1023.0f, - 1.0f / 3.0f)); + a_.OpMul(dxbc::Dest::R(color_temp), dxbc::Src::R(color_temp), + dxbc::Src::LF(1.0f / 1023.0f, 1.0f / 1023.0f, 1.0f / 1023.0f, + 1.0f / 3.0f)); } - DxbcOpBreak(); + a_.OpBreak(); // *************************************************************************** // k_2_10_10_10_FLOAT // k_2_10_10_10_FLOAT_AS_16_16_16_16 // https://github.com/Microsoft/DirectXTex/blob/master/DirectXTex/DirectXTexConvert.cpp // *************************************************************************** - DxbcOpCase(DxbcSrc::LU(ROV_AddColorFormatFlags( + a_.OpCase(dxbc::Src::LU(ROV_AddColorFormatFlags( xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT))); - DxbcOpCase(DxbcSrc::LU(ROV_AddColorFormatFlags( + a_.OpCase(dxbc::Src::LU(ROV_AddColorFormatFlags( xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT_AS_16_16_16_16))); { // Unpack the alpha. - DxbcOpUBFE(DxbcDest::R(color_temp, 0b1000), DxbcSrc::LU(2), DxbcSrc::LU(30), - packed_temp_low); + a_.OpUBFE(dxbc::Dest::R(color_temp, 0b1000), dxbc::Src::LU(2), + dxbc::Src::LU(30), packed_temp_low); // Convert the alpha from fixed-point. - DxbcOpUToF(DxbcDest::R(color_temp, 0b1000), - DxbcSrc::R(color_temp, DxbcSrc::kWWWW)); + a_.OpUToF(dxbc::Dest::R(color_temp, 0b1000), + dxbc::Src::R(color_temp, dxbc::Src::kWWWW)); // Normalize the alpha. - DxbcOpMul(DxbcDest::R(color_temp, 0b1000), - DxbcSrc::R(color_temp, DxbcSrc::kWWWW), DxbcSrc::LF(1.0f / 3.0f)); + a_.OpMul(dxbc::Dest::R(color_temp, 0b1000), + dxbc::Src::R(color_temp, dxbc::Src::kWWWW), + dxbc::Src::LF(1.0f / 3.0f)); // Process the components in reverse order because color_temp.r stores the // packed color which shouldn't be touched until G and B are converted if // packed_temp and color_temp are the same. for (int32_t i = 2; i >= 0; --i) { - DxbcDest color_component_dest(DxbcDest::R(color_temp, 1 << i)); - DxbcSrc color_component_src(DxbcSrc::R(color_temp).Select(i)); + dxbc::Dest color_component_dest(dxbc::Dest::R(color_temp, 1 << i)); + dxbc::Src color_component_src(dxbc::Src::R(color_temp).Select(i)); // Unpack the exponent to the temp. - DxbcOpUBFE(temp1_dest, DxbcSrc::LU(3), DxbcSrc::LU(i * 10 + 7), - packed_temp_low); + a_.OpUBFE(temp1_dest, dxbc::Src::LU(3), dxbc::Src::LU(i * 10 + 7), + packed_temp_low); // Unpack the mantissa to the result. - DxbcOpUBFE(color_component_dest, DxbcSrc::LU(7), DxbcSrc::LU(i * 10), - packed_temp_low); + a_.OpUBFE(color_component_dest, dxbc::Src::LU(7), dxbc::Src::LU(i * 10), + packed_temp_low); // Check if the number is denormalized. - DxbcOpIf(false, temp1_src); + a_.OpIf(false, temp1_src); { // Check if the number is non-zero (if the mantissa isn't zero - the // exponent is known to be zero at this point). - DxbcOpIf(true, color_component_src); + a_.OpIf(true, color_component_src); { // Normalize the mantissa. // Note that HLSL firstbithigh(x) is compiled to DXBC like: // `x ? 31 - firstbit_hi(x) : -1` // (returns the index from the LSB, not the MSB, but -1 for zero too). // temp = firstbit_hi(mantissa) - DxbcOpFirstBitHi(temp1_dest, color_component_src); + a_.OpFirstBitHi(temp1_dest, color_component_src); // temp = 7 - (31 - firstbit_hi(mantissa)) // Or, if expanded: // temp = firstbit_hi(mantissa) - 24 - DxbcOpIAdd(temp1_dest, temp1_src, DxbcSrc::LI(-24)); + a_.OpIAdd(temp1_dest, temp1_src, dxbc::Src::LI(-24)); // mantissa = mantissa << (7 - firstbithigh(mantissa)) // AND 0x7F not needed after this - BFI will do it. - DxbcOpIShL(color_component_dest, color_component_src, temp1_src); + a_.OpIShL(color_component_dest, color_component_src, temp1_src); // Get the normalized exponent. // exponent = 1 - (7 - firstbithigh(mantissa)) - DxbcOpIAdd(temp1_dest, DxbcSrc::LI(1), -temp1_src); + a_.OpIAdd(temp1_dest, dxbc::Src::LI(1), -temp1_src); } // The number is zero. - DxbcOpElse(); + a_.OpElse(); { // Set the unbiased exponent to -124 for zero - 124 will be added // later, resulting in zero float32. - DxbcOpMov(temp1_dest, DxbcSrc::LI(-124)); + a_.OpMov(temp1_dest, dxbc::Src::LI(-124)); } // Close the non-zero check. - DxbcOpEndIf(); + a_.OpEndIf(); } // Close the denormal check. - DxbcOpEndIf(); + a_.OpEndIf(); // Bias the exponent and move it to the correct location in f32. - DxbcOpIMAd(temp1_dest, temp1_src, DxbcSrc::LI(1 << 23), - DxbcSrc::LI(124 << 23)); + a_.OpIMAd(temp1_dest, temp1_src, dxbc::Src::LI(1 << 23), + dxbc::Src::LI(124 << 23)); // Combine the mantissa and the exponent. - DxbcOpBFI(color_component_dest, DxbcSrc::LU(7), DxbcSrc::LU(16), - color_component_src, temp1_src); + a_.OpBFI(color_component_dest, dxbc::Src::LU(7), dxbc::Src::LU(16), + color_component_src, temp1_src); } } - DxbcOpBreak(); + a_.OpBreak(); // *************************************************************************** // k_16_16 // k_16_16_16_16 (64bpp) // *************************************************************************** for (uint32_t i = 0; i < 2; ++i) { - DxbcOpCase(DxbcSrc::LU(ROV_AddColorFormatFlags( + a_.OpCase(dxbc::Src::LU(ROV_AddColorFormatFlags( i ? xenos::ColorRenderTargetFormat::k_16_16_16_16 : xenos::ColorRenderTargetFormat::k_16_16))); - DxbcDest color_components_dest( - DxbcDest::R(color_temp, i ? 0b1111 : 0b0011)); + dxbc::Dest color_components_dest( + dxbc::Dest::R(color_temp, i ? 0b1111 : 0b0011)); // Unpack the components. - DxbcOpIBFE(color_components_dest, DxbcSrc::LU(16), - DxbcSrc::LU(0, 16, 0, 16), - DxbcSrc::R(packed_temp, - 0b01010000 + packed_temp_components * 0b01010101)); + a_.OpIBFE(color_components_dest, dxbc::Src::LU(16), + dxbc::Src::LU(0, 16, 0, 16), + dxbc::Src::R(packed_temp, + 0b01010000 + packed_temp_components * 0b01010101)); // Convert from fixed-point. - DxbcOpIToF(color_components_dest, DxbcSrc::R(color_temp)); + a_.OpIToF(color_components_dest, dxbc::Src::R(color_temp)); // Normalize. - DxbcOpMul(color_components_dest, DxbcSrc::R(color_temp), - DxbcSrc::LF(32.0f / 32767.0f)); - DxbcOpBreak(); + a_.OpMul(color_components_dest, dxbc::Src::R(color_temp), + dxbc::Src::LF(32.0f / 32767.0f)); + a_.OpBreak(); } // *************************************************************************** @@ -1323,31 +1325,31 @@ void DxbcShaderTranslator::ROV_UnpackColor( // k_16_16_16_16_FLOAT (64bpp) // *************************************************************************** for (uint32_t i = 0; i < 2; ++i) { - DxbcOpCase(DxbcSrc::LU(ROV_AddColorFormatFlags( + a_.OpCase(dxbc::Src::LU(ROV_AddColorFormatFlags( i ? xenos::ColorRenderTargetFormat::k_16_16_16_16_FLOAT : xenos::ColorRenderTargetFormat::k_16_16_FLOAT))); - DxbcDest color_components_dest( - DxbcDest::R(color_temp, i ? 0b1111 : 0b0011)); + dxbc::Dest color_components_dest( + dxbc::Dest::R(color_temp, i ? 0b1111 : 0b0011)); // Unpack the components. - DxbcOpUBFE(color_components_dest, DxbcSrc::LU(16), - DxbcSrc::LU(0, 16, 0, 16), - DxbcSrc::R(packed_temp, - 0b01010000 + packed_temp_components * 0b01010101)); + a_.OpUBFE(color_components_dest, dxbc::Src::LU(16), + dxbc::Src::LU(0, 16, 0, 16), + dxbc::Src::R(packed_temp, + 0b01010000 + packed_temp_components * 0b01010101)); // Convert from 16-bit float. - DxbcOpF16ToF32(color_components_dest, DxbcSrc::R(color_temp)); - DxbcOpBreak(); + a_.OpF16ToF32(color_components_dest, dxbc::Src::R(color_temp)); + a_.OpBreak(); } if (packed_temp != color_temp) { // Assume k_32_FLOAT or k_32_32_FLOAT for the rest. - DxbcOpDefault(); - DxbcOpMov( - DxbcDest::R(color_temp, 0b0011), - DxbcSrc::R(packed_temp, 0b0100 + packed_temp_components * 0b0101)); - DxbcOpBreak(); + a_.OpDefault(); + a_.OpMov( + dxbc::Dest::R(color_temp, 0b0011), + dxbc::Src::R(packed_temp, 0b0100 + packed_temp_components * 0b0101)); + a_.OpBreak(); } - DxbcOpEndSwitch(); + a_.OpEndSwitch(); } void DxbcShaderTranslator::ROV_PackPreClampedColor( @@ -1359,32 +1361,32 @@ void DxbcShaderTranslator::ROV_PackPreClampedColor( assert_true(color_temp != packed_temp || packed_temp_components == 0); - DxbcDest packed_dest_low( - DxbcDest::R(packed_temp, 1 << packed_temp_components)); - DxbcSrc packed_src_low( - DxbcSrc::R(packed_temp).Select(packed_temp_components)); - DxbcDest temp1_dest(DxbcDest::R(temp1, 1 << temp1_component)); - DxbcSrc temp1_src(DxbcSrc::R(temp1).Select(temp1_component)); - DxbcDest temp2_dest(DxbcDest::R(temp2, 1 << temp2_component)); - DxbcSrc temp2_src(DxbcSrc::R(temp2).Select(temp2_component)); + dxbc::Dest packed_dest_low( + dxbc::Dest::R(packed_temp, 1 << packed_temp_components)); + dxbc::Src packed_src_low( + dxbc::Src::R(packed_temp).Select(packed_temp_components)); + dxbc::Dest temp1_dest(dxbc::Dest::R(temp1, 1 << temp1_component)); + dxbc::Src temp1_src(dxbc::Src::R(temp1).Select(temp1_component)); + dxbc::Dest temp2_dest(dxbc::Dest::R(temp2, 1 << temp2_component)); + dxbc::Src temp2_src(dxbc::Src::R(temp2).Select(temp2_component)); // Break register dependency after 32bpp cases. - DxbcOpMov(DxbcDest::R(packed_temp, 1 << (packed_temp_components + 1)), - DxbcSrc::LU(0)); + a_.OpMov(dxbc::Dest::R(packed_temp, 1 << (packed_temp_components + 1)), + dxbc::Src::LU(0)); // Choose the packing based on the render target's format. system_constants_used_ |= 1ull << kSysConst_EdramRTFormatFlags_Index; - DxbcOpSwitch(DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_EdramRTFormatFlags_Vec) - .Select(rt_index)); + a_.OpSwitch(dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_EdramRTFormatFlags_Vec) + .Select(rt_index)); // *************************************************************************** // k_8_8_8_8 // k_8_8_8_8_GAMMA // *************************************************************************** for (uint32_t i = 0; i < 2; ++i) { - DxbcOpCase(DxbcSrc::LU(ROV_AddColorFormatFlags( + a_.OpCase(dxbc::Src::LU(ROV_AddColorFormatFlags( i ? xenos::ColorRenderTargetFormat::k_8_8_8_8_GAMMA : xenos::ColorRenderTargetFormat::k_8_8_8_8))); for (uint32_t j = 0; j < 4; ++j) { @@ -1392,141 +1394,144 @@ void DxbcShaderTranslator::ROV_PackPreClampedColor( ConvertPWLGamma(true, color_temp, j, temp1, temp1_component, temp1, temp1_component, temp2, temp2_component); // Denormalize and add 0.5 for rounding. - DxbcOpMAd(temp1_dest, temp1_src, DxbcSrc::LF(255.0f), - DxbcSrc::LF(0.5f)); + a_.OpMAd(temp1_dest, temp1_src, dxbc::Src::LF(255.0f), + dxbc::Src::LF(0.5f)); } else { // Denormalize and add 0.5 for rounding. - DxbcOpMAd(temp1_dest, DxbcSrc::R(color_temp).Select(j), - DxbcSrc::LF(255.0f), DxbcSrc::LF(0.5f)); + a_.OpMAd(temp1_dest, dxbc::Src::R(color_temp).Select(j), + dxbc::Src::LF(255.0f), dxbc::Src::LF(0.5f)); } // Convert to fixed-point. - DxbcOpFToU(j ? temp1_dest : packed_dest_low, temp1_src); + a_.OpFToU(j ? temp1_dest : packed_dest_low, temp1_src); // Pack the upper components. if (j) { - DxbcOpBFI(packed_dest_low, DxbcSrc::LU(8), DxbcSrc::LU(j * 8), - temp1_src, packed_src_low); + a_.OpBFI(packed_dest_low, dxbc::Src::LU(8), dxbc::Src::LU(j * 8), + temp1_src, packed_src_low); } } - DxbcOpBreak(); + a_.OpBreak(); } // *************************************************************************** // k_2_10_10_10 // k_2_10_10_10_AS_10_10_10_10 // *************************************************************************** - DxbcOpCase(DxbcSrc::LU( + a_.OpCase(dxbc::Src::LU( ROV_AddColorFormatFlags(xenos::ColorRenderTargetFormat::k_2_10_10_10))); - DxbcOpCase(DxbcSrc::LU(ROV_AddColorFormatFlags( + a_.OpCase(dxbc::Src::LU(ROV_AddColorFormatFlags( xenos::ColorRenderTargetFormat::k_2_10_10_10_AS_10_10_10_10))); for (uint32_t i = 0; i < 4; ++i) { // Denormalize and convert to fixed-point. - DxbcOpMAd(temp1_dest, DxbcSrc::R(color_temp).Select(i), - DxbcSrc::LF(i < 3 ? 1023.0f : 3.0f), DxbcSrc::LF(0.5f)); - DxbcOpFToU(i ? temp1_dest : packed_dest_low, temp1_src); + a_.OpMAd(temp1_dest, dxbc::Src::R(color_temp).Select(i), + dxbc::Src::LF(i < 3 ? 1023.0f : 3.0f), dxbc::Src::LF(0.5f)); + a_.OpFToU(i ? temp1_dest : packed_dest_low, temp1_src); // Pack the upper components. if (i) { - DxbcOpBFI(packed_dest_low, DxbcSrc::LU(i < 3 ? 10 : 2), - DxbcSrc::LU(i * 10), temp1_src, packed_src_low); + a_.OpBFI(packed_dest_low, dxbc::Src::LU(i < 3 ? 10 : 2), + dxbc::Src::LU(i * 10), temp1_src, packed_src_low); } } - DxbcOpBreak(); + a_.OpBreak(); // *************************************************************************** // k_2_10_10_10_FLOAT // k_2_10_10_10_FLOAT_AS_16_16_16_16 // https://github.com/Microsoft/DirectXTex/blob/master/DirectXTex/DirectXTexConvert.cpp // *************************************************************************** - DxbcOpCase(DxbcSrc::LU(ROV_AddColorFormatFlags( + a_.OpCase(dxbc::Src::LU(ROV_AddColorFormatFlags( xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT))); - DxbcOpCase(DxbcSrc::LU(ROV_AddColorFormatFlags( + a_.OpCase(dxbc::Src::LU(ROV_AddColorFormatFlags( xenos::ColorRenderTargetFormat::k_2_10_10_10_FLOAT_AS_16_16_16_16))); { for (uint32_t i = 0; i < 3; ++i) { - DxbcSrc color_component_src(DxbcSrc::R(color_temp).Select(i)); + dxbc::Src color_component_src(dxbc::Src::R(color_temp).Select(i)); // Check if the number is too small to be represented as normalized 7e3. // temp2 = f32 < 2^-2 - DxbcOpULT(temp2_dest, color_component_src, DxbcSrc::LU(0x3E800000)); + a_.OpULT(temp2_dest, color_component_src, dxbc::Src::LU(0x3E800000)); // Handle denormalized numbers separately. - DxbcOpIf(true, temp2_src); + a_.OpIf(true, temp2_src); { // temp2 = f32 >> 23 - DxbcOpUShR(temp2_dest, color_component_src, DxbcSrc::LU(23)); + a_.OpUShR(temp2_dest, color_component_src, dxbc::Src::LU(23)); // temp2 = 125 - (f32 >> 23) - DxbcOpIAdd(temp2_dest, DxbcSrc::LI(125), -temp2_src); + a_.OpIAdd(temp2_dest, dxbc::Src::LI(125), -temp2_src); // Don't allow the shift to overflow, since in DXBC the lower 5 bits of // the shift amount are used. // temp2 = min(125 - (f32 >> 23), 24) - DxbcOpUMin(temp2_dest, temp2_src, DxbcSrc::LU(24)); + a_.OpUMin(temp2_dest, temp2_src, dxbc::Src::LU(24)); // biased_f32 = (f32 & 0x7FFFFF) | 0x800000 - DxbcOpBFI(temp1_dest, DxbcSrc::LU(9), DxbcSrc::LU(23), DxbcSrc::LU(1), - color_component_src); + a_.OpBFI(temp1_dest, dxbc::Src::LU(9), dxbc::Src::LU(23), + dxbc::Src::LU(1), color_component_src); // biased_f32 = // ((f32 & 0x7FFFFF) | 0x800000) >> min(125 - (f32 >> 23), 24) - DxbcOpUShR(temp1_dest, temp1_src, temp2_src); + a_.OpUShR(temp1_dest, temp1_src, temp2_src); } // Not denormalized? - DxbcOpElse(); + a_.OpElse(); { // Bias the exponent. // biased_f32 = f32 + (-124 << 23) // (left shift of a negative value is undefined behavior) - DxbcOpIAdd(temp1_dest, color_component_src, DxbcSrc::LU(0xC2000000u)); + a_.OpIAdd(temp1_dest, color_component_src, dxbc::Src::LU(0xC2000000u)); } // Close the denormal check. - DxbcOpEndIf(); + a_.OpEndIf(); // Build the 7e3 number. // temp2 = (biased_f32 >> 16) & 1 - DxbcOpUBFE(temp2_dest, DxbcSrc::LU(1), DxbcSrc::LU(16), temp1_src); + a_.OpUBFE(temp2_dest, dxbc::Src::LU(1), dxbc::Src::LU(16), temp1_src); // f10 = biased_f32 + 0x7FFF - DxbcOpIAdd(temp1_dest, temp1_src, DxbcSrc::LU(0x7FFF)); + a_.OpIAdd(temp1_dest, temp1_src, dxbc::Src::LU(0x7FFF)); // f10 = biased_f32 + 0x7FFF + ((biased_f32 >> 16) & 1) - DxbcOpIAdd(temp1_dest, temp1_src, temp2_src); + a_.OpIAdd(temp1_dest, temp1_src, temp2_src); // f10 = ((biased_f32 + 0x7FFF + ((biased_f32 >> 16) & 1)) >> 16) & 0x3FF - DxbcOpUBFE(i ? temp1_dest : packed_dest_low, DxbcSrc::LU(10), - DxbcSrc::LU(16), temp1_src); + a_.OpUBFE(i ? temp1_dest : packed_dest_low, dxbc::Src::LU(10), + dxbc::Src::LU(16), temp1_src); // Pack the upper components. if (i) { - DxbcOpBFI(packed_dest_low, DxbcSrc::LU(10), DxbcSrc::LU(i * 10), - temp1_src, packed_src_low); + a_.OpBFI(packed_dest_low, dxbc::Src::LU(10), dxbc::Src::LU(i * 10), + temp1_src, packed_src_low); } } // Denormalize the alpha and convert it to fixed-point. - DxbcOpMAd(temp1_dest, DxbcSrc::R(color_temp, DxbcSrc::kWWWW), - DxbcSrc::LF(3.0f), DxbcSrc::LF(0.5f)); - DxbcOpFToU(temp1_dest, temp1_src); + a_.OpMAd(temp1_dest, dxbc::Src::R(color_temp, dxbc::Src::kWWWW), + dxbc::Src::LF(3.0f), dxbc::Src::LF(0.5f)); + a_.OpFToU(temp1_dest, temp1_src); // Pack the alpha. - DxbcOpBFI(packed_dest_low, DxbcSrc::LU(2), DxbcSrc::LU(30), temp1_src, - packed_src_low); + a_.OpBFI(packed_dest_low, dxbc::Src::LU(2), dxbc::Src::LU(30), temp1_src, + packed_src_low); } - DxbcOpBreak(); + a_.OpBreak(); // *************************************************************************** // k_16_16 // k_16_16_16_16 (64bpp) // *************************************************************************** for (uint32_t i = 0; i < 2; ++i) { - DxbcOpCase(DxbcSrc::LU(ROV_AddColorFormatFlags( + a_.OpCase(dxbc::Src::LU(ROV_AddColorFormatFlags( i ? xenos::ColorRenderTargetFormat::k_16_16_16_16 : xenos::ColorRenderTargetFormat::k_16_16))); for (uint32_t j = 0; j < (uint32_t(2) << i); ++j) { // Denormalize and convert to fixed-point, making 0.5 with the proper sign // in temp2. - DxbcOpGE(temp2_dest, DxbcSrc::R(color_temp).Select(j), DxbcSrc::LF(0.0f)); - DxbcOpMovC(temp2_dest, temp2_src, DxbcSrc::LF(0.5f), DxbcSrc::LF(-0.5f)); - DxbcOpMAd(temp1_dest, DxbcSrc::R(color_temp).Select(j), - DxbcSrc::LF(32767.0f / 32.0f), temp2_src); - DxbcDest packed_dest_half( - DxbcDest::R(packed_temp, 1 << (packed_temp_components + (j >> 1)))); + a_.OpGE(temp2_dest, dxbc::Src::R(color_temp).Select(j), + dxbc::Src::LF(0.0f)); + a_.OpMovC(temp2_dest, temp2_src, dxbc::Src::LF(0.5f), + dxbc::Src::LF(-0.5f)); + a_.OpMAd(temp1_dest, dxbc::Src::R(color_temp).Select(j), + dxbc::Src::LF(32767.0f / 32.0f), temp2_src); + dxbc::Dest packed_dest_half( + dxbc::Dest::R(packed_temp, 1 << (packed_temp_components + (j >> 1)))); // Convert to fixed-point. - DxbcOpFToI((j & 1) ? temp1_dest : packed_dest_half, temp1_src); + a_.OpFToI((j & 1) ? temp1_dest : packed_dest_half, temp1_src); // Pack green or alpha. if (j & 1) { - DxbcOpBFI( - packed_dest_half, DxbcSrc::LU(16), DxbcSrc::LU(16), temp1_src, - DxbcSrc::R(packed_temp).Select(packed_temp_components + (j >> 1))); + a_.OpBFI(packed_dest_half, dxbc::Src::LU(16), dxbc::Src::LU(16), + temp1_src, + dxbc::Src::R(packed_temp) + .Select(packed_temp_components + (j >> 1))); } } - DxbcOpBreak(); + a_.OpBreak(); } // *************************************************************************** @@ -1534,204 +1539,209 @@ void DxbcShaderTranslator::ROV_PackPreClampedColor( // k_16_16_16_16_FLOAT (64bpp) // *************************************************************************** for (uint32_t i = 0; i < 2; ++i) { - DxbcOpCase(DxbcSrc::LU(ROV_AddColorFormatFlags( + a_.OpCase(dxbc::Src::LU(ROV_AddColorFormatFlags( i ? xenos::ColorRenderTargetFormat::k_16_16_16_16_FLOAT : xenos::ColorRenderTargetFormat::k_16_16_FLOAT))); for (uint32_t j = 0; j < (uint32_t(2) << i); ++j) { - DxbcDest packed_dest_half( - DxbcDest::R(packed_temp, 1 << (packed_temp_components + (j >> 1)))); + dxbc::Dest packed_dest_half( + dxbc::Dest::R(packed_temp, 1 << (packed_temp_components + (j >> 1)))); // Convert to 16-bit float. - DxbcOpF32ToF16((j & 1) ? temp1_dest : packed_dest_half, - DxbcSrc::R(color_temp).Select(j)); + a_.OpF32ToF16((j & 1) ? temp1_dest : packed_dest_half, + dxbc::Src::R(color_temp).Select(j)); // Pack green or alpha. if (j & 1) { - DxbcOpBFI( - packed_dest_half, DxbcSrc::LU(16), DxbcSrc::LU(16), temp1_src, - DxbcSrc::R(packed_temp).Select(packed_temp_components + (j >> 1))); + a_.OpBFI(packed_dest_half, dxbc::Src::LU(16), dxbc::Src::LU(16), + temp1_src, + dxbc::Src::R(packed_temp) + .Select(packed_temp_components + (j >> 1))); } } - DxbcOpBreak(); + a_.OpBreak(); } if (packed_temp != color_temp) { // Assume k_32_FLOAT or k_32_32_FLOAT for the rest. - DxbcOpDefault(); - DxbcOpMov(DxbcDest::R(packed_temp, 0b11 << packed_temp_components), - DxbcSrc::R(color_temp, 0b0100 << (packed_temp_components * 2))); - DxbcOpBreak(); + a_.OpDefault(); + a_.OpMov(dxbc::Dest::R(packed_temp, 0b11 << packed_temp_components), + dxbc::Src::R(color_temp, 0b0100 << (packed_temp_components * 2))); + a_.OpBreak(); } - DxbcOpEndSwitch(); + a_.OpEndSwitch(); } void DxbcShaderTranslator::ROV_HandleColorBlendFactorCases( uint32_t src_temp, uint32_t dst_temp, uint32_t factor_temp) { - DxbcDest factor_dest(DxbcDest::R(factor_temp, 0b0111)); - DxbcSrc one_src(DxbcSrc::LF(1.0f)); + dxbc::Dest factor_dest(dxbc::Dest::R(factor_temp, 0b0111)); + dxbc::Src one_src(dxbc::Src::LF(1.0f)); // kOne. - DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::BlendFactor::kOne))); - DxbcOpMov(factor_dest, one_src); - DxbcOpBreak(); + a_.OpCase(dxbc::Src::LU(uint32_t(xenos::BlendFactor::kOne))); + a_.OpMov(factor_dest, one_src); + a_.OpBreak(); // kSrcColor - DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::BlendFactor::kSrcColor))); + a_.OpCase(dxbc::Src::LU(uint32_t(xenos::BlendFactor::kSrcColor))); if (factor_temp != src_temp) { - DxbcOpMov(factor_dest, DxbcSrc::R(src_temp)); + a_.OpMov(factor_dest, dxbc::Src::R(src_temp)); } - DxbcOpBreak(); + a_.OpBreak(); // kOneMinusSrcColor - DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::BlendFactor::kOneMinusSrcColor))); - DxbcOpAdd(factor_dest, one_src, -DxbcSrc::R(src_temp)); - DxbcOpBreak(); + a_.OpCase(dxbc::Src::LU(uint32_t(xenos::BlendFactor::kOneMinusSrcColor))); + a_.OpAdd(factor_dest, one_src, -dxbc::Src::R(src_temp)); + a_.OpBreak(); // kSrcAlpha - DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::BlendFactor::kSrcAlpha))); - DxbcOpMov(factor_dest, DxbcSrc::R(src_temp, DxbcSrc::kWWWW)); - DxbcOpBreak(); + a_.OpCase(dxbc::Src::LU(uint32_t(xenos::BlendFactor::kSrcAlpha))); + a_.OpMov(factor_dest, dxbc::Src::R(src_temp, dxbc::Src::kWWWW)); + a_.OpBreak(); // kOneMinusSrcAlpha - DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::BlendFactor::kOneMinusSrcAlpha))); - DxbcOpAdd(factor_dest, one_src, -DxbcSrc::R(src_temp, DxbcSrc::kWWWW)); - DxbcOpBreak(); + a_.OpCase(dxbc::Src::LU(uint32_t(xenos::BlendFactor::kOneMinusSrcAlpha))); + a_.OpAdd(factor_dest, one_src, -dxbc::Src::R(src_temp, dxbc::Src::kWWWW)); + a_.OpBreak(); // kDstColor - DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::BlendFactor::kDstColor))); + a_.OpCase(dxbc::Src::LU(uint32_t(xenos::BlendFactor::kDstColor))); if (factor_temp != dst_temp) { - DxbcOpMov(factor_dest, DxbcSrc::R(dst_temp)); + a_.OpMov(factor_dest, dxbc::Src::R(dst_temp)); } - DxbcOpBreak(); + a_.OpBreak(); // kOneMinusDstColor - DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::BlendFactor::kOneMinusDstColor))); - DxbcOpAdd(factor_dest, one_src, -DxbcSrc::R(dst_temp)); - DxbcOpBreak(); + a_.OpCase(dxbc::Src::LU(uint32_t(xenos::BlendFactor::kOneMinusDstColor))); + a_.OpAdd(factor_dest, one_src, -dxbc::Src::R(dst_temp)); + a_.OpBreak(); // kDstAlpha - DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::BlendFactor::kDstAlpha))); - DxbcOpMov(factor_dest, DxbcSrc::R(dst_temp, DxbcSrc::kWWWW)); - DxbcOpBreak(); + a_.OpCase(dxbc::Src::LU(uint32_t(xenos::BlendFactor::kDstAlpha))); + a_.OpMov(factor_dest, dxbc::Src::R(dst_temp, dxbc::Src::kWWWW)); + a_.OpBreak(); // kOneMinusDstAlpha - DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::BlendFactor::kOneMinusDstAlpha))); - DxbcOpAdd(factor_dest, one_src, -DxbcSrc::R(dst_temp, DxbcSrc::kWWWW)); - DxbcOpBreak(); + a_.OpCase(dxbc::Src::LU(uint32_t(xenos::BlendFactor::kOneMinusDstAlpha))); + a_.OpAdd(factor_dest, one_src, -dxbc::Src::R(dst_temp, dxbc::Src::kWWWW)); + a_.OpBreak(); // Factors involving the constant. system_constants_used_ |= 1ull << kSysConst_EdramBlendConstant_Index; // kConstantColor - DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::BlendFactor::kConstantColor))); - DxbcOpMov(factor_dest, - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_EdramBlendConstant_Vec)); - DxbcOpBreak(); - - // kOneMinusConstantColor - DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::BlendFactor::kOneMinusConstantColor))); - DxbcOpAdd(factor_dest, one_src, - -DxbcSrc::CB(cbuffer_index_system_constants_, + a_.OpCase(dxbc::Src::LU(uint32_t(xenos::BlendFactor::kConstantColor))); + a_.OpMov(factor_dest, + dxbc::Src::CB(cbuffer_index_system_constants_, uint32_t(CbufferRegister::kSystemConstants), kSysConst_EdramBlendConstant_Vec)); - DxbcOpBreak(); + a_.OpBreak(); + + // kOneMinusConstantColor + a_.OpCase( + dxbc::Src::LU(uint32_t(xenos::BlendFactor::kOneMinusConstantColor))); + a_.OpAdd(factor_dest, one_src, + -dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_EdramBlendConstant_Vec)); + a_.OpBreak(); // kConstantAlpha - DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::BlendFactor::kConstantAlpha))); - DxbcOpMov(factor_dest, - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_EdramBlendConstant_Vec, DxbcSrc::kWWWW)); - DxbcOpBreak(); + a_.OpCase(dxbc::Src::LU(uint32_t(xenos::BlendFactor::kConstantAlpha))); + a_.OpMov(factor_dest, + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_EdramBlendConstant_Vec, dxbc::Src::kWWWW)); + a_.OpBreak(); // kOneMinusConstantAlpha - DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::BlendFactor::kOneMinusConstantAlpha))); - DxbcOpAdd(factor_dest, one_src, - -DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_EdramBlendConstant_Vec, DxbcSrc::kWWWW)); - DxbcOpBreak(); + a_.OpCase( + dxbc::Src::LU(uint32_t(xenos::BlendFactor::kOneMinusConstantAlpha))); + a_.OpAdd(factor_dest, one_src, + -dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_EdramBlendConstant_Vec, dxbc::Src::kWWWW)); + a_.OpBreak(); // kSrcAlphaSaturate - DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::BlendFactor::kSrcAlphaSaturate))); - DxbcOpAdd(DxbcDest::R(factor_temp, 0b0001), one_src, - -DxbcSrc::R(dst_temp, DxbcSrc::kWWWW)); - DxbcOpMin(factor_dest, DxbcSrc::R(src_temp, DxbcSrc::kWWWW), - DxbcSrc::R(factor_temp, DxbcSrc::kXXXX)); - DxbcOpBreak(); + a_.OpCase(dxbc::Src::LU(uint32_t(xenos::BlendFactor::kSrcAlphaSaturate))); + a_.OpAdd(dxbc::Dest::R(factor_temp, 0b0001), one_src, + -dxbc::Src::R(dst_temp, dxbc::Src::kWWWW)); + a_.OpMin(factor_dest, dxbc::Src::R(src_temp, dxbc::Src::kWWWW), + dxbc::Src::R(factor_temp, dxbc::Src::kXXXX)); + a_.OpBreak(); // kZero default. - DxbcOpDefault(); - DxbcOpMov(factor_dest, DxbcSrc::LF(0.0f)); - DxbcOpBreak(); + a_.OpDefault(); + a_.OpMov(factor_dest, dxbc::Src::LF(0.0f)); + a_.OpBreak(); } void DxbcShaderTranslator::ROV_HandleAlphaBlendFactorCases( uint32_t src_temp, uint32_t dst_temp, uint32_t factor_temp, uint32_t factor_component) { - DxbcDest factor_dest(DxbcDest::R(factor_temp, 1 << factor_component)); - DxbcSrc one_src(DxbcSrc::LF(1.0f)); + dxbc::Dest factor_dest(dxbc::Dest::R(factor_temp, 1 << factor_component)); + dxbc::Src one_src(dxbc::Src::LF(1.0f)); // kOne, kSrcAlphaSaturate. - DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::BlendFactor::kOne))); - DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::BlendFactor::kSrcAlphaSaturate))); - DxbcOpMov(factor_dest, one_src); - DxbcOpBreak(); + a_.OpCase(dxbc::Src::LU(uint32_t(xenos::BlendFactor::kOne))); + a_.OpCase(dxbc::Src::LU(uint32_t(xenos::BlendFactor::kSrcAlphaSaturate))); + a_.OpMov(factor_dest, one_src); + a_.OpBreak(); // kSrcColor, kSrcAlpha. - DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::BlendFactor::kSrcColor))); - DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::BlendFactor::kSrcAlpha))); + a_.OpCase(dxbc::Src::LU(uint32_t(xenos::BlendFactor::kSrcColor))); + a_.OpCase(dxbc::Src::LU(uint32_t(xenos::BlendFactor::kSrcAlpha))); if (factor_temp != src_temp || factor_component != 3) { - DxbcOpMov(factor_dest, DxbcSrc::R(src_temp, DxbcSrc::kWWWW)); + a_.OpMov(factor_dest, dxbc::Src::R(src_temp, dxbc::Src::kWWWW)); } - DxbcOpBreak(); + a_.OpBreak(); // kOneMinusSrcColor, kOneMinusSrcAlpha. - DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::BlendFactor::kOneMinusSrcColor))); - DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::BlendFactor::kOneMinusSrcAlpha))); - DxbcOpAdd(factor_dest, one_src, -DxbcSrc::R(src_temp, DxbcSrc::kWWWW)); - DxbcOpBreak(); + a_.OpCase(dxbc::Src::LU(uint32_t(xenos::BlendFactor::kOneMinusSrcColor))); + a_.OpCase(dxbc::Src::LU(uint32_t(xenos::BlendFactor::kOneMinusSrcAlpha))); + a_.OpAdd(factor_dest, one_src, -dxbc::Src::R(src_temp, dxbc::Src::kWWWW)); + a_.OpBreak(); // kDstColor, kDstAlpha. - DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::BlendFactor::kDstColor))); - DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::BlendFactor::kDstAlpha))); + a_.OpCase(dxbc::Src::LU(uint32_t(xenos::BlendFactor::kDstColor))); + a_.OpCase(dxbc::Src::LU(uint32_t(xenos::BlendFactor::kDstAlpha))); if (factor_temp != dst_temp || factor_component != 3) { - DxbcOpMov(factor_dest, DxbcSrc::R(dst_temp, DxbcSrc::kWWWW)); + a_.OpMov(factor_dest, dxbc::Src::R(dst_temp, dxbc::Src::kWWWW)); } - DxbcOpBreak(); + a_.OpBreak(); // kOneMinusDstColor, kOneMinusDstAlpha. - DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::BlendFactor::kOneMinusDstColor))); - DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::BlendFactor::kOneMinusDstAlpha))); - DxbcOpAdd(factor_dest, one_src, -DxbcSrc::R(dst_temp, DxbcSrc::kWWWW)); - DxbcOpBreak(); + a_.OpCase(dxbc::Src::LU(uint32_t(xenos::BlendFactor::kOneMinusDstColor))); + a_.OpCase(dxbc::Src::LU(uint32_t(xenos::BlendFactor::kOneMinusDstAlpha))); + a_.OpAdd(factor_dest, one_src, -dxbc::Src::R(dst_temp, dxbc::Src::kWWWW)); + a_.OpBreak(); // Factors involving the constant. system_constants_used_ |= 1ull << kSysConst_EdramBlendConstant_Index; // kConstantColor, kConstantAlpha. - DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::BlendFactor::kConstantColor))); - DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::BlendFactor::kConstantAlpha))); - DxbcOpMov(factor_dest, - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_EdramBlendConstant_Vec, DxbcSrc::kWWWW)); - DxbcOpBreak(); + a_.OpCase(dxbc::Src::LU(uint32_t(xenos::BlendFactor::kConstantColor))); + a_.OpCase(dxbc::Src::LU(uint32_t(xenos::BlendFactor::kConstantAlpha))); + a_.OpMov(factor_dest, + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_EdramBlendConstant_Vec, dxbc::Src::kWWWW)); + a_.OpBreak(); // kOneMinusConstantColor, kOneMinusConstantAlpha. - DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::BlendFactor::kOneMinusConstantColor))); - DxbcOpCase(DxbcSrc::LU(uint32_t(xenos::BlendFactor::kOneMinusConstantAlpha))); - DxbcOpAdd(factor_dest, one_src, - -DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_EdramBlendConstant_Vec, DxbcSrc::kWWWW)); - DxbcOpBreak(); + a_.OpCase( + dxbc::Src::LU(uint32_t(xenos::BlendFactor::kOneMinusConstantColor))); + a_.OpCase( + dxbc::Src::LU(uint32_t(xenos::BlendFactor::kOneMinusConstantAlpha))); + a_.OpAdd(factor_dest, one_src, + -dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_EdramBlendConstant_Vec, dxbc::Src::kWWWW)); + a_.OpBreak(); // kZero default. - DxbcOpDefault(); - DxbcOpMov(factor_dest, DxbcSrc::LF(0.0f)); - DxbcOpBreak(); + a_.OpDefault(); + a_.OpMov(factor_dest, dxbc::Src::LF(0.0f)); + a_.OpBreak(); } void DxbcShaderTranslator::CompletePixelShader_WriteToRTVs_AlphaToMask() { @@ -1742,47 +1752,47 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToRTVs_AlphaToMask() { // Check if alpha to coverage is enabled. system_constants_used_ |= 1ull << kSysConst_AlphaToMask_Index; - DxbcOpIf(true, DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_AlphaToMask_Vec) - .Select(kSysConst_AlphaToMask_Comp)); + a_.OpIf(true, dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_AlphaToMask_Vec) + .Select(kSysConst_AlphaToMask_Comp)); uint32_t temp = PushSystemTemp(); - DxbcDest temp_x_dest(DxbcDest::R(temp, 0b0001)); - DxbcSrc temp_x_src(DxbcSrc::R(temp, DxbcSrc::kXXXX)); - DxbcDest temp_y_dest(DxbcDest::R(temp, 0b0010)); - DxbcSrc temp_y_src(DxbcSrc::R(temp, DxbcSrc::kYYYY)); - DxbcDest temp_z_dest(DxbcDest::R(temp, 0b0100)); - DxbcSrc temp_z_src(DxbcSrc::R(temp, DxbcSrc::kZZZZ)); + dxbc::Dest temp_x_dest(dxbc::Dest::R(temp, 0b0001)); + dxbc::Src temp_x_src(dxbc::Src::R(temp, dxbc::Src::kXXXX)); + dxbc::Dest temp_y_dest(dxbc::Dest::R(temp, 0b0010)); + dxbc::Src temp_y_src(dxbc::Src::R(temp, dxbc::Src::kYYYY)); + dxbc::Dest temp_z_dest(dxbc::Dest::R(temp, 0b0100)); + dxbc::Src temp_z_src(dxbc::Src::R(temp, dxbc::Src::kZZZZ)); // Convert SSAA sample position to integer to temp.xy (not caring about the // resolution scale because it's not supported anywhere on the RTV output // path). in_position_used_ |= 0b0011; - DxbcOpFToU(DxbcDest::R(temp, 0b0011), - DxbcSrc::V(uint32_t(InOutRegister::kPSInPosition))); + a_.OpFToU(dxbc::Dest::R(temp, 0b0011), + dxbc::Src::V(uint32_t(InOutRegister::kPSInPosition))); // Check if SSAA is enabled. system_constants_used_ |= 1ull << kSysConst_SampleCountLog2_Index; - DxbcOpIf(true, DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_SampleCountLog2_Vec) - .Select(kSysConst_SampleCountLog2_Comp + 1)); + a_.OpIf(true, dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_SampleCountLog2_Vec) + .Select(kSysConst_SampleCountLog2_Comp + 1)); { // Check if SSAA is 4x or 2x. system_constants_used_ |= 1ull << kSysConst_SampleCountLog2_Index; - DxbcOpIf(true, DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_SampleCountLog2_Vec) - .Select(kSysConst_SampleCountLog2_Comp)); + a_.OpIf(true, dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_SampleCountLog2_Vec) + .Select(kSysConst_SampleCountLog2_Comp)); { // 4x SSAA. // Build the sample index in temp.z where X is the low bit and Y is the // high bit, for calculation of the dithering base according to the sample // position (left/right and top/bottom). - DxbcOpAnd(temp_z_dest, temp_y_src, DxbcSrc::LU(1)); - DxbcOpBFI(temp_z_dest, DxbcSrc::LU(31), DxbcSrc::LU(1), temp_z_src, - temp_x_src); + a_.OpAnd(temp_z_dest, temp_y_src, dxbc::Src::LU(1)); + a_.OpBFI(temp_z_dest, dxbc::Src::LU(31), dxbc::Src::LU(1), temp_z_src, + temp_x_src); // Top-left sample base: 0.75. // Top-right sample base: 0.5. // Bottom-left sample base: 0.25. @@ -1791,77 +1801,78 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToRTVs_AlphaToMask() { // Multiplication here will result in exactly 1 (power of 2 multiplied by // an integer). // Calculate the base. - DxbcOpUToF(temp_z_dest, temp_z_src); - DxbcOpMAd(temp_z_dest, temp_z_src, DxbcSrc::LF(0.25f), - DxbcSrc::LF(0.25f)); - DxbcOpFrc(temp_z_dest, temp_z_src); - DxbcOpAdd(temp_z_dest, DxbcSrc::LF(1.0f), -temp_z_src); + a_.OpUToF(temp_z_dest, temp_z_src); + a_.OpMAd(temp_z_dest, temp_z_src, dxbc::Src::LF(0.25f), + dxbc::Src::LF(0.25f)); + a_.OpFrc(temp_z_dest, temp_z_src); + a_.OpAdd(temp_z_dest, dxbc::Src::LF(1.0f), -temp_z_src); // Get the dithering threshold offset index for the guest pixel to temp.x, // Y - low bit of offset index, X - high bit. - DxbcOpUBFE(DxbcDest::R(temp, 0b0011), DxbcSrc::LU(1), DxbcSrc::LU(1), - DxbcSrc::R(temp)); - DxbcOpBFI(temp_x_dest, DxbcSrc::LU(1), DxbcSrc::LU(1), temp_x_src, - temp_y_src); + a_.OpUBFE(dxbc::Dest::R(temp, 0b0011), dxbc::Src::LU(1), dxbc::Src::LU(1), + dxbc::Src::R(temp)); + a_.OpBFI(temp_x_dest, dxbc::Src::LU(1), dxbc::Src::LU(1), temp_x_src, + temp_y_src); // Write the offset scale to temp.y. - DxbcOpMov(temp_y_dest, DxbcSrc::LF(-1.0f / 16.0f)); + a_.OpMov(temp_y_dest, dxbc::Src::LF(-1.0f / 16.0f)); } - DxbcOpElse(); + a_.OpElse(); { // 2x SSAA. // Check if the top (base 0.5) or the bottom (base 1.0) sample to temp.z, // and also extract the guest pixel Y parity to temp.y. - DxbcOpUBFE(DxbcDest::R(temp, 0b0110), DxbcSrc::LU(1), - DxbcSrc::LU(0, 1, 0, 0), temp_y_src); - DxbcOpMovC(temp_z_dest, temp_z_src, DxbcSrc::LF(1.0f), DxbcSrc::LF(0.5f)); + a_.OpUBFE(dxbc::Dest::R(temp, 0b0110), dxbc::Src::LU(1), + dxbc::Src::LU(0, 1, 0, 0), temp_y_src); + a_.OpMovC(temp_z_dest, temp_z_src, dxbc::Src::LF(1.0f), + dxbc::Src::LF(0.5f)); // Get the dithering threshold offset index for the guest pixel to temp.x, // Y - low bit of offset index, X - high bit. - DxbcOpBFI(temp_x_dest, DxbcSrc::LU(1), DxbcSrc::LU(1), temp_x_src, - temp_y_src); + a_.OpBFI(temp_x_dest, dxbc::Src::LU(1), dxbc::Src::LU(1), temp_x_src, + temp_y_src); // Write the offset scale to temp.y. - DxbcOpMov(temp_y_dest, DxbcSrc::LF(-1.0f / 8.0f)); + a_.OpMov(temp_y_dest, dxbc::Src::LF(-1.0f / 8.0f)); } // Close the 4x check. - DxbcOpEndIf(); + a_.OpEndIf(); } // SSAA is disabled. - DxbcOpElse(); + a_.OpElse(); { // Write the base 1.0 to temp.z. - DxbcOpMov(temp_z_dest, DxbcSrc::LF(1.0f)); + a_.OpMov(temp_z_dest, dxbc::Src::LF(1.0f)); // Get the dithering threshold offset index for the guest pixel to temp.x, // Y - low bit of offset index, X - high bit. - DxbcOpAnd(temp_y_dest, temp_y_src, DxbcSrc::LU(1)); - DxbcOpBFI(temp_x_dest, DxbcSrc::LU(1), DxbcSrc::LU(1), temp_x_src, - temp_y_src); + a_.OpAnd(temp_y_dest, temp_y_src, dxbc::Src::LU(1)); + a_.OpBFI(temp_x_dest, dxbc::Src::LU(1), dxbc::Src::LU(1), temp_x_src, + temp_y_src); // Write the offset scale to temp.y. - DxbcOpMov(temp_y_dest, DxbcSrc::LF(-1.0f / 4.0f)); + a_.OpMov(temp_y_dest, dxbc::Src::LF(-1.0f / 4.0f)); } // Close the 2x/4x check. - DxbcOpEndIf(); + a_.OpEndIf(); // Extract the dithering offset to temp.x for the quad pixel index. - DxbcOpIShL(temp_x_dest, temp_x_src, DxbcSrc::LU(1)); + a_.OpIShL(temp_x_dest, temp_x_src, dxbc::Src::LU(1)); system_constants_used_ |= 1ull << kSysConst_AlphaToMask_Index; - DxbcOpUBFE(temp_x_dest, DxbcSrc::LU(2), temp_x_src, - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_AlphaToMask_Vec) - .Select(kSysConst_AlphaToMask_Comp)); - DxbcOpUToF(temp_x_dest, temp_x_src); + a_.OpUBFE(temp_x_dest, dxbc::Src::LU(2), temp_x_src, + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_AlphaToMask_Vec) + .Select(kSysConst_AlphaToMask_Comp)); + a_.OpUToF(temp_x_dest, temp_x_src); // Combine the base and the offset to temp.x. - DxbcOpMAd(temp_x_dest, temp_x_src, temp_y_src, temp_z_src); + a_.OpMAd(temp_x_dest, temp_x_src, temp_y_src, temp_z_src); // Check if alpha of oC0 is at or greater than the threshold (handling NaN // according to the Direct3D 11.3 functional specification, as not covered). - DxbcOpGE(temp_x_dest, DxbcSrc::R(system_temps_color_[0], DxbcSrc::kWWWW), - temp_x_src); + a_.OpGE(temp_x_dest, dxbc::Src::R(system_temps_color_[0], dxbc::Src::kWWWW), + temp_x_src); // Discard the SSAA sample if it's not covered. - DxbcOpDiscard(false, temp_x_src); + a_.OpDiscard(false, temp_x_src); // Release temp. PopSystemTemp(); // Close the alpha to coverage check. - DxbcOpEndIf(); + a_.OpEndIf(); } void DxbcShaderTranslator::CompletePixelShader_WriteToRTVs() { @@ -1882,28 +1893,28 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToRTVs() { // Apply the exponent bias after alpha to coverage because it needs the // unbiased alpha from the shader system_constants_used_ |= 1ull << kSysConst_ColorExpBias_Index; - DxbcOpMul(DxbcDest::R(system_temps_color_[i]), - DxbcSrc::R(system_temps_color_[i]), - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_ColorExpBias_Vec) - .Select(i)); + a_.OpMul(dxbc::Dest::R(system_temps_color_[i]), + dxbc::Src::R(system_temps_color_[i]), + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_ColorExpBias_Vec) + .Select(i)); // Convert to gamma space - this is incorrect, since it must be done after // blending on the Xbox 360, but this is just one of many blending issues in // the RTV path. system_constants_used_ |= 1ull << kSysConst_Flags_Index; - DxbcOpAnd(DxbcDest::R(gamma_temp, 0b0001), - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_Flags_Vec) - .Select(kSysConst_Flags_Comp), - DxbcSrc::LU(kSysFlag_Color0Gamma << i)); - DxbcOpIf(true, DxbcSrc::R(gamma_temp, DxbcSrc::kXXXX)); + a_.OpAnd(dxbc::Dest::R(gamma_temp, 0b0001), + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_Flags_Vec) + .Select(kSysConst_Flags_Comp), + dxbc::Src::LU(kSysFlag_Color0Gamma << i)); + a_.OpIf(true, dxbc::Src::R(gamma_temp, dxbc::Src::kXXXX)); for (uint32_t j = 0; j < 3; ++j) { ConvertPWLGamma(true, system_temps_color_[i], j, system_temps_color_[i], j, gamma_temp, 0, gamma_temp, 1); } - DxbcOpEndIf(); + a_.OpEndIf(); } // Release gamma_temp. PopSystemTemp(); @@ -1919,27 +1930,27 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToRTVs() { // Host RT i, guest RT j. for (uint32_t i = 0; i < 4; ++i) { // mask = map.iiii == (0, 1, 2, 3) - DxbcOpIEq(DxbcDest::R(remap_movc_mask_temp, shader_writes_color_targets), - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_ColorOutputMap_Vec) - .Select(i), - DxbcSrc::LU(0, 1, 2, 3)); + a_.OpIEq(dxbc::Dest::R(remap_movc_mask_temp, shader_writes_color_targets), + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_ColorOutputMap_Vec) + .Select(i), + dxbc::Src::LU(0, 1, 2, 3)); bool guest_rt_first = true; for (uint32_t j = 0; j < 4; ++j) { // If map.i == j, move guest color j to the temporary host color. if (!(shader_writes_color_targets & (1 << j))) { continue; } - DxbcOpMovC(DxbcDest::R(remap_movc_target_temp), - DxbcSrc::R(remap_movc_mask_temp).Select(j), - DxbcSrc::R(system_temps_color_[j]), - guest_rt_first ? DxbcSrc::LF(0.0f) - : DxbcSrc::R(remap_movc_target_temp)); + a_.OpMovC(dxbc::Dest::R(remap_movc_target_temp), + dxbc::Src::R(remap_movc_mask_temp).Select(j), + dxbc::Src::R(system_temps_color_[j]), + guest_rt_first ? dxbc::Src::LF(0.0f) + : dxbc::Src::R(remap_movc_target_temp)); guest_rt_first = false; } // Write the remapped color to host render target i. - DxbcOpMov(DxbcDest::O(i), DxbcSrc::R(remap_movc_target_temp)); + a_.OpMov(dxbc::Dest::O(i), dxbc::Src::R(remap_movc_target_temp)); } // Release remap_movc_mask_temp and remap_movc_target_temp. PopSystemTemp(2); @@ -1967,16 +1978,16 @@ void DxbcShaderTranslator::CompletePixelShader_DSV_DepthTo24Bit() { // representation. temp = PushSystemTemp(); in_position_used_ |= 0b0100; - DxbcOpMov( - DxbcDest::R(temp, 0b0001), - DxbcSrc::V(uint32_t(InOutRegister::kPSInPosition), DxbcSrc::kZZZZ), + a_.OpMov( + dxbc::Dest::R(temp, 0b0001), + dxbc::Src::V(uint32_t(InOutRegister::kPSInPosition), dxbc::Src::kZZZZ), true); } - DxbcDest temp_x_dest(DxbcDest::R(temp, 0b0001)); - DxbcSrc temp_x_src(DxbcSrc::R(temp, DxbcSrc::kXXXX)); - DxbcDest temp_y_dest(DxbcDest::R(temp, 0b0010)); - DxbcSrc temp_y_src(DxbcSrc::R(temp, DxbcSrc::kYYYY)); + dxbc::Dest temp_x_dest(dxbc::Dest::R(temp, 0b0001)); + dxbc::Src temp_x_src(dxbc::Src::R(temp, dxbc::Src::kXXXX)); + dxbc::Dest temp_y_dest(dxbc::Dest::R(temp, 0b0010)); + dxbc::Src temp_y_src(dxbc::Src::R(temp, dxbc::Src::kYYYY)); if (GetDxbcShaderModification().depth_stencil_mode == Modification::DepthStencilMode::kFloat24Truncating) { @@ -1989,89 +2000,89 @@ void DxbcShaderTranslator::CompletePixelShader_DSV_DepthTo24Bit() { // The smallest denormalized 20e4 number is -34 - should drop 23 mantissa // bits at -34. // Anything smaller than 2^-34 becomes 0. - DxbcDest truncate_dest(shader_writes_depth ? DxbcDest::ODepth() - : DxbcDest::ODepthLE()); + dxbc::Dest truncate_dest(shader_writes_depth ? dxbc::Dest::ODepth() + : dxbc::Dest::ODepthLE()); // Check if the number is representable as a float24 after truncation - the // exponent is at least -34. - DxbcOpUGE(temp_y_dest, temp_x_src, DxbcSrc::LU(0x2E800000)); - DxbcOpIf(true, temp_y_src); + a_.OpUGE(temp_y_dest, temp_x_src, dxbc::Src::LU(0x2E800000)); + a_.OpIf(true, temp_y_src); { // Extract the biased float32 exponent to temp.y. // temp.y = 113+ at exponent -14+. // temp.y = 93 at exponent -34. - DxbcOpUBFE(temp_y_dest, DxbcSrc::LU(8), DxbcSrc::LU(23), temp_x_src); + a_.OpUBFE(temp_y_dest, dxbc::Src::LU(8), dxbc::Src::LU(23), temp_x_src); // Convert exponent to the unclamped number of bits to truncate. // 116 - 113 = 3. // 116 - 93 = 23. // temp.y = 3+ at exponent -14+. // temp.y = 23 at exponent -34. - DxbcOpIAdd(temp_y_dest, DxbcSrc::LI(116), -temp_y_src); + a_.OpIAdd(temp_y_dest, dxbc::Src::LI(116), -temp_y_src); // Clamp the truncated bit count to drop 3 bits of any normal number. // Exponents below -34 are handled separately. // temp.y = 3 at exponent -14. // temp.y = 23 at exponent -34. - DxbcOpIMax(temp_y_dest, temp_y_src, DxbcSrc::LI(3)); + a_.OpIMax(temp_y_dest, temp_y_src, dxbc::Src::LI(3)); // Truncate the mantissa - fill the low bits with zeros. - DxbcOpBFI(truncate_dest, temp_y_src, DxbcSrc::LU(0), DxbcSrc::LU(0), - temp_x_src); + a_.OpBFI(truncate_dest, temp_y_src, dxbc::Src::LU(0), dxbc::Src::LU(0), + temp_x_src); } // The number is not representable as float24 after truncation - zero. - DxbcOpElse(); - DxbcOpMov(truncate_dest, DxbcSrc::LF(0.0f)); + a_.OpElse(); + a_.OpMov(truncate_dest, dxbc::Src::LF(0.0f)); // Close the non-zero result check. - DxbcOpEndIf(); + a_.OpEndIf(); } else { // Properly convert to 20e4, with rounding to the nearest even. PreClampedDepthTo20e4(temp, 0, temp, 0, temp, 1); // Convert back to float32. // https://github.com/Microsoft/DirectXTex/blob/master/DirectXTex/DirectXTexConvert.cpp // Unpack the exponent to temp.y. - DxbcOpUShR(temp_y_dest, temp_x_src, DxbcSrc::LU(20)); + a_.OpUShR(temp_y_dest, temp_x_src, dxbc::Src::LU(20)); // Unpack the mantissa to temp.x. - DxbcOpAnd(temp_x_dest, temp_x_src, DxbcSrc::LU(0xFFFFF)); + a_.OpAnd(temp_x_dest, temp_x_src, dxbc::Src::LU(0xFFFFF)); // Check if the number is denormalized. - DxbcOpIf(false, temp_y_src); + a_.OpIf(false, temp_y_src); { // Check if the number is non-zero (if the mantissa isn't zero - the // exponent is known to be zero at this point). - DxbcOpIf(true, temp_x_src); + a_.OpIf(true, temp_x_src); { // Normalize the mantissa. // Note that HLSL firstbithigh(x) is compiled to DXBC like: // `x ? 31 - firstbit_hi(x) : -1` // (returns the index from the LSB, not the MSB, but -1 for zero too). // temp.y = firstbit_hi(mantissa) - DxbcOpFirstBitHi(temp_y_dest, temp_x_src); + a_.OpFirstBitHi(temp_y_dest, temp_x_src); // temp.y = 20 - firstbithigh(mantissa) // Or: // temp.y = 20 - (31 - firstbit_hi(mantissa)) - DxbcOpIAdd(temp_y_dest, temp_y_src, DxbcSrc::LI(20 - 31)); + a_.OpIAdd(temp_y_dest, temp_y_src, dxbc::Src::LI(20 - 31)); // mantissa = mantissa << (20 - firstbithigh(mantissa)) // AND 0xFFFFF not needed after this - BFI will do it. - DxbcOpIShL(temp_x_dest, temp_x_src, temp_y_src); + a_.OpIShL(temp_x_dest, temp_x_src, temp_y_src); // Get the normalized exponent. // exponent = 1 - (20 - firstbithigh(mantissa)) - DxbcOpIAdd(temp_y_dest, DxbcSrc::LI(1), -temp_y_src); + a_.OpIAdd(temp_y_dest, dxbc::Src::LI(1), -temp_y_src); } // The number is zero. - DxbcOpElse(); + a_.OpElse(); { // Set the unbiased exponent to -112 for zero - 112 will be added later, // resulting in zero float32. - DxbcOpMov(temp_y_dest, DxbcSrc::LI(-112)); + a_.OpMov(temp_y_dest, dxbc::Src::LI(-112)); } // Close the non-zero check. - DxbcOpEndIf(); + a_.OpEndIf(); } // Close the denormal check. - DxbcOpEndIf(); + a_.OpEndIf(); // Bias the exponent and move it to the correct location in float32 to // temp.y. - DxbcOpIMAd(temp_y_dest, temp_y_src, DxbcSrc::LI(1 << 23), - DxbcSrc::LI(112 << 23)); + a_.OpIMAd(temp_y_dest, temp_y_src, dxbc::Src::LI(1 << 23), + dxbc::Src::LI(112 << 23)); // Combine the mantissa and the exponent into the result. - DxbcOpBFI(DxbcDest::ODepth(), DxbcSrc::LU(20), DxbcSrc::LU(3), temp_x_src, - temp_y_src); + a_.OpBFI(dxbc::Dest::ODepth(), dxbc::Src::LU(20), dxbc::Src::LU(3), + temp_x_src, temp_y_src); } if (!shader_writes_depth) { @@ -2081,25 +2092,25 @@ void DxbcShaderTranslator::CompletePixelShader_DSV_DepthTo24Bit() { } void DxbcShaderTranslator::CompletePixelShader_ROV_AlphaToMaskSample( - uint32_t sample_index, float threshold_base, DxbcSrc threshold_offset, + uint32_t sample_index, float threshold_base, dxbc::Src threshold_offset, float threshold_offset_scale, uint32_t temp, uint32_t temp_component) { - DxbcDest temp_dest(DxbcDest::R(temp, 1 << temp_component)); - DxbcSrc temp_src(DxbcSrc::R(temp).Select(temp_component)); + dxbc::Dest temp_dest(dxbc::Dest::R(temp, 1 << temp_component)); + dxbc::Src temp_src(dxbc::Src::R(temp).Select(temp_component)); // Calculate the threshold. - DxbcOpMAd(temp_dest, threshold_offset, DxbcSrc::LF(-threshold_offset_scale), - DxbcSrc::LF(threshold_base)); + a_.OpMAd(temp_dest, threshold_offset, dxbc::Src::LF(-threshold_offset_scale), + dxbc::Src::LF(threshold_base)); // Check if alpha of oC0 is at or greater than the threshold (handling NaN // according to the Direct3D 11.3 functional specification, as not covered). - DxbcOpGE(temp_dest, DxbcSrc::R(system_temps_color_[0], DxbcSrc::kWWWW), - temp_src); + a_.OpGE(temp_dest, dxbc::Src::R(system_temps_color_[0], dxbc::Src::kWWWW), + temp_src); // Keep all bits in system_temp_rov_params_.x but the ones that need to be // removed in case of failure (coverage and deferred depth/stencil write are // removed). - DxbcOpOr(temp_dest, temp_src, - DxbcSrc::LU(~(uint32_t(0b00010001) << sample_index))); + a_.OpOr(temp_dest, temp_src, + dxbc::Src::LU(~(uint32_t(0b00010001) << sample_index))); // Clear the coverage for samples that have failed the test. - DxbcOpAnd(DxbcDest::R(system_temp_rov_params_, 0b0001), - DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX), temp_src); + a_.OpAnd(dxbc::Dest::R(system_temp_rov_params_, 0b0001), + dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kXXXX), temp_src); } void DxbcShaderTranslator::CompletePixelShader_ROV_AlphaToMask() { @@ -2110,14 +2121,14 @@ void DxbcShaderTranslator::CompletePixelShader_ROV_AlphaToMask() { // Check if alpha to coverage is enabled. system_constants_used_ |= 1ull << kSysConst_AlphaToMask_Index; - DxbcOpIf(true, DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_AlphaToMask_Vec) - .Select(kSysConst_AlphaToMask_Comp)); + a_.OpIf(true, dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_AlphaToMask_Vec) + .Select(kSysConst_AlphaToMask_Comp)); uint32_t temp = PushSystemTemp(); - DxbcDest temp_x_dest(DxbcDest::R(temp, 0b0001)); - DxbcSrc temp_x_src(DxbcSrc::R(temp, DxbcSrc::kXXXX)); + dxbc::Dest temp_x_dest(dxbc::Dest::R(temp, 0b0001)); + dxbc::Src temp_x_src(dxbc::Src::R(temp, dxbc::Src::kXXXX)); // Get the dithering threshold offset index for the pixel, Y - low bit of // offset index, X - high bit, and extract the offset and convert it to @@ -2125,20 +2136,20 @@ void DxbcShaderTranslator::CompletePixelShader_ROV_AlphaToMask() { // preserve the idea of dithering. // temp.x = alpha to coverage offset as float 0.0...3.0. in_position_used_ |= 0b0011; - DxbcOpFToU(DxbcDest::R(temp, 0b0011), - DxbcSrc::V(uint32_t(InOutRegister::kPSInPosition))); - DxbcOpAnd(DxbcDest::R(temp, 0b0010), DxbcSrc::R(temp, DxbcSrc::kYYYY), - DxbcSrc::LU(1)); - DxbcOpBFI(temp_x_dest, DxbcSrc::LU(1), DxbcSrc::LU(1), temp_x_src, - DxbcSrc::R(temp, DxbcSrc::kYYYY)); - DxbcOpIShL(temp_x_dest, temp_x_src, DxbcSrc::LU(1)); + a_.OpFToU(dxbc::Dest::R(temp, 0b0011), + dxbc::Src::V(uint32_t(InOutRegister::kPSInPosition))); + a_.OpAnd(dxbc::Dest::R(temp, 0b0010), dxbc::Src::R(temp, dxbc::Src::kYYYY), + dxbc::Src::LU(1)); + a_.OpBFI(temp_x_dest, dxbc::Src::LU(1), dxbc::Src::LU(1), temp_x_src, + dxbc::Src::R(temp, dxbc::Src::kYYYY)); + a_.OpIShL(temp_x_dest, temp_x_src, dxbc::Src::LU(1)); system_constants_used_ |= 1ull << kSysConst_AlphaToMask_Index; - DxbcOpUBFE(temp_x_dest, DxbcSrc::LU(2), temp_x_src, - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_AlphaToMask_Vec) - .Select(kSysConst_AlphaToMask_Comp)); - DxbcOpUToF(temp_x_dest, temp_x_src); + a_.OpUBFE(temp_x_dest, dxbc::Src::LU(2), temp_x_src, + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_AlphaToMask_Vec) + .Select(kSysConst_AlphaToMask_Comp)); + a_.OpUToF(temp_x_dest, temp_x_src); // The test must effect not only the coverage bits, but also the deferred // depth/stencil write bits since the coverage is zeroed for samples that have @@ -2148,17 +2159,17 @@ void DxbcShaderTranslator::CompletePixelShader_ROV_AlphaToMask() { // Check if MSAA is enabled. system_constants_used_ |= 1ull << kSysConst_SampleCountLog2_Index; - DxbcOpIf(true, DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_SampleCountLog2_Vec) - .Select(kSysConst_SampleCountLog2_Comp + 1)); + a_.OpIf(true, dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_SampleCountLog2_Vec) + .Select(kSysConst_SampleCountLog2_Comp + 1)); { // Check if MSAA is 4x or 2x. system_constants_used_ |= 1ull << kSysConst_SampleCountLog2_Index; - DxbcOpIf(true, DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_SampleCountLog2_Vec) - .Select(kSysConst_SampleCountLog2_Comp)); + a_.OpIf(true, dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_SampleCountLog2_Vec) + .Select(kSysConst_SampleCountLog2_Comp)); // 4x MSAA. CompletePixelShader_ROV_AlphaToMaskSample(0, 0.75f, temp_x_src, 1.0f / 16.0f, temp, 1); @@ -2169,34 +2180,34 @@ void DxbcShaderTranslator::CompletePixelShader_ROV_AlphaToMask() { CompletePixelShader_ROV_AlphaToMaskSample(3, 1.0f, temp_x_src, 1.0f / 16.0f, temp, 1); // 2x MSAA. - DxbcOpElse(); + a_.OpElse(); CompletePixelShader_ROV_AlphaToMaskSample(0, 0.5f, temp_x_src, 1.0f / 8.0f, temp, 1); CompletePixelShader_ROV_AlphaToMaskSample(1, 1.0f, temp_x_src, 1.0f / 8.0f, temp, 1); // Close the 4x check. - DxbcOpEndIf(); + a_.OpEndIf(); } // MSAA is disabled. - DxbcOpElse(); + a_.OpElse(); CompletePixelShader_ROV_AlphaToMaskSample(0, 1.0f, temp_x_src, 1.0f / 4.0f, temp, 1); // Close the 2x/4x check. - DxbcOpEndIf(); + a_.OpEndIf(); // Check if any sample is still covered (the mask includes both 0:3 and 4:7 // parts because there may be samples which passed alpha to coverage, but not // stencil test, and the stencil buffer needs to be modified - in this case, // samples would be dropped in 0:3, but not in 4:7). - DxbcOpAnd(temp_x_dest, DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX), - DxbcSrc::LU(0b11111111)); - DxbcOpRetC(false, temp_x_src); + a_.OpAnd(temp_x_dest, dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kXXXX), + dxbc::Src::LU(0b11111111)); + a_.OpRetC(false, temp_x_src); // Release temp. PopSystemTemp(); // Close the alpha to coverage check. - DxbcOpEndIf(); + a_.OpEndIf(); } void DxbcShaderTranslator::CompletePixelShader_WriteToROV() { @@ -2204,14 +2215,14 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() { CompletePixelShader_ROV_AlphaToMask(); uint32_t temp = PushSystemTemp(); - DxbcDest temp_x_dest(DxbcDest::R(temp, 0b0001)); - DxbcSrc temp_x_src(DxbcSrc::R(temp, DxbcSrc::kXXXX)); - DxbcDest temp_y_dest(DxbcDest::R(temp, 0b0010)); - DxbcSrc temp_y_src(DxbcSrc::R(temp, DxbcSrc::kYYYY)); - DxbcDest temp_z_dest(DxbcDest::R(temp, 0b0100)); - DxbcSrc temp_z_src(DxbcSrc::R(temp, DxbcSrc::kZZZZ)); - DxbcDest temp_w_dest(DxbcDest::R(temp, 0b1000)); - DxbcSrc temp_w_src(DxbcSrc::R(temp, DxbcSrc::kWWWW)); + dxbc::Dest temp_x_dest(dxbc::Dest::R(temp, 0b0001)); + dxbc::Src temp_x_src(dxbc::Src::R(temp, dxbc::Src::kXXXX)); + dxbc::Dest temp_y_dest(dxbc::Dest::R(temp, 0b0010)); + dxbc::Src temp_y_src(dxbc::Src::R(temp, dxbc::Src::kYYYY)); + dxbc::Dest temp_z_dest(dxbc::Dest::R(temp, 0b0100)); + dxbc::Src temp_z_src(dxbc::Src::R(temp, dxbc::Src::kZZZZ)); + dxbc::Dest temp_w_dest(dxbc::Dest::R(temp, 0b1000)); + dxbc::Src temp_w_src(dxbc::Src::R(temp, dxbc::Src::kWWWW)); // Do late depth/stencil test (which includes writing) if needed or deferred // depth writing. @@ -2220,36 +2231,36 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() { for (uint32_t i = 0; i < 4; ++i) { // Get if need to write to temp.x. // temp.x = whether the depth sample needs to be written. - DxbcOpAnd(temp_x_dest, - DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX), - DxbcSrc::LU(1 << (4 + i))); + a_.OpAnd(temp_x_dest, + dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kXXXX), + dxbc::Src::LU(1 << (4 + i))); // Check if need to write. // temp.x = free. - DxbcOpIf(true, temp_x_src); + a_.OpIf(true, temp_x_src); { // Write the new depth/stencil. if (uav_index_edram_ == kBindingIndexUnallocated) { uav_index_edram_ = uav_count_++; } - DxbcOpStoreUAVTyped( - DxbcDest::U(uav_index_edram_, uint32_t(UAVRegister::kEdram)), - DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kYYYY), 1, - DxbcSrc::R(system_temp_depth_stencil_).Select(i)); + a_.OpStoreUAVTyped( + dxbc::Dest::U(uav_index_edram_, uint32_t(UAVRegister::kEdram)), + dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kYYYY), 1, + dxbc::Src::R(system_temp_depth_stencil_).Select(i)); } // Close the write check. - DxbcOpEndIf(); + a_.OpEndIf(); // Go to the next sample (samples are at +0, +80, +1, +81, so need to do // +80, -79, +80 and -81 after each sample). if (i < 3) { system_constants_used_ |= 1ull << kSysConst_EdramResolutionSquareScale_Index; - DxbcOpIMAd(DxbcDest::R(system_temp_rov_params_, 0b0010), - DxbcSrc::LI((i & 1) ? -78 - i : 80), - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_EdramResolutionSquareScale_Vec) - .Select(kSysConst_EdramResolutionSquareScale_Comp), - DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kYYYY)); + a_.OpIMAd(dxbc::Dest::R(system_temp_rov_params_, 0b0010), + dxbc::Src::LI((i & 1) ? -78 - i : 80), + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_EdramResolutionSquareScale_Vec) + .Select(kSysConst_EdramResolutionSquareScale_Comp), + dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kYYYY)); } } } else { @@ -2260,10 +2271,11 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() { // Check if any sample is still covered after depth testing and writing, // skip color writing completely in this case. // temp.x = whether any sample is still covered. - DxbcOpAnd(temp_x_dest, DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX), - DxbcSrc::LU(0b1111)); + a_.OpAnd(temp_x_dest, + dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kXXXX), + dxbc::Src::LU(0b1111)); // temp.x = free. - DxbcOpRetC(false, temp_x_src); + a_.OpRetC(false, temp_x_src); } // Write color values. @@ -2274,10 +2286,10 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() { continue; } - DxbcSrc keep_mask_vec_src( - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_EdramRTKeepMask_Vec + (i >> 1))); + dxbc::Src keep_mask_vec_src( + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_EdramRTKeepMask_Vec + (i >> 1))); uint32_t keep_mask_component = (i & 1) * 2; uint32_t keep_mask_swizzle = keep_mask_component * 0b0101 + 0b0100; @@ -2288,150 +2300,150 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() { // Combine both parts of the keep mask to check if both are 0xFFFFFFFF. // temp.x = whether all bits need to be kept. system_constants_used_ |= 1ull << kSysConst_EdramRTKeepMask_Index; - DxbcOpAnd(temp_x_dest, keep_mask_vec_src.Select(keep_mask_component), - keep_mask_vec_src.Select(keep_mask_component + 1)); + a_.OpAnd(temp_x_dest, keep_mask_vec_src.Select(keep_mask_component), + keep_mask_vec_src.Select(keep_mask_component + 1)); // Flip the bits so both UINT32_MAX would result in 0 - not writing. // temp.x = whether any bits need to be written. - DxbcOpNot(temp_x_dest, temp_x_src); + a_.OpNot(temp_x_dest, temp_x_src); // Get the bits that will be used for checking wherther the render target // has been written to on the taken execution path - if the write mask is // empty, AND zero with the test bit to always get zero. // temp.x = bits for checking whether the render target has been written to. - DxbcOpMovC(temp_x_dest, temp_x_src, - DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX), - DxbcSrc::LU(0)); + a_.OpMovC(temp_x_dest, temp_x_src, + dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kXXXX), + dxbc::Src::LU(0)); // Check if the render target was written to on the execution path. // temp.x = whether anything was written and needs to be stored. - DxbcOpAnd(temp_x_dest, temp_x_src, DxbcSrc::LU(1 << (8 + i))); + a_.OpAnd(temp_x_dest, temp_x_src, dxbc::Src::LU(1 << (8 + i))); // Check if need to write anything to the render target. // temp.x = free. - DxbcOpIf(true, temp_x_src); + a_.OpIf(true, temp_x_src); // Apply the exponent bias after alpha to coverage because it needs the // unbiased alpha from the shader. system_constants_used_ |= 1ull << kSysConst_ColorExpBias_Index; - DxbcOpMul(DxbcDest::R(system_temps_color_[i]), - DxbcSrc::R(system_temps_color_[i]), - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_ColorExpBias_Vec) - .Select(i)); + a_.OpMul(dxbc::Dest::R(system_temps_color_[i]), + dxbc::Src::R(system_temps_color_[i]), + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_ColorExpBias_Vec) + .Select(i)); // Add the EDRAM bases of the render target to system_temp_rov_params_.zw. system_constants_used_ |= 1ull << kSysConst_EdramRTBaseDwordsScaled_Index; - DxbcOpIAdd(DxbcDest::R(system_temp_rov_params_, 0b1100), - DxbcSrc::R(system_temp_rov_params_), - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_EdramRTBaseDwordsScaled_Vec) - .Select(i)); + a_.OpIAdd(dxbc::Dest::R(system_temp_rov_params_, 0b1100), + dxbc::Src::R(system_temp_rov_params_), + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_EdramRTBaseDwordsScaled_Vec) + .Select(i)); - DxbcSrc rt_blend_factors_ops_src( - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_EdramRTBlendFactorsOps_Vec) + dxbc::Src rt_blend_factors_ops_src( + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_EdramRTBlendFactorsOps_Vec) .Select(i)); - DxbcSrc rt_clamp_vec_src( - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_EdramRTClamp_Vec + i)); + dxbc::Src rt_clamp_vec_src( + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_EdramRTClamp_Vec + i)); // Get if not blending to pack the color once for all 4 samples. // temp.x = whether blending is disabled. system_constants_used_ |= 1ull << kSysConst_EdramRTBlendFactorsOps_Index; - DxbcOpIEq(temp_x_dest, rt_blend_factors_ops_src, DxbcSrc::LU(0x00010001)); + a_.OpIEq(temp_x_dest, rt_blend_factors_ops_src, dxbc::Src::LU(0x00010001)); // Check if not blending. // temp.x = free. - DxbcOpIf(true, temp_x_src); + a_.OpIf(true, temp_x_src); { // Clamp the color to the render target's representable range - will be // packed. system_constants_used_ |= 1ull << kSysConst_EdramRTClamp_Index; - DxbcOpMax(DxbcDest::R(system_temps_color_[i]), - DxbcSrc::R(system_temps_color_[i]), - rt_clamp_vec_src.Swizzle(0b01000000)); - DxbcOpMin(DxbcDest::R(system_temps_color_[i]), - DxbcSrc::R(system_temps_color_[i]), - rt_clamp_vec_src.Swizzle(0b11101010)); + a_.OpMax(dxbc::Dest::R(system_temps_color_[i]), + dxbc::Src::R(system_temps_color_[i]), + rt_clamp_vec_src.Swizzle(0b01000000)); + a_.OpMin(dxbc::Dest::R(system_temps_color_[i]), + dxbc::Src::R(system_temps_color_[i]), + rt_clamp_vec_src.Swizzle(0b11101010)); // Pack the color once if blending. // temp.xy = packed color. ROV_PackPreClampedColor(i, system_temps_color_[i], temp, 0, temp, 2, temp, 3); } // Blending is enabled. - DxbcOpElse(); + a_.OpElse(); { // Get if the blending source color is fixed-point for clamping if it is. // temp.x = whether color is fixed-point. system_constants_used_ |= 1ull << kSysConst_EdramRTFormatFlags_Index; - DxbcOpAnd(temp_x_dest, - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_EdramRTFormatFlags_Vec) - .Select(i), - DxbcSrc::LU(kRTFormatFlag_FixedPointColor)); + a_.OpAnd(temp_x_dest, + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_EdramRTFormatFlags_Vec) + .Select(i), + dxbc::Src::LU(kRTFormatFlag_FixedPointColor)); // Check if the blending source color is fixed-point and needs clamping. // temp.x = free. - DxbcOpIf(true, temp_x_src); + a_.OpIf(true, temp_x_src); { // Clamp the blending source color if needed. system_constants_used_ |= 1ull << kSysConst_EdramRTClamp_Index; - DxbcOpMax(DxbcDest::R(system_temps_color_[i], 0b0111), - DxbcSrc::R(system_temps_color_[i]), - rt_clamp_vec_src.Select(0)); - DxbcOpMin(DxbcDest::R(system_temps_color_[i], 0b0111), - DxbcSrc::R(system_temps_color_[i]), - rt_clamp_vec_src.Select(2)); + a_.OpMax(dxbc::Dest::R(system_temps_color_[i], 0b0111), + dxbc::Src::R(system_temps_color_[i]), + rt_clamp_vec_src.Select(0)); + a_.OpMin(dxbc::Dest::R(system_temps_color_[i], 0b0111), + dxbc::Src::R(system_temps_color_[i]), + rt_clamp_vec_src.Select(2)); } // Close the fixed-point color check. - DxbcOpEndIf(); + a_.OpEndIf(); // Get if the blending source alpha is fixed-point for clamping if it is. // temp.x = whether alpha is fixed-point. system_constants_used_ |= 1ull << kSysConst_EdramRTFormatFlags_Index; - DxbcOpAnd(temp_x_dest, - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_EdramRTFormatFlags_Vec) - .Select(i), - DxbcSrc::LU(kRTFormatFlag_FixedPointAlpha)); + a_.OpAnd(temp_x_dest, + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_EdramRTFormatFlags_Vec) + .Select(i), + dxbc::Src::LU(kRTFormatFlag_FixedPointAlpha)); // Check if the blending source alpha is fixed-point and needs clamping. // temp.x = free. - DxbcOpIf(true, temp_x_src); + a_.OpIf(true, temp_x_src); { // Clamp the blending source alpha if needed. system_constants_used_ |= 1ull << kSysConst_EdramRTClamp_Index; - DxbcOpMax(DxbcDest::R(system_temps_color_[i], 0b1000), - DxbcSrc::R(system_temps_color_[i], DxbcSrc::kWWWW), - rt_clamp_vec_src.Select(1)); - DxbcOpMin(DxbcDest::R(system_temps_color_[i], 0b1000), - DxbcSrc::R(system_temps_color_[i], DxbcSrc::kWWWW), - rt_clamp_vec_src.Select(3)); + a_.OpMax(dxbc::Dest::R(system_temps_color_[i], 0b1000), + dxbc::Src::R(system_temps_color_[i], dxbc::Src::kWWWW), + rt_clamp_vec_src.Select(1)); + a_.OpMin(dxbc::Dest::R(system_temps_color_[i], 0b1000), + dxbc::Src::R(system_temps_color_[i], dxbc::Src::kWWWW), + rt_clamp_vec_src.Select(3)); } // Close the fixed-point alpha check. - DxbcOpEndIf(); + a_.OpEndIf(); // Break register dependency in the color sample raster operation. // temp.xy = 0 instead of packed color. - DxbcOpMov(DxbcDest::R(temp, 0b0011), DxbcSrc::LU(0)); + a_.OpMov(dxbc::Dest::R(temp, 0b0011), dxbc::Src::LU(0)); } - DxbcOpEndIf(); + a_.OpEndIf(); - DxbcSrc rt_format_flags_src( - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_EdramRTFormatFlags_Vec) + dxbc::Src rt_format_flags_src( + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_EdramRTFormatFlags_Vec) .Select(i)); // Blend, mask and write all samples. for (uint32_t j = 0; j < 4; ++j) { // Get if the sample is covered. // temp.z = whether the sample is covered. - DxbcOpAnd(temp_z_dest, - DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kXXXX), - DxbcSrc::LU(1 << j)); + a_.OpAnd(temp_z_dest, + dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kXXXX), + dxbc::Src::LU(1 << j)); // Check if the sample is covered. // temp.z = free. - DxbcOpIf(true, temp_z_src); + a_.OpIf(true, temp_z_src); // Only temp.xy are used at this point (containing the packed color from // the shader if not blending). @@ -2448,22 +2460,22 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() { // Get if need to keep any components to temp.z. // temp.z = whether any components must be kept (OR of keep masks). system_constants_used_ |= 1ull << kSysConst_EdramRTKeepMask_Index; - DxbcOpOr(temp_z_dest, keep_mask_vec_src.Select(keep_mask_component), - keep_mask_vec_src.Select(keep_mask_component + 1)); + a_.OpOr(temp_z_dest, keep_mask_vec_src.Select(keep_mask_component), + keep_mask_vec_src.Select(keep_mask_component + 1)); // Blending isn't done if it's 1 * source + 0 * destination. But since the // previous color also needs to be loaded if any original components need // to be kept, force the blend control to something with blending in this // case in temp.z. // temp.z = blending mode used to check if need to load. system_constants_used_ |= 1ull << kSysConst_EdramRTBlendFactorsOps_Index; - DxbcOpMovC(temp_z_dest, temp_z_src, DxbcSrc::LU(0), - rt_blend_factors_ops_src); + a_.OpMovC(temp_z_dest, temp_z_src, dxbc::Src::LU(0), + rt_blend_factors_ops_src); // Get if the blend control register requires loading the color to temp.z. // temp.z = whether need to load the color. - DxbcOpINE(temp_z_dest, temp_z_src, DxbcSrc::LU(0x00010001)); + a_.OpINE(temp_z_dest, temp_z_src, dxbc::Src::LU(0x00010001)); // Check if need to do something with the previous color. // temp.z = free. - DxbcOpIf(true, temp_z_src); + a_.OpIf(true, temp_z_src); { // ********************************************************************* // Loading the previous color to temp.zw. @@ -2472,72 +2484,72 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() { // Get if the format is 64bpp to temp.z. // temp.z = whether the render target is 64bpp. system_constants_used_ |= 1ull << kSysConst_EdramRTFormatFlags_Index; - DxbcOpAnd(temp_z_dest, rt_format_flags_src, - DxbcSrc::LU(kRTFormatFlag_64bpp)); + a_.OpAnd(temp_z_dest, rt_format_flags_src, + dxbc::Src::LU(kRTFormatFlag_64bpp)); // Check if the format is 64bpp. // temp.z = free. - DxbcOpIf(true, temp_z_src); + a_.OpIf(true, temp_z_src); { // Load the lower 32 bits of the 64bpp color to temp.z. // temp.z = lower 32 bits of the packed color. if (uav_index_edram_ == kBindingIndexUnallocated) { uav_index_edram_ = uav_count_++; } - DxbcOpLdUAVTyped( - temp_z_dest, DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kWWWW), - 1, - DxbcSrc::U(uav_index_edram_, uint32_t(UAVRegister::kEdram), - DxbcSrc::kXXXX)); + a_.OpLdUAVTyped( + temp_z_dest, + dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kWWWW), 1, + dxbc::Src::U(uav_index_edram_, uint32_t(UAVRegister::kEdram), + dxbc::Src::kXXXX)); // Get the address of the upper 32 bits of the color to temp.w. // temp.w = address of the upper 32 bits of the packed color. - DxbcOpIAdd(temp_w_dest, - DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kWWWW), - DxbcSrc::LU(1)); + a_.OpIAdd(temp_w_dest, + dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kWWWW), + dxbc::Src::LU(1)); // Load the upper 32 bits of the 64bpp color to temp.w. // temp.zw = packed destination color/alpha. if (uav_index_edram_ == kBindingIndexUnallocated) { uav_index_edram_ = uav_count_++; } - DxbcOpLdUAVTyped( + a_.OpLdUAVTyped( temp_w_dest, temp_w_src, 1, - DxbcSrc::U(uav_index_edram_, uint32_t(UAVRegister::kEdram), - DxbcSrc::kXXXX)); + dxbc::Src::U(uav_index_edram_, uint32_t(UAVRegister::kEdram), + dxbc::Src::kXXXX)); } // The color is 32bpp. - DxbcOpElse(); + a_.OpElse(); { // Load the 32bpp color to temp.z. // temp.z = packed 32bpp destination color. if (uav_index_edram_ == kBindingIndexUnallocated) { uav_index_edram_ = uav_count_++; } - DxbcOpLdUAVTyped( - temp_z_dest, DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kZZZZ), - 1, - DxbcSrc::U(uav_index_edram_, uint32_t(UAVRegister::kEdram), - DxbcSrc::kXXXX)); + a_.OpLdUAVTyped( + temp_z_dest, + dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kZZZZ), 1, + dxbc::Src::U(uav_index_edram_, uint32_t(UAVRegister::kEdram), + dxbc::Src::kXXXX)); // Break register dependency in temp.w if the color is 32bpp. // temp.zw = packed destination color/alpha. - DxbcOpMov(temp_w_dest, DxbcSrc::LU(0)); + a_.OpMov(temp_w_dest, dxbc::Src::LU(0)); } // Close the color format check. - DxbcOpEndIf(); + a_.OpEndIf(); uint32_t color_temp = PushSystemTemp(); - DxbcDest color_temp_rgb_dest(DxbcDest::R(color_temp, 0b0111)); - DxbcDest color_temp_a_dest(DxbcDest::R(color_temp, 0b1000)); - DxbcSrc color_temp_src(DxbcSrc::R(color_temp)); - DxbcSrc color_temp_a_src(DxbcSrc::R(color_temp, DxbcSrc::kWWWW)); + dxbc::Dest color_temp_rgb_dest(dxbc::Dest::R(color_temp, 0b0111)); + dxbc::Dest color_temp_a_dest(dxbc::Dest::R(color_temp, 0b1000)); + dxbc::Src color_temp_src(dxbc::Src::R(color_temp)); + dxbc::Src color_temp_a_src(dxbc::Src::R(color_temp, dxbc::Src::kWWWW)); // Get if blending is enabled to color_temp.x. // color_temp.x = whether blending is enabled. system_constants_used_ |= 1ull << kSysConst_EdramRTBlendFactorsOps_Index; - DxbcOpINE(DxbcDest::R(color_temp, 0b0001), rt_blend_factors_ops_src, - DxbcSrc::LU(0x00010001)); + a_.OpINE(dxbc::Dest::R(color_temp, 0b0001), rt_blend_factors_ops_src, + dxbc::Src::LU(0x00010001)); // Check if need to blend. // color_temp.x = free. - DxbcOpIf(true, DxbcSrc::R(color_temp, DxbcSrc::kXXXX)); + a_.OpIf(true, dxbc::Src::R(color_temp, dxbc::Src::kXXXX)); { // Now, when blending is enabled, temp.xy are used as scratch since // the color is packed after blending. @@ -2556,116 +2568,117 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() { // temp.x = whether min/max should be used for color. system_constants_used_ |= 1ull << kSysConst_EdramRTBlendFactorsOps_Index; - DxbcOpAnd(temp_x_dest, rt_blend_factors_ops_src, - DxbcSrc::LU(1 << (5 + 1))); + a_.OpAnd(temp_x_dest, rt_blend_factors_ops_src, + dxbc::Src::LU(1 << (5 + 1))); // Check if need to do blend the color with factors. // temp.x = free. - DxbcOpIf(false, temp_x_src); + a_.OpIf(false, temp_x_src); { uint32_t blend_src_temp = PushSystemTemp(); - DxbcDest blend_src_temp_rgb_dest( - DxbcDest::R(blend_src_temp, 0b0111)); - DxbcSrc blend_src_temp_src(DxbcSrc::R(blend_src_temp)); + dxbc::Dest blend_src_temp_rgb_dest( + dxbc::Dest::R(blend_src_temp, 0b0111)); + dxbc::Src blend_src_temp_src(dxbc::Src::R(blend_src_temp)); // Extract the source color factor to temp.x. // temp.x = source color factor index. system_constants_used_ |= 1ull << kSysConst_EdramRTBlendFactorsOps_Index; - DxbcOpAnd(temp_x_dest, rt_blend_factors_ops_src, - DxbcSrc::LU((1 << 5) - 1)); + a_.OpAnd(temp_x_dest, rt_blend_factors_ops_src, + dxbc::Src::LU((1 << 5) - 1)); // Check if the source color factor is not zero - if it is, the // source must be ignored completely, and Infinity and NaN in it // shouldn't affect blending. - DxbcOpIf(true, temp_x_src); + a_.OpIf(true, temp_x_src); { // Open the switch for choosing the source color blend factor. // temp.x = free. - DxbcOpSwitch(temp_x_src); + a_.OpSwitch(temp_x_src); // Write the source color factor to blend_src_temp.xyz. // blend_src_temp.xyz = unclamped source color factor. ROV_HandleColorBlendFactorCases(system_temps_color_[i], color_temp, blend_src_temp); // Close the source color factor switch. - DxbcOpEndSwitch(); + a_.OpEndSwitch(); // Get if the render target color is fixed-point and the source // color factor needs clamping to temp.x. // temp.x = whether color is fixed-point. system_constants_used_ |= 1ull << kSysConst_EdramRTFormatFlags_Index; - DxbcOpAnd(temp_x_dest, rt_format_flags_src, - DxbcSrc::LU(kRTFormatFlag_FixedPointColor)); + a_.OpAnd(temp_x_dest, rt_format_flags_src, + dxbc::Src::LU(kRTFormatFlag_FixedPointColor)); // Check if the source color factor needs clamping. - DxbcOpIf(true, temp_x_src); + a_.OpIf(true, temp_x_src); { // Clamp the source color factor in blend_src_temp.xyz. // blend_src_temp.xyz = source color factor. system_constants_used_ |= 1ull << kSysConst_EdramRTClamp_Index; - DxbcOpMax(blend_src_temp_rgb_dest, blend_src_temp_src, - rt_clamp_vec_src.Select(0)); - DxbcOpMin(blend_src_temp_rgb_dest, blend_src_temp_src, - rt_clamp_vec_src.Select(2)); + a_.OpMax(blend_src_temp_rgb_dest, blend_src_temp_src, + rt_clamp_vec_src.Select(0)); + a_.OpMin(blend_src_temp_rgb_dest, blend_src_temp_src, + rt_clamp_vec_src.Select(2)); } // Close the source color factor clamping check. - DxbcOpEndIf(); + a_.OpEndIf(); // Apply the factor to the source color. // blend_src_temp.xyz = unclamped source color part without // addition sign. - DxbcOpMul(blend_src_temp_rgb_dest, - DxbcSrc::R(system_temps_color_[i]), blend_src_temp_src); + a_.OpMul(blend_src_temp_rgb_dest, + dxbc::Src::R(system_temps_color_[i]), + blend_src_temp_src); // Check if the source color part needs clamping after the // multiplication. // temp.x = free. - DxbcOpIf(true, temp_x_src); + a_.OpIf(true, temp_x_src); { // Clamp the source color part. // blend_src_temp.xyz = source color part without addition sign. system_constants_used_ |= 1ull << kSysConst_EdramRTClamp_Index; - DxbcOpMax(blend_src_temp_rgb_dest, blend_src_temp_src, - rt_clamp_vec_src.Select(0)); - DxbcOpMin(blend_src_temp_rgb_dest, blend_src_temp_src, - rt_clamp_vec_src.Select(2)); + a_.OpMax(blend_src_temp_rgb_dest, blend_src_temp_src, + rt_clamp_vec_src.Select(0)); + a_.OpMin(blend_src_temp_rgb_dest, blend_src_temp_src, + rt_clamp_vec_src.Select(2)); } // Close the source color part clamping check. - DxbcOpEndIf(); + a_.OpEndIf(); // Extract the source color sign to temp.x. // temp.x = source color sign as zero for 1 and non-zero for -1. system_constants_used_ |= 1ull << kSysConst_EdramRTBlendFactorsOps_Index; - DxbcOpAnd(temp_x_dest, rt_blend_factors_ops_src, - DxbcSrc::LU(1 << (5 + 2))); + a_.OpAnd(temp_x_dest, rt_blend_factors_ops_src, + dxbc::Src::LU(1 << (5 + 2))); // Apply the source color sign. // blend_src_temp.xyz = source color part. // temp.x = free. - DxbcOpMovC(blend_src_temp_rgb_dest, temp_x_src, - -blend_src_temp_src, blend_src_temp_src); + a_.OpMovC(blend_src_temp_rgb_dest, temp_x_src, + -blend_src_temp_src, blend_src_temp_src); } // The source color factor is zero. - DxbcOpElse(); + a_.OpElse(); { // Write zero to the source color part. // blend_src_temp.xyz = source color part. // temp.x = free. - DxbcOpMov(blend_src_temp_rgb_dest, DxbcSrc::LF(0.0f)); + a_.OpMov(blend_src_temp_rgb_dest, dxbc::Src::LF(0.0f)); } // Close the source color factor zero check. - DxbcOpEndIf(); + a_.OpEndIf(); // Extract the destination color factor to temp.x. // temp.x = destination color factor index. system_constants_used_ |= 1ull << kSysConst_EdramRTBlendFactorsOps_Index; - DxbcOpUBFE(temp_x_dest, DxbcSrc::LU(5), DxbcSrc::LU(8), - rt_blend_factors_ops_src); + a_.OpUBFE(temp_x_dest, dxbc::Src::LU(5), dxbc::Src::LU(8), + rt_blend_factors_ops_src); // Check if the destination color factor is not zero. - DxbcOpIf(true, temp_x_src); + a_.OpIf(true, temp_x_src); { uint32_t blend_dest_factor_temp = PushSystemTemp(); - DxbcSrc blend_dest_factor_temp_src( - DxbcSrc::R(blend_dest_factor_temp)); + dxbc::Src blend_dest_factor_temp_src( + dxbc::Src::R(blend_dest_factor_temp)); // Open the switch for choosing the destination color blend // factor. // temp.x = free. - DxbcOpSwitch(temp_x_src); + a_.OpSwitch(temp_x_src); // Write the destination color factor to // blend_dest_factor_temp.xyz. // blend_dest_factor_temp.xyz = unclamped destination color @@ -2673,85 +2686,85 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() { ROV_HandleColorBlendFactorCases( system_temps_color_[i], color_temp, blend_dest_factor_temp); // Close the destination color factor switch. - DxbcOpEndSwitch(); + a_.OpEndSwitch(); // Get if the render target color is fixed-point and the // destination color factor needs clamping to temp.x. // temp.x = whether color is fixed-point. system_constants_used_ |= 1ull << kSysConst_EdramRTFormatFlags_Index; - DxbcOpAnd(temp_x_dest, rt_format_flags_src, - DxbcSrc::LU(kRTFormatFlag_FixedPointColor)); + a_.OpAnd(temp_x_dest, rt_format_flags_src, + dxbc::Src::LU(kRTFormatFlag_FixedPointColor)); // Check if the destination color factor needs clamping. - DxbcOpIf(true, temp_x_src); + a_.OpIf(true, temp_x_src); { // Clamp the destination color factor in // blend_dest_factor_temp.xyz. // blend_dest_factor_temp.xyz = destination color factor. system_constants_used_ |= 1ull << kSysConst_EdramRTClamp_Index; - DxbcOpMax(DxbcDest::R(blend_dest_factor_temp, 0b0111), - blend_dest_factor_temp_src, - rt_clamp_vec_src.Select(0)); - DxbcOpMin(DxbcDest::R(blend_dest_factor_temp, 0b0111), - blend_dest_factor_temp_src, - rt_clamp_vec_src.Select(2)); + a_.OpMax(dxbc::Dest::R(blend_dest_factor_temp, 0b0111), + blend_dest_factor_temp_src, + rt_clamp_vec_src.Select(0)); + a_.OpMin(dxbc::Dest::R(blend_dest_factor_temp, 0b0111), + blend_dest_factor_temp_src, + rt_clamp_vec_src.Select(2)); } // Close the destination color factor clamping check. - DxbcOpEndIf(); + a_.OpEndIf(); // Apply the factor to the destination color in color_temp.xyz. // color_temp.xyz = unclamped destination color part without // addition sign. // blend_dest_temp.xyz = free. - DxbcOpMul(color_temp_rgb_dest, color_temp_src, - blend_dest_factor_temp_src); + a_.OpMul(color_temp_rgb_dest, color_temp_src, + blend_dest_factor_temp_src); // Release blend_dest_factor_temp. PopSystemTemp(); // Check if the destination color part needs clamping after the // multiplication. // temp.x = free. - DxbcOpIf(true, temp_x_src); + a_.OpIf(true, temp_x_src); { // Clamp the destination color part. // color_temp.xyz = destination color part without addition // sign. system_constants_used_ |= 1ull << kSysConst_EdramRTClamp_Index; - DxbcOpMax(color_temp_rgb_dest, color_temp_src, - rt_clamp_vec_src.Select(0)); - DxbcOpMin(color_temp_rgb_dest, color_temp_src, - rt_clamp_vec_src.Select(2)); + a_.OpMax(color_temp_rgb_dest, color_temp_src, + rt_clamp_vec_src.Select(0)); + a_.OpMin(color_temp_rgb_dest, color_temp_src, + rt_clamp_vec_src.Select(2)); } // Close the destination color part clamping check. - DxbcOpEndIf(); + a_.OpEndIf(); // Extract the destination color sign to temp.x. // temp.x = destination color sign as zero for 1 and non-zero for // -1. system_constants_used_ |= 1ull << kSysConst_EdramRTBlendFactorsOps_Index; - DxbcOpAnd(temp_x_dest, rt_blend_factors_ops_src, - DxbcSrc::LU(1 << 5)); + a_.OpAnd(temp_x_dest, rt_blend_factors_ops_src, + dxbc::Src::LU(1 << 5)); // Select the sign for destination multiply-add as 1.0 or -1.0 to // temp.x. // temp.x = destination color sign as float. - DxbcOpMovC(temp_x_dest, temp_x_src, DxbcSrc::LF(-1.0f), - DxbcSrc::LF(1.0f)); + a_.OpMovC(temp_x_dest, temp_x_src, dxbc::Src::LF(-1.0f), + dxbc::Src::LF(1.0f)); // Perform color blending to color_temp.xyz. // color_temp.xyz = unclamped blended color. // blend_src_temp.xyz = free. // temp.x = free. - DxbcOpMAd(color_temp_rgb_dest, color_temp_src, temp_x_src, - blend_src_temp_src); + a_.OpMAd(color_temp_rgb_dest, color_temp_src, temp_x_src, + blend_src_temp_src); } // The destination color factor is zero. - DxbcOpElse(); + a_.OpElse(); { // Write the source color part without applying the destination // color. // color_temp.xyz = unclamped blended color. // blend_src_temp.xyz = free. // temp.x = free. - DxbcOpMov(color_temp_rgb_dest, blend_src_temp_src); + a_.OpMov(color_temp_rgb_dest, blend_src_temp_src); } // Close the destination color factor zero check. - DxbcOpEndIf(); + a_.OpEndIf(); // Release blend_src_temp. PopSystemTemp(); @@ -2759,44 +2772,44 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() { // Clamp the color in color_temp.xyz before packing. // color_temp.xyz = blended color. system_constants_used_ |= 1ull << kSysConst_EdramRTClamp_Index; - DxbcOpMax(color_temp_rgb_dest, color_temp_src, - rt_clamp_vec_src.Select(0)); - DxbcOpMin(color_temp_rgb_dest, color_temp_src, - rt_clamp_vec_src.Select(2)); + a_.OpMax(color_temp_rgb_dest, color_temp_src, + rt_clamp_vec_src.Select(0)); + a_.OpMin(color_temp_rgb_dest, color_temp_src, + rt_clamp_vec_src.Select(2)); } // Need to do min/max for color. - DxbcOpElse(); + a_.OpElse(); { // Extract the color min (0) or max (1) bit to temp.x // temp.x = whether min or max should be used for color. system_constants_used_ |= 1ull << kSysConst_EdramRTBlendFactorsOps_Index; - DxbcOpAnd(temp_x_dest, rt_blend_factors_ops_src, - DxbcSrc::LU(1 << 5)); + a_.OpAnd(temp_x_dest, rt_blend_factors_ops_src, + dxbc::Src::LU(1 << 5)); // Check if need to do min or max for color. // temp.x = free. - DxbcOpIf(true, temp_x_src); + a_.OpIf(true, temp_x_src); { // Choose max of the colors without applying the factors to // color_temp.xyz. // color_temp.xyz = blended color. - DxbcOpMax(color_temp_rgb_dest, DxbcSrc::R(system_temps_color_[i]), - color_temp_src); + a_.OpMax(color_temp_rgb_dest, + dxbc::Src::R(system_temps_color_[i]), color_temp_src); } // Need to do min. - DxbcOpElse(); + a_.OpElse(); { // Choose min of the colors without applying the factors to // color_temp.xyz. // color_temp.xyz = blended color. - DxbcOpMin(color_temp_rgb_dest, DxbcSrc::R(system_temps_color_[i]), - color_temp_src); + a_.OpMin(color_temp_rgb_dest, + dxbc::Src::R(system_temps_color_[i]), color_temp_src); } // Close the min or max check. - DxbcOpEndIf(); + a_.OpEndIf(); } // Close the color factor blending or min/max check. - DxbcOpEndIf(); + a_.OpEndIf(); // ******************************************************************* // Alpha blending. @@ -2806,219 +2819,221 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() { // temp.x = whether min/max should be used for alpha. system_constants_used_ |= 1ull << kSysConst_EdramRTBlendFactorsOps_Index; - DxbcOpAnd(temp_x_dest, rt_blend_factors_ops_src, - DxbcSrc::LU(1 << (21 + 1))); + a_.OpAnd(temp_x_dest, rt_blend_factors_ops_src, + dxbc::Src::LU(1 << (21 + 1))); // Check if need to do blend the color with factors. // temp.x = free. - DxbcOpIf(false, temp_x_src); + a_.OpIf(false, temp_x_src); { // Extract the source alpha factor to temp.x. // temp.x = source alpha factor index. system_constants_used_ |= 1ull << kSysConst_EdramRTBlendFactorsOps_Index; - DxbcOpUBFE(temp_x_dest, DxbcSrc::LU(5), DxbcSrc::LU(16), - rt_blend_factors_ops_src); + a_.OpUBFE(temp_x_dest, dxbc::Src::LU(5), dxbc::Src::LU(16), + rt_blend_factors_ops_src); // Check if the source alpha factor is not zero. - DxbcOpIf(true, temp_x_src); + a_.OpIf(true, temp_x_src); { // Open the switch for choosing the source alpha blend factor. // temp.x = free. - DxbcOpSwitch(temp_x_src); + a_.OpSwitch(temp_x_src); // Write the source alpha factor to temp.x. // temp.x = unclamped source alpha factor. ROV_HandleAlphaBlendFactorCases(system_temps_color_[i], color_temp, temp, 0); // Close the source alpha factor switch. - DxbcOpEndSwitch(); + a_.OpEndSwitch(); // Get if the render target alpha is fixed-point and the source // alpha factor needs clamping to temp.y. // temp.y = whether alpha is fixed-point. system_constants_used_ |= 1ull << kSysConst_EdramRTFormatFlags_Index; - DxbcOpAnd(temp_y_dest, rt_format_flags_src, - DxbcSrc::LU(kRTFormatFlag_FixedPointAlpha)); + a_.OpAnd(temp_y_dest, rt_format_flags_src, + dxbc::Src::LU(kRTFormatFlag_FixedPointAlpha)); // Check if the source alpha factor needs clamping. - DxbcOpIf(true, temp_y_src); + a_.OpIf(true, temp_y_src); { // Clamp the source alpha factor in temp.x. // temp.x = source alpha factor. system_constants_used_ |= 1ull << kSysConst_EdramRTClamp_Index; - DxbcOpMax(temp_x_dest, temp_x_src, rt_clamp_vec_src.Select(1)); - DxbcOpMin(temp_x_dest, temp_x_src, rt_clamp_vec_src.Select(3)); + a_.OpMax(temp_x_dest, temp_x_src, rt_clamp_vec_src.Select(1)); + a_.OpMin(temp_x_dest, temp_x_src, rt_clamp_vec_src.Select(3)); } // Close the source alpha factor clamping check. - DxbcOpEndIf(); + a_.OpEndIf(); // Apply the factor to the source alpha. // temp.x = unclamped source alpha part without addition sign. - DxbcOpMul(temp_x_dest, - DxbcSrc::R(system_temps_color_[i], DxbcSrc::kWWWW), - temp_x_src); + a_.OpMul(temp_x_dest, + dxbc::Src::R(system_temps_color_[i], dxbc::Src::kWWWW), + temp_x_src); // Check if the source alpha part needs clamping after the // multiplication. // temp.y = free. - DxbcOpIf(true, temp_y_src); + a_.OpIf(true, temp_y_src); { // Clamp the source alpha part. // temp.x = source alpha part without addition sign. system_constants_used_ |= 1ull << kSysConst_EdramRTClamp_Index; - DxbcOpMax(temp_x_dest, temp_x_src, rt_clamp_vec_src.Select(1)); - DxbcOpMin(temp_x_dest, temp_x_src, rt_clamp_vec_src.Select(3)); + a_.OpMax(temp_x_dest, temp_x_src, rt_clamp_vec_src.Select(1)); + a_.OpMin(temp_x_dest, temp_x_src, rt_clamp_vec_src.Select(3)); } // Close the source alpha part clamping check. - DxbcOpEndIf(); + a_.OpEndIf(); // Extract the source alpha sign to temp.y. // temp.y = source alpha sign as zero for 1 and non-zero for -1. system_constants_used_ |= 1ull << kSysConst_EdramRTBlendFactorsOps_Index; - DxbcOpAnd(temp_y_dest, rt_blend_factors_ops_src, - DxbcSrc::LU(1 << (21 + 2))); + a_.OpAnd(temp_y_dest, rt_blend_factors_ops_src, + dxbc::Src::LU(1 << (21 + 2))); // Apply the source alpha sign. // temp.x = source alpha part. - DxbcOpMovC(temp_x_dest, temp_y_src, -temp_x_src, temp_x_src); + a_.OpMovC(temp_x_dest, temp_y_src, -temp_x_src, temp_x_src); } // The source alpha factor is zero. - DxbcOpElse(); + a_.OpElse(); { // Write zero to the source alpha part. // temp.x = source alpha part. - DxbcOpMov(temp_x_dest, DxbcSrc::LF(0.0f)); + a_.OpMov(temp_x_dest, dxbc::Src::LF(0.0f)); } // Close the source alpha factor zero check. - DxbcOpEndIf(); + a_.OpEndIf(); // Extract the destination alpha factor to temp.y. // temp.y = destination alpha factor index. system_constants_used_ |= 1ull << kSysConst_EdramRTBlendFactorsOps_Index; - DxbcOpUBFE(temp_y_dest, DxbcSrc::LU(5), DxbcSrc::LU(24), - rt_blend_factors_ops_src); + a_.OpUBFE(temp_y_dest, dxbc::Src::LU(5), dxbc::Src::LU(24), + rt_blend_factors_ops_src); // Check if the destination alpha factor is not zero. - DxbcOpIf(true, temp_y_src); + a_.OpIf(true, temp_y_src); { // Open the switch for choosing the destination alpha blend // factor. // temp.y = free. - DxbcOpSwitch(temp_y_src); + a_.OpSwitch(temp_y_src); // Write the destination alpha factor to temp.y. // temp.y = unclamped destination alpha factor. ROV_HandleAlphaBlendFactorCases(system_temps_color_[i], color_temp, temp, 1); // Close the destination alpha factor switch. - DxbcOpEndSwitch(); + a_.OpEndSwitch(); // Get if the render target alpha is fixed-point and the // destination alpha factor needs clamping. // alpha_is_fixed_temp.x = whether alpha is fixed-point. uint32_t alpha_is_fixed_temp = PushSystemTemp(); system_constants_used_ |= 1ull << kSysConst_EdramRTFormatFlags_Index; - DxbcOpAnd(DxbcDest::R(alpha_is_fixed_temp, 0b0001), - rt_format_flags_src, - DxbcSrc::LU(kRTFormatFlag_FixedPointAlpha)); + a_.OpAnd(dxbc::Dest::R(alpha_is_fixed_temp, 0b0001), + rt_format_flags_src, + dxbc::Src::LU(kRTFormatFlag_FixedPointAlpha)); // Check if the destination alpha factor needs clamping. - DxbcOpIf(true, DxbcSrc::R(alpha_is_fixed_temp, DxbcSrc::kXXXX)); + a_.OpIf(true, + dxbc::Src::R(alpha_is_fixed_temp, dxbc::Src::kXXXX)); { // Clamp the destination alpha factor in temp.y. // temp.y = destination alpha factor. system_constants_used_ |= 1ull << kSysConst_EdramRTClamp_Index; - DxbcOpMax(temp_y_dest, temp_y_src, rt_clamp_vec_src.Select(1)); - DxbcOpMin(temp_y_dest, temp_y_src, rt_clamp_vec_src.Select(3)); + a_.OpMax(temp_y_dest, temp_y_src, rt_clamp_vec_src.Select(1)); + a_.OpMin(temp_y_dest, temp_y_src, rt_clamp_vec_src.Select(3)); } // Close the destination alpha factor clamping check. - DxbcOpEndIf(); + a_.OpEndIf(); // Apply the factor to the destination alpha in color_temp.w. // color_temp.w = unclamped destination alpha part without // addition sign. - DxbcOpMul(color_temp_a_dest, color_temp_a_src, temp_y_src); + a_.OpMul(color_temp_a_dest, color_temp_a_src, temp_y_src); // Check if the destination alpha part needs clamping after the // multiplication. // alpha_is_fixed_temp.x = free. - DxbcOpIf(true, DxbcSrc::R(alpha_is_fixed_temp, DxbcSrc::kXXXX)); + a_.OpIf(true, + dxbc::Src::R(alpha_is_fixed_temp, dxbc::Src::kXXXX)); // Release alpha_is_fixed_temp. PopSystemTemp(); { // Clamp the destination alpha part. // color_temp.w = destination alpha part without addition sign. system_constants_used_ |= 1ull << kSysConst_EdramRTClamp_Index; - DxbcOpMax(color_temp_a_dest, color_temp_a_src, - rt_clamp_vec_src.Select(1)); - DxbcOpMin(color_temp_a_dest, color_temp_a_src, - rt_clamp_vec_src.Select(3)); + a_.OpMax(color_temp_a_dest, color_temp_a_src, + rt_clamp_vec_src.Select(1)); + a_.OpMin(color_temp_a_dest, color_temp_a_src, + rt_clamp_vec_src.Select(3)); } // Close the destination alpha factor clamping check. - DxbcOpEndIf(); + a_.OpEndIf(); // Extract the destination alpha sign to temp.y. // temp.y = destination alpha sign as zero for 1 and non-zero for // -1. system_constants_used_ |= 1ull << kSysConst_EdramRTBlendFactorsOps_Index; - DxbcOpAnd(temp_y_dest, rt_blend_factors_ops_src, - DxbcSrc::LU(1 << 21)); + a_.OpAnd(temp_y_dest, rt_blend_factors_ops_src, + dxbc::Src::LU(1 << 21)); // Select the sign for destination multiply-add as 1.0 or -1.0 to // temp.y. // temp.y = destination alpha sign as float. - DxbcOpMovC(temp_y_dest, temp_y_src, DxbcSrc::LF(-1.0f), - DxbcSrc::LF(1.0f)); + a_.OpMovC(temp_y_dest, temp_y_src, dxbc::Src::LF(-1.0f), + dxbc::Src::LF(1.0f)); // Perform alpha blending to color_temp.w. // color_temp.w = unclamped blended alpha. // temp.xy = free. - DxbcOpMAd(color_temp_a_dest, color_temp_a_src, temp_y_src, - temp_x_src); + a_.OpMAd(color_temp_a_dest, color_temp_a_src, temp_y_src, + temp_x_src); } // The destination alpha factor is zero. - DxbcOpElse(); + a_.OpElse(); { // Write the source alpha part without applying the destination // alpha. // color_temp.w = unclamped blended alpha. // temp.xy = free. - DxbcOpMov(color_temp_a_dest, temp_x_src); + a_.OpMov(color_temp_a_dest, temp_x_src); } // Close the destination alpha factor zero check. - DxbcOpEndIf(); + a_.OpEndIf(); // Clamp the alpha in color_temp.w before packing. // color_temp.w = blended alpha. system_constants_used_ |= 1ull << kSysConst_EdramRTClamp_Index; - DxbcOpMax(color_temp_a_dest, color_temp_a_src, - rt_clamp_vec_src.Select(1)); - DxbcOpMin(color_temp_a_dest, color_temp_a_src, - rt_clamp_vec_src.Select(3)); + a_.OpMax(color_temp_a_dest, color_temp_a_src, + rt_clamp_vec_src.Select(1)); + a_.OpMin(color_temp_a_dest, color_temp_a_src, + rt_clamp_vec_src.Select(3)); } // Need to do min/max for alpha. - DxbcOpElse(); + a_.OpElse(); { // Extract the alpha min (0) or max (1) bit to temp.x. // temp.x = whether min or max should be used for alpha. system_constants_used_ |= 1ull << kSysConst_EdramRTBlendFactorsOps_Index; - DxbcOpAnd(temp_x_dest, rt_blend_factors_ops_src, - DxbcSrc::LU(1 << 21)); + a_.OpAnd(temp_x_dest, rt_blend_factors_ops_src, + dxbc::Src::LU(1 << 21)); // Check if need to do min or max for alpha. // temp.x = free. - DxbcOpIf(true, temp_x_src); + a_.OpIf(true, temp_x_src); { // Choose max of the alphas without applying the factors to // color_temp.w. // color_temp.w = blended alpha. - DxbcOpMax(color_temp_a_dest, - DxbcSrc::R(system_temps_color_[i], DxbcSrc::kWWWW), - color_temp_a_src); + a_.OpMax(color_temp_a_dest, + dxbc::Src::R(system_temps_color_[i], dxbc::Src::kWWWW), + color_temp_a_src); } // Need to do min. - DxbcOpElse(); + a_.OpElse(); { // Choose min of the alphas without applying the factors to // color_temp.w. // color_temp.w = blended alpha. - DxbcOpMin(color_temp_a_dest, - DxbcSrc::R(system_temps_color_[i], DxbcSrc::kWWWW), - color_temp_a_src); + a_.OpMin(color_temp_a_dest, + dxbc::Src::R(system_temps_color_[i], dxbc::Src::kWWWW), + color_temp_a_src); } // Close the min or max check. - DxbcOpEndIf(); + a_.OpEndIf(); } // Close the alpha factor blending or min/max check. - DxbcOpEndIf(); + a_.OpEndIf(); // Pack the new color/alpha to temp.xy. // temp.xy = packed new color/alpha. @@ -3029,7 +3044,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() { PopSystemTemp(); } // Close the blending check. - DxbcOpEndIf(); + a_.OpEndIf(); // ********************************************************************* // Write mask application @@ -3038,27 +3053,27 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() { // Apply the keep mask to the previous packed color/alpha in temp.zw. // temp.zw = masked packed old color/alpha. system_constants_used_ |= 1ull << kSysConst_EdramRTKeepMask_Index; - DxbcOpAnd(DxbcDest::R(temp, 0b1100), DxbcSrc::R(temp), - keep_mask_vec_src.Swizzle(keep_mask_swizzle << 4)); + a_.OpAnd(dxbc::Dest::R(temp, 0b1100), dxbc::Src::R(temp), + keep_mask_vec_src.Swizzle(keep_mask_swizzle << 4)); // Invert the keep mask into color_temp.xy. // color_temp.xy = inverted keep mask (write mask). system_constants_used_ |= 1ull << kSysConst_EdramRTKeepMask_Index; - DxbcOpNot(DxbcDest::R(color_temp, 0b0011), - keep_mask_vec_src.Swizzle(keep_mask_swizzle)); + a_.OpNot(dxbc::Dest::R(color_temp, 0b0011), + keep_mask_vec_src.Swizzle(keep_mask_swizzle)); // Release color_temp. PopSystemTemp(); // Apply the write mask to the new color/alpha in temp.xy. // temp.xy = masked packed new color/alpha. - DxbcOpAnd(DxbcDest::R(temp, 0b0011), DxbcSrc::R(temp), - DxbcSrc::R(color_temp)); + a_.OpAnd(dxbc::Dest::R(temp, 0b0011), dxbc::Src::R(temp), + dxbc::Src::R(color_temp)); // Combine the masked colors into temp.xy. // temp.xy = packed resulting color/alpha. // temp.zw = free. - DxbcOpOr(DxbcDest::R(temp, 0b0011), DxbcSrc::R(temp), - DxbcSrc::R(temp, 0b1110)); + a_.OpOr(dxbc::Dest::R(temp, 0b0011), dxbc::Src::R(temp), + dxbc::Src::R(temp, 0b1110)); } // Close the previous color load check. - DxbcOpEndIf(); + a_.OpEndIf(); // *********************************************************************** // Writing the color @@ -3067,79 +3082,81 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() { // Get if the format is 64bpp to temp.z. // temp.z = whether the render target is 64bpp. system_constants_used_ |= 1ull << kSysConst_EdramRTFormatFlags_Index; - DxbcOpAnd(temp_z_dest, rt_format_flags_src, - DxbcSrc::LU(kRTFormatFlag_64bpp)); + a_.OpAnd(temp_z_dest, rt_format_flags_src, + dxbc::Src::LU(kRTFormatFlag_64bpp)); // Check if the format is 64bpp. // temp.z = free. - DxbcOpIf(true, temp_z_src); + a_.OpIf(true, temp_z_src); { // Store the lower 32 bits of the 64bpp color. if (uav_index_edram_ == kBindingIndexUnallocated) { uav_index_edram_ = uav_count_++; } - DxbcOpStoreUAVTyped( - DxbcDest::U(uav_index_edram_, uint32_t(UAVRegister::kEdram)), - DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kWWWW), 1, temp_x_src); + a_.OpStoreUAVTyped( + dxbc::Dest::U(uav_index_edram_, uint32_t(UAVRegister::kEdram)), + dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kWWWW), 1, + temp_x_src); // Get the address of the upper 32 bits of the color to temp.z (can't // use temp.x because components when not blending, packing is done once // for all samples, so it has to be preserved). - DxbcOpIAdd(temp_z_dest, - DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kWWWW), - DxbcSrc::LU(1)); + a_.OpIAdd(temp_z_dest, + dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kWWWW), + dxbc::Src::LU(1)); // Store the upper 32 bits of the 64bpp color. if (uav_index_edram_ == kBindingIndexUnallocated) { uav_index_edram_ = uav_count_++; } - DxbcOpStoreUAVTyped( - DxbcDest::U(uav_index_edram_, uint32_t(UAVRegister::kEdram)), + a_.OpStoreUAVTyped( + dxbc::Dest::U(uav_index_edram_, uint32_t(UAVRegister::kEdram)), temp_z_src, 1, temp_y_src); } // The color is 32bpp. - DxbcOpElse(); + a_.OpElse(); { // Store the 32bpp color. if (uav_index_edram_ == kBindingIndexUnallocated) { uav_index_edram_ = uav_count_++; } - DxbcOpStoreUAVTyped( - DxbcDest::U(uav_index_edram_, uint32_t(UAVRegister::kEdram)), - DxbcSrc::R(system_temp_rov_params_, DxbcSrc::kZZZZ), 1, temp_x_src); + a_.OpStoreUAVTyped( + dxbc::Dest::U(uav_index_edram_, uint32_t(UAVRegister::kEdram)), + dxbc::Src::R(system_temp_rov_params_, dxbc::Src::kZZZZ), 1, + temp_x_src); } // Close the 64bpp/32bpp conditional. - DxbcOpEndIf(); + a_.OpEndIf(); // *********************************************************************** // End of color sample raster operation. // *********************************************************************** // Close the sample covered check. - DxbcOpEndIf(); + a_.OpEndIf(); // Go to the next sample (samples are at +0, +80, +1, +81, so need to do // +80, -79, +80 and -81 after each sample). system_constants_used_ |= 1ull << kSysConst_EdramResolutionSquareScale_Index; - DxbcOpIMAd(DxbcDest::R(system_temp_rov_params_, 0b1100), - DxbcSrc::LI(0, 0, (j & 1) ? -78 - j : 80, - ((j & 1) ? -78 - j : 80) * 2), - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_EdramResolutionSquareScale_Vec) - .Select(kSysConst_EdramResolutionSquareScale_Comp), - DxbcSrc::R(system_temp_rov_params_)); + a_.OpIMAd(dxbc::Dest::R(system_temp_rov_params_, 0b1100), + dxbc::Src::LI(0, 0, (j & 1) ? -78 - j : 80, + ((j & 1) ? -78 - j : 80) * 2), + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_EdramResolutionSquareScale_Vec) + .Select(kSysConst_EdramResolutionSquareScale_Comp), + dxbc::Src::R(system_temp_rov_params_)); } // Revert adding the EDRAM bases of the render target to // system_temp_rov_params_.zw. system_constants_used_ |= 1ull << kSysConst_EdramRTBaseDwordsScaled_Index; - DxbcOpIAdd(DxbcDest::R(system_temp_rov_params_, 0b1100), - DxbcSrc::R(system_temp_rov_params_), - -DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_EdramRTBaseDwordsScaled_Vec) - .Select(i)); + a_.OpIAdd(dxbc::Dest::R(system_temp_rov_params_, 0b1100), + dxbc::Src::R(system_temp_rov_params_), + -dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_EdramRTBaseDwordsScaled_Vec) + .Select(i)); // Close the render target write check. - DxbcOpEndIf(); + a_.OpEndIf(); } // Release temp. @@ -3162,61 +3179,64 @@ void DxbcShaderTranslator::CompletePixelShader() { // Y - operation result (SGPR for mask operations, VGPR for alpha // operations). uint32_t alpha_test_temp = PushSystemTemp(); - DxbcDest alpha_test_mask_dest(DxbcDest::R(alpha_test_temp, 0b0001)); - DxbcSrc alpha_test_mask_src(DxbcSrc::R(alpha_test_temp, DxbcSrc::kXXXX)); - DxbcDest alpha_test_op_dest(DxbcDest::R(alpha_test_temp, 0b0010)); - DxbcSrc alpha_test_op_src(DxbcSrc::R(alpha_test_temp, DxbcSrc::kYYYY)); + dxbc::Dest alpha_test_mask_dest(dxbc::Dest::R(alpha_test_temp, 0b0001)); + dxbc::Src alpha_test_mask_src( + dxbc::Src::R(alpha_test_temp, dxbc::Src::kXXXX)); + dxbc::Dest alpha_test_op_dest(dxbc::Dest::R(alpha_test_temp, 0b0010)); + dxbc::Src alpha_test_op_src( + dxbc::Src::R(alpha_test_temp, dxbc::Src::kYYYY)); // Extract the comparison mask to check if the test needs to be done at all. // Don't care about flow control being somewhat dynamic - early Z is forced // using a special version of the shader anyway. system_constants_used_ |= 1ull << kSysConst_Flags_Index; - DxbcOpUBFE(alpha_test_mask_dest, DxbcSrc::LU(3), - DxbcSrc::LU(kSysFlag_AlphaPassIfLess_Shift), - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_Flags_Vec) - .Select(kSysConst_Flags_Comp)); + a_.OpUBFE(alpha_test_mask_dest, dxbc::Src::LU(3), + dxbc::Src::LU(kSysFlag_AlphaPassIfLess_Shift), + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_Flags_Vec) + .Select(kSysConst_Flags_Comp)); // Compare the mask to ALWAYS to check if the test shouldn't be done (will // pass even for NaNs, though the expected behavior in this case hasn't been // checked, but let's assume this means "always", not "less, equal or // greater". // TODO(Triang3l): Check how alpha test works with NaN on Direct3D 9. - DxbcOpINE(alpha_test_op_dest, alpha_test_mask_src, DxbcSrc::LU(0b111)); + a_.OpINE(alpha_test_op_dest, alpha_test_mask_src, dxbc::Src::LU(0b111)); // Don't do the test if the mode is "always". - DxbcOpIf(true, alpha_test_op_src); + a_.OpIf(true, alpha_test_op_src); { // Do the test. Can't use subtraction and sign because of float specials. - DxbcSrc alpha_src(DxbcSrc::R(system_temps_color_[0], DxbcSrc::kWWWW)); + dxbc::Src alpha_src( + dxbc::Src::R(system_temps_color_[0], dxbc::Src::kWWWW)); system_constants_used_ |= 1ull << kSysConst_AlphaTestReference_Index; - DxbcSrc alpha_test_reference_src( - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_AlphaTestReference_Vec) + dxbc::Src alpha_test_reference_src( + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_AlphaTestReference_Vec) .Select(kSysConst_AlphaTestReference_Comp)); // Less than. - DxbcOpLT(alpha_test_op_dest, alpha_src, alpha_test_reference_src); - DxbcOpOr(alpha_test_op_dest, alpha_test_op_src, - DxbcSrc::LU(~uint32_t(1 << 0))); - DxbcOpAnd(alpha_test_mask_dest, alpha_test_mask_src, alpha_test_op_src); + a_.OpLT(alpha_test_op_dest, alpha_src, alpha_test_reference_src); + a_.OpOr(alpha_test_op_dest, alpha_test_op_src, + dxbc::Src::LU(~uint32_t(1 << 0))); + a_.OpAnd(alpha_test_mask_dest, alpha_test_mask_src, alpha_test_op_src); // Equals to. - DxbcOpEq(alpha_test_op_dest, alpha_src, alpha_test_reference_src); - DxbcOpOr(alpha_test_op_dest, alpha_test_op_src, - DxbcSrc::LU(~uint32_t(1 << 1))); - DxbcOpAnd(alpha_test_mask_dest, alpha_test_mask_src, alpha_test_op_src); + a_.OpEq(alpha_test_op_dest, alpha_src, alpha_test_reference_src); + a_.OpOr(alpha_test_op_dest, alpha_test_op_src, + dxbc::Src::LU(~uint32_t(1 << 1))); + a_.OpAnd(alpha_test_mask_dest, alpha_test_mask_src, alpha_test_op_src); // Greater than. - DxbcOpLT(alpha_test_op_dest, alpha_test_reference_src, alpha_src); - DxbcOpOr(alpha_test_op_dest, alpha_test_op_src, - DxbcSrc::LU(~uint32_t(1 << 2))); - DxbcOpAnd(alpha_test_mask_dest, alpha_test_mask_src, alpha_test_op_src); + a_.OpLT(alpha_test_op_dest, alpha_test_reference_src, alpha_src); + a_.OpOr(alpha_test_op_dest, alpha_test_op_src, + dxbc::Src::LU(~uint32_t(1 << 2))); + a_.OpAnd(alpha_test_mask_dest, alpha_test_mask_src, alpha_test_op_src); // Discard the pixel if it has failed the test. if (edram_rov_used_) { - DxbcOpRetC(false, alpha_test_mask_src); + a_.OpRetC(false, alpha_test_mask_src); } else { - DxbcOpDiscard(false, alpha_test_mask_src); + a_.OpDiscard(false, alpha_test_mask_src); } } // Close the "not always" check. - DxbcOpEndIf(); + a_.OpEndIf(); // Release alpha_test_temp. PopSystemTemp(); } @@ -3243,11 +3263,11 @@ void DxbcShaderTranslator::PreClampedDepthTo20e4(uint32_t d24_temp, assert_true(temp_temp != d32_temp || temp_temp_component != d32_temp_component); // Source and destination may be the same. - DxbcDest d24_dest(DxbcDest::R(d24_temp, 1 << d24_temp_component)); - DxbcSrc d24_src(DxbcSrc::R(d24_temp).Select(d24_temp_component)); - DxbcSrc d32_src(DxbcSrc::R(d32_temp).Select(d32_temp_component)); - DxbcDest temp_dest(DxbcDest::R(temp_temp, 1 << temp_temp_component)); - DxbcSrc temp_src(DxbcSrc::R(temp_temp).Select(temp_temp_component)); + dxbc::Dest d24_dest(dxbc::Dest::R(d24_temp, 1 << d24_temp_component)); + dxbc::Src d24_src(dxbc::Src::R(d24_temp).Select(d24_temp_component)); + dxbc::Src d32_src(dxbc::Src::R(d32_temp).Select(d32_temp_component)); + dxbc::Dest temp_dest(dxbc::Dest::R(temp_temp, 1 << temp_temp_component)); + dxbc::Src temp_src(dxbc::Src::R(temp_temp).Select(temp_temp_component)); // CFloat24 from d3dref9.dll. // Assuming the depth is already clamped to [0, 2) (in all places, the depth @@ -3255,43 +3275,43 @@ void DxbcShaderTranslator::PreClampedDepthTo20e4(uint32_t d24_temp, // Check if the number is too small to be represented as normalized 20e4. // temp = f32 < 2^-14 - DxbcOpULT(temp_dest, d32_src, DxbcSrc::LU(0x38800000)); + a_.OpULT(temp_dest, d32_src, dxbc::Src::LU(0x38800000)); // Handle denormalized numbers separately. - DxbcOpIf(true, temp_src); + a_.OpIf(true, temp_src); { // temp = f32 >> 23 - DxbcOpUShR(temp_dest, d32_src, DxbcSrc::LU(23)); + a_.OpUShR(temp_dest, d32_src, dxbc::Src::LU(23)); // temp = 113 - (f32 >> 23) - DxbcOpIAdd(temp_dest, DxbcSrc::LI(113), -temp_src); + a_.OpIAdd(temp_dest, dxbc::Src::LI(113), -temp_src); // Don't allow the shift to overflow, since in DXBC the lower 5 bits of the // shift amount are used (otherwise 0 becomes 8). // temp = min(113 - (f32 >> 23), 24) - DxbcOpUMin(temp_dest, temp_src, DxbcSrc::LU(24)); + a_.OpUMin(temp_dest, temp_src, dxbc::Src::LU(24)); // biased_f32 = (f32 & 0x7FFFFF) | 0x800000 - DxbcOpBFI(d24_dest, DxbcSrc::LU(9), DxbcSrc::LU(23), DxbcSrc::LU(1), - d32_src); + a_.OpBFI(d24_dest, dxbc::Src::LU(9), dxbc::Src::LU(23), dxbc::Src::LU(1), + d32_src); // biased_f32 = ((f32 & 0x7FFFFF) | 0x800000) >> min(113 - (f32 >> 23), 24) - DxbcOpUShR(d24_dest, d24_src, temp_src); + a_.OpUShR(d24_dest, d24_src, temp_src); } // Not denormalized? - DxbcOpElse(); + a_.OpElse(); { // Bias the exponent. // biased_f32 = f32 + (-112 << 23) // (left shift of a negative value is undefined behavior) - DxbcOpIAdd(d24_dest, d32_src, DxbcSrc::LU(0xC8000000u)); + a_.OpIAdd(d24_dest, d32_src, dxbc::Src::LU(0xC8000000u)); } // Close the denormal check. - DxbcOpEndIf(); + a_.OpEndIf(); // Build the 20e4 number. // temp = (biased_f32 >> 3) & 1 - DxbcOpUBFE(temp_dest, DxbcSrc::LU(1), DxbcSrc::LU(3), d24_src); + a_.OpUBFE(temp_dest, dxbc::Src::LU(1), dxbc::Src::LU(3), d24_src); // f24 = biased_f32 + 3 - DxbcOpIAdd(d24_dest, d24_src, DxbcSrc::LU(3)); + a_.OpIAdd(d24_dest, d24_src, dxbc::Src::LU(3)); // f24 = biased_f32 + 3 + ((biased_f32 >> 3) & 1) - DxbcOpIAdd(d24_dest, d24_src, temp_src); + a_.OpIAdd(d24_dest, d24_src, temp_src); // f24 = ((biased_f32 + 3 + ((biased_f32 >> 3) & 1)) >> 3) & 0xFFFFFF - DxbcOpUBFE(d24_dest, DxbcSrc::LU(24), DxbcSrc::LU(3), d24_src); + a_.OpUBFE(d24_dest, dxbc::Src::LU(24), dxbc::Src::LU(3), d24_src); } void DxbcShaderTranslator::ROV_DepthTo24Bit(uint32_t d24_temp, @@ -3305,36 +3325,36 @@ void DxbcShaderTranslator::ROV_DepthTo24Bit(uint32_t d24_temp, // Source and destination may be the same. system_constants_used_ |= 1ull << kSysConst_Flags_Index; - DxbcOpAnd(DxbcDest::R(temp_temp, 1 << temp_temp_component), - DxbcSrc::CB(cbuffer_index_system_constants_, - uint32_t(CbufferRegister::kSystemConstants), - kSysConst_Flags_Vec) - .Select(kSysConst_Flags_Comp), - DxbcSrc::LU(kSysFlag_ROVDepthFloat24)); + a_.OpAnd(dxbc::Dest::R(temp_temp, 1 << temp_temp_component), + dxbc::Src::CB(cbuffer_index_system_constants_, + uint32_t(CbufferRegister::kSystemConstants), + kSysConst_Flags_Vec) + .Select(kSysConst_Flags_Comp), + dxbc::Src::LU(kSysFlag_ROVDepthFloat24)); // Convert according to the format. - DxbcOpIf(true, DxbcSrc::R(temp_temp).Select(temp_temp_component)); + a_.OpIf(true, dxbc::Src::R(temp_temp).Select(temp_temp_component)); { // 20e4 conversion. PreClampedDepthTo20e4(d24_temp, d24_temp_component, d32_temp, d32_temp_component, temp_temp, temp_temp_component); } - DxbcOpElse(); + a_.OpElse(); { // Unorm24 conversion. - DxbcDest d24_dest(DxbcDest::R(d24_temp, 1 << d24_temp_component)); - DxbcSrc d24_src(DxbcSrc::R(d24_temp).Select(d24_temp_component)); + dxbc::Dest d24_dest(dxbc::Dest::R(d24_temp, 1 << d24_temp_component)); + dxbc::Src d24_src(dxbc::Src::R(d24_temp).Select(d24_temp_component)); // Multiply by float(0xFFFFFF). - DxbcOpMul(d24_dest, DxbcSrc::R(d32_temp).Select(d32_temp_component), - DxbcSrc::LF(16777215.0f)); + a_.OpMul(d24_dest, dxbc::Src::R(d32_temp).Select(d32_temp_component), + dxbc::Src::LF(16777215.0f)); // Round to the nearest even integer. This seems to be the correct way: // rounding towards zero gives 0xFF instead of 0x100 in clear shaders in, // for instance, Halo 3, but other clear shaders in it are also broken if // 0.5 is added before ftou instead of round_ne. - DxbcOpRoundNE(d24_dest, d24_src); + a_.OpRoundNE(d24_dest, d24_src); // Convert to fixed-point. - DxbcOpFToU(d24_dest, d24_src); + a_.OpFToU(d24_dest, d24_src); } - DxbcOpEndIf(); + a_.OpEndIf(); } } // namespace gpu