From e3425b242e191de2b3b9442567b94d14f18b4b6e Mon Sep 17 00:00:00 2001 From: Triang3l Date: Sat, 7 May 2022 16:17:17 +0300 Subject: [PATCH] [DXBC] Both v[#] and v[#][#] operands for HS and GS --- .../gpu/d3d12/d3d12_render_target_cache.cc | 12 +- src/xenia/gpu/dxbc.h | 170 +++++++++--------- src/xenia/gpu/dxbc_shader_translator.cc | 51 +++--- .../gpu/dxbc_shader_translator_memexport.cc | 6 +- src/xenia/gpu/dxbc_shader_translator_om.cc | 56 +++--- 5 files changed, 145 insertions(+), 150 deletions(-) diff --git a/src/xenia/gpu/d3d12/d3d12_render_target_cache.cc b/src/xenia/gpu/d3d12/d3d12_render_target_cache.cc index 45454f4fb..510669dbc 100644 --- a/src/xenia/gpu/d3d12/d3d12_render_target_cache.cc +++ b/src/xenia/gpu/d3d12/d3d12_render_target_cache.cc @@ -2943,10 +2943,10 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) { kTransferSRVRegisterHostDepth)); } a.OpDclInputPSSIV(dxbc::InterpolationMode::kLinearNoPerspective, - dxbc::Dest::V(kInputRegisterPosition, 0b0011), + dxbc::Dest::V1D(kInputRegisterPosition, 0b0011), dxbc::Name::kPosition); if (key.dest_msaa_samples != xenos::MsaaSamples::k1X) { - a.OpDclInputPSSGV(dxbc::Dest::V(kInputRegisterSampleIndex, 0b0001), + a.OpDclInputPSSGV(dxbc::Dest::V1D(kInputRegisterSampleIndex, 0b0001), dxbc::Name::kSampleIndex); } if (osgn_parameter_index_sv_target != UINT32_MAX) { @@ -2971,7 +2971,7 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) { // Split the destination pixel index into 32bpp tile in r0.z and // 32bpp-tile-relative pixel index in r0.xy. // r0.xy = pixel XY as uint - a.OpFToU(dxbc::Dest::R(0, 0b0011), dxbc::Src::V(kInputRegisterPosition)); + a.OpFToU(dxbc::Dest::R(0, 0b0011), dxbc::Src::V1D(kInputRegisterPosition)); uint32_t dest_sample_width_log2 = uint32_t(dest_is_64bpp) + uint32_t(key.dest_msaa_samples >= xenos::MsaaSamples::k4X); @@ -3057,7 +3057,7 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) { // If 64bpp -> 32bpp, also the needed half in r0.w. dxbc::Src dest_sample( - dxbc::Src::V(kInputRegisterSampleIndex, dxbc::Src::kXXXX)); + dxbc::Src::V1D(kInputRegisterSampleIndex, dxbc::Src::kXXXX)); dxbc::Src source_sample(dest_sample); uint32_t source_tile_pixel_x_reg = 0; uint32_t source_tile_pixel_y_reg = 0; @@ -3086,7 +3086,7 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) { source_sample = dxbc::Src::R(1, dxbc::Src::kZZZZ); a.OpBFI(dxbc::Dest::R(1, 0b0001), dxbc::Src::LU(31), dxbc::Src::LU(1), dxbc::Src::R(0, dxbc::Src::kXXXX), - dxbc::Src::V(kInputRegisterSampleIndex, dxbc::Src::kXXXX)); + dxbc::Src::V1D(kInputRegisterSampleIndex, dxbc::Src::kXXXX)); source_tile_pixel_x_reg = 1; } else if (key.dest_msaa_samples == xenos::MsaaSamples::k2X) { // 32bpp -> 64bpp, 4x -> 2x. @@ -3128,7 +3128,7 @@ D3D12RenderTargetCache::GetOrCreateTransferPipelines(TransferShaderKey key) { a.OpIShL(dxbc::Dest::R(1, 0b0001), dxbc::Src::R(0, dxbc::Src::kXXXX), dxbc::Src::LU(2)); a.OpBFI(dxbc::Dest::R(1, 0b0001), dxbc::Src::LU(1), dxbc::Src::LU(1), - dxbc::Src::V(kInputRegisterSampleIndex, dxbc::Src::kXXXX), + dxbc::Src::V1D(kInputRegisterSampleIndex, dxbc::Src::kXXXX), dxbc::Src::R(1, dxbc::Src::kXXXX)); source_tile_pixel_x_reg = 1; // Y is handled by common code. diff --git a/src/xenia/gpu/dxbc.h b/src/xenia/gpu/dxbc.h index e0c0efb85..34eb6e8b7 100644 --- a/src/xenia/gpu/dxbc.h +++ b/src/xenia/gpu/dxbc.h @@ -655,33 +655,6 @@ enum class OperandType : uint32_t { kOutputStencilRef = 41, }; -// D3D10_SB_OPERAND_INDEX_DIMENSION -constexpr uint32_t GetOperandIndexDimension(OperandType type, - bool in_dcl = false) { - switch (type) { - case OperandType::kTemp: - case OperandType::kInput: - // FIXME(Triang3l): kInput has a dimensionality of 2 in the control point - // phase of hull shaders, however, currently the translator isn't used to - // emit them - if code where this matters is emitted by Xenia, the actual - // dimensionality will need to be stored in OperandAddress itself. - case OperandType::kOutput: - case OperandType::kLabel: - return 1; - case OperandType::kIndexableTemp: - case OperandType::kInputControlPoint: - return 2; - case OperandType::kSampler: - case OperandType::kResource: - case OperandType::kUnorderedAccessView: - return in_dcl ? 3 : 2; - case OperandType::kConstantBuffer: - return 3; - default: - return 0; - } -} - // D3D10_SB_OPERAND_NUM_COMPONENTS enum class OperandDimension : uint32_t { kNoData, // D3D10_SB_OPERAND_0_COMPONENT @@ -766,11 +739,22 @@ struct Index { struct OperandAddress { OperandType type_; + uint32_t index_dimension_; Index index_1d_, index_2d_, index_3d_; - explicit OperandAddress(OperandType type, Index index_1d = Index(), - Index index_2d = Index(), Index index_3d = Index()) + explicit OperandAddress(OperandType type) + : type_(type), index_dimension_(0) {} + explicit OperandAddress(OperandType type, Index index_1d) + : type_(type), index_dimension_(1), index_1d_(index_1d) {} + explicit OperandAddress(OperandType type, Index index_1d, Index index_2d) : type_(type), + index_dimension_(2), + index_1d_(index_1d), + index_2d_(index_2d) {} + explicit OperandAddress(OperandType type, Index index_1d, Index index_2d, + Index index_3d) + : type_(type), + index_dimension_(3), index_1d_(index_1d), index_2d_(index_2d), index_3d_(index_3d) {} @@ -778,44 +762,38 @@ struct OperandAddress { OperandDimension GetDimension(bool in_dcl = false) const { return GetOperandDimension(type_, in_dcl); } - uint32_t GetIndexDimension(bool in_dcl = false) const { - return GetOperandIndexDimension(type_, in_dcl); - } - uint32_t GetOperandTokenTypeAndIndex(bool in_dcl = false) const { - uint32_t index_dimension = GetIndexDimension(in_dcl); - uint32_t operand_token = (uint32_t(type_) << 12) | (index_dimension << 20); - if (index_dimension > 0) { + uint32_t GetOperandTokenTypeAndIndex() const { + uint32_t operand_token = (uint32_t(type_) << 12) | (index_dimension_ << 20); + if (index_dimension_ > 0) { operand_token |= uint32_t(index_1d_.GetRepresentation()) << 22; - if (index_dimension > 1) { + if (index_dimension_ > 1) { operand_token |= uint32_t(index_2d_.GetRepresentation()) << 25; - if (index_dimension > 2) { + if (index_dimension_ > 2) { operand_token |= uint32_t(index_3d_.GetRepresentation()) << 28; } } } return operand_token; } - uint32_t GetLength(bool in_dcl = false) const { + uint32_t GetLength() const { uint32_t length = 0; - uint32_t index_dimension = GetIndexDimension(in_dcl); - if (index_dimension > 0) { + if (index_dimension_ > 0) { length += index_1d_.GetLength(); - if (index_dimension > 1) { + if (index_dimension_ > 1) { length += index_2d_.GetLength(); - if (index_dimension > 2) { + if (index_dimension_ > 2) { length += index_3d_.GetLength(); } } } return length; } - void Write(std::vector& code, bool in_dcl = false) const { - uint32_t index_dimension = GetIndexDimension(in_dcl); - if (index_dimension > 0) { + void Write(std::vector& code) const { + if (index_dimension_ > 0) { index_1d_.Write(code); - if (index_dimension > 1) { + if (index_dimension_ > 1) { index_2d_.Write(code); - if (index_dimension > 2) { + if (index_dimension_ > 2) { index_3d_.Write(code); } } @@ -845,18 +823,28 @@ struct Dest : OperandAddress { // declarations use read masks instead of swizzle (resource declarations still // use swizzle when they're vector, however). - explicit Dest(OperandType type, uint32_t write_mask = 0b1111, - Index index_1d = Index(), Index index_2d = Index(), - Index index_3d = Index()) + explicit Dest(OperandType type, uint32_t write_mask) + : OperandAddress(type), write_mask_(write_mask) {} + explicit Dest(OperandType type, uint32_t write_mask, Index index_1d) + : OperandAddress(type, index_1d), write_mask_(write_mask) {} + explicit Dest(OperandType type, uint32_t write_mask, Index index_1d, + Index index_2d) + : OperandAddress(type, index_1d, index_2d), write_mask_(write_mask) {} + explicit Dest(OperandType type, uint32_t write_mask, Index index_1d, + Index index_2d, Index index_3d) : OperandAddress(type, index_1d, index_2d, index_3d), write_mask_(write_mask) {} static Dest R(uint32_t index, uint32_t write_mask = 0b1111) { return Dest(OperandType::kTemp, write_mask, index); } - static Dest V(uint32_t index, uint32_t read_mask = 0b1111) { + static Dest V1D(uint32_t index, uint32_t read_mask = 0b1111) { return Dest(OperandType::kInput, read_mask, index); } + static Dest V2D(uint32_t index_1d, uint32_t index_2d, + uint32_t read_mask = 0b1111) { + return Dest(OperandType::kInput, read_mask, index_1d, index_2d); + } static Dest O(Index index, uint32_t write_mask = 0b1111) { return Dest(OperandType::kOutput, write_mask, index); } @@ -915,11 +903,14 @@ struct Dest : OperandAddress { } } [[nodiscard]] Dest Mask(uint32_t write_mask) const { - return Dest(type_, write_mask, index_1d_, index_2d_, index_3d_); + Dest new_dest(*this); + new_dest.write_mask_ = write_mask; + return new_dest; } [[nodiscard]] Dest MaskMasked(uint32_t write_mask) const { - return Dest(type_, write_mask_ & write_mask, index_1d_, index_2d_, - index_3d_); + Dest new_dest(*this); + new_dest.write_mask_ &= write_mask; + return new_dest; } static uint32_t GetMaskSingleComponent(uint32_t write_mask) { uint32_t component; @@ -934,11 +925,9 @@ struct Dest : OperandAddress { return GetMaskSingleComponent(GetMask(in_dcl)); } - uint32_t GetLength(bool in_dcl = false) const { - return 1 + OperandAddress::GetLength(in_dcl); - } + uint32_t GetLength() const { return 1 + OperandAddress::GetLength(); } void Write(std::vector& code, bool in_dcl = false) const { - uint32_t operand_token = GetOperandTokenTypeAndIndex(in_dcl); + uint32_t operand_token = GetOperandTokenTypeAndIndex(); OperandDimension dimension = GetDimension(in_dcl); operand_token |= uint32_t(dimension); if (dimension == OperandDimension::kVector) { @@ -947,7 +936,7 @@ struct Dest : OperandAddress { (uint32_t(ComponentSelection::kMask) << 2) | (write_mask_ << 4); } code.push_back(operand_token); - OperandAddress::Write(code, in_dcl); + OperandAddress::Write(code); } }; @@ -962,18 +951,21 @@ struct Src : OperandAddress { // Ignored for 0-component and 1-component operand types. uint32_t swizzle_; - bool absolute_; - bool negate_; + bool absolute_ = false; + bool negate_ = false; // Only valid for OperandType::kImmediate32. uint32_t immediate_[4]; - explicit Src(OperandType type, uint32_t swizzle = kXYZW, - Index index_1d = Index(), Index index_2d = Index(), - Index index_3d = Index()) - : OperandAddress(type, index_1d, index_2d, index_3d), - swizzle_(swizzle), - absolute_(false), - negate_(false) {} + explicit Src(OperandType type, uint32_t swizzle) + : OperandAddress(type), swizzle_(swizzle) {} + explicit Src(OperandType type, uint32_t swizzle, Index index_1d) + : OperandAddress(type, index_1d), swizzle_(swizzle) {} + explicit Src(OperandType type, uint32_t swizzle, Index index_1d, + Index index_2d) + : OperandAddress(type, index_1d, index_2d), swizzle_(swizzle) {} + explicit Src(OperandType type, uint32_t swizzle, Index index_1d, + Index index_2d, Index index_3d) + : OperandAddress(type, index_1d, index_2d, index_3d), swizzle_(swizzle) {} // For creating instances for use in declarations. struct DclT {}; @@ -982,9 +974,12 @@ struct Src : OperandAddress { static Src R(uint32_t index, uint32_t swizzle = kXYZW) { return Src(OperandType::kTemp, swizzle, index); } - static Src V(Index index, uint32_t swizzle = kXYZW) { + static Src V1D(Index index, uint32_t swizzle = kXYZW) { return Src(OperandType::kInput, swizzle, index); } + static Src V2D(Index index_1d, Index index_2d, uint32_t swizzle = kXYZW) { + return Src(OperandType::kInput, swizzle, index_1d, index_2d); + } static Src X(uint32_t index_1d, Index index_2d, uint32_t swizzle = kXYZW) { return Src(OperandType::kIndexableTemp, swizzle, index_1d, index_2d); } @@ -1108,15 +1103,14 @@ struct Src : OperandAddress { return new_src; } - uint32_t GetLength(uint32_t mask, bool force_vector = false, - bool in_dcl = false) const { + uint32_t GetLength(uint32_t mask, bool force_vector = false) const { bool is_vector = force_vector || (mask != 0b0000 && Dest::GetMaskSingleComponent(mask) == UINT32_MAX); if (type_ == OperandType::kImmediate32) { return is_vector ? 5 : 2; } - return ((absolute_ || negate_) ? 2 : 1) + OperandAddress::GetLength(in_dcl); + return ((absolute_ || negate_) ? 2 : 1) + OperandAddress::GetLength(); } static constexpr uint32_t GetModifiedImmediate(uint32_t value, bool is_integer, bool absolute, @@ -1147,7 +1141,7 @@ struct Src : OperandAddress { } void Write(std::vector& code, bool is_integer, uint32_t mask, bool force_vector = false, bool in_dcl = false) const { - uint32_t operand_token = GetOperandTokenTypeAndIndex(in_dcl); + uint32_t operand_token = GetOperandTokenTypeAndIndex(); uint32_t mask_single_component = Dest::GetMaskSingleComponent(mask); uint32_t select_component = mask_single_component != UINT32_MAX ? mask_single_component : 0; @@ -1220,7 +1214,7 @@ struct Src : OperandAddress { code.push_back(uint32_t(ExtendedOperandType::kModifier) | (uint32_t(modifier) << 6)); } - OperandAddress::Write(code, in_dcl); + OperandAddress::Write(code); } } }; @@ -1915,7 +1909,7 @@ class Assembler { } void OpDclResource(ResourceDimension dimension, uint32_t return_type_token, const Src& operand, uint32_t space = 0) { - uint32_t operands_length = operand.GetLength(0b1111, false, true); + uint32_t operands_length = operand.GetLength(0b1111, false); code_.reserve(code_.size() + 3 + operands_length); code_.push_back(OpcodeToken(Opcode::kDclResource, 2 + operands_length) | (uint32_t(dimension) << 11)); @@ -1929,7 +1923,7 @@ class Assembler { ConstantBufferAccessPattern access_pattern = ConstantBufferAccessPattern::kImmediateIndexed, uint32_t space = 0) { - uint32_t operands_length = operand.GetLength(0b1111, false, true); + uint32_t operands_length = operand.GetLength(0b1111, false); code_.reserve(code_.size() + 3 + operands_length); code_.push_back( OpcodeToken(Opcode::kDclConstantBuffer, 2 + operands_length) | @@ -1941,7 +1935,7 @@ class Assembler { void OpDclSampler(const Src& operand, SamplerMode mode = SamplerMode::kDefault, uint32_t space = 0) { - uint32_t operands_length = operand.GetLength(0b1111, false, true); + uint32_t operands_length = operand.GetLength(0b1111, false); code_.reserve(code_.size() + 2 + operands_length); code_.push_back(OpcodeToken(Opcode::kDclSampler, 1 + operands_length) | (uint32_t(mode) << 11)); @@ -1949,14 +1943,14 @@ class Assembler { code_.push_back(space); } void OpDclInput(const Dest& operand) { - uint32_t operands_length = operand.GetLength(true); + uint32_t operands_length = operand.GetLength(); code_.reserve(code_.size() + 1 + operands_length); code_.push_back(OpcodeToken(Opcode::kDclInput, operands_length)); operand.Write(code_, true); ++stat_.dcl_count; } void OpDclInputSGV(const Dest& operand, Name name) { - uint32_t operands_length = operand.GetLength(true); + uint32_t operands_length = operand.GetLength(); code_.reserve(code_.size() + 2 + operands_length); code_.push_back(OpcodeToken(Opcode::kDclInputSGV, 1 + operands_length)); operand.Write(code_, true); @@ -1964,7 +1958,7 @@ class Assembler { ++stat_.dcl_count; } void OpDclInputPS(InterpolationMode interpolation_mode, const Dest& operand) { - uint32_t operands_length = operand.GetLength(true); + uint32_t operands_length = operand.GetLength(); code_.reserve(code_.size() + 1 + operands_length); code_.push_back(OpcodeToken(Opcode::kDclInputPS, operands_length) | (uint32_t(interpolation_mode) << 11)); @@ -1972,7 +1966,7 @@ class Assembler { ++stat_.dcl_count; } void OpDclInputPSSGV(const Dest& operand, Name name) { - uint32_t operands_length = operand.GetLength(true); + uint32_t operands_length = operand.GetLength(); code_.reserve(code_.size() + 2 + operands_length); // Constant interpolation mode is set in FXC output at least for // SV_IsFrontFace, despite the comment in d3d12TokenizedProgramFormat.hpp @@ -1985,7 +1979,7 @@ class Assembler { } void OpDclInputPSSIV(InterpolationMode interpolation_mode, const Dest& operand, Name name) { - uint32_t operands_length = operand.GetLength(true); + uint32_t operands_length = operand.GetLength(); code_.reserve(code_.size() + 2 + operands_length); code_.push_back(OpcodeToken(Opcode::kDclInputPSSIV, 1 + operands_length) | (uint32_t(interpolation_mode) << 11)); @@ -1994,14 +1988,14 @@ class Assembler { ++stat_.dcl_count; } void OpDclOutput(const Dest& operand) { - uint32_t operands_length = operand.GetLength(true); + uint32_t operands_length = operand.GetLength(); code_.reserve(code_.size() + 1 + operands_length); code_.push_back(OpcodeToken(Opcode::kDclOutput, operands_length)); operand.Write(code_, true); ++stat_.dcl_count; } void OpDclOutputSIV(const Dest& operand, Name name) { - uint32_t operands_length = operand.GetLength(true); + uint32_t operands_length = operand.GetLength(); code_.reserve(code_.size() + 2 + operands_length); code_.push_back(OpcodeToken(Opcode::kDclOutputSIV, 1 + operands_length)); operand.Write(code_, true); @@ -2124,7 +2118,7 @@ class Assembler { void OpDclUnorderedAccessViewTyped(ResourceDimension dimension, uint32_t flags, uint32_t return_type_token, const Src& operand, uint32_t space = 0) { - uint32_t operands_length = operand.GetLength(0b1111, false, true); + uint32_t operands_length = operand.GetLength(0b1111, false); code_.reserve(code_.size() + 3 + operands_length); code_.push_back( OpcodeToken(Opcode::kDclUnorderedAccessViewTyped, 2 + operands_length) | @@ -2137,7 +2131,7 @@ class Assembler { // kUAVFlagRasterizerOrderedAccess. void OpDclUnorderedAccessViewRaw(uint32_t flags, const Src& operand, uint32_t space = 0) { - uint32_t operands_length = operand.GetLength(0b1111, false, true); + uint32_t operands_length = operand.GetLength(0b1111, false); code_.reserve(code_.size() + 2 + operands_length); code_.push_back( OpcodeToken(Opcode::kDclUnorderedAccessViewRaw, 1 + operands_length) | @@ -2146,7 +2140,7 @@ class Assembler { code_.push_back(space); } void OpDclResourceRaw(const Src& operand, uint32_t space = 0) { - uint32_t operands_length = operand.GetLength(0b1111, false, true); + uint32_t operands_length = operand.GetLength(0b1111, false); code_.reserve(code_.size() + 2 + operands_length); code_.push_back(OpcodeToken(Opcode::kDclResourceRaw, 1 + operands_length)); operand.Write(code_, true, 0b1111, false, true); diff --git a/src/xenia/gpu/dxbc_shader_translator.cc b/src/xenia/gpu/dxbc_shader_translator.cc index 885705476..b99e9cbe8 100644 --- a/src/xenia/gpu/dxbc_shader_translator.cc +++ b/src/xenia/gpu/dxbc_shader_translator.cc @@ -326,16 +326,17 @@ void DxbcShaderTranslator::StartVertexShader_LoadVertexIndex() { // Check if the closing vertex of a non-indexed line loop is being processed. a_.OpINE( index_dest, - dxbc::Src::V(uint32_t(InOutRegister::kVSInVertexIndex), dxbc::Src::kXXXX), + dxbc::Src::V1D(uint32_t(InOutRegister::kVSInVertexIndex), + dxbc::Src::kXXXX), LoadSystemConstant(SystemConstants::Index::kLineLoopClosingIndex, offsetof(SystemConstants, line_loop_closing_index), dxbc::Src::kXXXX)); // Zero the index if processing the closing vertex of a line loop, or do // nothing (replace 0 with 0) if not needed. - a_.OpAnd( - index_dest, - dxbc::Src::V(uint32_t(InOutRegister::kVSInVertexIndex), dxbc::Src::kXXXX), - index_src); + a_.OpAnd(index_dest, + dxbc::Src::V1D(uint32_t(InOutRegister::kVSInVertexIndex), + dxbc::Src::kXXXX), + index_src); { // Swap the vertex index's endianness. @@ -590,7 +591,7 @@ void DxbcShaderTranslator::StartPixelShader() { // system_temp_depth_stencil_ before any return statement is possibly // reached. assert_true(system_temp_depth_stencil_ != UINT32_MAX); - dxbc::Src in_position_z(dxbc::Src::V( + dxbc::Src in_position_z(dxbc::Src::V1D( uint32_t(InOutRegister::kPSInPosition), dxbc::Src::kZZZZ)); in_position_used_ |= 0b0100; a_.OpDerivRTXCoarse(dxbc::Dest::R(system_temp_depth_stencil_, 0b0001), @@ -633,14 +634,14 @@ void DxbcShaderTranslator::StartPixelShader() { // At center. a_.OpMov(uses_register_dynamic_addressing ? dxbc::Dest::X(0, i) : dxbc::Dest::R(i), - dxbc::Src::V(uint32_t(InOutRegister::kPSInInterpolators) + i)); + dxbc::Src::V1D(uint32_t(InOutRegister::kPSInInterpolators) + i)); a_.OpElse(); // At centroid. Not really important that 2x MSAA is emulated using // ForcedSampleCount 4 - what matters is that the sample position will // be within the primitive, and the value will not be extrapolated. a_.OpEvalCentroid( dxbc::Dest::R(centroid_register), - dxbc::Src::V(uint32_t(InOutRegister::kPSInInterpolators) + i)); + dxbc::Src::V1D(uint32_t(InOutRegister::kPSInInterpolators) + i)); if (uses_register_dynamic_addressing) { a_.OpMov(dxbc::Dest::X(0, i), dxbc::Src::R(centroid_register)); } @@ -677,7 +678,7 @@ void DxbcShaderTranslator::StartPixelShader() { // have correct derivative magnitude and LODs. in_position_used_ |= 0b0011; a_.OpRoundNI(dxbc::Dest::R(param_gen_temp, 0b0011), - dxbc::Src::V(uint32_t(InOutRegister::kPSInPosition))); + dxbc::Src::V1D(uint32_t(InOutRegister::kPSInPosition))); uint32_t resolution_scaled_axes = uint32_t(draw_resolution_scale_x_ > 1) | (uint32_t(draw_resolution_scale_y_ > 1) << 1); @@ -701,20 +702,20 @@ void DxbcShaderTranslator::StartPixelShader() { // Negate modifier flips the sign bit even for 0 - set it to minus for // backfaces. in_front_face_used_ = true; - a_.OpMovC( - dxbc::Dest::R(param_gen_temp, 0b0001), - dxbc::Src::V(uint32_t(InOutRegister::kPSInFrontFaceAndSampleIndex), - dxbc::Src::kXXXX), - dxbc::Src::R(param_gen_temp, dxbc::Src::kXXXX), - -dxbc::Src::R(param_gen_temp, dxbc::Src::kXXXX)); + a_.OpMovC(dxbc::Dest::R(param_gen_temp, 0b0001), + dxbc::Src::V1D( + uint32_t(InOutRegister::kPSInFrontFaceAndSampleIndex), + dxbc::Src::kXXXX), + dxbc::Src::R(param_gen_temp, dxbc::Src::kXXXX), + -dxbc::Src::R(param_gen_temp, dxbc::Src::kXXXX)); } a_.OpEndIf(); // Point sprite coordinates. // Saturate to avoid negative point coordinates if the center of the pixel // is not covered, and extrapolation is done. a_.OpMov(dxbc::Dest::R(param_gen_temp, 0b1100), - dxbc::Src::V(uint32_t(InOutRegister::kPSInPointParameters), - 0b0100 << 4), + dxbc::Src::V1D(uint32_t(InOutRegister::kPSInPointParameters), + 0b0100 << 4), true); // Primitive type. { @@ -3499,7 +3500,7 @@ void DxbcShaderTranslator::WriteShaderCode() { if (register_count()) { // Unswapped vertex index input (only X component). ao_.OpDclInputSGV( - dxbc::Dest::V(uint32_t(InOutRegister::kVSInVertexIndex), 0b0001), + dxbc::Dest::V1D(uint32_t(InOutRegister::kVSInVertexIndex), 0b0001), dxbc::Name::kVertexID); } } @@ -3537,14 +3538,14 @@ void DxbcShaderTranslator::WriteShaderCode() { for (uint32_t i = 0; i < interpolator_count; ++i) { ao_.OpDclInputPS( dxbc::InterpolationMode::kLinear, - dxbc::Dest::V(uint32_t(InOutRegister::kPSInInterpolators) + i)); + dxbc::Dest::V1D(uint32_t(InOutRegister::kPSInInterpolators) + i)); } if (register_count()) { // Point parameters input (only coordinates, not size, needed). ao_.OpDclInputPS( dxbc::InterpolationMode::kLinear, - dxbc::Dest::V(uint32_t(InOutRegister::kPSInPointParameters), - 0b0011)); + dxbc::Dest::V1D(uint32_t(InOutRegister::kPSInPointParameters), + 0b0011)); } } if (in_position_used_) { @@ -3560,8 +3561,8 @@ void DxbcShaderTranslator::WriteShaderCode() { (is_writing_float24_depth && !shader_writes_depth) ? dxbc::InterpolationMode::kLinearNoPerspectiveSample : dxbc::InterpolationMode::kLinearNoPerspective, - dxbc::Dest::V(uint32_t(InOutRegister::kPSInPosition), - in_position_used_), + dxbc::Dest::V1D(uint32_t(InOutRegister::kPSInPosition), + in_position_used_), dxbc::Name::kPosition); } bool sample_rate_memexport = @@ -3575,8 +3576,8 @@ void DxbcShaderTranslator::WriteShaderCode() { if (front_face_and_sample_index_mask) { // Is front face, sample index. ao_.OpDclInputPSSGV( - dxbc::Dest::V(uint32_t(InOutRegister::kPSInFrontFaceAndSampleIndex), - front_face_and_sample_index_mask), + dxbc::Dest::V1D(uint32_t(InOutRegister::kPSInFrontFaceAndSampleIndex), + front_face_and_sample_index_mask), dxbc::Name::kIsFrontFace); } if (edram_rov_used_) { diff --git a/src/xenia/gpu/dxbc_shader_translator_memexport.cc b/src/xenia/gpu/dxbc_shader_translator_memexport.cc index f9d09fc7d..e2f65e66f 100644 --- a/src/xenia/gpu/dxbc_shader_translator_memexport.cc +++ b/src/xenia/gpu/dxbc_shader_translator_memexport.cc @@ -139,7 +139,7 @@ void DxbcShaderTranslator::ExportToMemory() { in_position_used_ |= resolution_scaled_axes; a_.OpFToU( dxbc::Dest::R(control_temp, resolution_scaled_axes << 1), - dxbc::Src::V(uint32_t(InOutRegister::kPSInPosition), 0b0100 << 2)); + dxbc::Src::V1D(uint32_t(InOutRegister::kPSInPosition), 0b0100 << 2)); dxbc::Dest resolution_scaling_temp_dest( dxbc::Dest::R(control_temp, 0b1000)); dxbc::Src resolution_scaling_temp_src( @@ -201,8 +201,8 @@ void DxbcShaderTranslator::ExportToMemory() { a_.OpIEq( dxbc::Dest::R(control_temp, inner_condition_provided ? 0b0010 : 0b0001), - dxbc::Src::V(uint32_t(InOutRegister::kPSInFrontFaceAndSampleIndex), - dxbc::Src::kYYYY), + dxbc::Src::V1D(uint32_t(InOutRegister::kPSInFrontFaceAndSampleIndex), + dxbc::Src::kYYYY), dxbc::Src::R(control_temp, dxbc::Src::kYYYY)); if (inner_condition_provided) { // Merge with the previous condition in control_temp.x. diff --git a/src/xenia/gpu/dxbc_shader_translator_om.cc b/src/xenia/gpu/dxbc_shader_translator_om.cc index 6c90c42e8..164ac07fb 100644 --- a/src/xenia/gpu/dxbc_shader_translator_om.cc +++ b/src/xenia/gpu/dxbc_shader_translator_om.cc @@ -172,7 +172,7 @@ void DxbcShaderTranslator::StartPixelShader_LoadROVParameters() { // system_temp_rov_params_.y = Y host pixel position as uint in_position_used_ |= 0b0011; a_.OpFToU(dxbc::Dest::R(system_temp_rov_params_, 0b0011), - dxbc::Src::V(uint32_t(InOutRegister::kPSInPosition))); + dxbc::Src::V1D(uint32_t(InOutRegister::kPSInPosition))); // Convert the position from pixels to samples. // system_temp_rov_params_.x = X sample 0 position // system_temp_rov_params_.y = Y sample 0 position @@ -605,8 +605,8 @@ void DxbcShaderTranslator::ROV_DepthStencilTest() { ROV_DepthTo24Bit(system_temp_depth_stencil_, 0, system_temp_depth_stencil_, 0, temp, 0); } else { - dxbc::Src in_position_z( - dxbc::Src::V(uint32_t(InOutRegister::kPSInPosition), dxbc::Src::kZZZZ)); + dxbc::Src in_position_z(dxbc::Src::V1D( + uint32_t(InOutRegister::kPSInPosition), dxbc::Src::kZZZZ)); // Get the derivatives of the screen-space (but not clamped to the viewport // depth bounds yet - this happens after the pixel shader in Direct3D 11+; // also linear within the triangle - thus constant derivatives along the @@ -645,9 +645,9 @@ void DxbcShaderTranslator::ROV_DepthStencilTest() { a_.OpMax(temp_z_dest, z_ddx_src.Abs(), z_ddy_src.Abs()); // Calculate the depth bias for the needed faceness. in_front_face_used_ = true; - a_.OpIf(true, - dxbc::Src::V(uint32_t(InOutRegister::kPSInFrontFaceAndSampleIndex), - dxbc::Src::kXXXX)); + a_.OpIf(true, dxbc::Src::V1D( + uint32_t(InOutRegister::kPSInFrontFaceAndSampleIndex), + dxbc::Src::kXXXX)); // temp.x if early = ddx(z) // temp.y if early = ddy(z) // temp.z = front face polygon offset @@ -949,7 +949,7 @@ void DxbcShaderTranslator::ROV_DepthStencilTest() { { // Check the current face to get the reference and apply the read mask. in_front_face_used_ = true; - a_.OpIf(true, dxbc::Src::V( + a_.OpIf(true, dxbc::Src::V1D( uint32_t(InOutRegister::kPSInFrontFaceAndSampleIndex), dxbc::Src::kXXXX)); for (uint32_t j = 0; j < 2; ++j) { @@ -1012,8 +1012,8 @@ void DxbcShaderTranslator::ROV_DepthStencilTest() { in_front_face_used_ = true; a_.OpMovC( sample_temp_z_dest, - dxbc::Src::V(uint32_t(InOutRegister::kPSInFrontFaceAndSampleIndex), - dxbc::Src::kXXXX), + dxbc::Src::V1D(uint32_t(InOutRegister::kPSInFrontFaceAndSampleIndex), + dxbc::Src::kXXXX), LoadSystemConstant( SystemConstants::Index::kEdramStencil, offsetof(SystemConstants, edram_stencil_front_func_ops), @@ -1090,18 +1090,18 @@ void DxbcShaderTranslator::ROV_DepthStencilTest() { // Replace. a_.OpCase(dxbc::Src::LU(uint32_t(xenos::StencilOp::kReplace))); in_front_face_used_ = true; - a_.OpMovC( - sample_temp_y_dest, - dxbc::Src::V(uint32_t(InOutRegister::kPSInFrontFaceAndSampleIndex), - dxbc::Src::kXXXX), - LoadSystemConstant( - SystemConstants::Index::kEdramStencil, - offsetof(SystemConstants, edram_stencil_front_reference), - dxbc::Src::kXXXX), - LoadSystemConstant( - SystemConstants::Index::kEdramStencil, - offsetof(SystemConstants, edram_stencil_back_reference), - dxbc::Src::kXXXX)); + a_.OpMovC(sample_temp_y_dest, + dxbc::Src::V1D( + uint32_t(InOutRegister::kPSInFrontFaceAndSampleIndex), + dxbc::Src::kXXXX), + LoadSystemConstant( + SystemConstants::Index::kEdramStencil, + offsetof(SystemConstants, edram_stencil_front_reference), + dxbc::Src::kXXXX), + LoadSystemConstant( + SystemConstants::Index::kEdramStencil, + offsetof(SystemConstants, edram_stencil_back_reference), + dxbc::Src::kXXXX)); a_.OpBreak(); // Increment and clamp. a_.OpCase(dxbc::Src::LU(uint32_t(xenos::StencilOp::kIncrementClamp))); @@ -1155,8 +1155,8 @@ void DxbcShaderTranslator::ROV_DepthStencilTest() { in_front_face_used_ = true; a_.OpMovC( sample_temp_z_dest, - dxbc::Src::V(uint32_t(InOutRegister::kPSInFrontFaceAndSampleIndex), - dxbc::Src::kXXXX), + dxbc::Src::V1D(uint32_t(InOutRegister::kPSInFrontFaceAndSampleIndex), + dxbc::Src::kXXXX), LoadSystemConstant( SystemConstants::Index::kEdramStencil, offsetof(SystemConstants, edram_stencil_front_write_mask), @@ -1924,10 +1924,10 @@ void DxbcShaderTranslator::CompletePixelShader_DSV_DepthTo24Bit() { // assumption of it being clamped while working with the bit representation. temp = PushSystemTemp(); in_position_used_ |= 0b0100; - a_.OpMul( - dxbc::Dest::R(temp, 0b0001), - dxbc::Src::V(uint32_t(InOutRegister::kPSInPosition), dxbc::Src::kZZZZ), - dxbc::Src::LF(2.0f), true); + a_.OpMul(dxbc::Dest::R(temp, 0b0001), + dxbc::Src::V1D(uint32_t(InOutRegister::kPSInPosition), + dxbc::Src::kZZZZ), + dxbc::Src::LF(2.0f), true); } dxbc::Dest temp_x_dest(dxbc::Dest::R(temp, 0b0001)); @@ -2068,7 +2068,7 @@ void DxbcShaderTranslator::CompletePixelShader_AlphaToMask() { // temp.x = alpha to coverage offset as float 0.0...3.0. in_position_used_ |= 0b0011; a_.OpFToU(dxbc::Dest::R(temp, 0b0011), - dxbc::Src::V(uint32_t(InOutRegister::kPSInPosition))); + dxbc::Src::V1D(uint32_t(InOutRegister::kPSInPosition))); a_.OpAnd(dxbc::Dest::R(temp, 0b0010), dxbc::Src::R(temp, dxbc::Src::kYYYY), dxbc::Src::LU(1)); a_.OpBFI(temp_x_dest, dxbc::Src::LU(1), dxbc::Src::LU(1), temp_x_src,