[DXBC] Cleanup: kSysConst_*_Vec/Comp > LoadSystemConstant

This commit is contained in:
Triang3l 2021-05-16 17:43:18 +03:00
parent ff23b1d9f9
commit 9f8a432479
5 changed files with 308 additions and 569 deletions

View File

@ -290,14 +290,12 @@ void DxbcShaderTranslator::StartVertexShader_LoadVertexIndex() {
dxbc::Src index_src(dxbc::Src::R(reg, dxbc::Src::kXXXX));
// Check if the closing vertex of a non-indexed line loop is being processed.
system_constants_used_ |= 1ull << kSysConst_LineLoopClosingIndex_Index;
a_.OpINE(
index_dest,
dxbc::Src::V(uint32_t(InOutRegister::kVSInVertexIndex), dxbc::Src::kXXXX),
dxbc::Src::CB(cbuffer_index_system_constants_,
uint32_t(CbufferRegister::kSystemConstants),
kSysConst_LineLoopClosingIndex_Vec)
.Select(kSysConst_LineLoopClosingIndex_Comp));
LoadSystemConstant(SystemConstantIndex::kLineLoopClosingIndex,
offsetof(SystemConstants, line_loop_closing_index),
dxbc::Src::kXXXX));
// Zero the index if processing the closing vertex of a line loop, or do
// nothing (replace 0 with 0) if not needed.
a_.OpAnd(
@ -307,12 +305,9 @@ void DxbcShaderTranslator::StartVertexShader_LoadVertexIndex() {
{
// Swap the vertex index's endianness.
system_constants_used_ |= 1ull << kSysConst_VertexIndexEndian_Index;
dxbc::Src endian_src(
dxbc::Src::CB(cbuffer_index_system_constants_,
uint32_t(CbufferRegister::kSystemConstants),
kSysConst_VertexIndexEndian_Vec)
.Select(kSysConst_VertexIndexEndian_Comp));
dxbc::Src endian_src(LoadSystemConstant(
SystemConstantIndex::kVertexIndexEndian,
offsetof(SystemConstants, vertex_index_endian), dxbc::Src::kXXXX));
dxbc::Dest swap_temp_dest(dxbc::Dest::R(reg, 0b0010));
dxbc::Src swap_temp_src(dxbc::Src::R(reg, dxbc::Src::kYYYY));
@ -350,12 +345,10 @@ void DxbcShaderTranslator::StartVertexShader_LoadVertexIndex() {
}
// Add the base vertex index.
system_constants_used_ |= 1ull << kSysConst_VertexBaseIndex_Index;
a_.OpIAdd(index_dest, index_src,
dxbc::Src::CB(cbuffer_index_system_constants_,
uint32_t(CbufferRegister::kSystemConstants),
kSysConst_VertexBaseIndex_Vec)
.Select(kSysConst_VertexBaseIndex_Comp));
LoadSystemConstant(SystemConstantIndex::kVertexBaseIndex,
offsetof(SystemConstants, vertex_base_index),
dxbc::Src::kXXXX));
// Convert to float.
a_.OpIToF(index_dest, index_src);
@ -574,13 +567,10 @@ void DxbcShaderTranslator::StartPixelShader() {
// Copy interpolants to GPRs.
uint32_t centroid_temp =
uses_register_dynamic_addressing ? PushSystemTemp() : UINT32_MAX;
system_constants_used_ |= 1ull
<< kSysConst_InterpolatorSamplingPattern_Index;
dxbc::Src sampling_pattern_src(
dxbc::Src::CB(cbuffer_index_system_constants_,
uint32_t(CbufferRegister::kSystemConstants),
kSysConst_InterpolatorSamplingPattern_Vec)
.Select(kSysConst_InterpolatorSamplingPattern_Comp));
dxbc::Src sampling_pattern_src(LoadSystemConstant(
SystemConstantIndex::kInterpolatorSamplingPattern,
offsetof(SystemConstants, interpolator_sampling_pattern),
dxbc::Src::kXXXX));
for (uint32_t i = 0; i < interpolator_count; ++i) {
// With GPR dynamic addressing, first evaluate to centroid_temp r#, then
// store to the x#.
@ -615,12 +605,9 @@ void DxbcShaderTranslator::StartPixelShader() {
// Write pixel parameters - screen (XY absolute value) and point sprite (ZW
// absolute value) coordinates, facing (X sign bit) - to the specified
// interpolator register (ps_param_gen).
system_constants_used_ |= 1ull << kSysConst_PSParamGen_Index;
dxbc::Src param_gen_index_src(
dxbc::Src::CB(cbuffer_index_system_constants_,
uint32_t(CbufferRegister::kSystemConstants),
kSysConst_PSParamGen_Vec)
.Select(kSysConst_PSParamGen_Comp));
dxbc::Src param_gen_index_src(LoadSystemConstant(
SystemConstantIndex::kPSParamGen,
offsetof(SystemConstants, ps_param_gen), dxbc::Src::kXXXX));
uint32_t param_gen_temp = PushSystemTemp();
// Check if pixel parameters need to be written.
a_.OpULT(dxbc::Dest::R(param_gen_temp, 0b0001), param_gen_index_src,
@ -650,12 +637,7 @@ void DxbcShaderTranslator::StartPixelShader() {
a_.OpMov(dxbc::Dest::R(param_gen_temp, 0b0011),
dxbc::Src::R(param_gen_temp).Abs());
// Check if faceness applies to the current primitive type.
system_constants_used_ |= 1ull << kSysConst_Flags_Index;
a_.OpAnd(dxbc::Dest::R(param_gen_temp, 0b0100),
dxbc::Src::CB(cbuffer_index_system_constants_,
uint32_t(CbufferRegister::kSystemConstants),
kSysConst_Flags_Vec)
.Select(kSysConst_Flags_Comp),
a_.OpAnd(dxbc::Dest::R(param_gen_temp, 0b0100), LoadFlagsSystemConstant(),
dxbc::Src::LU(kSysFlag_PrimitivePolygonal));
a_.OpIf(true, dxbc::Src::R(param_gen_temp, dxbc::Src::kZZZZ));
{
@ -675,14 +657,12 @@ void DxbcShaderTranslator::StartPixelShader() {
dxbc::Dest point_coord_r_zw_dest(dxbc::Dest::R(param_gen_temp, 0b1100));
dxbc::Src point_coord_v_xxxy_src(dxbc::Src::V(
uint32_t(InOutRegister::kPSInPointParameters), 0b01000000));
system_constants_used_ |= 1ull
<< kSysConst_InterpolatorSamplingPattern_Index;
a_.OpUBFE(dxbc::Dest::R(param_gen_temp, 0b0100), dxbc::Src::LU(1),
param_gen_index_src,
dxbc::Src::CB(cbuffer_index_system_constants_,
uint32_t(CbufferRegister::kSystemConstants),
kSysConst_InterpolatorSamplingPattern_Vec)
.Select(kSysConst_InterpolatorSamplingPattern_Comp));
LoadSystemConstant(
SystemConstantIndex::kInterpolatorSamplingPattern,
offsetof(SystemConstants, interpolator_sampling_pattern),
dxbc::Src::kXXXX));
a_.OpIf(bool(xenos::SampleLocation::kCenter),
dxbc::Src::R(param_gen_temp, dxbc::Src::kZZZZ));
// At center.
@ -697,10 +677,7 @@ void DxbcShaderTranslator::StartPixelShader() {
// Copy the GPR number to r# for relative addressing.
uint32_t param_gen_copy_temp = PushSystemTemp();
a_.OpMov(dxbc::Dest::R(param_gen_copy_temp, 0b0001),
dxbc::Src::CB(cbuffer_index_system_constants_,
uint32_t(CbufferRegister::kSystemConstants),
kSysConst_PSParamGen_Vec)
.Select(kSysConst_PSParamGen_Comp));
param_gen_index_src);
// Write to the GPR.
a_.OpMov(dxbc::Dest::X(0, dxbc::Index(param_gen_copy_temp, 0)),
param_gen_src);
@ -864,11 +841,7 @@ void DxbcShaderTranslator::CompleteVertexOrDomainShader() {
dxbc::Dest temp_x_dest(dxbc::Dest::R(temp, 0b0001));
dxbc::Src temp_x_src(dxbc::Src::R(temp, dxbc::Src::kXXXX));
system_constants_used_ |= 1ull << kSysConst_Flags_Index;
dxbc::Src flags_src(dxbc::Src::CB(cbuffer_index_system_constants_,
uint32_t(CbufferRegister::kSystemConstants),
kSysConst_Flags_Vec)
.Select(kSysConst_Flags_Comp));
dxbc::Src flags_src(LoadFlagsSystemConstant());
// Check if the shader already returns W, not 1/W, and if it doesn't, turn 1/W
// into W. Using div rather than relaxed-precision rcp for safety.
@ -911,7 +884,6 @@ void DxbcShaderTranslator::CompleteVertexOrDomainShader() {
// Not possible to handle UCP_CULL_ONLY_ENA with the same shader though, since
// there can be only 8 SV_ClipDistance + SV_CullDistance values at most, but
// 12 would be needed.
system_constants_used_ |= 1ull << kSysConst_UserClipPlanes_Index;
for (uint32_t i = 0; i < 6; ++i) {
// Check if the clip plane is enabled - this `if` is needed, as opposed to
// just zeroing the clip planes in the constants, so Infinity and NaN in the
@ -924,30 +896,25 @@ void DxbcShaderTranslator::CompleteVertexOrDomainShader() {
uint32_t(InOutRegister::kVSDSOutClipDistance0123) + (i >> 2),
1 << (i & 3)),
dxbc::Src::R(system_temp_position_),
dxbc::Src::CB(cbuffer_index_system_constants_,
uint32_t(CbufferRegister::kSystemConstants),
kSysConst_UserClipPlanes_Vec + i));
LoadSystemConstant(SystemConstantIndex::kUserClipPlanes,
offsetof(SystemConstants, user_clip_planes) +
sizeof(float) * 4 * i,
dxbc::Src::kXYZW));
a_.OpEndIf();
}
// Apply scale for guest to host viewport and clip space conversion. Also, if
// the vertex shader is multipass, the NDC scale constant can be used to set
// position to NaN to kill all primitives.
system_constants_used_ |= 1ull << kSysConst_NDCScale_Index;
a_.OpMul(dxbc::Dest::R(system_temp_position_, 0b0111),
dxbc::Src::R(system_temp_position_),
dxbc::Src::CB(cbuffer_index_system_constants_,
uint32_t(CbufferRegister::kSystemConstants),
kSysConst_NDCScale_Vec,
kSysConst_NDCScale_Comp * 0b010101 + 0b100100));
LoadSystemConstant(SystemConstantIndex::kNDCScale,
offsetof(SystemConstants, ndc_scale), 0b100100));
// Apply offset (multiplied by W) used for the same purposes.
system_constants_used_ |= 1ull << kSysConst_NDCOffset_Index;
a_.OpMAd(dxbc::Dest::R(system_temp_position_, 0b0111),
dxbc::Src::CB(cbuffer_index_system_constants_,
uint32_t(CbufferRegister::kSystemConstants),
kSysConst_NDCOffset_Vec,
kSysConst_NDCOffset_Comp * 0b010101 + 0b100100),
LoadSystemConstant(SystemConstantIndex::kNDCOffset,
offsetof(SystemConstants, ndc_offset), 0b100100),
dxbc::Src::R(system_temp_position_, dxbc::Src::kWWWW),
dxbc::Src::R(system_temp_position_));
@ -1959,8 +1926,9 @@ const DxbcShaderTranslator::ShaderRdefType
dxbc::RdefVariableType::kUInt, 1, 4, 0, ShaderRdefTypeIndex::kUint4},
};
const DxbcShaderTranslator::SystemConstantRdef DxbcShaderTranslator::
system_constant_rdef_[DxbcShaderTranslator::kSysConst_Count] = {
const DxbcShaderTranslator::SystemConstantRdef
DxbcShaderTranslator::system_constant_rdef_[size_t(
DxbcShaderTranslator::SystemConstantIndex::kCount)] = {
{"xe_flags", ShaderRdefTypeIndex::kUint, sizeof(uint32_t)},
{"xe_tessellation_factor_range", ShaderRdefTypeIndex::kFloat2,
sizeof(float) * 2},
@ -2110,9 +2078,9 @@ void DxbcShaderTranslator::WriteResourceDefinition() {
// Names.
name_ptr = (uint32_t(shader_object_.size()) - blob_position_dwords) *
sizeof(uint32_t);
uint32_t constant_name_ptrs_system[kSysConst_Count];
uint32_t constant_name_ptrs_system[size_t(SystemConstantIndex::kCount)];
if (cbuffer_index_system_constants_ != kBindingIndexUnallocated) {
for (uint32_t i = 0; i < kSysConst_Count; ++i) {
for (size_t i = 0; i < size_t(SystemConstantIndex::kCount); ++i) {
constant_name_ptrs_system[i] = name_ptr;
name_ptr += dxbc::AppendAlignedString(shader_object_,
system_constant_rdef_[i].name);
@ -2144,11 +2112,11 @@ void DxbcShaderTranslator::WriteResourceDefinition() {
if (cbuffer_index_system_constants_ != kBindingIndexUnallocated) {
shader_object_.resize(constant_position_dwords_system +
sizeof(dxbc::RdefVariable) / sizeof(uint32_t) *
kSysConst_Count);
size_t(SystemConstantIndex::kCount));
auto constants_system = reinterpret_cast<dxbc::RdefVariable*>(
shader_object_.data() + constant_position_dwords_system);
uint32_t constant_offset_system = 0;
for (uint32_t i = 0; i < kSysConst_Count; ++i) {
for (size_t i = 0; i < size_t(SystemConstantIndex::kCount); ++i) {
dxbc::RdefVariable& constant_system = constants_system[i];
const SystemConstantRdef& translator_constant_system =
system_constant_rdef_[i];
@ -2303,7 +2271,7 @@ void DxbcShaderTranslator::WriteResourceDefinition() {
cbuffer.type = dxbc::RdefCbufferType::kCbuffer;
if (i == cbuffer_index_system_constants_) {
cbuffer.name_ptr = cbuffer_name_ptr_system;
cbuffer.variable_count = kSysConst_Count;
cbuffer.variable_count = uint32_t(SystemConstantIndex::kCount);
cbuffer.variables_ptr =
(constant_position_dwords_system - blob_position_dwords) *
sizeof(uint32_t);

View File

@ -202,7 +202,7 @@ class DxbcShaderTranslator : public ShaderTranslator {
};
// IF SYSTEM CONSTANTS ARE CHANGED OR ADDED, THE FOLLOWING MUST BE UPDATED:
// - kSysConst enum (indices, registers and first components).
// - SystemConstantIndex enum.
// - system_constant_rdef_.
// - d3d12/shaders/xenos_draw.hlsli (for geometry shaders).
struct SystemConstants {
@ -507,150 +507,42 @@ class DxbcShaderTranslator : public ShaderTranslator {
void ProcessAluInstruction(const ParsedAluInstruction& instr) override;
private:
enum : uint32_t {
// Indices.
enum class SystemConstantIndex : uint32_t {
kFlags,
kTessellationFactorRange,
kLineLoopClosingIndex,
kVertexIndexEndian,
kVertexBaseIndex,
kPointSize,
kPointSizeMinMax,
kPointScreenToNDC,
kUserClipPlanes,
kNDCScale,
kInterpolatorSamplingPattern,
kNDCOffset,
kPSParamGen,
kTextureSwizzledSigns,
kTexturesResolved,
kSampleCountLog2,
kAlphaTestReference,
kColorExpBias,
kAlphaToMask,
kEdramPitchTiles,
kEdramDepthRange,
kEdramPolyOffsetFront,
kEdramPolyOffsetBack,
kEdramDepthBaseDwords,
kEdramStencil,
kEdramRTBaseDwordsScaled,
kEdramRTFormatFlags,
kEdramRTClamp,
kEdramRTKeepMask,
kEdramRTBlendFactorsOps,
kEdramBlendConstant,
kSysConst_Flags_Index,
kSysConst_TessellationFactorRange_Index,
kSysConst_LineLoopClosingIndex_Index,
kSysConst_VertexIndexEndian_Index,
kSysConst_VertexBaseIndex_Index,
kSysConst_PointSize_Index,
kSysConst_PointSizeMinMax_Index,
kSysConst_PointScreenToNDC_Index,
kSysConst_UserClipPlanes_Index,
kSysConst_NDCScale_Index,
kSysConst_InterpolatorSamplingPattern_Index,
kSysConst_NDCOffset_Index,
kSysConst_PSParamGen_Index,
kSysConst_TextureSwizzledSigns_Index,
kSysConst_TexturesResolved_Index,
kSysConst_SampleCountLog2_Index,
kSysConst_AlphaTestReference_Index,
kSysConst_ColorExpBias_Index,
kSysConst_AlphaToMask_Index,
kSysConst_EdramPitchTiles_Index,
kSysConst_EdramDepthRange_Index,
kSysConst_EdramPolyOffsetFront_Index,
kSysConst_EdramPolyOffsetBack_Index,
kSysConst_EdramDepthBaseDwords_Index,
kSysConst_EdramStencil_Index,
kSysConst_EdramRTBaseDwordsScaled_Index,
kSysConst_EdramRTFormatFlags_Index,
kSysConst_EdramRTClamp_Index,
kSysConst_EdramRTKeepMask_Index,
kSysConst_EdramRTBlendFactorsOps_Index,
kSysConst_EdramBlendConstant_Index,
kSysConst_Count,
// Vectors.
kSysConst_Flags_Vec = 0,
kSysConst_Flags_Comp = 0,
kSysConst_TessellationFactorRange_Vec = kSysConst_Flags_Vec,
kSysConst_TessellationFactorRange_Comp = 1,
kSysConst_LineLoopClosingIndex_Vec = kSysConst_Flags_Vec,
kSysConst_LineLoopClosingIndex_Comp = 3,
kSysConst_VertexIndexEndian_Vec = kSysConst_LineLoopClosingIndex_Vec + 1,
kSysConst_VertexIndexEndian_Comp = 0,
kSysConst_VertexBaseIndex_Vec = kSysConst_VertexIndexEndian_Vec,
kSysConst_VertexBaseIndex_Comp = 1,
kSysConst_PointSize_Vec = kSysConst_VertexIndexEndian_Vec,
kSysConst_PointSize_Comp = 2,
kSysConst_PointSizeMinMax_Vec = kSysConst_PointSize_Vec + 1,
kSysConst_PointSizeMinMax_Comp = 0,
kSysConst_PointScreenToNDC_Vec = kSysConst_PointSizeMinMax_Vec,
kSysConst_PointScreenToNDC_Comp = 2,
// 6 vectors.
kSysConst_UserClipPlanes_Vec = kSysConst_PointScreenToNDC_Vec + 1,
kSysConst_NDCScale_Vec = kSysConst_UserClipPlanes_Vec + 6,
kSysConst_NDCScale_Comp = 0,
kSysConst_InterpolatorSamplingPattern_Vec = kSysConst_NDCScale_Vec,
kSysConst_InterpolatorSamplingPattern_Comp = 3,
kSysConst_NDCOffset_Vec = kSysConst_InterpolatorSamplingPattern_Vec + 1,
kSysConst_NDCOffset_Comp = 0,
kSysConst_PSParamGen_Vec = kSysConst_NDCOffset_Vec,
kSysConst_PSParamGen_Comp = 3,
// 2 vectors.
kSysConst_TextureSwizzledSigns_Vec = kSysConst_PSParamGen_Vec + 1,
kSysConst_TexturesResolved_Vec = kSysConst_TextureSwizzledSigns_Vec + 2,
kSysConst_TexturesResolved_Comp = 0,
kSysConst_SampleCountLog2_Vec = kSysConst_TexturesResolved_Vec,
kSysConst_SampleCountLog2_Comp = 1,
kSysConst_AlphaTestReference_Vec = kSysConst_TexturesResolved_Vec,
kSysConst_AlphaTestReference_Comp = 3,
kSysConst_ColorExpBias_Vec = kSysConst_AlphaTestReference_Vec + 1,
kSysConst_AlphaToMask_Vec = kSysConst_ColorExpBias_Vec + 1,
kSysConst_AlphaToMask_Comp = 0,
kSysConst_EdramPitchTiles_Vec = kSysConst_AlphaToMask_Vec,
kSysConst_EdramPitchTiles_Comp = 1,
kSysConst_EdramDepthRange_Vec = kSysConst_AlphaToMask_Vec,
kSysConst_EdramDepthRangeScale_Comp = 2,
kSysConst_EdramDepthRangeOffset_Comp = 3,
kSysConst_EdramPolyOffsetFront_Vec = kSysConst_EdramDepthRange_Vec + 1,
kSysConst_EdramPolyOffsetFrontScale_Comp = 0,
kSysConst_EdramPolyOffsetFrontOffset_Comp = 1,
kSysConst_EdramPolyOffsetBack_Vec = kSysConst_EdramPolyOffsetFront_Vec,
kSysConst_EdramPolyOffsetBackScale_Comp = 2,
kSysConst_EdramPolyOffsetBackOffset_Comp = 3,
kSysConst_EdramDepthBaseDwords_Vec = kSysConst_EdramPolyOffsetBack_Vec + 1,
kSysConst_EdramDepthBaseDwords_Comp = 0,
// 2 vectors.
kSysConst_EdramStencil_Vec = kSysConst_EdramDepthBaseDwords_Vec + 1,
kSysConst_EdramStencil_Front_Vec = kSysConst_EdramStencil_Vec,
kSysConst_EdramStencil_Back_Vec,
kSysConst_EdramStencil_Reference_Comp = 0,
kSysConst_EdramStencil_ReadMask_Comp,
kSysConst_EdramStencil_WriteMask_Comp,
kSysConst_EdramStencil_FuncOps_Comp,
kSysConst_EdramRTBaseDwordsScaled_Vec = kSysConst_EdramStencil_Vec + 2,
kSysConst_EdramRTFormatFlags_Vec =
kSysConst_EdramRTBaseDwordsScaled_Vec + 1,
// 4 vectors.
kSysConst_EdramRTClamp_Vec = kSysConst_EdramRTFormatFlags_Vec + 1,
// 2 vectors (render targets 01 and 23).
kSysConst_EdramRTKeepMask_Vec = kSysConst_EdramRTClamp_Vec + 4,
kSysConst_EdramRTBlendFactorsOps_Vec = kSysConst_EdramRTKeepMask_Vec + 2,
kSysConst_EdramBlendConstant_Vec = kSysConst_EdramRTBlendFactorsOps_Vec + 1,
kCount,
};
static_assert(kSysConst_Count <= 64,
static_assert(uint32_t(SystemConstantIndex::kCount) <= 64,
"Too many system constants, can't use uint64_t for usage bits");
static constexpr uint32_t kPointParametersTexCoord = xenos::kMaxInterpolators;
@ -685,19 +577,37 @@ class DxbcShaderTranslator : public ShaderTranslator {
kPSInFrontFaceAndSampleIndex,
};
// GetSystemConstantSrc + MarkSystemConstantUsed is for special cases of
// building the source unconditionally - in general, LoadSystemConstant must
// be used instead.
void MarkSystemConstantUsed(SystemConstantIndex index) {
system_constants_used_ |= uint64_t(1) << uint32_t(index);
}
// Offset should be offsetof(SystemConstants, field). Swizzle values are
// relative to the first component in the vector according to offsetof - to
// request a scalar, use XXXX swizzle, and if it's at +4 in its 16-byte
// vector, it will be turned into YYYY, and so on.
// TODO(Triang3l): Index to enum class.
dxbc::Src LoadSystemConstant(uint32_t index, size_t offset,
uint32_t swizzle) {
system_constants_used_ |= uint64_t(1) << index;
// vector, it will be turned into YYYY, and so on. The swizzle may include
// out-of-bounds components of the vector for simplicity of use, assuming they
// will be dropped anyway later.
dxbc::Src GetSystemConstantSrc(size_t offset, uint32_t swizzle) {
uint32_t first_component = uint32_t((offset >> 2) & 3);
return dxbc::Src::CB(cbuffer_index_system_constants_,
uint32_t(CbufferRegister::kSystemConstants),
uint32_t(offset >> 4),
first_component * 0b01010101 + swizzle);
return dxbc::Src::CB(
cbuffer_index_system_constants_,
uint32_t(CbufferRegister::kSystemConstants), uint32_t(offset >> 4),
std::min((swizzle & 3) + first_component, uint32_t(3)) |
std::min(((swizzle >> 2) & 3) + first_component, uint32_t(3)) << 2 |
std::min(((swizzle >> 4) & 3) + first_component, uint32_t(3)) << 4 |
std::min(((swizzle >> 6) & 3) + first_component, uint32_t(3)) << 6);
}
dxbc::Src LoadSystemConstant(SystemConstantIndex index, size_t offset,
uint32_t swizzle) {
MarkSystemConstantUsed(index);
return GetSystemConstantSrc(offset, swizzle);
}
dxbc::Src LoadFlagsSystemConstant() {
return LoadSystemConstant(SystemConstantIndex::kFlags,
offsetof(SystemConstants, flags),
dxbc::Src::kXXXX);
}
Modification GetDxbcShaderModification() const {
@ -1071,8 +981,9 @@ class DxbcShaderTranslator : public ShaderTranslator {
uint32_t size;
uint32_t padding_after;
};
static const SystemConstantRdef system_constant_rdef_[kSysConst_Count];
// Mask of system constants (1 << kSysConst_#_Index) used in the shader, so
static const SystemConstantRdef
system_constant_rdef_[size_t(SystemConstantIndex::kCount)];
// Mask of system constants (1 << SystemConstantIndex) used in the shader, so
// the remaining ones can be marked as unused in RDEF.
uint64_t system_constants_used_;

View File

@ -128,12 +128,8 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction(
// memexport is used), fetch from the appropriate binding. Extract whether
// shared memory is a UAV to system_temp_result_.x and check. In the `if`, put
// the more likely case (SRV), in the `else`, the less likely one (UAV).
system_constants_used_ |= 1ull << kSysConst_Flags_Index;
a_.OpAnd(dxbc::Dest::R(system_temp_result_, 0b0001),
dxbc::Src::CB(cbuffer_index_system_constants_,
uint32_t(CbufferRegister::kSystemConstants),
kSysConst_Flags_Vec)
.Select(kSysConst_Flags_Comp),
LoadFlagsSystemConstant(),
dxbc::Src::LU(kSysFlag_SharedMemoryIsUAV));
a_.OpIf(false, dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX));
if (srv_index_shared_memory_ == kBindingIndexUnallocated) {
@ -949,7 +945,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
// calculations.
assert_zero(used_result_nonzero_components & 0b1000);
a_.OpAnd(dxbc::Dest::R(system_temp_result_, 0b1000),
LoadSystemConstant(kSysConst_TexturesResolved_Index,
LoadSystemConstant(SystemConstantIndex::kTexturesResolved,
offsetof(SystemConstants, textures_resolved),
dxbc::Src::kXXXX),
dxbc::Src::LU(uint32_t(1) << tfetch_index));
@ -1003,15 +999,14 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
// - Component signedness, for selecting the SRV, and if data is needed.
dxbc::Src signs_uint_src(
dxbc::Src::CB(cbuffer_index_system_constants_,
uint32_t(CbufferRegister::kSystemConstants),
kSysConst_TextureSwizzledSigns_Vec + (tfetch_index >> 4))
.Select((tfetch_index >> 2) & 3));
GetSystemConstantSrc(offsetof(SystemConstants, texture_swizzled_signs) +
sizeof(uint32_t) * (tfetch_index >> 2),
dxbc::Src::kXXXX));
uint32_t signs_shift = (tfetch_index & 3) * 8;
uint32_t signs_temp = UINT32_MAX;
if (instr.opcode == FetchOpcode::kTextureFetch) {
signs_temp = PushSystemTemp();
system_constants_used_ |= 1ull << kSysConst_TextureSwizzledSigns_Index;
MarkSystemConstantUsed(SystemConstantIndex::kTextureSwizzledSigns);
a_.OpUBFE(dxbc::Dest::R(signs_temp, used_result_nonzero_components),
dxbc::Src::LU(2),
dxbc::Src::LU(signs_shift, signs_shift + 2, signs_shift + 4,
@ -1074,7 +1069,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
// resolution scale inverse - sampler not loaded yet.
a_.OpAnd(
dxbc::Dest::R(coord_and_sampler_temp, 0b1000),
LoadSystemConstant(kSysConst_TexturesResolved_Index,
LoadSystemConstant(SystemConstantIndex::kTexturesResolved,
offsetof(SystemConstants, textures_resolved),
dxbc::Src::kXXXX),
dxbc::Src::LU(uint32_t(1) << tfetch_index));
@ -1140,7 +1135,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
// resolution scale inverse - sampler not loaded yet.
a_.OpAnd(
dxbc::Dest::R(coord_and_sampler_temp, 0b1000),
LoadSystemConstant(kSysConst_TexturesResolved_Index,
LoadSystemConstant(SystemConstantIndex::kTexturesResolved,
offsetof(SystemConstants, textures_resolved),
dxbc::Src::kXXXX),
dxbc::Src::LU(uint32_t(1) << tfetch_index));
@ -1317,7 +1312,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
// Check which SRV needs to be accessed - signed or unsigned. If there is
// at least one non-signed component, will be using the unsigned one.
uint32_t is_unsigned_temp = PushSystemTemp();
system_constants_used_ |= 1ull << kSysConst_TextureSwizzledSigns_Index;
MarkSystemConstantUsed(SystemConstantIndex::kTextureSwizzledSigns);
a_.OpUBFE(dxbc::Dest::R(is_unsigned_temp, 0b0001), dxbc::Src::LU(8),
dxbc::Src::LU(signs_shift), signs_uint_src);
a_.OpINE(
@ -2060,7 +2055,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
// `if`, with `else` for sRGB resolved render targets.
a_.OpAnd(
dxbc::Dest::R(gamma_temp, 0b0001),
LoadSystemConstant(kSysConst_TexturesResolved_Index,
LoadSystemConstant(SystemConstantIndex::kTexturesResolved,
offsetof(SystemConstants, textures_resolved),
dxbc::Src::kXXXX),
dxbc::Src::LU(uint32_t(1) << tfetch_index));

View File

@ -101,13 +101,9 @@ void DxbcShaderTranslator::ExportToMemory() {
uint32_t control_temp = PushSystemTemp();
// Safety check if the shared memory is bound as UAV.
system_constants_used_ |= 1ull << kSysConst_Flags_Index;
a_.OpUBFE(dxbc::Dest::R(control_temp, 0b0001), dxbc::Src::LU(1),
dxbc::Src::LU(kSysFlag_SharedMemoryIsUAV_Shift),
dxbc::Src::CB(cbuffer_index_system_constants_,
uint32_t(CbufferRegister::kSystemConstants),
kSysConst_Flags_Vec)
.Select(kSysConst_Flags_Comp));
LoadFlagsSystemConstant());
// Open the `if` with the uniform condition for the shared memory buffer being
// bound as a UAV (more fine-grained checks are vector and likely divergent).
a_.OpIf(true, dxbc::Src::R(control_temp, dxbc::Src::kXXXX));

File diff suppressed because it is too large Load Diff