[DXBC] Cleanup: kSysConst_*_Vec/Comp > LoadSystemConstant

This commit is contained in:
Triang3l 2021-05-16 17:43:18 +03:00
parent ff23b1d9f9
commit 9f8a432479
5 changed files with 308 additions and 569 deletions

View File

@ -290,14 +290,12 @@ void DxbcShaderTranslator::StartVertexShader_LoadVertexIndex() {
dxbc::Src index_src(dxbc::Src::R(reg, dxbc::Src::kXXXX)); dxbc::Src index_src(dxbc::Src::R(reg, dxbc::Src::kXXXX));
// Check if the closing vertex of a non-indexed line loop is being processed. // Check if the closing vertex of a non-indexed line loop is being processed.
system_constants_used_ |= 1ull << kSysConst_LineLoopClosingIndex_Index;
a_.OpINE( a_.OpINE(
index_dest, index_dest,
dxbc::Src::V(uint32_t(InOutRegister::kVSInVertexIndex), dxbc::Src::kXXXX), dxbc::Src::V(uint32_t(InOutRegister::kVSInVertexIndex), dxbc::Src::kXXXX),
dxbc::Src::CB(cbuffer_index_system_constants_, LoadSystemConstant(SystemConstantIndex::kLineLoopClosingIndex,
uint32_t(CbufferRegister::kSystemConstants), offsetof(SystemConstants, line_loop_closing_index),
kSysConst_LineLoopClosingIndex_Vec) dxbc::Src::kXXXX));
.Select(kSysConst_LineLoopClosingIndex_Comp));
// Zero the index if processing the closing vertex of a line loop, or do // Zero the index if processing the closing vertex of a line loop, or do
// nothing (replace 0 with 0) if not needed. // nothing (replace 0 with 0) if not needed.
a_.OpAnd( a_.OpAnd(
@ -307,12 +305,9 @@ void DxbcShaderTranslator::StartVertexShader_LoadVertexIndex() {
{ {
// Swap the vertex index's endianness. // Swap the vertex index's endianness.
system_constants_used_ |= 1ull << kSysConst_VertexIndexEndian_Index; dxbc::Src endian_src(LoadSystemConstant(
dxbc::Src endian_src( SystemConstantIndex::kVertexIndexEndian,
dxbc::Src::CB(cbuffer_index_system_constants_, offsetof(SystemConstants, vertex_index_endian), dxbc::Src::kXXXX));
uint32_t(CbufferRegister::kSystemConstants),
kSysConst_VertexIndexEndian_Vec)
.Select(kSysConst_VertexIndexEndian_Comp));
dxbc::Dest swap_temp_dest(dxbc::Dest::R(reg, 0b0010)); dxbc::Dest swap_temp_dest(dxbc::Dest::R(reg, 0b0010));
dxbc::Src swap_temp_src(dxbc::Src::R(reg, dxbc::Src::kYYYY)); dxbc::Src swap_temp_src(dxbc::Src::R(reg, dxbc::Src::kYYYY));
@ -350,12 +345,10 @@ void DxbcShaderTranslator::StartVertexShader_LoadVertexIndex() {
} }
// Add the base vertex index. // Add the base vertex index.
system_constants_used_ |= 1ull << kSysConst_VertexBaseIndex_Index;
a_.OpIAdd(index_dest, index_src, a_.OpIAdd(index_dest, index_src,
dxbc::Src::CB(cbuffer_index_system_constants_, LoadSystemConstant(SystemConstantIndex::kVertexBaseIndex,
uint32_t(CbufferRegister::kSystemConstants), offsetof(SystemConstants, vertex_base_index),
kSysConst_VertexBaseIndex_Vec) dxbc::Src::kXXXX));
.Select(kSysConst_VertexBaseIndex_Comp));
// Convert to float. // Convert to float.
a_.OpIToF(index_dest, index_src); a_.OpIToF(index_dest, index_src);
@ -574,13 +567,10 @@ void DxbcShaderTranslator::StartPixelShader() {
// Copy interpolants to GPRs. // Copy interpolants to GPRs.
uint32_t centroid_temp = uint32_t centroid_temp =
uses_register_dynamic_addressing ? PushSystemTemp() : UINT32_MAX; uses_register_dynamic_addressing ? PushSystemTemp() : UINT32_MAX;
system_constants_used_ |= 1ull dxbc::Src sampling_pattern_src(LoadSystemConstant(
<< kSysConst_InterpolatorSamplingPattern_Index; SystemConstantIndex::kInterpolatorSamplingPattern,
dxbc::Src sampling_pattern_src( offsetof(SystemConstants, interpolator_sampling_pattern),
dxbc::Src::CB(cbuffer_index_system_constants_, dxbc::Src::kXXXX));
uint32_t(CbufferRegister::kSystemConstants),
kSysConst_InterpolatorSamplingPattern_Vec)
.Select(kSysConst_InterpolatorSamplingPattern_Comp));
for (uint32_t i = 0; i < interpolator_count; ++i) { for (uint32_t i = 0; i < interpolator_count; ++i) {
// With GPR dynamic addressing, first evaluate to centroid_temp r#, then // With GPR dynamic addressing, first evaluate to centroid_temp r#, then
// store to the x#. // store to the x#.
@ -615,12 +605,9 @@ void DxbcShaderTranslator::StartPixelShader() {
// Write pixel parameters - screen (XY absolute value) and point sprite (ZW // Write pixel parameters - screen (XY absolute value) and point sprite (ZW
// absolute value) coordinates, facing (X sign bit) - to the specified // absolute value) coordinates, facing (X sign bit) - to the specified
// interpolator register (ps_param_gen). // interpolator register (ps_param_gen).
system_constants_used_ |= 1ull << kSysConst_PSParamGen_Index; dxbc::Src param_gen_index_src(LoadSystemConstant(
dxbc::Src param_gen_index_src( SystemConstantIndex::kPSParamGen,
dxbc::Src::CB(cbuffer_index_system_constants_, offsetof(SystemConstants, ps_param_gen), dxbc::Src::kXXXX));
uint32_t(CbufferRegister::kSystemConstants),
kSysConst_PSParamGen_Vec)
.Select(kSysConst_PSParamGen_Comp));
uint32_t param_gen_temp = PushSystemTemp(); uint32_t param_gen_temp = PushSystemTemp();
// Check if pixel parameters need to be written. // Check if pixel parameters need to be written.
a_.OpULT(dxbc::Dest::R(param_gen_temp, 0b0001), param_gen_index_src, a_.OpULT(dxbc::Dest::R(param_gen_temp, 0b0001), param_gen_index_src,
@ -650,12 +637,7 @@ void DxbcShaderTranslator::StartPixelShader() {
a_.OpMov(dxbc::Dest::R(param_gen_temp, 0b0011), a_.OpMov(dxbc::Dest::R(param_gen_temp, 0b0011),
dxbc::Src::R(param_gen_temp).Abs()); dxbc::Src::R(param_gen_temp).Abs());
// Check if faceness applies to the current primitive type. // Check if faceness applies to the current primitive type.
system_constants_used_ |= 1ull << kSysConst_Flags_Index; a_.OpAnd(dxbc::Dest::R(param_gen_temp, 0b0100), LoadFlagsSystemConstant(),
a_.OpAnd(dxbc::Dest::R(param_gen_temp, 0b0100),
dxbc::Src::CB(cbuffer_index_system_constants_,
uint32_t(CbufferRegister::kSystemConstants),
kSysConst_Flags_Vec)
.Select(kSysConst_Flags_Comp),
dxbc::Src::LU(kSysFlag_PrimitivePolygonal)); dxbc::Src::LU(kSysFlag_PrimitivePolygonal));
a_.OpIf(true, dxbc::Src::R(param_gen_temp, dxbc::Src::kZZZZ)); a_.OpIf(true, dxbc::Src::R(param_gen_temp, dxbc::Src::kZZZZ));
{ {
@ -675,14 +657,12 @@ void DxbcShaderTranslator::StartPixelShader() {
dxbc::Dest point_coord_r_zw_dest(dxbc::Dest::R(param_gen_temp, 0b1100)); dxbc::Dest point_coord_r_zw_dest(dxbc::Dest::R(param_gen_temp, 0b1100));
dxbc::Src point_coord_v_xxxy_src(dxbc::Src::V( dxbc::Src point_coord_v_xxxy_src(dxbc::Src::V(
uint32_t(InOutRegister::kPSInPointParameters), 0b01000000)); uint32_t(InOutRegister::kPSInPointParameters), 0b01000000));
system_constants_used_ |= 1ull
<< kSysConst_InterpolatorSamplingPattern_Index;
a_.OpUBFE(dxbc::Dest::R(param_gen_temp, 0b0100), dxbc::Src::LU(1), a_.OpUBFE(dxbc::Dest::R(param_gen_temp, 0b0100), dxbc::Src::LU(1),
param_gen_index_src, param_gen_index_src,
dxbc::Src::CB(cbuffer_index_system_constants_, LoadSystemConstant(
uint32_t(CbufferRegister::kSystemConstants), SystemConstantIndex::kInterpolatorSamplingPattern,
kSysConst_InterpolatorSamplingPattern_Vec) offsetof(SystemConstants, interpolator_sampling_pattern),
.Select(kSysConst_InterpolatorSamplingPattern_Comp)); dxbc::Src::kXXXX));
a_.OpIf(bool(xenos::SampleLocation::kCenter), a_.OpIf(bool(xenos::SampleLocation::kCenter),
dxbc::Src::R(param_gen_temp, dxbc::Src::kZZZZ)); dxbc::Src::R(param_gen_temp, dxbc::Src::kZZZZ));
// At center. // At center.
@ -697,10 +677,7 @@ void DxbcShaderTranslator::StartPixelShader() {
// Copy the GPR number to r# for relative addressing. // Copy the GPR number to r# for relative addressing.
uint32_t param_gen_copy_temp = PushSystemTemp(); uint32_t param_gen_copy_temp = PushSystemTemp();
a_.OpMov(dxbc::Dest::R(param_gen_copy_temp, 0b0001), a_.OpMov(dxbc::Dest::R(param_gen_copy_temp, 0b0001),
dxbc::Src::CB(cbuffer_index_system_constants_, param_gen_index_src);
uint32_t(CbufferRegister::kSystemConstants),
kSysConst_PSParamGen_Vec)
.Select(kSysConst_PSParamGen_Comp));
// Write to the GPR. // Write to the GPR.
a_.OpMov(dxbc::Dest::X(0, dxbc::Index(param_gen_copy_temp, 0)), a_.OpMov(dxbc::Dest::X(0, dxbc::Index(param_gen_copy_temp, 0)),
param_gen_src); param_gen_src);
@ -864,11 +841,7 @@ void DxbcShaderTranslator::CompleteVertexOrDomainShader() {
dxbc::Dest temp_x_dest(dxbc::Dest::R(temp, 0b0001)); dxbc::Dest temp_x_dest(dxbc::Dest::R(temp, 0b0001));
dxbc::Src temp_x_src(dxbc::Src::R(temp, dxbc::Src::kXXXX)); dxbc::Src temp_x_src(dxbc::Src::R(temp, dxbc::Src::kXXXX));
system_constants_used_ |= 1ull << kSysConst_Flags_Index; dxbc::Src flags_src(LoadFlagsSystemConstant());
dxbc::Src flags_src(dxbc::Src::CB(cbuffer_index_system_constants_,
uint32_t(CbufferRegister::kSystemConstants),
kSysConst_Flags_Vec)
.Select(kSysConst_Flags_Comp));
// Check if the shader already returns W, not 1/W, and if it doesn't, turn 1/W // Check if the shader already returns W, not 1/W, and if it doesn't, turn 1/W
// into W. Using div rather than relaxed-precision rcp for safety. // into W. Using div rather than relaxed-precision rcp for safety.
@ -911,7 +884,6 @@ void DxbcShaderTranslator::CompleteVertexOrDomainShader() {
// Not possible to handle UCP_CULL_ONLY_ENA with the same shader though, since // Not possible to handle UCP_CULL_ONLY_ENA with the same shader though, since
// there can be only 8 SV_ClipDistance + SV_CullDistance values at most, but // there can be only 8 SV_ClipDistance + SV_CullDistance values at most, but
// 12 would be needed. // 12 would be needed.
system_constants_used_ |= 1ull << kSysConst_UserClipPlanes_Index;
for (uint32_t i = 0; i < 6; ++i) { for (uint32_t i = 0; i < 6; ++i) {
// Check if the clip plane is enabled - this `if` is needed, as opposed to // Check if the clip plane is enabled - this `if` is needed, as opposed to
// just zeroing the clip planes in the constants, so Infinity and NaN in the // just zeroing the clip planes in the constants, so Infinity and NaN in the
@ -924,30 +896,25 @@ void DxbcShaderTranslator::CompleteVertexOrDomainShader() {
uint32_t(InOutRegister::kVSDSOutClipDistance0123) + (i >> 2), uint32_t(InOutRegister::kVSDSOutClipDistance0123) + (i >> 2),
1 << (i & 3)), 1 << (i & 3)),
dxbc::Src::R(system_temp_position_), dxbc::Src::R(system_temp_position_),
dxbc::Src::CB(cbuffer_index_system_constants_, LoadSystemConstant(SystemConstantIndex::kUserClipPlanes,
uint32_t(CbufferRegister::kSystemConstants), offsetof(SystemConstants, user_clip_planes) +
kSysConst_UserClipPlanes_Vec + i)); sizeof(float) * 4 * i,
dxbc::Src::kXYZW));
a_.OpEndIf(); a_.OpEndIf();
} }
// Apply scale for guest to host viewport and clip space conversion. Also, if // Apply scale for guest to host viewport and clip space conversion. Also, if
// the vertex shader is multipass, the NDC scale constant can be used to set // the vertex shader is multipass, the NDC scale constant can be used to set
// position to NaN to kill all primitives. // position to NaN to kill all primitives.
system_constants_used_ |= 1ull << kSysConst_NDCScale_Index;
a_.OpMul(dxbc::Dest::R(system_temp_position_, 0b0111), a_.OpMul(dxbc::Dest::R(system_temp_position_, 0b0111),
dxbc::Src::R(system_temp_position_), dxbc::Src::R(system_temp_position_),
dxbc::Src::CB(cbuffer_index_system_constants_, LoadSystemConstant(SystemConstantIndex::kNDCScale,
uint32_t(CbufferRegister::kSystemConstants), offsetof(SystemConstants, ndc_scale), 0b100100));
kSysConst_NDCScale_Vec,
kSysConst_NDCScale_Comp * 0b010101 + 0b100100));
// Apply offset (multiplied by W) used for the same purposes. // Apply offset (multiplied by W) used for the same purposes.
system_constants_used_ |= 1ull << kSysConst_NDCOffset_Index;
a_.OpMAd(dxbc::Dest::R(system_temp_position_, 0b0111), a_.OpMAd(dxbc::Dest::R(system_temp_position_, 0b0111),
dxbc::Src::CB(cbuffer_index_system_constants_, LoadSystemConstant(SystemConstantIndex::kNDCOffset,
uint32_t(CbufferRegister::kSystemConstants), offsetof(SystemConstants, ndc_offset), 0b100100),
kSysConst_NDCOffset_Vec,
kSysConst_NDCOffset_Comp * 0b010101 + 0b100100),
dxbc::Src::R(system_temp_position_, dxbc::Src::kWWWW), dxbc::Src::R(system_temp_position_, dxbc::Src::kWWWW),
dxbc::Src::R(system_temp_position_)); dxbc::Src::R(system_temp_position_));
@ -1959,8 +1926,9 @@ const DxbcShaderTranslator::ShaderRdefType
dxbc::RdefVariableType::kUInt, 1, 4, 0, ShaderRdefTypeIndex::kUint4}, dxbc::RdefVariableType::kUInt, 1, 4, 0, ShaderRdefTypeIndex::kUint4},
}; };
const DxbcShaderTranslator::SystemConstantRdef DxbcShaderTranslator:: const DxbcShaderTranslator::SystemConstantRdef
system_constant_rdef_[DxbcShaderTranslator::kSysConst_Count] = { DxbcShaderTranslator::system_constant_rdef_[size_t(
DxbcShaderTranslator::SystemConstantIndex::kCount)] = {
{"xe_flags", ShaderRdefTypeIndex::kUint, sizeof(uint32_t)}, {"xe_flags", ShaderRdefTypeIndex::kUint, sizeof(uint32_t)},
{"xe_tessellation_factor_range", ShaderRdefTypeIndex::kFloat2, {"xe_tessellation_factor_range", ShaderRdefTypeIndex::kFloat2,
sizeof(float) * 2}, sizeof(float) * 2},
@ -2110,9 +2078,9 @@ void DxbcShaderTranslator::WriteResourceDefinition() {
// Names. // Names.
name_ptr = (uint32_t(shader_object_.size()) - blob_position_dwords) * name_ptr = (uint32_t(shader_object_.size()) - blob_position_dwords) *
sizeof(uint32_t); sizeof(uint32_t);
uint32_t constant_name_ptrs_system[kSysConst_Count]; uint32_t constant_name_ptrs_system[size_t(SystemConstantIndex::kCount)];
if (cbuffer_index_system_constants_ != kBindingIndexUnallocated) { if (cbuffer_index_system_constants_ != kBindingIndexUnallocated) {
for (uint32_t i = 0; i < kSysConst_Count; ++i) { for (size_t i = 0; i < size_t(SystemConstantIndex::kCount); ++i) {
constant_name_ptrs_system[i] = name_ptr; constant_name_ptrs_system[i] = name_ptr;
name_ptr += dxbc::AppendAlignedString(shader_object_, name_ptr += dxbc::AppendAlignedString(shader_object_,
system_constant_rdef_[i].name); system_constant_rdef_[i].name);
@ -2144,11 +2112,11 @@ void DxbcShaderTranslator::WriteResourceDefinition() {
if (cbuffer_index_system_constants_ != kBindingIndexUnallocated) { if (cbuffer_index_system_constants_ != kBindingIndexUnallocated) {
shader_object_.resize(constant_position_dwords_system + shader_object_.resize(constant_position_dwords_system +
sizeof(dxbc::RdefVariable) / sizeof(uint32_t) * sizeof(dxbc::RdefVariable) / sizeof(uint32_t) *
kSysConst_Count); size_t(SystemConstantIndex::kCount));
auto constants_system = reinterpret_cast<dxbc::RdefVariable*>( auto constants_system = reinterpret_cast<dxbc::RdefVariable*>(
shader_object_.data() + constant_position_dwords_system); shader_object_.data() + constant_position_dwords_system);
uint32_t constant_offset_system = 0; uint32_t constant_offset_system = 0;
for (uint32_t i = 0; i < kSysConst_Count; ++i) { for (size_t i = 0; i < size_t(SystemConstantIndex::kCount); ++i) {
dxbc::RdefVariable& constant_system = constants_system[i]; dxbc::RdefVariable& constant_system = constants_system[i];
const SystemConstantRdef& translator_constant_system = const SystemConstantRdef& translator_constant_system =
system_constant_rdef_[i]; system_constant_rdef_[i];
@ -2303,7 +2271,7 @@ void DxbcShaderTranslator::WriteResourceDefinition() {
cbuffer.type = dxbc::RdefCbufferType::kCbuffer; cbuffer.type = dxbc::RdefCbufferType::kCbuffer;
if (i == cbuffer_index_system_constants_) { if (i == cbuffer_index_system_constants_) {
cbuffer.name_ptr = cbuffer_name_ptr_system; cbuffer.name_ptr = cbuffer_name_ptr_system;
cbuffer.variable_count = kSysConst_Count; cbuffer.variable_count = uint32_t(SystemConstantIndex::kCount);
cbuffer.variables_ptr = cbuffer.variables_ptr =
(constant_position_dwords_system - blob_position_dwords) * (constant_position_dwords_system - blob_position_dwords) *
sizeof(uint32_t); sizeof(uint32_t);

View File

@ -202,7 +202,7 @@ class DxbcShaderTranslator : public ShaderTranslator {
}; };
// IF SYSTEM CONSTANTS ARE CHANGED OR ADDED, THE FOLLOWING MUST BE UPDATED: // IF SYSTEM CONSTANTS ARE CHANGED OR ADDED, THE FOLLOWING MUST BE UPDATED:
// - kSysConst enum (indices, registers and first components). // - SystemConstantIndex enum.
// - system_constant_rdef_. // - system_constant_rdef_.
// - d3d12/shaders/xenos_draw.hlsli (for geometry shaders). // - d3d12/shaders/xenos_draw.hlsli (for geometry shaders).
struct SystemConstants { struct SystemConstants {
@ -507,150 +507,42 @@ class DxbcShaderTranslator : public ShaderTranslator {
void ProcessAluInstruction(const ParsedAluInstruction& instr) override; void ProcessAluInstruction(const ParsedAluInstruction& instr) override;
private: private:
enum : uint32_t { enum class SystemConstantIndex : uint32_t {
// Indices. kFlags,
kTessellationFactorRange,
kLineLoopClosingIndex,
kVertexIndexEndian,
kVertexBaseIndex,
kPointSize,
kPointSizeMinMax,
kPointScreenToNDC,
kUserClipPlanes,
kNDCScale,
kInterpolatorSamplingPattern,
kNDCOffset,
kPSParamGen,
kTextureSwizzledSigns,
kTexturesResolved,
kSampleCountLog2,
kAlphaTestReference,
kColorExpBias,
kAlphaToMask,
kEdramPitchTiles,
kEdramDepthRange,
kEdramPolyOffsetFront,
kEdramPolyOffsetBack,
kEdramDepthBaseDwords,
kEdramStencil,
kEdramRTBaseDwordsScaled,
kEdramRTFormatFlags,
kEdramRTClamp,
kEdramRTKeepMask,
kEdramRTBlendFactorsOps,
kEdramBlendConstant,
kSysConst_Flags_Index, kCount,
kSysConst_TessellationFactorRange_Index,
kSysConst_LineLoopClosingIndex_Index,
kSysConst_VertexIndexEndian_Index,
kSysConst_VertexBaseIndex_Index,
kSysConst_PointSize_Index,
kSysConst_PointSizeMinMax_Index,
kSysConst_PointScreenToNDC_Index,
kSysConst_UserClipPlanes_Index,
kSysConst_NDCScale_Index,
kSysConst_InterpolatorSamplingPattern_Index,
kSysConst_NDCOffset_Index,
kSysConst_PSParamGen_Index,
kSysConst_TextureSwizzledSigns_Index,
kSysConst_TexturesResolved_Index,
kSysConst_SampleCountLog2_Index,
kSysConst_AlphaTestReference_Index,
kSysConst_ColorExpBias_Index,
kSysConst_AlphaToMask_Index,
kSysConst_EdramPitchTiles_Index,
kSysConst_EdramDepthRange_Index,
kSysConst_EdramPolyOffsetFront_Index,
kSysConst_EdramPolyOffsetBack_Index,
kSysConst_EdramDepthBaseDwords_Index,
kSysConst_EdramStencil_Index,
kSysConst_EdramRTBaseDwordsScaled_Index,
kSysConst_EdramRTFormatFlags_Index,
kSysConst_EdramRTClamp_Index,
kSysConst_EdramRTKeepMask_Index,
kSysConst_EdramRTBlendFactorsOps_Index,
kSysConst_EdramBlendConstant_Index,
kSysConst_Count,
// Vectors.
kSysConst_Flags_Vec = 0,
kSysConst_Flags_Comp = 0,
kSysConst_TessellationFactorRange_Vec = kSysConst_Flags_Vec,
kSysConst_TessellationFactorRange_Comp = 1,
kSysConst_LineLoopClosingIndex_Vec = kSysConst_Flags_Vec,
kSysConst_LineLoopClosingIndex_Comp = 3,
kSysConst_VertexIndexEndian_Vec = kSysConst_LineLoopClosingIndex_Vec + 1,
kSysConst_VertexIndexEndian_Comp = 0,
kSysConst_VertexBaseIndex_Vec = kSysConst_VertexIndexEndian_Vec,
kSysConst_VertexBaseIndex_Comp = 1,
kSysConst_PointSize_Vec = kSysConst_VertexIndexEndian_Vec,
kSysConst_PointSize_Comp = 2,
kSysConst_PointSizeMinMax_Vec = kSysConst_PointSize_Vec + 1,
kSysConst_PointSizeMinMax_Comp = 0,
kSysConst_PointScreenToNDC_Vec = kSysConst_PointSizeMinMax_Vec,
kSysConst_PointScreenToNDC_Comp = 2,
// 6 vectors.
kSysConst_UserClipPlanes_Vec = kSysConst_PointScreenToNDC_Vec + 1,
kSysConst_NDCScale_Vec = kSysConst_UserClipPlanes_Vec + 6,
kSysConst_NDCScale_Comp = 0,
kSysConst_InterpolatorSamplingPattern_Vec = kSysConst_NDCScale_Vec,
kSysConst_InterpolatorSamplingPattern_Comp = 3,
kSysConst_NDCOffset_Vec = kSysConst_InterpolatorSamplingPattern_Vec + 1,
kSysConst_NDCOffset_Comp = 0,
kSysConst_PSParamGen_Vec = kSysConst_NDCOffset_Vec,
kSysConst_PSParamGen_Comp = 3,
// 2 vectors.
kSysConst_TextureSwizzledSigns_Vec = kSysConst_PSParamGen_Vec + 1,
kSysConst_TexturesResolved_Vec = kSysConst_TextureSwizzledSigns_Vec + 2,
kSysConst_TexturesResolved_Comp = 0,
kSysConst_SampleCountLog2_Vec = kSysConst_TexturesResolved_Vec,
kSysConst_SampleCountLog2_Comp = 1,
kSysConst_AlphaTestReference_Vec = kSysConst_TexturesResolved_Vec,
kSysConst_AlphaTestReference_Comp = 3,
kSysConst_ColorExpBias_Vec = kSysConst_AlphaTestReference_Vec + 1,
kSysConst_AlphaToMask_Vec = kSysConst_ColorExpBias_Vec + 1,
kSysConst_AlphaToMask_Comp = 0,
kSysConst_EdramPitchTiles_Vec = kSysConst_AlphaToMask_Vec,
kSysConst_EdramPitchTiles_Comp = 1,
kSysConst_EdramDepthRange_Vec = kSysConst_AlphaToMask_Vec,
kSysConst_EdramDepthRangeScale_Comp = 2,
kSysConst_EdramDepthRangeOffset_Comp = 3,
kSysConst_EdramPolyOffsetFront_Vec = kSysConst_EdramDepthRange_Vec + 1,
kSysConst_EdramPolyOffsetFrontScale_Comp = 0,
kSysConst_EdramPolyOffsetFrontOffset_Comp = 1,
kSysConst_EdramPolyOffsetBack_Vec = kSysConst_EdramPolyOffsetFront_Vec,
kSysConst_EdramPolyOffsetBackScale_Comp = 2,
kSysConst_EdramPolyOffsetBackOffset_Comp = 3,
kSysConst_EdramDepthBaseDwords_Vec = kSysConst_EdramPolyOffsetBack_Vec + 1,
kSysConst_EdramDepthBaseDwords_Comp = 0,
// 2 vectors.
kSysConst_EdramStencil_Vec = kSysConst_EdramDepthBaseDwords_Vec + 1,
kSysConst_EdramStencil_Front_Vec = kSysConst_EdramStencil_Vec,
kSysConst_EdramStencil_Back_Vec,
kSysConst_EdramStencil_Reference_Comp = 0,
kSysConst_EdramStencil_ReadMask_Comp,
kSysConst_EdramStencil_WriteMask_Comp,
kSysConst_EdramStencil_FuncOps_Comp,
kSysConst_EdramRTBaseDwordsScaled_Vec = kSysConst_EdramStencil_Vec + 2,
kSysConst_EdramRTFormatFlags_Vec =
kSysConst_EdramRTBaseDwordsScaled_Vec + 1,
// 4 vectors.
kSysConst_EdramRTClamp_Vec = kSysConst_EdramRTFormatFlags_Vec + 1,
// 2 vectors (render targets 01 and 23).
kSysConst_EdramRTKeepMask_Vec = kSysConst_EdramRTClamp_Vec + 4,
kSysConst_EdramRTBlendFactorsOps_Vec = kSysConst_EdramRTKeepMask_Vec + 2,
kSysConst_EdramBlendConstant_Vec = kSysConst_EdramRTBlendFactorsOps_Vec + 1,
}; };
static_assert(kSysConst_Count <= 64, static_assert(uint32_t(SystemConstantIndex::kCount) <= 64,
"Too many system constants, can't use uint64_t for usage bits"); "Too many system constants, can't use uint64_t for usage bits");
static constexpr uint32_t kPointParametersTexCoord = xenos::kMaxInterpolators; static constexpr uint32_t kPointParametersTexCoord = xenos::kMaxInterpolators;
@ -685,19 +577,37 @@ class DxbcShaderTranslator : public ShaderTranslator {
kPSInFrontFaceAndSampleIndex, kPSInFrontFaceAndSampleIndex,
}; };
// GetSystemConstantSrc + MarkSystemConstantUsed is for special cases of
// building the source unconditionally - in general, LoadSystemConstant must
// be used instead.
void MarkSystemConstantUsed(SystemConstantIndex index) {
system_constants_used_ |= uint64_t(1) << uint32_t(index);
}
// Offset should be offsetof(SystemConstants, field). Swizzle values are // Offset should be offsetof(SystemConstants, field). Swizzle values are
// relative to the first component in the vector according to offsetof - to // relative to the first component in the vector according to offsetof - to
// request a scalar, use XXXX swizzle, and if it's at +4 in its 16-byte // request a scalar, use XXXX swizzle, and if it's at +4 in its 16-byte
// vector, it will be turned into YYYY, and so on. // vector, it will be turned into YYYY, and so on. The swizzle may include
// TODO(Triang3l): Index to enum class. // out-of-bounds components of the vector for simplicity of use, assuming they
dxbc::Src LoadSystemConstant(uint32_t index, size_t offset, // will be dropped anyway later.
uint32_t swizzle) { dxbc::Src GetSystemConstantSrc(size_t offset, uint32_t swizzle) {
system_constants_used_ |= uint64_t(1) << index;
uint32_t first_component = uint32_t((offset >> 2) & 3); uint32_t first_component = uint32_t((offset >> 2) & 3);
return dxbc::Src::CB(cbuffer_index_system_constants_, return dxbc::Src::CB(
uint32_t(CbufferRegister::kSystemConstants), cbuffer_index_system_constants_,
uint32_t(offset >> 4), uint32_t(CbufferRegister::kSystemConstants), uint32_t(offset >> 4),
first_component * 0b01010101 + swizzle); std::min((swizzle & 3) + first_component, uint32_t(3)) |
std::min(((swizzle >> 2) & 3) + first_component, uint32_t(3)) << 2 |
std::min(((swizzle >> 4) & 3) + first_component, uint32_t(3)) << 4 |
std::min(((swizzle >> 6) & 3) + first_component, uint32_t(3)) << 6);
}
dxbc::Src LoadSystemConstant(SystemConstantIndex index, size_t offset,
uint32_t swizzle) {
MarkSystemConstantUsed(index);
return GetSystemConstantSrc(offset, swizzle);
}
dxbc::Src LoadFlagsSystemConstant() {
return LoadSystemConstant(SystemConstantIndex::kFlags,
offsetof(SystemConstants, flags),
dxbc::Src::kXXXX);
} }
Modification GetDxbcShaderModification() const { Modification GetDxbcShaderModification() const {
@ -1071,8 +981,9 @@ class DxbcShaderTranslator : public ShaderTranslator {
uint32_t size; uint32_t size;
uint32_t padding_after; uint32_t padding_after;
}; };
static const SystemConstantRdef system_constant_rdef_[kSysConst_Count]; static const SystemConstantRdef
// Mask of system constants (1 << kSysConst_#_Index) used in the shader, so system_constant_rdef_[size_t(SystemConstantIndex::kCount)];
// Mask of system constants (1 << SystemConstantIndex) used in the shader, so
// the remaining ones can be marked as unused in RDEF. // the remaining ones can be marked as unused in RDEF.
uint64_t system_constants_used_; uint64_t system_constants_used_;

View File

@ -128,12 +128,8 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction(
// memexport is used), fetch from the appropriate binding. Extract whether // memexport is used), fetch from the appropriate binding. Extract whether
// shared memory is a UAV to system_temp_result_.x and check. In the `if`, put // shared memory is a UAV to system_temp_result_.x and check. In the `if`, put
// the more likely case (SRV), in the `else`, the less likely one (UAV). // the more likely case (SRV), in the `else`, the less likely one (UAV).
system_constants_used_ |= 1ull << kSysConst_Flags_Index;
a_.OpAnd(dxbc::Dest::R(system_temp_result_, 0b0001), a_.OpAnd(dxbc::Dest::R(system_temp_result_, 0b0001),
dxbc::Src::CB(cbuffer_index_system_constants_, LoadFlagsSystemConstant(),
uint32_t(CbufferRegister::kSystemConstants),
kSysConst_Flags_Vec)
.Select(kSysConst_Flags_Comp),
dxbc::Src::LU(kSysFlag_SharedMemoryIsUAV)); dxbc::Src::LU(kSysFlag_SharedMemoryIsUAV));
a_.OpIf(false, dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX)); a_.OpIf(false, dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX));
if (srv_index_shared_memory_ == kBindingIndexUnallocated) { if (srv_index_shared_memory_ == kBindingIndexUnallocated) {
@ -949,7 +945,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
// calculations. // calculations.
assert_zero(used_result_nonzero_components & 0b1000); assert_zero(used_result_nonzero_components & 0b1000);
a_.OpAnd(dxbc::Dest::R(system_temp_result_, 0b1000), a_.OpAnd(dxbc::Dest::R(system_temp_result_, 0b1000),
LoadSystemConstant(kSysConst_TexturesResolved_Index, LoadSystemConstant(SystemConstantIndex::kTexturesResolved,
offsetof(SystemConstants, textures_resolved), offsetof(SystemConstants, textures_resolved),
dxbc::Src::kXXXX), dxbc::Src::kXXXX),
dxbc::Src::LU(uint32_t(1) << tfetch_index)); dxbc::Src::LU(uint32_t(1) << tfetch_index));
@ -1003,15 +999,14 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
// - Component signedness, for selecting the SRV, and if data is needed. // - Component signedness, for selecting the SRV, and if data is needed.
dxbc::Src signs_uint_src( dxbc::Src signs_uint_src(
dxbc::Src::CB(cbuffer_index_system_constants_, GetSystemConstantSrc(offsetof(SystemConstants, texture_swizzled_signs) +
uint32_t(CbufferRegister::kSystemConstants), sizeof(uint32_t) * (tfetch_index >> 2),
kSysConst_TextureSwizzledSigns_Vec + (tfetch_index >> 4)) dxbc::Src::kXXXX));
.Select((tfetch_index >> 2) & 3));
uint32_t signs_shift = (tfetch_index & 3) * 8; uint32_t signs_shift = (tfetch_index & 3) * 8;
uint32_t signs_temp = UINT32_MAX; uint32_t signs_temp = UINT32_MAX;
if (instr.opcode == FetchOpcode::kTextureFetch) { if (instr.opcode == FetchOpcode::kTextureFetch) {
signs_temp = PushSystemTemp(); signs_temp = PushSystemTemp();
system_constants_used_ |= 1ull << kSysConst_TextureSwizzledSigns_Index; MarkSystemConstantUsed(SystemConstantIndex::kTextureSwizzledSigns);
a_.OpUBFE(dxbc::Dest::R(signs_temp, used_result_nonzero_components), a_.OpUBFE(dxbc::Dest::R(signs_temp, used_result_nonzero_components),
dxbc::Src::LU(2), dxbc::Src::LU(2),
dxbc::Src::LU(signs_shift, signs_shift + 2, signs_shift + 4, dxbc::Src::LU(signs_shift, signs_shift + 2, signs_shift + 4,
@ -1074,7 +1069,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
// resolution scale inverse - sampler not loaded yet. // resolution scale inverse - sampler not loaded yet.
a_.OpAnd( a_.OpAnd(
dxbc::Dest::R(coord_and_sampler_temp, 0b1000), dxbc::Dest::R(coord_and_sampler_temp, 0b1000),
LoadSystemConstant(kSysConst_TexturesResolved_Index, LoadSystemConstant(SystemConstantIndex::kTexturesResolved,
offsetof(SystemConstants, textures_resolved), offsetof(SystemConstants, textures_resolved),
dxbc::Src::kXXXX), dxbc::Src::kXXXX),
dxbc::Src::LU(uint32_t(1) << tfetch_index)); dxbc::Src::LU(uint32_t(1) << tfetch_index));
@ -1140,7 +1135,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
// resolution scale inverse - sampler not loaded yet. // resolution scale inverse - sampler not loaded yet.
a_.OpAnd( a_.OpAnd(
dxbc::Dest::R(coord_and_sampler_temp, 0b1000), dxbc::Dest::R(coord_and_sampler_temp, 0b1000),
LoadSystemConstant(kSysConst_TexturesResolved_Index, LoadSystemConstant(SystemConstantIndex::kTexturesResolved,
offsetof(SystemConstants, textures_resolved), offsetof(SystemConstants, textures_resolved),
dxbc::Src::kXXXX), dxbc::Src::kXXXX),
dxbc::Src::LU(uint32_t(1) << tfetch_index)); dxbc::Src::LU(uint32_t(1) << tfetch_index));
@ -1317,7 +1312,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
// Check which SRV needs to be accessed - signed or unsigned. If there is // Check which SRV needs to be accessed - signed or unsigned. If there is
// at least one non-signed component, will be using the unsigned one. // at least one non-signed component, will be using the unsigned one.
uint32_t is_unsigned_temp = PushSystemTemp(); uint32_t is_unsigned_temp = PushSystemTemp();
system_constants_used_ |= 1ull << kSysConst_TextureSwizzledSigns_Index; MarkSystemConstantUsed(SystemConstantIndex::kTextureSwizzledSigns);
a_.OpUBFE(dxbc::Dest::R(is_unsigned_temp, 0b0001), dxbc::Src::LU(8), a_.OpUBFE(dxbc::Dest::R(is_unsigned_temp, 0b0001), dxbc::Src::LU(8),
dxbc::Src::LU(signs_shift), signs_uint_src); dxbc::Src::LU(signs_shift), signs_uint_src);
a_.OpINE( a_.OpINE(
@ -2060,7 +2055,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
// `if`, with `else` for sRGB resolved render targets. // `if`, with `else` for sRGB resolved render targets.
a_.OpAnd( a_.OpAnd(
dxbc::Dest::R(gamma_temp, 0b0001), dxbc::Dest::R(gamma_temp, 0b0001),
LoadSystemConstant(kSysConst_TexturesResolved_Index, LoadSystemConstant(SystemConstantIndex::kTexturesResolved,
offsetof(SystemConstants, textures_resolved), offsetof(SystemConstants, textures_resolved),
dxbc::Src::kXXXX), dxbc::Src::kXXXX),
dxbc::Src::LU(uint32_t(1) << tfetch_index)); dxbc::Src::LU(uint32_t(1) << tfetch_index));

View File

@ -101,13 +101,9 @@ void DxbcShaderTranslator::ExportToMemory() {
uint32_t control_temp = PushSystemTemp(); uint32_t control_temp = PushSystemTemp();
// Safety check if the shared memory is bound as UAV. // Safety check if the shared memory is bound as UAV.
system_constants_used_ |= 1ull << kSysConst_Flags_Index;
a_.OpUBFE(dxbc::Dest::R(control_temp, 0b0001), dxbc::Src::LU(1), a_.OpUBFE(dxbc::Dest::R(control_temp, 0b0001), dxbc::Src::LU(1),
dxbc::Src::LU(kSysFlag_SharedMemoryIsUAV_Shift), dxbc::Src::LU(kSysFlag_SharedMemoryIsUAV_Shift),
dxbc::Src::CB(cbuffer_index_system_constants_, LoadFlagsSystemConstant());
uint32_t(CbufferRegister::kSystemConstants),
kSysConst_Flags_Vec)
.Select(kSysConst_Flags_Comp));
// Open the `if` with the uniform condition for the shared memory buffer being // Open the `if` with the uniform condition for the shared memory buffer being
// bound as a UAV (more fine-grained checks are vector and likely divergent). // bound as a UAV (more fine-grained checks are vector and likely divergent).
a_.OpIf(true, dxbc::Src::R(control_temp, dxbc::Src::kXXXX)); a_.OpIf(true, dxbc::Src::R(control_temp, dxbc::Src::kXXXX));

File diff suppressed because it is too large Load Diff