[DXBC] Cleanup: kSysConst_*_Vec/Comp > LoadSystemConstant
This commit is contained in:
parent
ff23b1d9f9
commit
9f8a432479
|
@ -290,14 +290,12 @@ void DxbcShaderTranslator::StartVertexShader_LoadVertexIndex() {
|
|||
dxbc::Src index_src(dxbc::Src::R(reg, dxbc::Src::kXXXX));
|
||||
|
||||
// Check if the closing vertex of a non-indexed line loop is being processed.
|
||||
system_constants_used_ |= 1ull << kSysConst_LineLoopClosingIndex_Index;
|
||||
a_.OpINE(
|
||||
index_dest,
|
||||
dxbc::Src::V(uint32_t(InOutRegister::kVSInVertexIndex), dxbc::Src::kXXXX),
|
||||
dxbc::Src::CB(cbuffer_index_system_constants_,
|
||||
uint32_t(CbufferRegister::kSystemConstants),
|
||||
kSysConst_LineLoopClosingIndex_Vec)
|
||||
.Select(kSysConst_LineLoopClosingIndex_Comp));
|
||||
LoadSystemConstant(SystemConstantIndex::kLineLoopClosingIndex,
|
||||
offsetof(SystemConstants, line_loop_closing_index),
|
||||
dxbc::Src::kXXXX));
|
||||
// Zero the index if processing the closing vertex of a line loop, or do
|
||||
// nothing (replace 0 with 0) if not needed.
|
||||
a_.OpAnd(
|
||||
|
@ -307,12 +305,9 @@ void DxbcShaderTranslator::StartVertexShader_LoadVertexIndex() {
|
|||
|
||||
{
|
||||
// Swap the vertex index's endianness.
|
||||
system_constants_used_ |= 1ull << kSysConst_VertexIndexEndian_Index;
|
||||
dxbc::Src endian_src(
|
||||
dxbc::Src::CB(cbuffer_index_system_constants_,
|
||||
uint32_t(CbufferRegister::kSystemConstants),
|
||||
kSysConst_VertexIndexEndian_Vec)
|
||||
.Select(kSysConst_VertexIndexEndian_Comp));
|
||||
dxbc::Src endian_src(LoadSystemConstant(
|
||||
SystemConstantIndex::kVertexIndexEndian,
|
||||
offsetof(SystemConstants, vertex_index_endian), dxbc::Src::kXXXX));
|
||||
dxbc::Dest swap_temp_dest(dxbc::Dest::R(reg, 0b0010));
|
||||
dxbc::Src swap_temp_src(dxbc::Src::R(reg, dxbc::Src::kYYYY));
|
||||
|
||||
|
@ -350,12 +345,10 @@ void DxbcShaderTranslator::StartVertexShader_LoadVertexIndex() {
|
|||
}
|
||||
|
||||
// Add the base vertex index.
|
||||
system_constants_used_ |= 1ull << kSysConst_VertexBaseIndex_Index;
|
||||
a_.OpIAdd(index_dest, index_src,
|
||||
dxbc::Src::CB(cbuffer_index_system_constants_,
|
||||
uint32_t(CbufferRegister::kSystemConstants),
|
||||
kSysConst_VertexBaseIndex_Vec)
|
||||
.Select(kSysConst_VertexBaseIndex_Comp));
|
||||
LoadSystemConstant(SystemConstantIndex::kVertexBaseIndex,
|
||||
offsetof(SystemConstants, vertex_base_index),
|
||||
dxbc::Src::kXXXX));
|
||||
|
||||
// Convert to float.
|
||||
a_.OpIToF(index_dest, index_src);
|
||||
|
@ -574,13 +567,10 @@ void DxbcShaderTranslator::StartPixelShader() {
|
|||
// Copy interpolants to GPRs.
|
||||
uint32_t centroid_temp =
|
||||
uses_register_dynamic_addressing ? PushSystemTemp() : UINT32_MAX;
|
||||
system_constants_used_ |= 1ull
|
||||
<< kSysConst_InterpolatorSamplingPattern_Index;
|
||||
dxbc::Src sampling_pattern_src(
|
||||
dxbc::Src::CB(cbuffer_index_system_constants_,
|
||||
uint32_t(CbufferRegister::kSystemConstants),
|
||||
kSysConst_InterpolatorSamplingPattern_Vec)
|
||||
.Select(kSysConst_InterpolatorSamplingPattern_Comp));
|
||||
dxbc::Src sampling_pattern_src(LoadSystemConstant(
|
||||
SystemConstantIndex::kInterpolatorSamplingPattern,
|
||||
offsetof(SystemConstants, interpolator_sampling_pattern),
|
||||
dxbc::Src::kXXXX));
|
||||
for (uint32_t i = 0; i < interpolator_count; ++i) {
|
||||
// With GPR dynamic addressing, first evaluate to centroid_temp r#, then
|
||||
// store to the x#.
|
||||
|
@ -615,12 +605,9 @@ void DxbcShaderTranslator::StartPixelShader() {
|
|||
// Write pixel parameters - screen (XY absolute value) and point sprite (ZW
|
||||
// absolute value) coordinates, facing (X sign bit) - to the specified
|
||||
// interpolator register (ps_param_gen).
|
||||
system_constants_used_ |= 1ull << kSysConst_PSParamGen_Index;
|
||||
dxbc::Src param_gen_index_src(
|
||||
dxbc::Src::CB(cbuffer_index_system_constants_,
|
||||
uint32_t(CbufferRegister::kSystemConstants),
|
||||
kSysConst_PSParamGen_Vec)
|
||||
.Select(kSysConst_PSParamGen_Comp));
|
||||
dxbc::Src param_gen_index_src(LoadSystemConstant(
|
||||
SystemConstantIndex::kPSParamGen,
|
||||
offsetof(SystemConstants, ps_param_gen), dxbc::Src::kXXXX));
|
||||
uint32_t param_gen_temp = PushSystemTemp();
|
||||
// Check if pixel parameters need to be written.
|
||||
a_.OpULT(dxbc::Dest::R(param_gen_temp, 0b0001), param_gen_index_src,
|
||||
|
@ -650,12 +637,7 @@ void DxbcShaderTranslator::StartPixelShader() {
|
|||
a_.OpMov(dxbc::Dest::R(param_gen_temp, 0b0011),
|
||||
dxbc::Src::R(param_gen_temp).Abs());
|
||||
// Check if faceness applies to the current primitive type.
|
||||
system_constants_used_ |= 1ull << kSysConst_Flags_Index;
|
||||
a_.OpAnd(dxbc::Dest::R(param_gen_temp, 0b0100),
|
||||
dxbc::Src::CB(cbuffer_index_system_constants_,
|
||||
uint32_t(CbufferRegister::kSystemConstants),
|
||||
kSysConst_Flags_Vec)
|
||||
.Select(kSysConst_Flags_Comp),
|
||||
a_.OpAnd(dxbc::Dest::R(param_gen_temp, 0b0100), LoadFlagsSystemConstant(),
|
||||
dxbc::Src::LU(kSysFlag_PrimitivePolygonal));
|
||||
a_.OpIf(true, dxbc::Src::R(param_gen_temp, dxbc::Src::kZZZZ));
|
||||
{
|
||||
|
@ -675,14 +657,12 @@ void DxbcShaderTranslator::StartPixelShader() {
|
|||
dxbc::Dest point_coord_r_zw_dest(dxbc::Dest::R(param_gen_temp, 0b1100));
|
||||
dxbc::Src point_coord_v_xxxy_src(dxbc::Src::V(
|
||||
uint32_t(InOutRegister::kPSInPointParameters), 0b01000000));
|
||||
system_constants_used_ |= 1ull
|
||||
<< kSysConst_InterpolatorSamplingPattern_Index;
|
||||
a_.OpUBFE(dxbc::Dest::R(param_gen_temp, 0b0100), dxbc::Src::LU(1),
|
||||
param_gen_index_src,
|
||||
dxbc::Src::CB(cbuffer_index_system_constants_,
|
||||
uint32_t(CbufferRegister::kSystemConstants),
|
||||
kSysConst_InterpolatorSamplingPattern_Vec)
|
||||
.Select(kSysConst_InterpolatorSamplingPattern_Comp));
|
||||
LoadSystemConstant(
|
||||
SystemConstantIndex::kInterpolatorSamplingPattern,
|
||||
offsetof(SystemConstants, interpolator_sampling_pattern),
|
||||
dxbc::Src::kXXXX));
|
||||
a_.OpIf(bool(xenos::SampleLocation::kCenter),
|
||||
dxbc::Src::R(param_gen_temp, dxbc::Src::kZZZZ));
|
||||
// At center.
|
||||
|
@ -697,10 +677,7 @@ void DxbcShaderTranslator::StartPixelShader() {
|
|||
// Copy the GPR number to r# for relative addressing.
|
||||
uint32_t param_gen_copy_temp = PushSystemTemp();
|
||||
a_.OpMov(dxbc::Dest::R(param_gen_copy_temp, 0b0001),
|
||||
dxbc::Src::CB(cbuffer_index_system_constants_,
|
||||
uint32_t(CbufferRegister::kSystemConstants),
|
||||
kSysConst_PSParamGen_Vec)
|
||||
.Select(kSysConst_PSParamGen_Comp));
|
||||
param_gen_index_src);
|
||||
// Write to the GPR.
|
||||
a_.OpMov(dxbc::Dest::X(0, dxbc::Index(param_gen_copy_temp, 0)),
|
||||
param_gen_src);
|
||||
|
@ -864,11 +841,7 @@ void DxbcShaderTranslator::CompleteVertexOrDomainShader() {
|
|||
dxbc::Dest temp_x_dest(dxbc::Dest::R(temp, 0b0001));
|
||||
dxbc::Src temp_x_src(dxbc::Src::R(temp, dxbc::Src::kXXXX));
|
||||
|
||||
system_constants_used_ |= 1ull << kSysConst_Flags_Index;
|
||||
dxbc::Src flags_src(dxbc::Src::CB(cbuffer_index_system_constants_,
|
||||
uint32_t(CbufferRegister::kSystemConstants),
|
||||
kSysConst_Flags_Vec)
|
||||
.Select(kSysConst_Flags_Comp));
|
||||
dxbc::Src flags_src(LoadFlagsSystemConstant());
|
||||
|
||||
// Check if the shader already returns W, not 1/W, and if it doesn't, turn 1/W
|
||||
// into W. Using div rather than relaxed-precision rcp for safety.
|
||||
|
@ -911,7 +884,6 @@ void DxbcShaderTranslator::CompleteVertexOrDomainShader() {
|
|||
// Not possible to handle UCP_CULL_ONLY_ENA with the same shader though, since
|
||||
// there can be only 8 SV_ClipDistance + SV_CullDistance values at most, but
|
||||
// 12 would be needed.
|
||||
system_constants_used_ |= 1ull << kSysConst_UserClipPlanes_Index;
|
||||
for (uint32_t i = 0; i < 6; ++i) {
|
||||
// Check if the clip plane is enabled - this `if` is needed, as opposed to
|
||||
// just zeroing the clip planes in the constants, so Infinity and NaN in the
|
||||
|
@ -924,30 +896,25 @@ void DxbcShaderTranslator::CompleteVertexOrDomainShader() {
|
|||
uint32_t(InOutRegister::kVSDSOutClipDistance0123) + (i >> 2),
|
||||
1 << (i & 3)),
|
||||
dxbc::Src::R(system_temp_position_),
|
||||
dxbc::Src::CB(cbuffer_index_system_constants_,
|
||||
uint32_t(CbufferRegister::kSystemConstants),
|
||||
kSysConst_UserClipPlanes_Vec + i));
|
||||
LoadSystemConstant(SystemConstantIndex::kUserClipPlanes,
|
||||
offsetof(SystemConstants, user_clip_planes) +
|
||||
sizeof(float) * 4 * i,
|
||||
dxbc::Src::kXYZW));
|
||||
a_.OpEndIf();
|
||||
}
|
||||
|
||||
// Apply scale for guest to host viewport and clip space conversion. Also, if
|
||||
// the vertex shader is multipass, the NDC scale constant can be used to set
|
||||
// position to NaN to kill all primitives.
|
||||
system_constants_used_ |= 1ull << kSysConst_NDCScale_Index;
|
||||
a_.OpMul(dxbc::Dest::R(system_temp_position_, 0b0111),
|
||||
dxbc::Src::R(system_temp_position_),
|
||||
dxbc::Src::CB(cbuffer_index_system_constants_,
|
||||
uint32_t(CbufferRegister::kSystemConstants),
|
||||
kSysConst_NDCScale_Vec,
|
||||
kSysConst_NDCScale_Comp * 0b010101 + 0b100100));
|
||||
LoadSystemConstant(SystemConstantIndex::kNDCScale,
|
||||
offsetof(SystemConstants, ndc_scale), 0b100100));
|
||||
|
||||
// Apply offset (multiplied by W) used for the same purposes.
|
||||
system_constants_used_ |= 1ull << kSysConst_NDCOffset_Index;
|
||||
a_.OpMAd(dxbc::Dest::R(system_temp_position_, 0b0111),
|
||||
dxbc::Src::CB(cbuffer_index_system_constants_,
|
||||
uint32_t(CbufferRegister::kSystemConstants),
|
||||
kSysConst_NDCOffset_Vec,
|
||||
kSysConst_NDCOffset_Comp * 0b010101 + 0b100100),
|
||||
LoadSystemConstant(SystemConstantIndex::kNDCOffset,
|
||||
offsetof(SystemConstants, ndc_offset), 0b100100),
|
||||
dxbc::Src::R(system_temp_position_, dxbc::Src::kWWWW),
|
||||
dxbc::Src::R(system_temp_position_));
|
||||
|
||||
|
@ -1959,8 +1926,9 @@ const DxbcShaderTranslator::ShaderRdefType
|
|||
dxbc::RdefVariableType::kUInt, 1, 4, 0, ShaderRdefTypeIndex::kUint4},
|
||||
};
|
||||
|
||||
const DxbcShaderTranslator::SystemConstantRdef DxbcShaderTranslator::
|
||||
system_constant_rdef_[DxbcShaderTranslator::kSysConst_Count] = {
|
||||
const DxbcShaderTranslator::SystemConstantRdef
|
||||
DxbcShaderTranslator::system_constant_rdef_[size_t(
|
||||
DxbcShaderTranslator::SystemConstantIndex::kCount)] = {
|
||||
{"xe_flags", ShaderRdefTypeIndex::kUint, sizeof(uint32_t)},
|
||||
{"xe_tessellation_factor_range", ShaderRdefTypeIndex::kFloat2,
|
||||
sizeof(float) * 2},
|
||||
|
@ -2110,9 +2078,9 @@ void DxbcShaderTranslator::WriteResourceDefinition() {
|
|||
// Names.
|
||||
name_ptr = (uint32_t(shader_object_.size()) - blob_position_dwords) *
|
||||
sizeof(uint32_t);
|
||||
uint32_t constant_name_ptrs_system[kSysConst_Count];
|
||||
uint32_t constant_name_ptrs_system[size_t(SystemConstantIndex::kCount)];
|
||||
if (cbuffer_index_system_constants_ != kBindingIndexUnallocated) {
|
||||
for (uint32_t i = 0; i < kSysConst_Count; ++i) {
|
||||
for (size_t i = 0; i < size_t(SystemConstantIndex::kCount); ++i) {
|
||||
constant_name_ptrs_system[i] = name_ptr;
|
||||
name_ptr += dxbc::AppendAlignedString(shader_object_,
|
||||
system_constant_rdef_[i].name);
|
||||
|
@ -2144,11 +2112,11 @@ void DxbcShaderTranslator::WriteResourceDefinition() {
|
|||
if (cbuffer_index_system_constants_ != kBindingIndexUnallocated) {
|
||||
shader_object_.resize(constant_position_dwords_system +
|
||||
sizeof(dxbc::RdefVariable) / sizeof(uint32_t) *
|
||||
kSysConst_Count);
|
||||
size_t(SystemConstantIndex::kCount));
|
||||
auto constants_system = reinterpret_cast<dxbc::RdefVariable*>(
|
||||
shader_object_.data() + constant_position_dwords_system);
|
||||
uint32_t constant_offset_system = 0;
|
||||
for (uint32_t i = 0; i < kSysConst_Count; ++i) {
|
||||
for (size_t i = 0; i < size_t(SystemConstantIndex::kCount); ++i) {
|
||||
dxbc::RdefVariable& constant_system = constants_system[i];
|
||||
const SystemConstantRdef& translator_constant_system =
|
||||
system_constant_rdef_[i];
|
||||
|
@ -2303,7 +2271,7 @@ void DxbcShaderTranslator::WriteResourceDefinition() {
|
|||
cbuffer.type = dxbc::RdefCbufferType::kCbuffer;
|
||||
if (i == cbuffer_index_system_constants_) {
|
||||
cbuffer.name_ptr = cbuffer_name_ptr_system;
|
||||
cbuffer.variable_count = kSysConst_Count;
|
||||
cbuffer.variable_count = uint32_t(SystemConstantIndex::kCount);
|
||||
cbuffer.variables_ptr =
|
||||
(constant_position_dwords_system - blob_position_dwords) *
|
||||
sizeof(uint32_t);
|
||||
|
|
|
@ -202,7 +202,7 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
};
|
||||
|
||||
// IF SYSTEM CONSTANTS ARE CHANGED OR ADDED, THE FOLLOWING MUST BE UPDATED:
|
||||
// - kSysConst enum (indices, registers and first components).
|
||||
// - SystemConstantIndex enum.
|
||||
// - system_constant_rdef_.
|
||||
// - d3d12/shaders/xenos_draw.hlsli (for geometry shaders).
|
||||
struct SystemConstants {
|
||||
|
@ -507,150 +507,42 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
void ProcessAluInstruction(const ParsedAluInstruction& instr) override;
|
||||
|
||||
private:
|
||||
enum : uint32_t {
|
||||
// Indices.
|
||||
enum class SystemConstantIndex : uint32_t {
|
||||
kFlags,
|
||||
kTessellationFactorRange,
|
||||
kLineLoopClosingIndex,
|
||||
kVertexIndexEndian,
|
||||
kVertexBaseIndex,
|
||||
kPointSize,
|
||||
kPointSizeMinMax,
|
||||
kPointScreenToNDC,
|
||||
kUserClipPlanes,
|
||||
kNDCScale,
|
||||
kInterpolatorSamplingPattern,
|
||||
kNDCOffset,
|
||||
kPSParamGen,
|
||||
kTextureSwizzledSigns,
|
||||
kTexturesResolved,
|
||||
kSampleCountLog2,
|
||||
kAlphaTestReference,
|
||||
kColorExpBias,
|
||||
kAlphaToMask,
|
||||
kEdramPitchTiles,
|
||||
kEdramDepthRange,
|
||||
kEdramPolyOffsetFront,
|
||||
kEdramPolyOffsetBack,
|
||||
kEdramDepthBaseDwords,
|
||||
kEdramStencil,
|
||||
kEdramRTBaseDwordsScaled,
|
||||
kEdramRTFormatFlags,
|
||||
kEdramRTClamp,
|
||||
kEdramRTKeepMask,
|
||||
kEdramRTBlendFactorsOps,
|
||||
kEdramBlendConstant,
|
||||
|
||||
kSysConst_Flags_Index,
|
||||
kSysConst_TessellationFactorRange_Index,
|
||||
kSysConst_LineLoopClosingIndex_Index,
|
||||
|
||||
kSysConst_VertexIndexEndian_Index,
|
||||
kSysConst_VertexBaseIndex_Index,
|
||||
kSysConst_PointSize_Index,
|
||||
|
||||
kSysConst_PointSizeMinMax_Index,
|
||||
kSysConst_PointScreenToNDC_Index,
|
||||
|
||||
kSysConst_UserClipPlanes_Index,
|
||||
|
||||
kSysConst_NDCScale_Index,
|
||||
kSysConst_InterpolatorSamplingPattern_Index,
|
||||
|
||||
kSysConst_NDCOffset_Index,
|
||||
kSysConst_PSParamGen_Index,
|
||||
|
||||
kSysConst_TextureSwizzledSigns_Index,
|
||||
|
||||
kSysConst_TexturesResolved_Index,
|
||||
kSysConst_SampleCountLog2_Index,
|
||||
kSysConst_AlphaTestReference_Index,
|
||||
|
||||
kSysConst_ColorExpBias_Index,
|
||||
|
||||
kSysConst_AlphaToMask_Index,
|
||||
kSysConst_EdramPitchTiles_Index,
|
||||
kSysConst_EdramDepthRange_Index,
|
||||
|
||||
kSysConst_EdramPolyOffsetFront_Index,
|
||||
kSysConst_EdramPolyOffsetBack_Index,
|
||||
|
||||
kSysConst_EdramDepthBaseDwords_Index,
|
||||
|
||||
kSysConst_EdramStencil_Index,
|
||||
|
||||
kSysConst_EdramRTBaseDwordsScaled_Index,
|
||||
|
||||
kSysConst_EdramRTFormatFlags_Index,
|
||||
|
||||
kSysConst_EdramRTClamp_Index,
|
||||
|
||||
kSysConst_EdramRTKeepMask_Index,
|
||||
|
||||
kSysConst_EdramRTBlendFactorsOps_Index,
|
||||
|
||||
kSysConst_EdramBlendConstant_Index,
|
||||
|
||||
kSysConst_Count,
|
||||
|
||||
// Vectors.
|
||||
|
||||
kSysConst_Flags_Vec = 0,
|
||||
kSysConst_Flags_Comp = 0,
|
||||
kSysConst_TessellationFactorRange_Vec = kSysConst_Flags_Vec,
|
||||
kSysConst_TessellationFactorRange_Comp = 1,
|
||||
kSysConst_LineLoopClosingIndex_Vec = kSysConst_Flags_Vec,
|
||||
kSysConst_LineLoopClosingIndex_Comp = 3,
|
||||
|
||||
kSysConst_VertexIndexEndian_Vec = kSysConst_LineLoopClosingIndex_Vec + 1,
|
||||
kSysConst_VertexIndexEndian_Comp = 0,
|
||||
kSysConst_VertexBaseIndex_Vec = kSysConst_VertexIndexEndian_Vec,
|
||||
kSysConst_VertexBaseIndex_Comp = 1,
|
||||
kSysConst_PointSize_Vec = kSysConst_VertexIndexEndian_Vec,
|
||||
kSysConst_PointSize_Comp = 2,
|
||||
|
||||
kSysConst_PointSizeMinMax_Vec = kSysConst_PointSize_Vec + 1,
|
||||
kSysConst_PointSizeMinMax_Comp = 0,
|
||||
kSysConst_PointScreenToNDC_Vec = kSysConst_PointSizeMinMax_Vec,
|
||||
kSysConst_PointScreenToNDC_Comp = 2,
|
||||
|
||||
// 6 vectors.
|
||||
kSysConst_UserClipPlanes_Vec = kSysConst_PointScreenToNDC_Vec + 1,
|
||||
|
||||
kSysConst_NDCScale_Vec = kSysConst_UserClipPlanes_Vec + 6,
|
||||
kSysConst_NDCScale_Comp = 0,
|
||||
kSysConst_InterpolatorSamplingPattern_Vec = kSysConst_NDCScale_Vec,
|
||||
kSysConst_InterpolatorSamplingPattern_Comp = 3,
|
||||
|
||||
kSysConst_NDCOffset_Vec = kSysConst_InterpolatorSamplingPattern_Vec + 1,
|
||||
kSysConst_NDCOffset_Comp = 0,
|
||||
kSysConst_PSParamGen_Vec = kSysConst_NDCOffset_Vec,
|
||||
kSysConst_PSParamGen_Comp = 3,
|
||||
|
||||
// 2 vectors.
|
||||
kSysConst_TextureSwizzledSigns_Vec = kSysConst_PSParamGen_Vec + 1,
|
||||
|
||||
kSysConst_TexturesResolved_Vec = kSysConst_TextureSwizzledSigns_Vec + 2,
|
||||
kSysConst_TexturesResolved_Comp = 0,
|
||||
kSysConst_SampleCountLog2_Vec = kSysConst_TexturesResolved_Vec,
|
||||
kSysConst_SampleCountLog2_Comp = 1,
|
||||
kSysConst_AlphaTestReference_Vec = kSysConst_TexturesResolved_Vec,
|
||||
kSysConst_AlphaTestReference_Comp = 3,
|
||||
|
||||
kSysConst_ColorExpBias_Vec = kSysConst_AlphaTestReference_Vec + 1,
|
||||
|
||||
kSysConst_AlphaToMask_Vec = kSysConst_ColorExpBias_Vec + 1,
|
||||
kSysConst_AlphaToMask_Comp = 0,
|
||||
kSysConst_EdramPitchTiles_Vec = kSysConst_AlphaToMask_Vec,
|
||||
kSysConst_EdramPitchTiles_Comp = 1,
|
||||
kSysConst_EdramDepthRange_Vec = kSysConst_AlphaToMask_Vec,
|
||||
kSysConst_EdramDepthRangeScale_Comp = 2,
|
||||
kSysConst_EdramDepthRangeOffset_Comp = 3,
|
||||
|
||||
kSysConst_EdramPolyOffsetFront_Vec = kSysConst_EdramDepthRange_Vec + 1,
|
||||
kSysConst_EdramPolyOffsetFrontScale_Comp = 0,
|
||||
kSysConst_EdramPolyOffsetFrontOffset_Comp = 1,
|
||||
kSysConst_EdramPolyOffsetBack_Vec = kSysConst_EdramPolyOffsetFront_Vec,
|
||||
kSysConst_EdramPolyOffsetBackScale_Comp = 2,
|
||||
kSysConst_EdramPolyOffsetBackOffset_Comp = 3,
|
||||
|
||||
kSysConst_EdramDepthBaseDwords_Vec = kSysConst_EdramPolyOffsetBack_Vec + 1,
|
||||
kSysConst_EdramDepthBaseDwords_Comp = 0,
|
||||
|
||||
// 2 vectors.
|
||||
kSysConst_EdramStencil_Vec = kSysConst_EdramDepthBaseDwords_Vec + 1,
|
||||
kSysConst_EdramStencil_Front_Vec = kSysConst_EdramStencil_Vec,
|
||||
kSysConst_EdramStencil_Back_Vec,
|
||||
kSysConst_EdramStencil_Reference_Comp = 0,
|
||||
kSysConst_EdramStencil_ReadMask_Comp,
|
||||
kSysConst_EdramStencil_WriteMask_Comp,
|
||||
kSysConst_EdramStencil_FuncOps_Comp,
|
||||
|
||||
kSysConst_EdramRTBaseDwordsScaled_Vec = kSysConst_EdramStencil_Vec + 2,
|
||||
|
||||
kSysConst_EdramRTFormatFlags_Vec =
|
||||
kSysConst_EdramRTBaseDwordsScaled_Vec + 1,
|
||||
|
||||
// 4 vectors.
|
||||
kSysConst_EdramRTClamp_Vec = kSysConst_EdramRTFormatFlags_Vec + 1,
|
||||
|
||||
// 2 vectors (render targets 01 and 23).
|
||||
kSysConst_EdramRTKeepMask_Vec = kSysConst_EdramRTClamp_Vec + 4,
|
||||
|
||||
kSysConst_EdramRTBlendFactorsOps_Vec = kSysConst_EdramRTKeepMask_Vec + 2,
|
||||
|
||||
kSysConst_EdramBlendConstant_Vec = kSysConst_EdramRTBlendFactorsOps_Vec + 1,
|
||||
kCount,
|
||||
};
|
||||
static_assert(kSysConst_Count <= 64,
|
||||
static_assert(uint32_t(SystemConstantIndex::kCount) <= 64,
|
||||
"Too many system constants, can't use uint64_t for usage bits");
|
||||
|
||||
static constexpr uint32_t kPointParametersTexCoord = xenos::kMaxInterpolators;
|
||||
|
@ -685,19 +577,37 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
kPSInFrontFaceAndSampleIndex,
|
||||
};
|
||||
|
||||
// GetSystemConstantSrc + MarkSystemConstantUsed is for special cases of
|
||||
// building the source unconditionally - in general, LoadSystemConstant must
|
||||
// be used instead.
|
||||
void MarkSystemConstantUsed(SystemConstantIndex index) {
|
||||
system_constants_used_ |= uint64_t(1) << uint32_t(index);
|
||||
}
|
||||
// Offset should be offsetof(SystemConstants, field). Swizzle values are
|
||||
// relative to the first component in the vector according to offsetof - to
|
||||
// request a scalar, use XXXX swizzle, and if it's at +4 in its 16-byte
|
||||
// vector, it will be turned into YYYY, and so on.
|
||||
// TODO(Triang3l): Index to enum class.
|
||||
dxbc::Src LoadSystemConstant(uint32_t index, size_t offset,
|
||||
uint32_t swizzle) {
|
||||
system_constants_used_ |= uint64_t(1) << index;
|
||||
// vector, it will be turned into YYYY, and so on. The swizzle may include
|
||||
// out-of-bounds components of the vector for simplicity of use, assuming they
|
||||
// will be dropped anyway later.
|
||||
dxbc::Src GetSystemConstantSrc(size_t offset, uint32_t swizzle) {
|
||||
uint32_t first_component = uint32_t((offset >> 2) & 3);
|
||||
return dxbc::Src::CB(cbuffer_index_system_constants_,
|
||||
uint32_t(CbufferRegister::kSystemConstants),
|
||||
uint32_t(offset >> 4),
|
||||
first_component * 0b01010101 + swizzle);
|
||||
return dxbc::Src::CB(
|
||||
cbuffer_index_system_constants_,
|
||||
uint32_t(CbufferRegister::kSystemConstants), uint32_t(offset >> 4),
|
||||
std::min((swizzle & 3) + first_component, uint32_t(3)) |
|
||||
std::min(((swizzle >> 2) & 3) + first_component, uint32_t(3)) << 2 |
|
||||
std::min(((swizzle >> 4) & 3) + first_component, uint32_t(3)) << 4 |
|
||||
std::min(((swizzle >> 6) & 3) + first_component, uint32_t(3)) << 6);
|
||||
}
|
||||
dxbc::Src LoadSystemConstant(SystemConstantIndex index, size_t offset,
|
||||
uint32_t swizzle) {
|
||||
MarkSystemConstantUsed(index);
|
||||
return GetSystemConstantSrc(offset, swizzle);
|
||||
}
|
||||
dxbc::Src LoadFlagsSystemConstant() {
|
||||
return LoadSystemConstant(SystemConstantIndex::kFlags,
|
||||
offsetof(SystemConstants, flags),
|
||||
dxbc::Src::kXXXX);
|
||||
}
|
||||
|
||||
Modification GetDxbcShaderModification() const {
|
||||
|
@ -1071,8 +981,9 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
uint32_t size;
|
||||
uint32_t padding_after;
|
||||
};
|
||||
static const SystemConstantRdef system_constant_rdef_[kSysConst_Count];
|
||||
// Mask of system constants (1 << kSysConst_#_Index) used in the shader, so
|
||||
static const SystemConstantRdef
|
||||
system_constant_rdef_[size_t(SystemConstantIndex::kCount)];
|
||||
// Mask of system constants (1 << SystemConstantIndex) used in the shader, so
|
||||
// the remaining ones can be marked as unused in RDEF.
|
||||
uint64_t system_constants_used_;
|
||||
|
||||
|
|
|
@ -128,12 +128,8 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction(
|
|||
// memexport is used), fetch from the appropriate binding. Extract whether
|
||||
// shared memory is a UAV to system_temp_result_.x and check. In the `if`, put
|
||||
// the more likely case (SRV), in the `else`, the less likely one (UAV).
|
||||
system_constants_used_ |= 1ull << kSysConst_Flags_Index;
|
||||
a_.OpAnd(dxbc::Dest::R(system_temp_result_, 0b0001),
|
||||
dxbc::Src::CB(cbuffer_index_system_constants_,
|
||||
uint32_t(CbufferRegister::kSystemConstants),
|
||||
kSysConst_Flags_Vec)
|
||||
.Select(kSysConst_Flags_Comp),
|
||||
LoadFlagsSystemConstant(),
|
||||
dxbc::Src::LU(kSysFlag_SharedMemoryIsUAV));
|
||||
a_.OpIf(false, dxbc::Src::R(system_temp_result_, dxbc::Src::kXXXX));
|
||||
if (srv_index_shared_memory_ == kBindingIndexUnallocated) {
|
||||
|
@ -949,7 +945,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
|||
// calculations.
|
||||
assert_zero(used_result_nonzero_components & 0b1000);
|
||||
a_.OpAnd(dxbc::Dest::R(system_temp_result_, 0b1000),
|
||||
LoadSystemConstant(kSysConst_TexturesResolved_Index,
|
||||
LoadSystemConstant(SystemConstantIndex::kTexturesResolved,
|
||||
offsetof(SystemConstants, textures_resolved),
|
||||
dxbc::Src::kXXXX),
|
||||
dxbc::Src::LU(uint32_t(1) << tfetch_index));
|
||||
|
@ -1003,15 +999,14 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
|||
// - Component signedness, for selecting the SRV, and if data is needed.
|
||||
|
||||
dxbc::Src signs_uint_src(
|
||||
dxbc::Src::CB(cbuffer_index_system_constants_,
|
||||
uint32_t(CbufferRegister::kSystemConstants),
|
||||
kSysConst_TextureSwizzledSigns_Vec + (tfetch_index >> 4))
|
||||
.Select((tfetch_index >> 2) & 3));
|
||||
GetSystemConstantSrc(offsetof(SystemConstants, texture_swizzled_signs) +
|
||||
sizeof(uint32_t) * (tfetch_index >> 2),
|
||||
dxbc::Src::kXXXX));
|
||||
uint32_t signs_shift = (tfetch_index & 3) * 8;
|
||||
uint32_t signs_temp = UINT32_MAX;
|
||||
if (instr.opcode == FetchOpcode::kTextureFetch) {
|
||||
signs_temp = PushSystemTemp();
|
||||
system_constants_used_ |= 1ull << kSysConst_TextureSwizzledSigns_Index;
|
||||
MarkSystemConstantUsed(SystemConstantIndex::kTextureSwizzledSigns);
|
||||
a_.OpUBFE(dxbc::Dest::R(signs_temp, used_result_nonzero_components),
|
||||
dxbc::Src::LU(2),
|
||||
dxbc::Src::LU(signs_shift, signs_shift + 2, signs_shift + 4,
|
||||
|
@ -1074,7 +1069,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
|||
// resolution scale inverse - sampler not loaded yet.
|
||||
a_.OpAnd(
|
||||
dxbc::Dest::R(coord_and_sampler_temp, 0b1000),
|
||||
LoadSystemConstant(kSysConst_TexturesResolved_Index,
|
||||
LoadSystemConstant(SystemConstantIndex::kTexturesResolved,
|
||||
offsetof(SystemConstants, textures_resolved),
|
||||
dxbc::Src::kXXXX),
|
||||
dxbc::Src::LU(uint32_t(1) << tfetch_index));
|
||||
|
@ -1140,7 +1135,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
|||
// resolution scale inverse - sampler not loaded yet.
|
||||
a_.OpAnd(
|
||||
dxbc::Dest::R(coord_and_sampler_temp, 0b1000),
|
||||
LoadSystemConstant(kSysConst_TexturesResolved_Index,
|
||||
LoadSystemConstant(SystemConstantIndex::kTexturesResolved,
|
||||
offsetof(SystemConstants, textures_resolved),
|
||||
dxbc::Src::kXXXX),
|
||||
dxbc::Src::LU(uint32_t(1) << tfetch_index));
|
||||
|
@ -1317,7 +1312,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
|||
// Check which SRV needs to be accessed - signed or unsigned. If there is
|
||||
// at least one non-signed component, will be using the unsigned one.
|
||||
uint32_t is_unsigned_temp = PushSystemTemp();
|
||||
system_constants_used_ |= 1ull << kSysConst_TextureSwizzledSigns_Index;
|
||||
MarkSystemConstantUsed(SystemConstantIndex::kTextureSwizzledSigns);
|
||||
a_.OpUBFE(dxbc::Dest::R(is_unsigned_temp, 0b0001), dxbc::Src::LU(8),
|
||||
dxbc::Src::LU(signs_shift), signs_uint_src);
|
||||
a_.OpINE(
|
||||
|
@ -2060,7 +2055,7 @@ void DxbcShaderTranslator::ProcessTextureFetchInstruction(
|
|||
// `if`, with `else` for sRGB resolved render targets.
|
||||
a_.OpAnd(
|
||||
dxbc::Dest::R(gamma_temp, 0b0001),
|
||||
LoadSystemConstant(kSysConst_TexturesResolved_Index,
|
||||
LoadSystemConstant(SystemConstantIndex::kTexturesResolved,
|
||||
offsetof(SystemConstants, textures_resolved),
|
||||
dxbc::Src::kXXXX),
|
||||
dxbc::Src::LU(uint32_t(1) << tfetch_index));
|
||||
|
|
|
@ -101,13 +101,9 @@ void DxbcShaderTranslator::ExportToMemory() {
|
|||
uint32_t control_temp = PushSystemTemp();
|
||||
|
||||
// Safety check if the shared memory is bound as UAV.
|
||||
system_constants_used_ |= 1ull << kSysConst_Flags_Index;
|
||||
a_.OpUBFE(dxbc::Dest::R(control_temp, 0b0001), dxbc::Src::LU(1),
|
||||
dxbc::Src::LU(kSysFlag_SharedMemoryIsUAV_Shift),
|
||||
dxbc::Src::CB(cbuffer_index_system_constants_,
|
||||
uint32_t(CbufferRegister::kSystemConstants),
|
||||
kSysConst_Flags_Vec)
|
||||
.Select(kSysConst_Flags_Comp));
|
||||
LoadFlagsSystemConstant());
|
||||
// Open the `if` with the uniform condition for the shared memory buffer being
|
||||
// bound as a UAV (more fine-grained checks are vector and likely divergent).
|
||||
a_.OpIf(true, dxbc::Src::R(control_temp, dxbc::Src::kXXXX));
|
||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue