[D3D12] Code cleanup

This commit is contained in:
Triang3l 2018-10-23 16:42:53 +03:00
parent 31e0581304
commit b81bb4d68c
9 changed files with 163 additions and 102 deletions

View File

@ -40,6 +40,12 @@ namespace xe {
namespace gpu {
namespace d3d12 {
constexpr uint32_t
D3D12CommandProcessor::RootExtraParameterIndices::kUnavailable;
constexpr uint32_t D3D12CommandProcessor::kSwapTextureWidth;
constexpr uint32_t D3D12CommandProcessor::kSwapTextureHeight;
constexpr uint32_t D3D12CommandProcessor::kScratchBufferSizeIncrement;
D3D12CommandProcessor::D3D12CommandProcessor(
D3D12GraphicsSystem* graphics_system, kernel::KernelState* kernel_state)
: CommandProcessor(graphics_system, kernel_state) {}

View File

@ -18,6 +18,11 @@ namespace xe {
namespace gpu {
namespace d3d12 {
constexpr uint32_t D3D12Shader::kMaxTextureSRVIndexBits;
constexpr uint32_t D3D12Shader::kMaxTextureSRVs;
constexpr uint32_t D3D12Shader::kMaxSamplerBindingIndexBits;
constexpr uint32_t D3D12Shader::kMaxSamplerBindings;
D3D12Shader::D3D12Shader(ShaderType shader_type, uint64_t data_hash,
const uint32_t* dword_ptr, uint32_t dword_count)
: Shader(shader_type, data_hash, dword_ptr, dword_count) {}

View File

@ -24,6 +24,11 @@ namespace xe {
namespace gpu {
namespace d3d12 {
constexpr uint32_t PrimitiveConverter::kMaxNonIndexedVertices;
constexpr uint32_t PrimitiveConverter::kStaticIBTriangleFanOffset;
constexpr uint32_t PrimitiveConverter::kStaticIBTriangleFanCount;
constexpr uint32_t PrimitiveConverter::kStaticIBTotalCount;
PrimitiveConverter::PrimitiveConverter(D3D12CommandProcessor* command_processor,
RegisterFile* register_file,
Memory* memory)

View File

@ -46,6 +46,9 @@ namespace d3d12 {
#include "xenia/gpu/d3d12/shaders/dxbc/resolve_ps.h"
#include "xenia/gpu/d3d12/shaders/dxbc/resolve_vs.h"
constexpr uint32_t RenderTargetCache::kHeap4MBPages;
constexpr uint32_t RenderTargetCache::kRenderTargetDescriptorHeapSize;
const RenderTargetCache::EDRAMLoadStoreModeInfo
RenderTargetCache::edram_load_store_mode_info_[size_t(
RenderTargetCache::EDRAMLoadStoreMode::kCount)] = {
@ -79,7 +82,7 @@ bool RenderTargetCache::Initialize() {
// Create the buffer for reinterpreting EDRAM contents.
D3D12_RESOURCE_DESC edram_buffer_desc;
ui::d3d12::util::FillBufferResourceDesc(
edram_buffer_desc, kEDRAMBufferSize,
edram_buffer_desc, GetEDRAMBufferSize(),
D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS);
// The first operation will be a clear.
edram_buffer_state_ = D3D12_RESOURCE_STATE_UNORDERED_ACCESS;
@ -1094,7 +1097,7 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
return false;
}
ui::d3d12::util::CreateRawBufferSRV(device, descriptor_cpu_start,
edram_buffer_, kEDRAMBufferSize);
edram_buffer_, GetEDRAMBufferSize());
shared_memory->CreateRawUAV(
provider->OffsetViewDescriptor(descriptor_cpu_start, 1));
@ -1246,7 +1249,7 @@ bool RenderTargetCache::ResolveCopy(SharedMemory* shared_memory,
0);
ui::d3d12::util::CreateRawBufferSRV(device, descriptor_cpu_start,
edram_buffer_, kEDRAMBufferSize);
edram_buffer_, GetEDRAMBufferSize());
ui::d3d12::util::CreateRawBufferUAV(
device, provider->OffsetViewDescriptor(descriptor_cpu_start, 1),
copy_buffer, render_target->copy_buffer_size);
@ -1540,7 +1543,7 @@ bool RenderTargetCache::ResolveClear(uint32_t edram_base,
command_list->SetComputeRoot32BitConstants(
0, sizeof(root_constants) / sizeof(uint32_t), &root_constants, 0);
ui::d3d12::util::CreateRawBufferUAV(device, descriptor_cpu_start,
edram_buffer_, kEDRAMBufferSize);
edram_buffer_, GetEDRAMBufferSize());
command_list->SetComputeRootDescriptorTable(1, descriptor_gpu_start);
// 1 group per 80x16 samples.
command_list->Dispatch(row_width_ss_div_80, rows, 1);
@ -1722,7 +1725,7 @@ void RenderTargetCache::CreateEDRAMUint32UAV(
desc.Format = DXGI_FORMAT_R32_UINT;
desc.ViewDimension = D3D12_UAV_DIMENSION_BUFFER;
desc.Buffer.FirstElement = 0;
desc.Buffer.NumElements = kEDRAMBufferSize / sizeof(uint32_t);
desc.Buffer.NumElements = GetEDRAMBufferSize() / sizeof(uint32_t);
desc.Buffer.StructureByteStride = 0;
desc.Buffer.CounterOffsetInBytes = 0;
desc.Buffer.Flags = D3D12_BUFFER_UAV_FLAG_NONE;
@ -1774,6 +1777,17 @@ DXGI_FORMAT RenderTargetCache::GetColorDXGIFormat(
return DXGI_FORMAT_UNKNOWN;
}
uint32_t RenderTargetCache::GetEDRAMBufferSize() const {
uint32_t size = 2048 * 5120;
if (!command_processor_->IsROVUsedForEDRAM()) {
// Two 10 MB pages, one containing color and integer depth data, another
// with 32-bit float depth when 20e4 depth is used to allow for multipass
// drawing without precision loss in case of EDRAM store/load.
size *= 2;
}
return size;
}
void RenderTargetCache::TransitionEDRAMBuffer(D3D12_RESOURCE_STATES new_state) {
command_processor_->PushTransitionBarrier(edram_buffer_, edram_buffer_state_,
new_state);
@ -2112,7 +2126,7 @@ void RenderTargetCache::StoreRenderTargetsToEDRAM() {
copy_buffer_size);
ui::d3d12::util::CreateRawBufferUAV(
device, provider->OffsetViewDescriptor(descriptor_cpu_start, 1),
edram_buffer_, kEDRAMBufferSize);
edram_buffer_, GetEDRAMBufferSize());
command_list->SetComputeRootDescriptorTable(1, descriptor_gpu_start);
// Sort the bindings in ascending order of EDRAM base so data in the render
@ -2264,7 +2278,7 @@ void RenderTargetCache::LoadRenderTargetsFromEDRAM(
auto device = provider->GetDevice();
command_list->SetComputeRootSignature(edram_load_store_root_signature_);
ui::d3d12::util::CreateRawBufferSRV(device, descriptor_cpu_start,
edram_buffer_, kEDRAMBufferSize);
edram_buffer_, GetEDRAMBufferSize());
ui::d3d12::util::CreateRawBufferUAV(
device, provider->OffsetViewDescriptor(descriptor_cpu_start, 1),
copy_buffer, copy_buffer_size);

View File

@ -390,6 +390,8 @@ class RenderTargetCache {
uint32_t copy_buffer_size;
};
uint32_t GetEDRAMBufferSize() const;
void TransitionEDRAMBuffer(D3D12_RESOURCE_STATES new_state);
void ClearBindings();
@ -460,10 +462,6 @@ class RenderTargetCache {
// The EDRAM buffer allowing color and depth data to be reinterpreted.
ID3D12Resource* edram_buffer_ = nullptr;
// Two 10 MB pages, one containing color and integer depth data, another with
// 32-bit float depth when 20e4 depth is used to allow for multipass drawing
// without precision loss in case of EDRAM store/load.
static constexpr uint32_t kEDRAMBufferSize = 2 * 2048 * 5120;
D3D12_RESOURCE_STATES edram_buffer_state_;
bool edram_buffer_cleared_;

View File

@ -32,6 +32,18 @@ namespace xe {
namespace gpu {
namespace d3d12 {
constexpr uint32_t SharedMemory::kBufferSizeLog2;
constexpr uint32_t SharedMemory::kBufferSize;
constexpr uint32_t SharedMemory::kAddressMask;
constexpr uint32_t SharedMemory::kTileSizeLog2;
constexpr uint32_t SharedMemory::kTileSize;
constexpr uint32_t SharedMemory::kHeapSizeLog2;
constexpr uint32_t SharedMemory::kHeapSize;
constexpr uint32_t SharedMemory::kWatchBucketSizeLog2;
constexpr uint32_t SharedMemory::kWatchBucketCount;
constexpr uint32_t SharedMemory::kWatchRangePoolSize;
constexpr uint32_t SharedMemory::kWatchNodePoolSize;
SharedMemory::SharedMemory(D3D12CommandProcessor* command_processor,
Memory* memory)
: command_processor_(command_processor), memory_(memory) {

View File

@ -48,6 +48,8 @@ namespace d3d12 {
#include "xenia/gpu/d3d12/shaders/dxbc/texture_tile_64bpp_cs.h"
#include "xenia/gpu/d3d12/shaders/dxbc/texture_tile_8bpp_cs.h"
constexpr uint32_t TextureCache::LoadConstants::kGuestPitchTiled;
const TextureCache::HostFormat TextureCache::host_formats_[64] = {
// k_1_REVERSE
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN,

View File

@ -61,6 +61,25 @@ using namespace ucode;
// For example, if the requested vector is located in the beginning of the
// second buffer in the descriptor array at b2, which is assigned to CB1, the
// index would be CB1[3][0].
// - Resources and samplers use 2D indices, where the first dimension is the
// S#/T#/U# binding index, and the second is the s#/t#/u# register index
// within its space.
constexpr uint32_t DxbcShaderTranslator::kMaxTextureSRVIndexBits;
constexpr uint32_t DxbcShaderTranslator::kMaxTextureSRVs;
constexpr uint32_t DxbcShaderTranslator::kMaxSamplerBindingIndexBits;
constexpr uint32_t DxbcShaderTranslator::kMaxSamplerBindings;
constexpr uint32_t DxbcShaderTranslator::kInterpolatorCount;
constexpr uint32_t DxbcShaderTranslator::kPointParametersTexCoord;
constexpr uint32_t DxbcShaderTranslator::kSwizzleXYZW;
constexpr uint32_t DxbcShaderTranslator::kSwizzleXXXX;
constexpr uint32_t DxbcShaderTranslator::kSwizzleYYYY;
constexpr uint32_t DxbcShaderTranslator::kSwizzleZZZZ;
constexpr uint32_t DxbcShaderTranslator::kSwizzleWWWW;
constexpr uint32_t
DxbcShaderTranslator::DxbcSourceOperand::kIntermediateRegisterNone;
constexpr uint32_t DxbcShaderTranslator::kCbufferIndexUnallocated;
constexpr uint32_t DxbcShaderTranslator::kCfExecBoolConstantNone;
DxbcShaderTranslator::DxbcShaderTranslator(bool edram_rov_used)
: edram_rov_used_(edram_rov_used) {
@ -567,9 +586,9 @@ void DxbcShaderTranslator::StartVertexShader_LoadVertexIndex() {
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0111, 1));
shader_code_.push_back(reg);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_INPUT, kSwizzleXXXX, 1));
shader_code_.push_back(kVSInVertexIndexRegister);
shader_code_.push_back(
EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_INPUT, 0, 1));
shader_code_.push_back(uint32_t(InOutRegister::kVSInVertexIndex));
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
shader_code_.push_back(0);
@ -810,7 +829,7 @@ void DxbcShaderTranslator::StartVertexShader() {
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_OUTPUT, 0b1111, 1));
shader_code_.push_back(kVSOutInterpolatorRegister + i);
shader_code_.push_back(uint32_t(InOutRegister::kVSOutInterpolators) + i);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
shader_code_.push_back(0);
@ -829,7 +848,7 @@ void DxbcShaderTranslator::StartVertexShader() {
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(8));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_OUTPUT, 0b0111, 1));
shader_code_.push_back(kVSOutPointParametersRegister);
shader_code_.push_back(uint32_t(InOutRegister::kVSOutPointParameters));
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
shader_code_.push_back(0);
@ -855,7 +874,7 @@ void DxbcShaderTranslator::StartPixelShader() {
shader_code_.push_back(system_temp_depth_);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_INPUT, 2, 1));
shader_code_.push_back(kPSInPositionRegister);
shader_code_.push_back(uint32_t(InOutRegister::kPSInPosition));
++stat_.instruction_count;
++stat_.mov_instruction_count;
}
@ -878,7 +897,7 @@ void DxbcShaderTranslator::StartPixelShader() {
shader_code_.push_back(interpolator_temp_register);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_INPUT, kSwizzleXYZW, 1));
shader_code_.push_back(kPSInInterpolatorRegister + i);
shader_code_.push_back(uint32_t(InOutRegister::kPSInInterpolators) + i);
++stat_.instruction_count;
++stat_.mov_instruction_count;
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_MOV) |
@ -904,7 +923,7 @@ void DxbcShaderTranslator::StartPixelShader() {
shader_code_.push_back(i);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_INPUT, kSwizzleXYZW, 1));
shader_code_.push_back(kPSInInterpolatorRegister + i);
shader_code_.push_back(uint32_t(InOutRegister::kPSInInterpolators) + i);
++stat_.instruction_count;
++stat_.mov_instruction_count;
}
@ -951,7 +970,7 @@ void DxbcShaderTranslator::StartPixelShader() {
shader_code_.push_back(param_gen_value_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_INPUT, kSwizzleXYZW, 1));
shader_code_.push_back(kPSInPositionRegister);
shader_code_.push_back(uint32_t(InOutRegister::kPSInPosition));
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER,
kSysConst_SSAAInvScale_Comp | ((kSysConst_SSAAInvScale_Comp + 1) << 2),
@ -975,7 +994,7 @@ void DxbcShaderTranslator::StartPixelShader() {
shader_code_.push_back(param_gen_value_temp);
shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_INPUT, 0b01000000, 1));
shader_code_.push_back(kPSInPointParametersRegister);
shader_code_.push_back(uint32_t(InOutRegister::kPSInPointParameters));
++stat_.instruction_count;
++stat_.mov_instruction_count;
if (IndexableGPRsUsed()) {
@ -1125,9 +1144,9 @@ void DxbcShaderTranslator::StartTranslation() {
++stat_.static_flow_control_count;
} else {
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3) |
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(
D3D10_SB_INSTRUCTION_TEST_ZERO));
D3D10_SB_INSTRUCTION_TEST_ZERO) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1));
shader_code_.push_back(system_temp_ps_pc_p0_a0_);
@ -1321,7 +1340,7 @@ void DxbcShaderTranslator::CompleteVertexShader() {
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_OUTPUT, 0b1111, 1));
shader_code_.push_back(kVSOutPositionRegister);
shader_code_.push_back(uint32_t(InOutRegister::kVSOutPosition));
shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(system_temp_position_);
@ -1926,7 +1945,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_LoadColor(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(pack_offset_temp);
shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXXXX, 1));
EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(target_temp);
++stat_.instruction_count;
++stat_.int_instruction_count;
@ -1981,7 +2000,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV_LoadColor(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
shader_code_.push_back(pack_offset_temp);
shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXXXX, 1));
EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(high_temp);
++stat_.instruction_count;
++stat_.int_instruction_count;
@ -3671,7 +3690,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
shader_code_.push_back(edram_coord_low_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_INPUT, kSwizzleXYZW, 1));
shader_code_.push_back(kPSInPositionRegister);
shader_code_.push_back(uint32_t(InOutRegister::kPSInPosition));
++stat_.instruction_count;
++stat_.conversion_instruction_count;
@ -4252,7 +4271,7 @@ void DxbcShaderTranslator::CompletePixelShader_WriteToROV() {
shader_code_.push_back(depth_stencil_test_temp);
shader_code_.push_back(
EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_INPUT, 0, 1));
shader_code_.push_back(kPSInFrontFaceRegister);
shader_code_.push_back(uint32_t(InOutRegister::kPSInFrontFace));
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSwizzleXYZW, 3));
shader_code_.push_back(cbuffer_index_system_constants_);
@ -5774,8 +5793,8 @@ void DxbcShaderTranslator::LoadDxbcSourceOperand(
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(dxbc_operand.intermediate_register);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSwizzleXXXX, 3,
shader_code_.push_back(EncodeVectorReplicatedOperand(
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, 0, 3,
D3D10_SB_OPERAND_INDEX_IMMEDIATE32,
D3D10_SB_OPERAND_INDEX_IMMEDIATE32,
is_static ? D3D10_SB_OPERAND_INDEX_IMMEDIATE32
@ -5911,8 +5930,8 @@ void DxbcShaderTranslator::LoadDxbcSourceOperand(
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(dxbc_operand.intermediate_register);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXXXX, 1));
shader_code_.push_back(
EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(dxbc_operand.intermediate_register);
++stat_.instruction_count;
++stat_.conversion_instruction_count;
@ -6190,7 +6209,7 @@ void DxbcShaderTranslator::StoreResult(const InstructionResult& result,
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5) | saturate_bit);
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_OUTPUT, 0b0100, 1));
shader_code_.push_back(kVSOutPointParametersRegister);
shader_code_.push_back(uint32_t(InOutRegister::kVSOutPointParameters));
break;
case InstructionStorageTarget::kDepth:
writes_depth_ = true;
@ -6332,7 +6351,7 @@ void DxbcShaderTranslator::StoreResult(const InstructionResult& result,
saturate_bit);
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_OUTPUT, mask, 1));
shader_code_.push_back(kVSOutInterpolatorRegister +
shader_code_.push_back(uint32_t(InOutRegister::kVSOutInterpolators) +
uint32_t(result.storage_index));
break;
@ -6482,8 +6501,8 @@ void DxbcShaderTranslator::CheckPredicate(
instruction_predicate_condition ? D3D10_SB_INSTRUCTION_TEST_NONZERO
: D3D10_SB_INSTRUCTION_TEST_ZERO;
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3) |
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(test));
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(test) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1));
shader_code_.push_back(system_temp_ps_pc_p0_a0_);
@ -6535,8 +6554,8 @@ void DxbcShaderTranslator::SetExecBoolConstant(uint32_t index, bool condition) {
condition ? D3D10_SB_INSTRUCTION_TEST_NONZERO
: D3D10_SB_INSTRUCTION_TEST_ZERO;
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3) |
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(test));
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(test) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(bool_constant_test_register);
@ -6735,7 +6754,7 @@ void DxbcShaderTranslator::SwapVertexData(uint32_t vfetch_index,
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, write_mask, 1));
shader_code_.push_back(system_temp_pv_);
shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXXXX, 1));
EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(temp2);
shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
@ -6803,7 +6822,7 @@ void DxbcShaderTranslator::SwapVertexData(uint32_t vfetch_index,
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, write_mask, 1));
shader_code_.push_back(system_temp_pv_);
shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleYYYY, 1));
EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1));
shader_code_.push_back(temp2);
shader_code_.push_back(
EncodeVectorSwizzledOperand(D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXYZW, 1));
@ -7050,8 +7069,8 @@ void DxbcShaderTranslator::ProcessLoopStartInstruction(
// Short-circuit if loop counter is 0.
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3) |
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(D3D10_SB_INSTRUCTION_TEST_ZERO));
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(D3D10_SB_INSTRUCTION_TEST_ZERO) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(system_temp_loop_count_);
@ -7246,8 +7265,8 @@ void DxbcShaderTranslator::ProcessJumpInstruction(
++stat_.uint_instruction_count;
// Open the `if`.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3) |
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(test));
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(test) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(bool_constant_test_register);
@ -7258,8 +7277,8 @@ void DxbcShaderTranslator::ProcessJumpInstruction(
} else if (instr.type == ParsedJumpInstruction::Type::kPredicated) {
// Called outside of exec - need to check the predicate explicitly.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IF) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3) |
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(test));
ENCODE_D3D10_SB_INSTRUCTION_TEST_BOOLEAN(test) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 2, 1));
shader_code_.push_back(system_temp_ps_pc_p0_a0_);
@ -9863,8 +9882,8 @@ void DxbcShaderTranslator::ProcessVectorAluInstruction(
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0110, 1));
shader_code_.push_back(cube_mask_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_TEMP, kSwizzleXXXX, 1));
shader_code_.push_back(
EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(cube_mask_temp);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
@ -12283,7 +12302,7 @@ void DxbcShaderTranslator::WriteInputSignature() {
shader_object_.push_back(6);
// D3D_REGISTER_COMPONENT_UINT32.
shader_object_.push_back(1);
shader_object_.push_back(kVSInVertexIndexRegister);
shader_object_.push_back(uint32_t(InOutRegister::kVSInVertexIndex));
// x present, x used (always written to GPR 0).
shader_object_.push_back(0x1 | (0x1 << 8));
@ -12306,7 +12325,7 @@ void DxbcShaderTranslator::WriteInputSignature() {
shader_object_.push_back(0);
// D3D_REGISTER_COMPONENT_FLOAT32.
shader_object_.push_back(3);
shader_object_.push_back(kPSInInterpolatorRegister + i);
shader_object_.push_back(uint32_t(InOutRegister::kPSInInterpolators) + i);
// Interpolators are copied to GPRs in the beginning of the shader. If
// there's a register to copy to, this interpolator is used.
uint32_t interpolator_used =
@ -12322,7 +12341,7 @@ void DxbcShaderTranslator::WriteInputSignature() {
shader_object_.push_back(kPointParametersTexCoord);
shader_object_.push_back(0);
shader_object_.push_back(3);
shader_object_.push_back(kPSInPointParametersRegister);
shader_object_.push_back(uint32_t(InOutRegister::kPSInPointParameters));
shader_object_.push_back(0x7 |
(is_depth_only_pixel_shader_ ? 0 : (0x3 << 8)));
@ -12334,7 +12353,7 @@ void DxbcShaderTranslator::WriteInputSignature() {
// D3D_NAME_POSITION.
shader_object_.push_back(1);
shader_object_.push_back(3);
shader_object_.push_back(kPSInPositionRegister);
shader_object_.push_back(uint32_t(InOutRegister::kPSInPosition));
shader_object_.push_back(0xF | ((edram_rov_used_ ? 0x7 : 0x3) << 8));
// Is front face. Always used because ps_param_gen is handled dynamically.
@ -12343,7 +12362,7 @@ void DxbcShaderTranslator::WriteInputSignature() {
// D3D_NAME_IS_FRONT_FACE.
shader_object_.push_back(9);
shader_object_.push_back(1);
shader_object_.push_back(kPSInFrontFaceRegister);
shader_object_.push_back(uint32_t(InOutRegister::kPSInFrontFace));
if (edram_rov_used_) {
shader_object_.push_back(0x1 | (0x1 << 8));
} else {
@ -12398,7 +12417,8 @@ void DxbcShaderTranslator::WriteOutputSignature() {
shader_object_.push_back(0);
// D3D_REGISTER_COMPONENT_FLOAT32.
shader_object_.push_back(3);
shader_object_.push_back(kVSOutInterpolatorRegister + i);
shader_object_.push_back(uint32_t(InOutRegister::kVSOutInterpolators) +
i);
// Unlike in ISGN, the second byte contains the unused components, not the
// used ones. All components are always used because they are reset to 0.
shader_object_.push_back(0xF);
@ -12410,7 +12430,7 @@ void DxbcShaderTranslator::WriteOutputSignature() {
shader_object_.push_back(kPointParametersTexCoord);
shader_object_.push_back(0);
shader_object_.push_back(3);
shader_object_.push_back(kVSOutPointParametersRegister);
shader_object_.push_back(uint32_t(InOutRegister::kVSOutPointParameters));
shader_object_.push_back(0x7 | (0x8 << 8));
// Position.
@ -12419,7 +12439,7 @@ void DxbcShaderTranslator::WriteOutputSignature() {
// D3D_NAME_POSITION.
shader_object_.push_back(1);
shader_object_.push_back(3);
shader_object_.push_back(kVSOutPositionRegister);
shader_object_.push_back(uint32_t(InOutRegister::kVSOutPosition));
shader_object_.push_back(0xF);
// Write the semantic names.
@ -12505,10 +12525,9 @@ void DxbcShaderTranslator::WriteOutputSignature() {
void DxbcShaderTranslator::WriteShaderCode() {
uint32_t chunk_position_dwords = uint32_t(shader_object_.size());
D3D10_SB_TOKENIZED_PROGRAM_TYPE program_type =
IsDXBCVertexShader() ? D3D10_SB_VERTEX_SHADER : D3D10_SB_PIXEL_SHADER;
shader_object_.push_back(
ENCODE_D3D10_SB_TOKENIZED_PROGRAM_VERSION_TOKEN(program_type, 5, 1));
shader_object_.push_back(ENCODE_D3D10_SB_TOKENIZED_PROGRAM_VERSION_TOKEN(
IsDXBCVertexShader() ? D3D10_SB_VERTEX_SHADER : D3D10_SB_PIXEL_SHADER, 5,
1));
// Reserve space for the length token.
shader_object_.push_back(0);
@ -12528,8 +12547,8 @@ void DxbcShaderTranslator::WriteShaderCode() {
// invariance (needed even in pixel shaders for oDepth invariance).
shader_object_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_GLOBAL_FLAGS) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1) |
D3D11_1_SB_GLOBAL_FLAG_SKIP_OPTIMIZATION);
D3D11_1_SB_GLOBAL_FLAG_SKIP_OPTIMIZATION |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(1));
// Constant buffers, from most frequenly accessed to least frequently accessed
// (the order is a hint to the driver according to the DXBC header).
@ -12541,11 +12560,11 @@ void DxbcShaderTranslator::WriteShaderCode() {
}
shader_object_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7) |
ENCODE_D3D10_SB_D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN(
float_constants_dynamic_indexed_
? D3D10_SB_CONSTANT_BUFFER_DYNAMIC_INDEXED
: D3D10_SB_CONSTANT_BUFFER_IMMEDIATE_INDEXED));
: D3D10_SB_CONSTANT_BUFFER_IMMEDIATE_INDEXED) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_object_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSwizzleXYZW, 3));
shader_object_.push_back(cbuffer_index_float_constants_);
@ -12557,9 +12576,9 @@ void DxbcShaderTranslator::WriteShaderCode() {
if (cbuffer_index_system_constants_ != kCbufferIndexUnallocated) {
shader_object_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7) |
ENCODE_D3D10_SB_D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN(
D3D10_SB_CONSTANT_BUFFER_IMMEDIATE_INDEXED));
D3D10_SB_CONSTANT_BUFFER_IMMEDIATE_INDEXED) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_object_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSwizzleXYZW, 3));
shader_object_.push_back(cbuffer_index_system_constants_);
@ -12571,9 +12590,9 @@ void DxbcShaderTranslator::WriteShaderCode() {
if (cbuffer_index_fetch_constants_ != kCbufferIndexUnallocated) {
shader_object_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7) |
ENCODE_D3D10_SB_D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN(
D3D10_SB_CONSTANT_BUFFER_IMMEDIATE_INDEXED));
D3D10_SB_CONSTANT_BUFFER_IMMEDIATE_INDEXED) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_object_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSwizzleXYZW, 3));
shader_object_.push_back(cbuffer_index_fetch_constants_);
@ -12585,11 +12604,11 @@ void DxbcShaderTranslator::WriteShaderCode() {
if (cbuffer_index_bool_loop_constants_ != kCbufferIndexUnallocated) {
shader_object_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7) |
ENCODE_D3D10_SB_D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN(
bool_loop_constants_dynamic_indexed_
? D3D10_SB_CONSTANT_BUFFER_DYNAMIC_INDEXED
: D3D10_SB_CONSTANT_BUFFER_IMMEDIATE_INDEXED));
: D3D10_SB_CONSTANT_BUFFER_IMMEDIATE_INDEXED) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_object_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSwizzleXYZW, 3));
shader_object_.push_back(cbuffer_index_bool_loop_constants_);
@ -12604,8 +12623,8 @@ void DxbcShaderTranslator::WriteShaderCode() {
const SamplerBinding& sampler_binding = sampler_bindings_[i];
shader_object_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_SAMPLER) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(6) |
ENCODE_D3D10_SB_SAMPLER_MODE(D3D10_SB_SAMPLER_MODE_DEFAULT));
ENCODE_D3D10_SB_SAMPLER_MODE(D3D10_SB_SAMPLER_MODE_DEFAULT) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(6));
shader_object_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_SAMPLER, kSwizzleXYZW, 3));
shader_object_.push_back(i);
@ -12689,7 +12708,7 @@ void DxbcShaderTranslator::WriteShaderCode() {
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(4));
shader_object_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_INPUT, 0b0001, 1));
shader_object_.push_back(kVSInVertexIndexRegister);
shader_object_.push_back(uint32_t(InOutRegister::kVSInVertexIndex));
shader_object_.push_back(ENCODE_D3D10_SB_NAME(D3D10_SB_NAME_VERTEX_ID));
++stat_.dcl_count;
// Interpolator output.
@ -12699,7 +12718,8 @@ void DxbcShaderTranslator::WriteShaderCode() {
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
shader_object_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_OUTPUT, 0b1111, 1));
shader_object_.push_back(kVSOutInterpolatorRegister + i);
shader_object_.push_back(uint32_t(InOutRegister::kVSOutInterpolators) +
i);
++stat_.dcl_count;
}
// Point parameters output.
@ -12708,7 +12728,7 @@ void DxbcShaderTranslator::WriteShaderCode() {
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
shader_object_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_OUTPUT, 0b0111, 1));
shader_object_.push_back(kVSOutPointParametersRegister);
shader_object_.push_back(uint32_t(InOutRegister::kVSOutPointParameters));
++stat_.dcl_count;
// Position output.
shader_object_.push_back(
@ -12716,7 +12736,7 @@ void DxbcShaderTranslator::WriteShaderCode() {
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(4));
shader_object_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_OUTPUT, 0b1111, 1));
shader_object_.push_back(kVSOutPositionRegister);
shader_object_.push_back(uint32_t(InOutRegister::kVSOutPosition));
shader_object_.push_back(ENCODE_D3D10_SB_NAME(D3D10_SB_NAME_POSITION));
++stat_.dcl_count;
} else if (IsDXBCPixelShader()) {
@ -12727,50 +12747,51 @@ void DxbcShaderTranslator::WriteShaderCode() {
for (uint32_t i = 0; i < interpolator_count; ++i) {
shader_object_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INPUT_PS) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3) |
ENCODE_D3D10_SB_INPUT_INTERPOLATION_MODE(
D3D10_SB_INTERPOLATION_LINEAR));
D3D10_SB_INTERPOLATION_LINEAR) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
shader_object_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_INPUT, 0b1111, 1));
shader_object_.push_back(kPSInInterpolatorRegister + i);
shader_object_.push_back(uint32_t(InOutRegister::kPSInInterpolators) +
i);
++stat_.dcl_count;
}
// Point parameters input (only coordinates, not size, needed).
shader_object_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INPUT_PS) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3) |
ENCODE_D3D10_SB_INPUT_INTERPOLATION_MODE(
D3D10_SB_INTERPOLATION_LINEAR));
D3D10_SB_INTERPOLATION_LINEAR) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(3));
shader_object_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_INPUT, 0b0011, 1));
shader_object_.push_back(kPSInPointParametersRegister);
shader_object_.push_back(uint32_t(InOutRegister::kPSInPointParameters));
++stat_.dcl_count;
}
// Position input (only XY needed for ps_param_gen, but for ROV access, XYZ
// are needed).
shader_object_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INPUT_PS_SIV) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(4) |
ENCODE_D3D10_SB_INPUT_INTERPOLATION_MODE(
D3D10_SB_INTERPOLATION_LINEAR_NOPERSPECTIVE));
D3D10_SB_INTERPOLATION_LINEAR_NOPERSPECTIVE) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(4));
shader_object_.push_back(EncodeVectorMaskedOperand(
D3D10_SB_OPERAND_TYPE_INPUT, edram_rov_used_ ? 0b0111 : 0b0011, 1));
shader_object_.push_back(kPSInPositionRegister);
shader_object_.push_back(uint32_t(InOutRegister::kPSInPosition));
shader_object_.push_back(ENCODE_D3D10_SB_NAME(D3D10_SB_NAME_POSITION));
++stat_.dcl_count;
if (edram_rov_used_ || !is_depth_only_pixel_shader_) {
// Is front face.
shader_object_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_INPUT_PS_SGV) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(4) |
// This needs to be set according to FXC output, despite the
// description in d3d12TokenizedProgramFormat.hpp saying bits 11:23
// are ignored.
ENCODE_D3D10_SB_INPUT_INTERPOLATION_MODE(
D3D10_SB_INTERPOLATION_CONSTANT));
D3D10_SB_INTERPOLATION_CONSTANT) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(4));
shader_object_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_INPUT, 0b0001, 1));
shader_object_.push_back(kPSInFrontFaceRegister);
shader_object_.push_back(uint32_t(InOutRegister::kPSInFrontFace));
shader_object_.push_back(
ENCODE_D3D10_SB_NAME(D3D10_SB_NAME_IS_FRONT_FACE));
++stat_.dcl_count;

View File

@ -626,22 +626,20 @@ class DxbcShaderTranslator : public ShaderTranslator {
static constexpr uint32_t kInterpolatorCount = 16;
static constexpr uint32_t kPointParametersTexCoord = kInterpolatorCount;
enum class InOutRegister : uint32_t {
// IF ANY OF THESE ARE CHANGED, WriteInputSignature and WriteOutputSignature
// MUST BE UPDATED!
kVSInVertexIndex = 0,
static constexpr uint32_t kVSInVertexIndexRegister = 0;
static constexpr uint32_t kVSOutInterpolatorRegister = 0;
static constexpr uint32_t kVSOutPointParametersRegister =
kVSOutInterpolatorRegister + kInterpolatorCount;
static constexpr uint32_t kVSOutPositionRegister =
kVSOutPointParametersRegister + 1;
kVSOutInterpolators = 0,
kVSOutPointParameters = kVSOutInterpolators + kInterpolatorCount,
kVSOutPosition,
static constexpr uint32_t kPSInInterpolatorRegister = 0;
static constexpr uint32_t kPSInPointParametersRegister =
kPSInInterpolatorRegister + kInterpolatorCount;
static constexpr uint32_t kPSInPositionRegister =
kPSInPointParametersRegister + 1;
static constexpr uint32_t kPSInFrontFaceRegister = kPSInPositionRegister + 1;
kPSInInterpolators = 0,
kPSInPointParameters = kPSInInterpolators + kInterpolatorCount,
kPSInPosition,
kPSInFrontFace,
};
static constexpr uint32_t kSwizzleXYZW = 0b11100100;
static constexpr uint32_t kSwizzleXXXX = 0b00000000;