[GPU] Remove unused kConstantInt/Bool shader operands

This commit is contained in:
Triang3l 2020-02-02 21:05:00 +03:00
parent 38bf6c8822
commit 4061445087
7 changed files with 34 additions and 226 deletions

View File

@ -3053,23 +3053,15 @@ bool D3D12CommandProcessor::UpdateBindings(
write_float_constant_view_pixel = true;
}
if (!cbuffer_bindings_bool_loop_.up_to_date) {
uint32_t* bool_loop_constants =
reinterpret_cast<uint32_t*>(constant_buffer_pool_->Request(
frame_current_, 768, nullptr, nullptr,
&cbuffer_bindings_bool_loop_.buffer_address));
uint8_t* bool_loop_constants = constant_buffer_pool_->Request(
frame_current_, 256, nullptr, nullptr,
&cbuffer_bindings_bool_loop_.buffer_address);
if (bool_loop_constants == nullptr) {
return false;
}
// Bool and loop constants are quadrupled to allow dynamic indexing.
for (uint32_t i = 0; i < 40; ++i) {
uint32_t bool_loop_constant =
regs[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031 + i].u32;
uint32_t* bool_loop_constant_vector = bool_loop_constants + (i << 2);
bool_loop_constant_vector[0] = bool_loop_constant;
bool_loop_constant_vector[1] = bool_loop_constant;
bool_loop_constant_vector[2] = bool_loop_constant;
bool_loop_constant_vector[3] = bool_loop_constant;
}
std::memcpy(bool_loop_constants,
&regs[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32,
(8 + 32) * sizeof(uint32_t));
cbuffer_bindings_bool_loop_.up_to_date = true;
write_bool_loop_constant_view = true;
}
@ -3226,7 +3218,7 @@ bool D3D12CommandProcessor::UpdateBindings(
gpu_handle_bool_loop_constants_ = view_gpu_handle;
constant_buffer_desc.BufferLocation =
cbuffer_bindings_bool_loop_.buffer_address;
constant_buffer_desc.SizeInBytes = 768;
constant_buffer_desc.SizeInBytes = 256;
device->CreateConstantBufferView(&constant_buffer_desc, view_cpu_handle);
view_cpu_handle.ptr += descriptor_size_view;
view_gpu_handle.ptr += descriptor_size_view;

View File

@ -162,7 +162,6 @@ void DxbcShaderTranslator::Reset() {
system_constants_used_ = 0;
float_constants_dynamic_indexed_ = false;
bool_loop_constants_dynamic_indexed_ = false;
float_constant_index_offsets_.clear();
system_temp_count_current_ = 0;
@ -2039,173 +2038,6 @@ void DxbcShaderTranslator::LoadDxbcSourceOperand(
}
break;
case InstructionStorageSource::kConstantInt: {
// ***********************************************************************
// Loop constant
// ***********************************************************************
if (cbuffer_index_bool_loop_constants_ == kCbufferIndexUnallocated) {
cbuffer_index_bool_loop_constants_ = cbuffer_count_++;
}
// Convert to float and store in the intermediate register.
// The constant buffer contains each integer replicated in XYZW so dynamic
// indexing is possible.
dxbc_operand.type = DxbcSourceOperand::Type::kIntermediateRegister;
if (dxbc_operand.intermediate_register ==
DxbcSourceOperand::kIntermediateRegisterNone) {
dxbc_operand.intermediate_register = PushSystemTemp();
}
bool is_static = operand.storage_addressing_mode ==
InstructionStorageAddressingMode::kStatic;
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ITOF) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(is_static ? 7 : 9));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(dxbc_operand.intermediate_register);
shader_code_.push_back(EncodeVectorReplicatedOperand(
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, 0, 3,
D3D10_SB_OPERAND_INDEX_IMMEDIATE32,
D3D10_SB_OPERAND_INDEX_IMMEDIATE32,
is_static ? D3D10_SB_OPERAND_INDEX_IMMEDIATE32
: D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE));
shader_code_.push_back(cbuffer_index_bool_loop_constants_);
shader_code_.push_back(uint32_t(CbufferRegister::kBoolLoopConstants));
// 8 to skip bool constants.
shader_code_.push_back(8 + uint32_t(operand.storage_index));
if (!is_static) {
shader_code_.push_back(EncodeVectorSelectOperand(
D3D10_SB_OPERAND_TYPE_TEMP, dynamic_address_component, 1));
shader_code_.push_back(dynamic_address_register);
bool_loop_constants_dynamic_indexed_ = true;
}
++stat_.instruction_count;
++stat_.conversion_instruction_count;
} break;
case InstructionStorageSource::kConstantBool: {
// ***********************************************************************
// Boolean constant
// ***********************************************************************
if (cbuffer_index_bool_loop_constants_ == kCbufferIndexUnallocated) {
cbuffer_index_bool_loop_constants_ = cbuffer_count_++;
}
// Extract, convert to float and store in the intermediate register.
// The constant buffer contains each 32-bit vector replicated in XYZW so
// dynamic indexing is possible.
dxbc_operand.type = DxbcSourceOperand::Type::kIntermediateRegister;
if (dxbc_operand.intermediate_register ==
DxbcSourceOperand::kIntermediateRegisterNone) {
dxbc_operand.intermediate_register = PushSystemTemp();
}
if (operand.storage_addressing_mode ==
InstructionStorageAddressingMode::kStatic) {
// Extract the bit directly.
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_UBFE) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(11));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(dxbc_operand.intermediate_register);
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(1);
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(uint32_t(operand.storage_index) & 31);
shader_code_.push_back(EncodeVectorSelectOperand(
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, 0, 3));
shader_code_.push_back(cbuffer_index_bool_loop_constants_);
shader_code_.push_back(uint32_t(CbufferRegister::kBoolLoopConstants));
shader_code_.push_back(uint32_t(operand.storage_index) >> 5);
++stat_.instruction_count;
++stat_.uint_instruction_count;
} else {
bool_loop_constants_dynamic_indexed_ = true;
uint32_t constant_address_register = dynamic_address_register;
uint32_t constant_address_component = dynamic_address_component;
if (operand.storage_index != 0) {
// Has an offset - add it.
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IADD) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(dxbc_operand.intermediate_register);
shader_code_.push_back(EncodeVectorSelectOperand(
D3D10_SB_OPERAND_TYPE_TEMP, constant_address_component, 1));
shader_code_.push_back(constant_address_register);
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(uint32_t(operand.storage_index));
++stat_.instruction_count;
++stat_.int_instruction_count;
constant_address_register = dxbc_operand.intermediate_register;
constant_address_component = 0;
}
// Split the index into constant index and bit offset and store them in
// the intermediate register.
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_UBFE) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(15));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0011, 1));
shader_code_.push_back(dxbc_operand.intermediate_register);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
shader_code_.push_back(5);
shader_code_.push_back(3);
shader_code_.push_back(0);
shader_code_.push_back(0);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
shader_code_.push_back(0);
shader_code_.push_back(5);
shader_code_.push_back(0);
shader_code_.push_back(0);
shader_code_.push_back(EncodeVectorReplicatedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, constant_address_component, 1));
shader_code_.push_back(constant_address_register);
++stat_.instruction_count;
++stat_.uint_instruction_count;
// Extract the bits.
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_UBFE) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(12));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(dxbc_operand.intermediate_register);
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(1);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1));
shader_code_.push_back(dxbc_operand.intermediate_register);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, 0,
3, D3D10_SB_OPERAND_INDEX_IMMEDIATE32,
D3D10_SB_OPERAND_INDEX_IMMEDIATE32,
D3D10_SB_OPERAND_INDEX_RELATIVE));
shader_code_.push_back(cbuffer_index_bool_loop_constants_);
shader_code_.push_back(uint32_t(CbufferRegister::kBoolLoopConstants));
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(dxbc_operand.intermediate_register);
++stat_.instruction_count;
++stat_.uint_instruction_count;
}
// Convert the bit to float and replicate it.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_UTOF) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(dxbc_operand.intermediate_register);
shader_code_.push_back(
EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(dxbc_operand.intermediate_register);
++stat_.instruction_count;
++stat_.conversion_instruction_count;
} break;
default:
// Fall back to constant zeros for invalid types.
dxbc_operand.index = constant_component_values;
@ -2819,10 +2651,11 @@ void DxbcShaderTranslator::UpdateExecConditionals(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(bool_constant_test_register);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, 0, 3));
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER,
(bool_constant_index >> 5) & 3, 3));
shader_code_.push_back(cbuffer_index_bool_loop_constants_);
shader_code_.push_back(uint32_t(CbufferRegister::kBoolLoopConstants));
shader_code_.push_back(bool_constant_index >> 5);
shader_code_.push_back(bool_constant_index >> 7);
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(1u << (bool_constant_index & 31));
@ -3109,11 +2942,11 @@ void DxbcShaderTranslator::ProcessLoopStartInstruction(
shader_code_.push_back(0);
shader_code_.push_back(0);
shader_code_.push_back(EncodeVectorReplicatedOperand(
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, 0, 3));
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, instr.loop_constant_index & 3, 3));
shader_code_.push_back(cbuffer_index_bool_loop_constants_);
shader_code_.push_back(uint32_t(CbufferRegister::kBoolLoopConstants));
// 8 because of bool constants.
shader_code_.push_back(8 + instr.loop_constant_index);
// 2 because of bool constants.
shader_code_.push_back(2 + (instr.loop_constant_index >> 2));
++stat_.instruction_count;
++stat_.uint_instruction_count;
@ -3312,12 +3145,12 @@ void DxbcShaderTranslator::ProcessLoopEndInstruction(
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(16);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, 0, 3));
shader_code_.push_back(EncodeVectorSelectOperand(
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, instr.loop_constant_index & 3, 3));
shader_code_.push_back(cbuffer_index_bool_loop_constants_);
shader_code_.push_back(uint32_t(CbufferRegister::kBoolLoopConstants));
// 8 because of bool constants.
shader_code_.push_back(8 + instr.loop_constant_index);
// 2 because of bool constants.
shader_code_.push_back(2 + (instr.loop_constant_index >> 2));
++stat_.instruction_count;
++stat_.uint_instruction_count;
@ -3431,8 +3264,6 @@ const DxbcShaderTranslator::RdefType DxbcShaderTranslator::rdef_types_[size_t(
{nullptr, 1, 19, 1, 4, 2, 0, RdefTypeIndex::kUint4, nullptr},
// kUint4Array8
{nullptr, 1, 19, 1, 4, 8, 0, RdefTypeIndex::kUint4, nullptr},
// kUint4Array32
{nullptr, 1, 19, 1, 4, 32, 0, RdefTypeIndex::kUint4, nullptr},
// kUint4Array48
{nullptr, 1, 19, 1, 4, 48, 0, RdefTypeIndex::kUint4, nullptr},
};
@ -3720,10 +3551,10 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
if (cbuffer_index_bool_loop_constants_ != kCbufferIndexUnallocated) {
shader_object_.push_back(constant_name_offset_bool);
shader_object_.push_back(0);
shader_object_.push_back(8 * 4 * sizeof(uint32_t));
shader_object_.push_back(2 * 4 * sizeof(uint32_t));
shader_object_.push_back(0x2);
shader_object_.push_back(types_offset +
uint32_t(RdefTypeIndex::kUint4Array8) * type_size);
uint32_t(RdefTypeIndex::kUint4Array2) * type_size);
shader_object_.push_back(0);
shader_object_.push_back(0xFFFFFFFFu);
shader_object_.push_back(0);
@ -3731,11 +3562,11 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
shader_object_.push_back(0);
new_offset += constant_size;
shader_object_.push_back(constant_name_offset_loop);
shader_object_.push_back(2 * 4 * sizeof(uint32_t));
shader_object_.push_back(8 * 4 * sizeof(uint32_t));
shader_object_.push_back(32 * 4 * sizeof(uint32_t));
shader_object_.push_back(0x2);
shader_object_.push_back(
types_offset + uint32_t(RdefTypeIndex::kUint4Array32) * type_size);
shader_object_.push_back(types_offset +
uint32_t(RdefTypeIndex::kUint4Array8) * type_size);
shader_object_.push_back(0);
shader_object_.push_back(0xFFFFFFFFu);
shader_object_.push_back(0);
@ -3813,7 +3644,7 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
// Bool constants and loop constants are separate for easier debugging.
shader_object_.push_back(2);
shader_object_.push_back(constant_offset_bool_loop);
shader_object_.push_back((8 + 32) * 4 * sizeof(uint32_t));
shader_object_.push_back((2 + 8) * 4 * sizeof(uint32_t));
shader_object_.push_back(0);
shader_object_.push_back(0);
} else if (i == cbuffer_index_fetch_constants_) {
@ -4493,16 +4324,14 @@ void DxbcShaderTranslator::WriteShaderCode() {
shader_object_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER) |
ENCODE_D3D10_SB_D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN(
bool_loop_constants_dynamic_indexed_
? D3D10_SB_CONSTANT_BUFFER_DYNAMIC_INDEXED
: D3D10_SB_CONSTANT_BUFFER_IMMEDIATE_INDEXED) |
D3D10_SB_CONSTANT_BUFFER_IMMEDIATE_INDEXED) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_object_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSwizzleXYZW, 3));
shader_object_.push_back(cbuffer_index_bool_loop_constants_);
shader_object_.push_back(uint32_t(CbufferRegister::kBoolLoopConstants));
shader_object_.push_back(uint32_t(CbufferRegister::kBoolLoopConstants));
shader_object_.push_back(40);
shader_object_.push_back(10);
shader_object_.push_back(0);
}

View File

@ -919,12 +919,10 @@ class DxbcShaderTranslator : public ShaderTranslator {
kFloat4Array6,
// Float constants - size written dynamically.
kFloat4ConstantArray,
// Front/back stencil, render target keep masks.
// Bool constants constants, front/back stencil, render target keep masks.
kUint4Array2,
// Bool constants.
kUint4Array8,
// Loop constants.
kUint4Array32,
kUint4Array8,
// Fetch constants.
kUint4Array48,
@ -978,7 +976,6 @@ class DxbcShaderTranslator : public ShaderTranslator {
// Whether constants are dynamically indexed and need to be marked as such in
// dcl_constantBuffer.
bool float_constants_dynamic_indexed_;
bool bool_loop_constants_dynamic_indexed_;
// Offsets of float constant indices in shader_code_, for remapping in
// CompleteTranslation (initially, at these offsets, guest float constant

View File

@ -139,10 +139,6 @@ enum class InstructionStorageSource {
kRegister,
// Source is stored in a float constant indexed by storage_index [0-511].
kConstantFloat,
// Source is stored in a float constant indexed by storage_index [0-31].
kConstantInt,
// Source is stored in a float constant indexed by storage_index [0-255].
kConstantBool,
// Source is stored in a vertex fetch constant indexed by storage_index
// [0-95].
kVertexFetchConstant,
@ -568,10 +564,10 @@ class Shader {
// base, so bit 0 in a vertex shader is register 0, and bit 0 in a fragment
// shader is register 256.
uint64_t float_bitmap[256 / 64];
// Bitmap of all kConstantInt registers read by the shader.
// Bitmap of all loop constants read by the shader.
// Each bit corresponds to a storage index [0-31].
uint32_t int_bitmap;
// Bitmap of all kConstantBool registers read by the shader.
uint32_t loop_bitmap;
// Bitmap of all bool constants read by the shader.
// Each bit corresponds to a storage index [0-255].
uint32_t bool_bitmap[256 / 32];

View File

@ -183,7 +183,7 @@ bool ShaderTranslator::TranslateInternal(Shader* shader,
4 * 4 * constant_register_map_.float_count;
// Each bit indicates a single word.
constant_register_map_.packed_byte_length +=
4 * xe::bit_count(constant_register_map_.int_bitmap);
4 * xe::bit_count(constant_register_map_.loop_bitmap);
// Direct map between words and words we upload.
for (int i = 0; i < 8; ++i) {
if (constant_register_map_.bool_bitmap[i]) {
@ -714,7 +714,7 @@ void ShaderTranslator::TranslateControlFlowLoopStart(
ParsedLoopStartInstruction i;
i.dword_index = cf_index_;
i.loop_constant_index = cf.loop_id();
constant_register_map_.int_bitmap |= 1 << i.loop_constant_index;
constant_register_map_.loop_bitmap |= 1 << i.loop_constant_index;
i.is_repeat = cf.is_repeat();
i.loop_skip_address = cf.address();
@ -730,7 +730,7 @@ void ShaderTranslator::TranslateControlFlowLoopEnd(
i.is_predicated_break = cf.is_predicated_break();
i.predicate_condition = cf.condition();
i.loop_constant_index = cf.loop_id();
constant_register_map_.int_bitmap |= 1 << i.loop_constant_index;
constant_register_map_.loop_bitmap |= 1 << i.loop_constant_index;
i.loop_body_address = cf.address();
i.Disassemble(&ucode_disasm_buffer_);

View File

@ -93,12 +93,6 @@ void DisassembleSourceOperand(const InstructionOperand& op, StringBuffer* out) {
case InstructionStorageSource::kConstantFloat:
out->Append('c');
break;
case InstructionStorageSource::kConstantInt:
out->Append('i');
break;
case InstructionStorageSource::kConstantBool:
out->Append('b');
break;
case InstructionStorageSource::kTextureFetchConstant:
case InstructionStorageSource::kVertexFetchConstant:
assert_always();

View File

@ -400,7 +400,7 @@ std::pair<VkDeviceSize, VkDeviceSize> BufferCache::UploadConstantRegisters(
}
}
for (int i = 0; i < 32; ++i) {
if (constant_register_map.int_bitmap & (1 << i)) {
if (constant_register_map.loop_bitmap & (1 << i)) {
xe::store<uint32_t>(dest_ptr,
values[XE_GPU_REG_SHADER_CONSTANT_LOOP_00 + i].u32);
dest_ptr += 4;