Count packed bytes used by a constant map.

This commit is contained in:
Ben Vanik 2016-02-18 01:06:28 -08:00
parent 5ab0af9e6d
commit 618ca80011
3 changed files with 45 additions and 5 deletions

View File

@ -64,6 +64,22 @@ constexpr uint32_t select_bits(uint32_t value, uint32_t a, uint32_t b) {
return (value & make_bitmask(a, b)) >> a;
}
inline uint32_t bit_count(uint32_t v) {
v = v - ((v >> 1) & 0x55555555);
v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
return ((v + (v >> 4) & 0xF0F0F0F) * 0x1010101) >> 24;
}
inline uint32_t bit_count(uint64_t v) {
v = (v & 0x5555555555555555LU) + (v >> 1 & 0x5555555555555555LU);
v = (v & 0x3333333333333333LU) + (v >> 2 & 0x3333333333333333LU);
v = v + (v >> 4) & 0x0F0F0F0F0F0F0F0FLU;
v = v + (v >> 8);
v = v + (v >> 16);
v = v + (v >> 32) & 0x0000007F;
return static_cast<uint32_t>(v);
}
// lzcnt instruction, typed for integers of all sizes.
// The number of leading zero bits in the value parameter. If value is zero, the
// return value is the size of the input operand (8, 16, 32, or 64). If the most

View File

@ -504,8 +504,11 @@ class Shader {
// Each bit corresponds to a storage index [0-31].
uint32_t int_bitmap;
// Bitmap of all kConstantBool registers read by the shader.
// Each bit corresponds to a storage index [0-31].
uint32_t bool_bitmap;
// Each bit corresponds to a storage index [0-255].
uint32_t bool_bitmap[256 / 32];
// Computed byte count of all registers required when packed.
uint32_t packed_byte_length;
};
Shader(ShaderType shader_type, uint64_t ucode_data_hash,

View File

@ -125,6 +125,24 @@ bool ShaderTranslator::Translate(Shader* shader) {
TranslateBlocks();
// Compute total bytes used by the register map.
// This saves us work later when we need to pack them.
constant_register_map_.packed_byte_length = 0;
for (int i = 0; i < 4; ++i) {
// Each bit indicates a vec4 (4 floats).
constant_register_map_.packed_byte_length +=
4 * 4 * xe::bit_count(constant_register_map_.float_bitmap[i]);
}
// Each bit indicates a single word.
constant_register_map_.packed_byte_length +=
4 * xe::bit_count(constant_register_map_.int_bitmap);
// Direct map between words and words we upload.
for (int i = 0; i < 4; ++i) {
if (constant_register_map_.bool_bitmap[i]) {
constant_register_map_.packed_byte_length += 4;
}
}
shader->errors_ = std::move(errors_);
shader->translated_binary_ = CompleteTranslation();
shader->ucode_disassembly_ = ucode_disasm_buffer_.to_string();
@ -490,7 +508,8 @@ void ShaderTranslator::TranslateControlFlowCondExec(
i.instruction_count = cf.count();
i.type = ParsedExecInstruction::Type::kConditional;
i.bool_constant_index = cf.bool_address();
constant_register_map_.bool_bitmap |= 1 << i.bool_constant_index;
constant_register_map_.bool_bitmap[i.bool_constant_index / 32] |=
1 << (i.bool_constant_index % 32);
i.condition = cf.condition();
switch (cf.opcode()) {
case ControlFlowOpcode::kCondExec:
@ -567,7 +586,8 @@ void ShaderTranslator::TranslateControlFlowCondCall(
} else {
i.type = ParsedCallInstruction::Type::kConditional;
i.bool_constant_index = cf.bool_address();
constant_register_map_.bool_bitmap |= 1 << i.bool_constant_index;
constant_register_map_.bool_bitmap[i.bool_constant_index / 32] |=
1 << (i.bool_constant_index % 32);
i.condition = cf.condition();
}
@ -599,7 +619,8 @@ void ShaderTranslator::TranslateControlFlowCondJmp(
} else {
i.type = ParsedJumpInstruction::Type::kConditional;
i.bool_constant_index = cf.bool_address();
constant_register_map_.bool_bitmap |= 1 << i.bool_constant_index;
constant_register_map_.bool_bitmap[i.bool_constant_index / 32] |=
1 << (i.bool_constant_index % 32);
i.condition = cf.condition();
}