Merge remote-tracking branch 'upstream/master' into canary-old-update

This commit is contained in:
illusion98 2020-02-06 04:38:47 -05:00
commit 4ddfffd009
11 changed files with 152 additions and 333 deletions

View File

@ -580,11 +580,14 @@ void X64ThunkEmitter::EmitSaveVolatileRegs() {
// mov(qword[rsp + offsetof(StackLayout::Thunk, r[0])], rax);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[1])], rcx);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[2])], rdx);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[3])], r8);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[4])], r9);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[5])], r10);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[6])], r11);
#if XE_PLATFORM_LINUX
mov(qword[rsp + offsetof(StackLayout::Thunk, r[3])], rsi);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[4])], rdi);
#endif
mov(qword[rsp + offsetof(StackLayout::Thunk, r[5])], r8);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[6])], r9);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[7])], r10);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[8])], r11);
// vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[0])], xmm0);
vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[1])], xmm1);
vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[2])], xmm2);
@ -594,35 +597,40 @@ void X64ThunkEmitter::EmitSaveVolatileRegs() {
}
void X64ThunkEmitter::EmitLoadVolatileRegs() {
// Load volatile registers from our stack frame.
// mov(rax, qword[rsp + offsetof(StackLayout::Thunk, r[0])]);
mov(rcx, qword[rsp + offsetof(StackLayout::Thunk, r[1])]);
mov(rdx, qword[rsp + offsetof(StackLayout::Thunk, r[2])]);
#if XE_PLATFORM_LINUX
mov(rsi, qword[rsp + offsetof(StackLayout::Thunk, r[3])]);
mov(rdi, qword[rsp + offsetof(StackLayout::Thunk, r[4])]);
#endif
mov(r8, qword[rsp + offsetof(StackLayout::Thunk, r[5])]);
mov(r9, qword[rsp + offsetof(StackLayout::Thunk, r[6])]);
mov(r10, qword[rsp + offsetof(StackLayout::Thunk, r[7])]);
mov(r11, qword[rsp + offsetof(StackLayout::Thunk, r[8])]);
// vmovaps(xmm0, qword[rsp + offsetof(StackLayout::Thunk, xmm[0])]);
vmovaps(xmm1, qword[rsp + offsetof(StackLayout::Thunk, xmm[1])]);
vmovaps(xmm2, qword[rsp + offsetof(StackLayout::Thunk, xmm[2])]);
vmovaps(xmm3, qword[rsp + offsetof(StackLayout::Thunk, xmm[3])]);
vmovaps(xmm4, qword[rsp + offsetof(StackLayout::Thunk, xmm[4])]);
vmovaps(xmm5, qword[rsp + offsetof(StackLayout::Thunk, xmm[5])]);
// mov(rax, qword[rsp + offsetof(StackLayout::Thunk, r[0])]);
mov(rcx, qword[rsp + offsetof(StackLayout::Thunk, r[1])]);
mov(rdx, qword[rsp + offsetof(StackLayout::Thunk, r[2])]);
mov(r8, qword[rsp + offsetof(StackLayout::Thunk, r[3])]);
mov(r9, qword[rsp + offsetof(StackLayout::Thunk, r[4])]);
mov(r10, qword[rsp + offsetof(StackLayout::Thunk, r[5])]);
mov(r11, qword[rsp + offsetof(StackLayout::Thunk, r[6])]);
}
void X64ThunkEmitter::EmitSaveNonvolatileRegs() {
// Preserve nonvolatile registers.
mov(qword[rsp + offsetof(StackLayout::Thunk, r[0])], rbx);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[1])], rcx);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[2])], rbp);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[1])], rbp);
#if XE_PLATFORM_WIN32
mov(qword[rsp + offsetof(StackLayout::Thunk, r[2])], rcx);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[3])], rsi);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[4])], rdi);
#endif
mov(qword[rsp + offsetof(StackLayout::Thunk, r[5])], r12);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[6])], r13);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[7])], r14);
mov(qword[rsp + offsetof(StackLayout::Thunk, r[8])], r15);
// SysV does not have nonvolatile XMM registers.
#if XE_PLATFORM_WIN32
vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[0])], xmm6);
vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[1])], xmm7);
vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[2])], xmm8);
@ -633,9 +641,23 @@ void X64ThunkEmitter::EmitSaveNonvolatileRegs() {
vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[7])], xmm13);
vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[8])], xmm14);
vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[9])], xmm15);
#endif
}
void X64ThunkEmitter::EmitLoadNonvolatileRegs() {
mov(rbx, qword[rsp + offsetof(StackLayout::Thunk, r[0])]);
mov(rbp, qword[rsp + offsetof(StackLayout::Thunk, r[1])]);
#if XE_PLATFORM_WIN32
mov(rcx, qword[rsp + offsetof(StackLayout::Thunk, r[2])]);
mov(rsi, qword[rsp + offsetof(StackLayout::Thunk, r[3])]);
mov(rdi, qword[rsp + offsetof(StackLayout::Thunk, r[4])]);
#endif
mov(r12, qword[rsp + offsetof(StackLayout::Thunk, r[5])]);
mov(r13, qword[rsp + offsetof(StackLayout::Thunk, r[6])]);
mov(r14, qword[rsp + offsetof(StackLayout::Thunk, r[7])]);
mov(r15, qword[rsp + offsetof(StackLayout::Thunk, r[8])]);
#if XE_PLATFORM_WIN32
vmovaps(xmm6, qword[rsp + offsetof(StackLayout::Thunk, xmm[0])]);
vmovaps(xmm7, qword[rsp + offsetof(StackLayout::Thunk, xmm[1])]);
vmovaps(xmm8, qword[rsp + offsetof(StackLayout::Thunk, xmm[2])]);
@ -646,16 +668,7 @@ void X64ThunkEmitter::EmitLoadNonvolatileRegs() {
vmovaps(xmm13, qword[rsp + offsetof(StackLayout::Thunk, xmm[7])]);
vmovaps(xmm14, qword[rsp + offsetof(StackLayout::Thunk, xmm[8])]);
vmovaps(xmm15, qword[rsp + offsetof(StackLayout::Thunk, xmm[9])]);
mov(rbx, qword[rsp + offsetof(StackLayout::Thunk, r[0])]);
mov(rcx, qword[rsp + offsetof(StackLayout::Thunk, r[1])]);
mov(rbp, qword[rsp + offsetof(StackLayout::Thunk, r[2])]);
mov(rsi, qword[rsp + offsetof(StackLayout::Thunk, r[3])]);
mov(rdi, qword[rsp + offsetof(StackLayout::Thunk, r[4])]);
mov(r12, qword[rsp + offsetof(StackLayout::Thunk, r[5])]);
mov(r13, qword[rsp + offsetof(StackLayout::Thunk, r[6])]);
mov(r14, qword[rsp + offsetof(StackLayout::Thunk, r[7])]);
mov(r15, qword[rsp + offsetof(StackLayout::Thunk, r[8])]);
#endif
}
} // namespace x64

View File

@ -27,78 +27,71 @@ class StackLayout {
* NOTE: stack must always be 16b aligned.
*
* Thunk stack:
* +------------------+
* | arg temp, 3 * 8 | rsp + 0
* | |
* | |
* +------------------+
* | scratch, 16b | rsp + 24
* | |
* +------------------+
* | rbx | rsp + 40
* +------------------+
* | rcx / context | rsp + 48
* +------------------+
* | rbp | rsp + 56
* +------------------+
* | rsi | rsp + 64
* +------------------+
* | rdi | rsp + 72
* +------------------+
* | r12 | rsp + 80
* +------------------+
* | r13 | rsp + 88
* +------------------+
* | r14 | rsp + 96
* +------------------+
* | r15 | rsp + 104
* +------------------+
* | xmm6/0 | rsp + 112
* | |
* +------------------+
* | xmm7/1 | rsp + 128
* | |
* +------------------+
* | xmm8/2 | rsp + 144
* | |
* +------------------+
* | xmm9/3 | rsp + 160
* | |
* +------------------+
* | xmm10/4 | rsp + 176
* | |
* +------------------+
* | xmm11/5 | rsp + 192
* | |
* +------------------+
* | xmm12 | rsp + 208
* | |
* +------------------+
* | xmm13 | rsp + 224
* | |
* +------------------+
* | xmm14 | rsp + 240
* | |
* +------------------+
* | xmm15 | rsp + 256
* | |
* +------------------+
* | scratch, 8b | rsp + 272
* | |
* +------------------+
* | (return address) | rsp + 280
* +------------------+
* | (rcx home) | rsp + 288
* +------------------+
* | (rdx home) | rsp + 296
* +------------------+
* Non-Volatile Volatile
* +------------------+------------------+
* | arg temp, 3 * 8 | arg temp, 3 * 8 | rsp + 0x000
* | | |
* | | |
* +------------------+------------------+
* | rbx | (unused) | rsp + 0x018
* +------------------+------------------+
* | rbp | rcx | rsp + 0x020
* +------------------+------------------+
* | rcx (Win32) | rdx | rsp + 0x028
* +------------------+------------------+
* | rsi (Win32) | rsi (Linux) | rsp + 0x030
* +------------------+------------------+
* | rdi (Win32) | rdi (Linux) | rsp + 0x038
* +------------------+------------------+
* | r12 | r8 | rsp + 0x040
* +------------------+------------------+
* | r13 | r9 | rsp + 0x048
* +------------------+------------------+
* | r14 | r10 | rsp + 0x050
* +------------------+------------------+
* | r15 | r11 | rsp + 0x058
* +------------------+------------------+
* | xmm6 (Win32) | (unused) | rsp + 0x060
* | | |
* +------------------+------------------+
* | xmm7 (Win32) | xmm1 | rsp + 0x070
* | | |
* +------------------+------------------+
* | xmm8 (Win32) | xmm2 | rsp + 0x080
* | | |
* +------------------+------------------+
* | xmm9 (Win32) | xmm3 | rsp + 0x090
* | | |
* +------------------+------------------+
* | xmm10 (Win32) | xmm4 | rsp + 0x0A0
* | | |
* +------------------+------------------+
* | xmm11 (Win32) | xmm5 | rsp + 0x0B0
* | | |
* +------------------+------------------+
* | xmm12 (Win32) | (unused) | rsp + 0x0C0
* | | |
* +------------------+------------------+
* | xmm13 (Win32) | (unused) | rsp + 0x0D0
* | | |
* +------------------+------------------+
* | xmm14 (Win32) | (unused) | rsp + 0x0E0
* | | |
* +------------------+------------------+
* | xmm15 (Win32) | (unused) | rsp + 0x0F0
* | | |
* +------------------+------------------+
* | (return address) | (return address) | rsp + 0x100
* +------------------+------------------+
* | (rcx home) | (rcx home) | rsp + 0x108
* +------------------+------------------+
* | (rdx home) | (rdx home) | rsp + 0x110
* +------------------+------------------+
*/
XEPACKEDSTRUCT(Thunk, {
uint64_t arg_temp[3];
uint8_t scratch[16];
uint64_t r[10];
uint64_t r[9];
vec128_t xmm[10];
uint64_t dummy;
});
static_assert(sizeof(Thunk) % 16 == 0,
"sizeof(Thunk) must be a multiple of 16!");

View File

@ -3053,23 +3053,15 @@ bool D3D12CommandProcessor::UpdateBindings(
write_float_constant_view_pixel = true;
}
if (!cbuffer_bindings_bool_loop_.up_to_date) {
uint32_t* bool_loop_constants =
reinterpret_cast<uint32_t*>(constant_buffer_pool_->Request(
frame_current_, 768, nullptr, nullptr,
&cbuffer_bindings_bool_loop_.buffer_address));
uint8_t* bool_loop_constants = constant_buffer_pool_->Request(
frame_current_, 256, nullptr, nullptr,
&cbuffer_bindings_bool_loop_.buffer_address);
if (bool_loop_constants == nullptr) {
return false;
}
// Bool and loop constants are quadrupled to allow dynamic indexing.
for (uint32_t i = 0; i < 40; ++i) {
uint32_t bool_loop_constant =
regs[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031 + i].u32;
uint32_t* bool_loop_constant_vector = bool_loop_constants + (i << 2);
bool_loop_constant_vector[0] = bool_loop_constant;
bool_loop_constant_vector[1] = bool_loop_constant;
bool_loop_constant_vector[2] = bool_loop_constant;
bool_loop_constant_vector[3] = bool_loop_constant;
}
std::memcpy(bool_loop_constants,
&regs[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32,
(8 + 32) * sizeof(uint32_t));
cbuffer_bindings_bool_loop_.up_to_date = true;
write_bool_loop_constant_view = true;
}
@ -3226,7 +3218,7 @@ bool D3D12CommandProcessor::UpdateBindings(
gpu_handle_bool_loop_constants_ = view_gpu_handle;
constant_buffer_desc.BufferLocation =
cbuffer_bindings_bool_loop_.buffer_address;
constant_buffer_desc.SizeInBytes = 768;
constant_buffer_desc.SizeInBytes = 256;
device->CreateConstantBufferView(&constant_buffer_desc, view_cpu_handle);
view_cpu_handle.ptr += descriptor_size_view;
view_gpu_handle.ptr += descriptor_size_view;

View File

@ -162,7 +162,6 @@ void DxbcShaderTranslator::Reset() {
system_constants_used_ = 0;
float_constants_dynamic_indexed_ = false;
bool_loop_constants_dynamic_indexed_ = false;
float_constant_index_offsets_.clear();
system_temp_count_current_ = 0;
@ -2039,173 +2038,6 @@ void DxbcShaderTranslator::LoadDxbcSourceOperand(
}
break;
case InstructionStorageSource::kConstantInt: {
// ***********************************************************************
// Loop constant
// ***********************************************************************
if (cbuffer_index_bool_loop_constants_ == kCbufferIndexUnallocated) {
cbuffer_index_bool_loop_constants_ = cbuffer_count_++;
}
// Convert to float and store in the intermediate register.
// The constant buffer contains each integer replicated in XYZW so dynamic
// indexing is possible.
dxbc_operand.type = DxbcSourceOperand::Type::kIntermediateRegister;
if (dxbc_operand.intermediate_register ==
DxbcSourceOperand::kIntermediateRegisterNone) {
dxbc_operand.intermediate_register = PushSystemTemp();
}
bool is_static = operand.storage_addressing_mode ==
InstructionStorageAddressingMode::kStatic;
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ITOF) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(is_static ? 7 : 9));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(dxbc_operand.intermediate_register);
shader_code_.push_back(EncodeVectorReplicatedOperand(
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, 0, 3,
D3D10_SB_OPERAND_INDEX_IMMEDIATE32,
D3D10_SB_OPERAND_INDEX_IMMEDIATE32,
is_static ? D3D10_SB_OPERAND_INDEX_IMMEDIATE32
: D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE));
shader_code_.push_back(cbuffer_index_bool_loop_constants_);
shader_code_.push_back(uint32_t(CbufferRegister::kBoolLoopConstants));
// 8 to skip bool constants.
shader_code_.push_back(8 + uint32_t(operand.storage_index));
if (!is_static) {
shader_code_.push_back(EncodeVectorSelectOperand(
D3D10_SB_OPERAND_TYPE_TEMP, dynamic_address_component, 1));
shader_code_.push_back(dynamic_address_register);
bool_loop_constants_dynamic_indexed_ = true;
}
++stat_.instruction_count;
++stat_.conversion_instruction_count;
} break;
case InstructionStorageSource::kConstantBool: {
// ***********************************************************************
// Boolean constant
// ***********************************************************************
if (cbuffer_index_bool_loop_constants_ == kCbufferIndexUnallocated) {
cbuffer_index_bool_loop_constants_ = cbuffer_count_++;
}
// Extract, convert to float and store in the intermediate register.
// The constant buffer contains each 32-bit vector replicated in XYZW so
// dynamic indexing is possible.
dxbc_operand.type = DxbcSourceOperand::Type::kIntermediateRegister;
if (dxbc_operand.intermediate_register ==
DxbcSourceOperand::kIntermediateRegisterNone) {
dxbc_operand.intermediate_register = PushSystemTemp();
}
if (operand.storage_addressing_mode ==
InstructionStorageAddressingMode::kStatic) {
// Extract the bit directly.
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_UBFE) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(11));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(dxbc_operand.intermediate_register);
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(1);
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(uint32_t(operand.storage_index) & 31);
shader_code_.push_back(EncodeVectorSelectOperand(
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, 0, 3));
shader_code_.push_back(cbuffer_index_bool_loop_constants_);
shader_code_.push_back(uint32_t(CbufferRegister::kBoolLoopConstants));
shader_code_.push_back(uint32_t(operand.storage_index) >> 5);
++stat_.instruction_count;
++stat_.uint_instruction_count;
} else {
bool_loop_constants_dynamic_indexed_ = true;
uint32_t constant_address_register = dynamic_address_register;
uint32_t constant_address_component = dynamic_address_component;
if (operand.storage_index != 0) {
// Has an offset - add it.
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IADD) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(dxbc_operand.intermediate_register);
shader_code_.push_back(EncodeVectorSelectOperand(
D3D10_SB_OPERAND_TYPE_TEMP, constant_address_component, 1));
shader_code_.push_back(constant_address_register);
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(uint32_t(operand.storage_index));
++stat_.instruction_count;
++stat_.int_instruction_count;
constant_address_register = dxbc_operand.intermediate_register;
constant_address_component = 0;
}
// Split the index into constant index and bit offset and store them in
// the intermediate register.
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_UBFE) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(15));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0011, 1));
shader_code_.push_back(dxbc_operand.intermediate_register);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
shader_code_.push_back(5);
shader_code_.push_back(3);
shader_code_.push_back(0);
shader_code_.push_back(0);
shader_code_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
shader_code_.push_back(0);
shader_code_.push_back(5);
shader_code_.push_back(0);
shader_code_.push_back(0);
shader_code_.push_back(EncodeVectorReplicatedOperand(
D3D10_SB_OPERAND_TYPE_TEMP, constant_address_component, 1));
shader_code_.push_back(constant_address_register);
++stat_.instruction_count;
++stat_.uint_instruction_count;
// Extract the bits.
shader_code_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_UBFE) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(12));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(dxbc_operand.intermediate_register);
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(1);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1));
shader_code_.push_back(dxbc_operand.intermediate_register);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, 0,
3, D3D10_SB_OPERAND_INDEX_IMMEDIATE32,
D3D10_SB_OPERAND_INDEX_IMMEDIATE32,
D3D10_SB_OPERAND_INDEX_RELATIVE));
shader_code_.push_back(cbuffer_index_bool_loop_constants_);
shader_code_.push_back(uint32_t(CbufferRegister::kBoolLoopConstants));
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(dxbc_operand.intermediate_register);
++stat_.instruction_count;
++stat_.uint_instruction_count;
}
// Convert the bit to float and replicate it.
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_UTOF) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5));
shader_code_.push_back(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
shader_code_.push_back(dxbc_operand.intermediate_register);
shader_code_.push_back(
EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
shader_code_.push_back(dxbc_operand.intermediate_register);
++stat_.instruction_count;
++stat_.conversion_instruction_count;
} break;
default:
// Fall back to constant zeros for invalid types.
dxbc_operand.index = constant_component_values;
@ -2819,10 +2651,11 @@ void DxbcShaderTranslator::UpdateExecConditionals(
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
shader_code_.push_back(bool_constant_test_register);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, 0, 3));
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER,
(bool_constant_index >> 5) & 3, 3));
shader_code_.push_back(cbuffer_index_bool_loop_constants_);
shader_code_.push_back(uint32_t(CbufferRegister::kBoolLoopConstants));
shader_code_.push_back(bool_constant_index >> 5);
shader_code_.push_back(bool_constant_index >> 7);
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(1u << (bool_constant_index & 31));
@ -3109,11 +2942,11 @@ void DxbcShaderTranslator::ProcessLoopStartInstruction(
shader_code_.push_back(0);
shader_code_.push_back(0);
shader_code_.push_back(EncodeVectorReplicatedOperand(
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, 0, 3));
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, instr.loop_constant_index & 3, 3));
shader_code_.push_back(cbuffer_index_bool_loop_constants_);
shader_code_.push_back(uint32_t(CbufferRegister::kBoolLoopConstants));
// 8 because of bool constants.
shader_code_.push_back(8 + instr.loop_constant_index);
// 2 because of bool constants.
shader_code_.push_back(2 + (instr.loop_constant_index >> 2));
++stat_.instruction_count;
++stat_.uint_instruction_count;
@ -3312,12 +3145,12 @@ void DxbcShaderTranslator::ProcessLoopEndInstruction(
shader_code_.push_back(
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
shader_code_.push_back(16);
shader_code_.push_back(
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, 0, 3));
shader_code_.push_back(EncodeVectorSelectOperand(
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, instr.loop_constant_index & 3, 3));
shader_code_.push_back(cbuffer_index_bool_loop_constants_);
shader_code_.push_back(uint32_t(CbufferRegister::kBoolLoopConstants));
// 8 because of bool constants.
shader_code_.push_back(8 + instr.loop_constant_index);
// 2 because of bool constants.
shader_code_.push_back(2 + (instr.loop_constant_index >> 2));
++stat_.instruction_count;
++stat_.uint_instruction_count;
@ -3431,8 +3264,6 @@ const DxbcShaderTranslator::RdefType DxbcShaderTranslator::rdef_types_[size_t(
{nullptr, 1, 19, 1, 4, 2, 0, RdefTypeIndex::kUint4, nullptr},
// kUint4Array8
{nullptr, 1, 19, 1, 4, 8, 0, RdefTypeIndex::kUint4, nullptr},
// kUint4Array32
{nullptr, 1, 19, 1, 4, 32, 0, RdefTypeIndex::kUint4, nullptr},
// kUint4Array48
{nullptr, 1, 19, 1, 4, 48, 0, RdefTypeIndex::kUint4, nullptr},
};
@ -3720,10 +3551,10 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
if (cbuffer_index_bool_loop_constants_ != kCbufferIndexUnallocated) {
shader_object_.push_back(constant_name_offset_bool);
shader_object_.push_back(0);
shader_object_.push_back(8 * 4 * sizeof(uint32_t));
shader_object_.push_back(2 * 4 * sizeof(uint32_t));
shader_object_.push_back(0x2);
shader_object_.push_back(types_offset +
uint32_t(RdefTypeIndex::kUint4Array8) * type_size);
uint32_t(RdefTypeIndex::kUint4Array2) * type_size);
shader_object_.push_back(0);
shader_object_.push_back(0xFFFFFFFFu);
shader_object_.push_back(0);
@ -3731,11 +3562,11 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
shader_object_.push_back(0);
new_offset += constant_size;
shader_object_.push_back(constant_name_offset_loop);
shader_object_.push_back(2 * 4 * sizeof(uint32_t));
shader_object_.push_back(8 * 4 * sizeof(uint32_t));
shader_object_.push_back(32 * 4 * sizeof(uint32_t));
shader_object_.push_back(0x2);
shader_object_.push_back(
types_offset + uint32_t(RdefTypeIndex::kUint4Array32) * type_size);
shader_object_.push_back(types_offset +
uint32_t(RdefTypeIndex::kUint4Array8) * type_size);
shader_object_.push_back(0);
shader_object_.push_back(0xFFFFFFFFu);
shader_object_.push_back(0);
@ -3813,7 +3644,7 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
// Bool constants and loop constants are separate for easier debugging.
shader_object_.push_back(2);
shader_object_.push_back(constant_offset_bool_loop);
shader_object_.push_back((8 + 32) * 4 * sizeof(uint32_t));
shader_object_.push_back((2 + 8) * 4 * sizeof(uint32_t));
shader_object_.push_back(0);
shader_object_.push_back(0);
} else if (i == cbuffer_index_fetch_constants_) {
@ -4493,16 +4324,14 @@ void DxbcShaderTranslator::WriteShaderCode() {
shader_object_.push_back(
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER) |
ENCODE_D3D10_SB_D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN(
bool_loop_constants_dynamic_indexed_
? D3D10_SB_CONSTANT_BUFFER_DYNAMIC_INDEXED
: D3D10_SB_CONSTANT_BUFFER_IMMEDIATE_INDEXED) |
D3D10_SB_CONSTANT_BUFFER_IMMEDIATE_INDEXED) |
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
shader_object_.push_back(EncodeVectorSwizzledOperand(
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSwizzleXYZW, 3));
shader_object_.push_back(cbuffer_index_bool_loop_constants_);
shader_object_.push_back(uint32_t(CbufferRegister::kBoolLoopConstants));
shader_object_.push_back(uint32_t(CbufferRegister::kBoolLoopConstants));
shader_object_.push_back(40);
shader_object_.push_back(10);
shader_object_.push_back(0);
}

View File

@ -919,12 +919,10 @@ class DxbcShaderTranslator : public ShaderTranslator {
kFloat4Array6,
// Float constants - size written dynamically.
kFloat4ConstantArray,
// Front/back stencil, render target keep masks.
// Bool constants, front/back stencil, render target keep masks.
kUint4Array2,
// Bool constants.
kUint4Array8,
// Loop constants.
kUint4Array32,
kUint4Array8,
// Fetch constants.
kUint4Array48,
@ -978,7 +976,6 @@ class DxbcShaderTranslator : public ShaderTranslator {
// Whether constants are dynamically indexed and need to be marked as such in
// dcl_constantBuffer.
bool float_constants_dynamic_indexed_;
bool bool_loop_constants_dynamic_indexed_;
// Offsets of float constant indices in shader_code_, for remapping in
// CompleteTranslation (initially, at these offsets, guest float constant

View File

@ -139,10 +139,6 @@ enum class InstructionStorageSource {
kRegister,
// Source is stored in a float constant indexed by storage_index [0-511].
kConstantFloat,
// Source is stored in a float constant indexed by storage_index [0-31].
kConstantInt,
// Source is stored in a float constant indexed by storage_index [0-255].
kConstantBool,
// Source is stored in a vertex fetch constant indexed by storage_index
// [0-95].
kVertexFetchConstant,
@ -568,10 +564,10 @@ class Shader {
// base, so bit 0 in a vertex shader is register 0, and bit 0 in a fragment
// shader is register 256.
uint64_t float_bitmap[256 / 64];
// Bitmap of all kConstantInt registers read by the shader.
// Bitmap of all loop constants read by the shader.
// Each bit corresponds to a storage index [0-31].
uint32_t int_bitmap;
// Bitmap of all kConstantBool registers read by the shader.
uint32_t loop_bitmap;
// Bitmap of all bool constants read by the shader.
// Each bit corresponds to a storage index [0-255].
uint32_t bool_bitmap[256 / 32];

View File

@ -183,7 +183,7 @@ bool ShaderTranslator::TranslateInternal(Shader* shader,
4 * 4 * constant_register_map_.float_count;
// Each bit indicates a single word.
constant_register_map_.packed_byte_length +=
4 * xe::bit_count(constant_register_map_.int_bitmap);
4 * xe::bit_count(constant_register_map_.loop_bitmap);
// Direct map between words and words we upload.
for (int i = 0; i < 8; ++i) {
if (constant_register_map_.bool_bitmap[i]) {
@ -714,7 +714,7 @@ void ShaderTranslator::TranslateControlFlowLoopStart(
ParsedLoopStartInstruction i;
i.dword_index = cf_index_;
i.loop_constant_index = cf.loop_id();
constant_register_map_.int_bitmap |= 1 << i.loop_constant_index;
constant_register_map_.loop_bitmap |= 1 << i.loop_constant_index;
i.is_repeat = cf.is_repeat();
i.loop_skip_address = cf.address();
@ -730,7 +730,7 @@ void ShaderTranslator::TranslateControlFlowLoopEnd(
i.is_predicated_break = cf.is_predicated_break();
i.predicate_condition = cf.condition();
i.loop_constant_index = cf.loop_id();
constant_register_map_.int_bitmap |= 1 << i.loop_constant_index;
constant_register_map_.loop_bitmap |= 1 << i.loop_constant_index;
i.loop_body_address = cf.address();
i.Disassemble(&ucode_disasm_buffer_);

View File

@ -93,12 +93,6 @@ void DisassembleSourceOperand(const InstructionOperand& op, StringBuffer* out) {
case InstructionStorageSource::kConstantFloat:
out->Append('c');
break;
case InstructionStorageSource::kConstantInt:
out->Append('i');
break;
case InstructionStorageSource::kConstantBool:
out->Append('b');
break;
case InstructionStorageSource::kTextureFetchConstant:
case InstructionStorageSource::kVertexFetchConstant:
assert_always();

View File

@ -400,7 +400,7 @@ std::pair<VkDeviceSize, VkDeviceSize> BufferCache::UploadConstantRegisters(
}
}
for (int i = 0; i < 32; ++i) {
if (constant_register_map.int_bitmap & (1 << i)) {
if (constant_register_map.loop_bitmap & (1 << i)) {
xe::store<uint32_t>(dest_ptr,
values[XE_GPU_REG_SHADER_CONSTANT_LOOP_00 + i].u32);
dest_ptr += 4;

View File

@ -178,13 +178,16 @@ void Window::OnPaint(UIEvent* e) {
++frame_count_;
++fps_frame_count_;
uint64_t now_ns = xe::Clock::QueryHostSystemTime();
if (now_ns > fps_update_time_ns_ + 1000 * 10000) {
static auto tick_frequency = Clock::QueryHostTickFrequency();
auto now_ticks = Clock::QueryHostTickCount();
// Average fps over 1 second.
if (now_ticks > fps_update_time_ticks_ + tick_frequency * 1) {
fps_ = static_cast<uint32_t>(
fps_frame_count_ /
(static_cast<double>(now_ns - fps_update_time_ns_) / 10000000.0));
fps_update_time_ns_ = now_ns;
(static_cast<double>(now_ticks - fps_update_time_ticks_) /
tick_frequency));
fps_update_time_ticks_ = now_ticks;
fps_frame_count_ = 0;
#if XE_OPTION_PROFILING
// This means FPS counter will not work with profiling disabled (e.g. on
@ -220,16 +223,16 @@ void Window::OnPaint(UIEvent* e) {
// Prepare ImGui for use this frame.
auto& io = imgui_drawer_->GetIO();
if (!last_paint_time_ns_) {
if (!last_paint_time_ticks_) {
io.DeltaTime = 0.0f;
last_paint_time_ns_ = now_ns;
last_paint_time_ticks_ = now_ticks;
} else {
io.DeltaTime = (now_ns - last_paint_time_ns_) / 10000000.0f;
last_paint_time_ns_ = now_ns;
io.DeltaTime = (now_ticks - last_paint_time_ticks_) /
static_cast<float>(tick_frequency);
last_paint_time_ticks_ = now_ticks;
}
io.DisplaySize = ImVec2(static_cast<float>(scaled_width()),
static_cast<float>(scaled_height()));
ImGui::NewFrame();
context_->BeginSwap();
if (context_->WasLost()) {
@ -237,6 +240,8 @@ void Window::OnPaint(UIEvent* e) {
return;
}
ImGui::NewFrame();
ForEachListener([e](auto listener) { listener->OnPainting(e); });
on_painting(e);
ForEachListener([e](auto listener) { listener->OnPaint(e); });

View File

@ -192,9 +192,9 @@ class Window {
uint32_t frame_count_ = 0;
uint32_t fps_ = 0;
uint64_t fps_update_time_ns_ = 0;
uint64_t fps_update_time_ticks_ = 0;
uint64_t fps_frame_count_ = 0;
uint64_t last_paint_time_ns_ = 0;
uint64_t last_paint_time_ticks_ = 0;
bool display_fps_ = false;
uint32_t game_fps_ = 0;