Merge remote-tracking branch 'upstream/master' into canary-old-update
This commit is contained in:
commit
4ddfffd009
|
@ -580,11 +580,14 @@ void X64ThunkEmitter::EmitSaveVolatileRegs() {
|
|||
// mov(qword[rsp + offsetof(StackLayout::Thunk, r[0])], rax);
|
||||
mov(qword[rsp + offsetof(StackLayout::Thunk, r[1])], rcx);
|
||||
mov(qword[rsp + offsetof(StackLayout::Thunk, r[2])], rdx);
|
||||
mov(qword[rsp + offsetof(StackLayout::Thunk, r[3])], r8);
|
||||
mov(qword[rsp + offsetof(StackLayout::Thunk, r[4])], r9);
|
||||
mov(qword[rsp + offsetof(StackLayout::Thunk, r[5])], r10);
|
||||
mov(qword[rsp + offsetof(StackLayout::Thunk, r[6])], r11);
|
||||
|
||||
#if XE_PLATFORM_LINUX
|
||||
mov(qword[rsp + offsetof(StackLayout::Thunk, r[3])], rsi);
|
||||
mov(qword[rsp + offsetof(StackLayout::Thunk, r[4])], rdi);
|
||||
#endif
|
||||
mov(qword[rsp + offsetof(StackLayout::Thunk, r[5])], r8);
|
||||
mov(qword[rsp + offsetof(StackLayout::Thunk, r[6])], r9);
|
||||
mov(qword[rsp + offsetof(StackLayout::Thunk, r[7])], r10);
|
||||
mov(qword[rsp + offsetof(StackLayout::Thunk, r[8])], r11);
|
||||
// vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[0])], xmm0);
|
||||
vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[1])], xmm1);
|
||||
vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[2])], xmm2);
|
||||
|
@ -594,35 +597,40 @@ void X64ThunkEmitter::EmitSaveVolatileRegs() {
|
|||
}
|
||||
|
||||
void X64ThunkEmitter::EmitLoadVolatileRegs() {
|
||||
// Load volatile registers from our stack frame.
|
||||
// mov(rax, qword[rsp + offsetof(StackLayout::Thunk, r[0])]);
|
||||
mov(rcx, qword[rsp + offsetof(StackLayout::Thunk, r[1])]);
|
||||
mov(rdx, qword[rsp + offsetof(StackLayout::Thunk, r[2])]);
|
||||
#if XE_PLATFORM_LINUX
|
||||
mov(rsi, qword[rsp + offsetof(StackLayout::Thunk, r[3])]);
|
||||
mov(rdi, qword[rsp + offsetof(StackLayout::Thunk, r[4])]);
|
||||
#endif
|
||||
mov(r8, qword[rsp + offsetof(StackLayout::Thunk, r[5])]);
|
||||
mov(r9, qword[rsp + offsetof(StackLayout::Thunk, r[6])]);
|
||||
mov(r10, qword[rsp + offsetof(StackLayout::Thunk, r[7])]);
|
||||
mov(r11, qword[rsp + offsetof(StackLayout::Thunk, r[8])]);
|
||||
// vmovaps(xmm0, qword[rsp + offsetof(StackLayout::Thunk, xmm[0])]);
|
||||
vmovaps(xmm1, qword[rsp + offsetof(StackLayout::Thunk, xmm[1])]);
|
||||
vmovaps(xmm2, qword[rsp + offsetof(StackLayout::Thunk, xmm[2])]);
|
||||
vmovaps(xmm3, qword[rsp + offsetof(StackLayout::Thunk, xmm[3])]);
|
||||
vmovaps(xmm4, qword[rsp + offsetof(StackLayout::Thunk, xmm[4])]);
|
||||
vmovaps(xmm5, qword[rsp + offsetof(StackLayout::Thunk, xmm[5])]);
|
||||
|
||||
// mov(rax, qword[rsp + offsetof(StackLayout::Thunk, r[0])]);
|
||||
mov(rcx, qword[rsp + offsetof(StackLayout::Thunk, r[1])]);
|
||||
mov(rdx, qword[rsp + offsetof(StackLayout::Thunk, r[2])]);
|
||||
mov(r8, qword[rsp + offsetof(StackLayout::Thunk, r[3])]);
|
||||
mov(r9, qword[rsp + offsetof(StackLayout::Thunk, r[4])]);
|
||||
mov(r10, qword[rsp + offsetof(StackLayout::Thunk, r[5])]);
|
||||
mov(r11, qword[rsp + offsetof(StackLayout::Thunk, r[6])]);
|
||||
}
|
||||
|
||||
void X64ThunkEmitter::EmitSaveNonvolatileRegs() {
|
||||
// Preserve nonvolatile registers.
|
||||
mov(qword[rsp + offsetof(StackLayout::Thunk, r[0])], rbx);
|
||||
mov(qword[rsp + offsetof(StackLayout::Thunk, r[1])], rcx);
|
||||
mov(qword[rsp + offsetof(StackLayout::Thunk, r[2])], rbp);
|
||||
mov(qword[rsp + offsetof(StackLayout::Thunk, r[1])], rbp);
|
||||
#if XE_PLATFORM_WIN32
|
||||
mov(qword[rsp + offsetof(StackLayout::Thunk, r[2])], rcx);
|
||||
mov(qword[rsp + offsetof(StackLayout::Thunk, r[3])], rsi);
|
||||
mov(qword[rsp + offsetof(StackLayout::Thunk, r[4])], rdi);
|
||||
#endif
|
||||
mov(qword[rsp + offsetof(StackLayout::Thunk, r[5])], r12);
|
||||
mov(qword[rsp + offsetof(StackLayout::Thunk, r[6])], r13);
|
||||
mov(qword[rsp + offsetof(StackLayout::Thunk, r[7])], r14);
|
||||
mov(qword[rsp + offsetof(StackLayout::Thunk, r[8])], r15);
|
||||
|
||||
// SysV does not have nonvolatile XMM registers.
|
||||
#if XE_PLATFORM_WIN32
|
||||
vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[0])], xmm6);
|
||||
vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[1])], xmm7);
|
||||
vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[2])], xmm8);
|
||||
|
@ -633,9 +641,23 @@ void X64ThunkEmitter::EmitSaveNonvolatileRegs() {
|
|||
vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[7])], xmm13);
|
||||
vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[8])], xmm14);
|
||||
vmovaps(qword[rsp + offsetof(StackLayout::Thunk, xmm[9])], xmm15);
|
||||
#endif
|
||||
}
|
||||
|
||||
void X64ThunkEmitter::EmitLoadNonvolatileRegs() {
|
||||
mov(rbx, qword[rsp + offsetof(StackLayout::Thunk, r[0])]);
|
||||
mov(rbp, qword[rsp + offsetof(StackLayout::Thunk, r[1])]);
|
||||
#if XE_PLATFORM_WIN32
|
||||
mov(rcx, qword[rsp + offsetof(StackLayout::Thunk, r[2])]);
|
||||
mov(rsi, qword[rsp + offsetof(StackLayout::Thunk, r[3])]);
|
||||
mov(rdi, qword[rsp + offsetof(StackLayout::Thunk, r[4])]);
|
||||
#endif
|
||||
mov(r12, qword[rsp + offsetof(StackLayout::Thunk, r[5])]);
|
||||
mov(r13, qword[rsp + offsetof(StackLayout::Thunk, r[6])]);
|
||||
mov(r14, qword[rsp + offsetof(StackLayout::Thunk, r[7])]);
|
||||
mov(r15, qword[rsp + offsetof(StackLayout::Thunk, r[8])]);
|
||||
|
||||
#if XE_PLATFORM_WIN32
|
||||
vmovaps(xmm6, qword[rsp + offsetof(StackLayout::Thunk, xmm[0])]);
|
||||
vmovaps(xmm7, qword[rsp + offsetof(StackLayout::Thunk, xmm[1])]);
|
||||
vmovaps(xmm8, qword[rsp + offsetof(StackLayout::Thunk, xmm[2])]);
|
||||
|
@ -646,16 +668,7 @@ void X64ThunkEmitter::EmitLoadNonvolatileRegs() {
|
|||
vmovaps(xmm13, qword[rsp + offsetof(StackLayout::Thunk, xmm[7])]);
|
||||
vmovaps(xmm14, qword[rsp + offsetof(StackLayout::Thunk, xmm[8])]);
|
||||
vmovaps(xmm15, qword[rsp + offsetof(StackLayout::Thunk, xmm[9])]);
|
||||
|
||||
mov(rbx, qword[rsp + offsetof(StackLayout::Thunk, r[0])]);
|
||||
mov(rcx, qword[rsp + offsetof(StackLayout::Thunk, r[1])]);
|
||||
mov(rbp, qword[rsp + offsetof(StackLayout::Thunk, r[2])]);
|
||||
mov(rsi, qword[rsp + offsetof(StackLayout::Thunk, r[3])]);
|
||||
mov(rdi, qword[rsp + offsetof(StackLayout::Thunk, r[4])]);
|
||||
mov(r12, qword[rsp + offsetof(StackLayout::Thunk, r[5])]);
|
||||
mov(r13, qword[rsp + offsetof(StackLayout::Thunk, r[6])]);
|
||||
mov(r14, qword[rsp + offsetof(StackLayout::Thunk, r[7])]);
|
||||
mov(r15, qword[rsp + offsetof(StackLayout::Thunk, r[8])]);
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace x64
|
||||
|
|
|
@ -27,78 +27,71 @@ class StackLayout {
|
|||
* NOTE: stack must always be 16b aligned.
|
||||
*
|
||||
* Thunk stack:
|
||||
* +------------------+
|
||||
* | arg temp, 3 * 8 | rsp + 0
|
||||
* | |
|
||||
* | |
|
||||
* +------------------+
|
||||
* | scratch, 16b | rsp + 24
|
||||
* | |
|
||||
* +------------------+
|
||||
* | rbx | rsp + 40
|
||||
* +------------------+
|
||||
* | rcx / context | rsp + 48
|
||||
* +------------------+
|
||||
* | rbp | rsp + 56
|
||||
* +------------------+
|
||||
* | rsi | rsp + 64
|
||||
* +------------------+
|
||||
* | rdi | rsp + 72
|
||||
* +------------------+
|
||||
* | r12 | rsp + 80
|
||||
* +------------------+
|
||||
* | r13 | rsp + 88
|
||||
* +------------------+
|
||||
* | r14 | rsp + 96
|
||||
* +------------------+
|
||||
* | r15 | rsp + 104
|
||||
* +------------------+
|
||||
* | xmm6/0 | rsp + 112
|
||||
* | |
|
||||
* +------------------+
|
||||
* | xmm7/1 | rsp + 128
|
||||
* | |
|
||||
* +------------------+
|
||||
* | xmm8/2 | rsp + 144
|
||||
* | |
|
||||
* +------------------+
|
||||
* | xmm9/3 | rsp + 160
|
||||
* | |
|
||||
* +------------------+
|
||||
* | xmm10/4 | rsp + 176
|
||||
* | |
|
||||
* +------------------+
|
||||
* | xmm11/5 | rsp + 192
|
||||
* | |
|
||||
* +------------------+
|
||||
* | xmm12 | rsp + 208
|
||||
* | |
|
||||
* +------------------+
|
||||
* | xmm13 | rsp + 224
|
||||
* | |
|
||||
* +------------------+
|
||||
* | xmm14 | rsp + 240
|
||||
* | |
|
||||
* +------------------+
|
||||
* | xmm15 | rsp + 256
|
||||
* | |
|
||||
* +------------------+
|
||||
* | scratch, 8b | rsp + 272
|
||||
* | |
|
||||
* +------------------+
|
||||
* | (return address) | rsp + 280
|
||||
* +------------------+
|
||||
* | (rcx home) | rsp + 288
|
||||
* +------------------+
|
||||
* | (rdx home) | rsp + 296
|
||||
* +------------------+
|
||||
* Non-Volatile Volatile
|
||||
* +------------------+------------------+
|
||||
* | arg temp, 3 * 8 | arg temp, 3 * 8 | rsp + 0x000
|
||||
* | | |
|
||||
* | | |
|
||||
* +------------------+------------------+
|
||||
* | rbx | (unused) | rsp + 0x018
|
||||
* +------------------+------------------+
|
||||
* | rbp | rcx | rsp + 0x020
|
||||
* +------------------+------------------+
|
||||
* | rcx (Win32) | rdx | rsp + 0x028
|
||||
* +------------------+------------------+
|
||||
* | rsi (Win32) | rsi (Linux) | rsp + 0x030
|
||||
* +------------------+------------------+
|
||||
* | rdi (Win32) | rdi (Linux) | rsp + 0x038
|
||||
* +------------------+------------------+
|
||||
* | r12 | r8 | rsp + 0x040
|
||||
* +------------------+------------------+
|
||||
* | r13 | r9 | rsp + 0x048
|
||||
* +------------------+------------------+
|
||||
* | r14 | r10 | rsp + 0x050
|
||||
* +------------------+------------------+
|
||||
* | r15 | r11 | rsp + 0x058
|
||||
* +------------------+------------------+
|
||||
* | xmm6 (Win32) | (unused) | rsp + 0x060
|
||||
* | | |
|
||||
* +------------------+------------------+
|
||||
* | xmm7 (Win32) | xmm1 | rsp + 0x070
|
||||
* | | |
|
||||
* +------------------+------------------+
|
||||
* | xmm8 (Win32) | xmm2 | rsp + 0x080
|
||||
* | | |
|
||||
* +------------------+------------------+
|
||||
* | xmm9 (Win32) | xmm3 | rsp + 0x090
|
||||
* | | |
|
||||
* +------------------+------------------+
|
||||
* | xmm10 (Win32) | xmm4 | rsp + 0x0A0
|
||||
* | | |
|
||||
* +------------------+------------------+
|
||||
* | xmm11 (Win32) | xmm5 | rsp + 0x0B0
|
||||
* | | |
|
||||
* +------------------+------------------+
|
||||
* | xmm12 (Win32) | (unused) | rsp + 0x0C0
|
||||
* | | |
|
||||
* +------------------+------------------+
|
||||
* | xmm13 (Win32) | (unused) | rsp + 0x0D0
|
||||
* | | |
|
||||
* +------------------+------------------+
|
||||
* | xmm14 (Win32) | (unused) | rsp + 0x0E0
|
||||
* | | |
|
||||
* +------------------+------------------+
|
||||
* | xmm15 (Win32) | (unused) | rsp + 0x0F0
|
||||
* | | |
|
||||
* +------------------+------------------+
|
||||
* | (return address) | (return address) | rsp + 0x100
|
||||
* +------------------+------------------+
|
||||
* | (rcx home) | (rcx home) | rsp + 0x108
|
||||
* +------------------+------------------+
|
||||
* | (rdx home) | (rdx home) | rsp + 0x110
|
||||
* +------------------+------------------+
|
||||
*/
|
||||
XEPACKEDSTRUCT(Thunk, {
|
||||
uint64_t arg_temp[3];
|
||||
uint8_t scratch[16];
|
||||
uint64_t r[10];
|
||||
uint64_t r[9];
|
||||
vec128_t xmm[10];
|
||||
uint64_t dummy;
|
||||
});
|
||||
static_assert(sizeof(Thunk) % 16 == 0,
|
||||
"sizeof(Thunk) must be a multiple of 16!");
|
||||
|
|
|
@ -3053,23 +3053,15 @@ bool D3D12CommandProcessor::UpdateBindings(
|
|||
write_float_constant_view_pixel = true;
|
||||
}
|
||||
if (!cbuffer_bindings_bool_loop_.up_to_date) {
|
||||
uint32_t* bool_loop_constants =
|
||||
reinterpret_cast<uint32_t*>(constant_buffer_pool_->Request(
|
||||
frame_current_, 768, nullptr, nullptr,
|
||||
&cbuffer_bindings_bool_loop_.buffer_address));
|
||||
uint8_t* bool_loop_constants = constant_buffer_pool_->Request(
|
||||
frame_current_, 256, nullptr, nullptr,
|
||||
&cbuffer_bindings_bool_loop_.buffer_address);
|
||||
if (bool_loop_constants == nullptr) {
|
||||
return false;
|
||||
}
|
||||
// Bool and loop constants are quadrupled to allow dynamic indexing.
|
||||
for (uint32_t i = 0; i < 40; ++i) {
|
||||
uint32_t bool_loop_constant =
|
||||
regs[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031 + i].u32;
|
||||
uint32_t* bool_loop_constant_vector = bool_loop_constants + (i << 2);
|
||||
bool_loop_constant_vector[0] = bool_loop_constant;
|
||||
bool_loop_constant_vector[1] = bool_loop_constant;
|
||||
bool_loop_constant_vector[2] = bool_loop_constant;
|
||||
bool_loop_constant_vector[3] = bool_loop_constant;
|
||||
}
|
||||
std::memcpy(bool_loop_constants,
|
||||
®s[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32,
|
||||
(8 + 32) * sizeof(uint32_t));
|
||||
cbuffer_bindings_bool_loop_.up_to_date = true;
|
||||
write_bool_loop_constant_view = true;
|
||||
}
|
||||
|
@ -3226,7 +3218,7 @@ bool D3D12CommandProcessor::UpdateBindings(
|
|||
gpu_handle_bool_loop_constants_ = view_gpu_handle;
|
||||
constant_buffer_desc.BufferLocation =
|
||||
cbuffer_bindings_bool_loop_.buffer_address;
|
||||
constant_buffer_desc.SizeInBytes = 768;
|
||||
constant_buffer_desc.SizeInBytes = 256;
|
||||
device->CreateConstantBufferView(&constant_buffer_desc, view_cpu_handle);
|
||||
view_cpu_handle.ptr += descriptor_size_view;
|
||||
view_gpu_handle.ptr += descriptor_size_view;
|
||||
|
|
|
@ -162,7 +162,6 @@ void DxbcShaderTranslator::Reset() {
|
|||
|
||||
system_constants_used_ = 0;
|
||||
float_constants_dynamic_indexed_ = false;
|
||||
bool_loop_constants_dynamic_indexed_ = false;
|
||||
float_constant_index_offsets_.clear();
|
||||
|
||||
system_temp_count_current_ = 0;
|
||||
|
@ -2039,173 +2038,6 @@ void DxbcShaderTranslator::LoadDxbcSourceOperand(
|
|||
}
|
||||
break;
|
||||
|
||||
case InstructionStorageSource::kConstantInt: {
|
||||
// ***********************************************************************
|
||||
// Loop constant
|
||||
// ***********************************************************************
|
||||
if (cbuffer_index_bool_loop_constants_ == kCbufferIndexUnallocated) {
|
||||
cbuffer_index_bool_loop_constants_ = cbuffer_count_++;
|
||||
}
|
||||
// Convert to float and store in the intermediate register.
|
||||
// The constant buffer contains each integer replicated in XYZW so dynamic
|
||||
// indexing is possible.
|
||||
dxbc_operand.type = DxbcSourceOperand::Type::kIntermediateRegister;
|
||||
if (dxbc_operand.intermediate_register ==
|
||||
DxbcSourceOperand::kIntermediateRegisterNone) {
|
||||
dxbc_operand.intermediate_register = PushSystemTemp();
|
||||
}
|
||||
bool is_static = operand.storage_addressing_mode ==
|
||||
InstructionStorageAddressingMode::kStatic;
|
||||
shader_code_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_ITOF) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(is_static ? 7 : 9));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||
shader_code_.push_back(dxbc_operand.intermediate_register);
|
||||
shader_code_.push_back(EncodeVectorReplicatedOperand(
|
||||
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, 0, 3,
|
||||
D3D10_SB_OPERAND_INDEX_IMMEDIATE32,
|
||||
D3D10_SB_OPERAND_INDEX_IMMEDIATE32,
|
||||
is_static ? D3D10_SB_OPERAND_INDEX_IMMEDIATE32
|
||||
: D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE));
|
||||
shader_code_.push_back(cbuffer_index_bool_loop_constants_);
|
||||
shader_code_.push_back(uint32_t(CbufferRegister::kBoolLoopConstants));
|
||||
// 8 to skip bool constants.
|
||||
shader_code_.push_back(8 + uint32_t(operand.storage_index));
|
||||
if (!is_static) {
|
||||
shader_code_.push_back(EncodeVectorSelectOperand(
|
||||
D3D10_SB_OPERAND_TYPE_TEMP, dynamic_address_component, 1));
|
||||
shader_code_.push_back(dynamic_address_register);
|
||||
bool_loop_constants_dynamic_indexed_ = true;
|
||||
}
|
||||
++stat_.instruction_count;
|
||||
++stat_.conversion_instruction_count;
|
||||
} break;
|
||||
|
||||
case InstructionStorageSource::kConstantBool: {
|
||||
// ***********************************************************************
|
||||
// Boolean constant
|
||||
// ***********************************************************************
|
||||
if (cbuffer_index_bool_loop_constants_ == kCbufferIndexUnallocated) {
|
||||
cbuffer_index_bool_loop_constants_ = cbuffer_count_++;
|
||||
}
|
||||
// Extract, convert to float and store in the intermediate register.
|
||||
// The constant buffer contains each 32-bit vector replicated in XYZW so
|
||||
// dynamic indexing is possible.
|
||||
dxbc_operand.type = DxbcSourceOperand::Type::kIntermediateRegister;
|
||||
if (dxbc_operand.intermediate_register ==
|
||||
DxbcSourceOperand::kIntermediateRegisterNone) {
|
||||
dxbc_operand.intermediate_register = PushSystemTemp();
|
||||
}
|
||||
if (operand.storage_addressing_mode ==
|
||||
InstructionStorageAddressingMode::kStatic) {
|
||||
// Extract the bit directly.
|
||||
shader_code_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_UBFE) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(11));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
|
||||
shader_code_.push_back(dxbc_operand.intermediate_register);
|
||||
shader_code_.push_back(
|
||||
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
|
||||
shader_code_.push_back(1);
|
||||
shader_code_.push_back(
|
||||
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
|
||||
shader_code_.push_back(uint32_t(operand.storage_index) & 31);
|
||||
shader_code_.push_back(EncodeVectorSelectOperand(
|
||||
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, 0, 3));
|
||||
shader_code_.push_back(cbuffer_index_bool_loop_constants_);
|
||||
shader_code_.push_back(uint32_t(CbufferRegister::kBoolLoopConstants));
|
||||
shader_code_.push_back(uint32_t(operand.storage_index) >> 5);
|
||||
++stat_.instruction_count;
|
||||
++stat_.uint_instruction_count;
|
||||
} else {
|
||||
bool_loop_constants_dynamic_indexed_ = true;
|
||||
uint32_t constant_address_register = dynamic_address_register;
|
||||
uint32_t constant_address_component = dynamic_address_component;
|
||||
if (operand.storage_index != 0) {
|
||||
// Has an offset - add it.
|
||||
shader_code_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_IADD) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
|
||||
shader_code_.push_back(dxbc_operand.intermediate_register);
|
||||
shader_code_.push_back(EncodeVectorSelectOperand(
|
||||
D3D10_SB_OPERAND_TYPE_TEMP, constant_address_component, 1));
|
||||
shader_code_.push_back(constant_address_register);
|
||||
shader_code_.push_back(
|
||||
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
|
||||
shader_code_.push_back(uint32_t(operand.storage_index));
|
||||
++stat_.instruction_count;
|
||||
++stat_.int_instruction_count;
|
||||
constant_address_register = dxbc_operand.intermediate_register;
|
||||
constant_address_component = 0;
|
||||
}
|
||||
// Split the index into constant index and bit offset and store them in
|
||||
// the intermediate register.
|
||||
shader_code_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_UBFE) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(15));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0011, 1));
|
||||
shader_code_.push_back(dxbc_operand.intermediate_register);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
|
||||
shader_code_.push_back(5);
|
||||
shader_code_.push_back(3);
|
||||
shader_code_.push_back(0);
|
||||
shader_code_.push_back(0);
|
||||
shader_code_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_IMMEDIATE32, kSwizzleXYZW, 0));
|
||||
shader_code_.push_back(0);
|
||||
shader_code_.push_back(5);
|
||||
shader_code_.push_back(0);
|
||||
shader_code_.push_back(0);
|
||||
shader_code_.push_back(EncodeVectorReplicatedOperand(
|
||||
D3D10_SB_OPERAND_TYPE_TEMP, constant_address_component, 1));
|
||||
shader_code_.push_back(constant_address_register);
|
||||
++stat_.instruction_count;
|
||||
++stat_.uint_instruction_count;
|
||||
// Extract the bits.
|
||||
shader_code_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D11_SB_OPCODE_UBFE) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(12));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
|
||||
shader_code_.push_back(dxbc_operand.intermediate_register);
|
||||
shader_code_.push_back(
|
||||
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
|
||||
shader_code_.push_back(1);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 1, 1));
|
||||
shader_code_.push_back(dxbc_operand.intermediate_register);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, 0,
|
||||
3, D3D10_SB_OPERAND_INDEX_IMMEDIATE32,
|
||||
D3D10_SB_OPERAND_INDEX_IMMEDIATE32,
|
||||
D3D10_SB_OPERAND_INDEX_RELATIVE));
|
||||
shader_code_.push_back(cbuffer_index_bool_loop_constants_);
|
||||
shader_code_.push_back(uint32_t(CbufferRegister::kBoolLoopConstants));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
|
||||
shader_code_.push_back(dxbc_operand.intermediate_register);
|
||||
++stat_.instruction_count;
|
||||
++stat_.uint_instruction_count;
|
||||
}
|
||||
// Convert the bit to float and replicate it.
|
||||
shader_code_.push_back(ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_UTOF) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(5));
|
||||
shader_code_.push_back(
|
||||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b1111, 1));
|
||||
shader_code_.push_back(dxbc_operand.intermediate_register);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorReplicatedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0, 1));
|
||||
shader_code_.push_back(dxbc_operand.intermediate_register);
|
||||
++stat_.instruction_count;
|
||||
++stat_.conversion_instruction_count;
|
||||
} break;
|
||||
|
||||
default:
|
||||
// Fall back to constant zeros for invalid types.
|
||||
dxbc_operand.index = constant_component_values;
|
||||
|
@ -2819,10 +2651,11 @@ void DxbcShaderTranslator::UpdateExecConditionals(
|
|||
EncodeVectorMaskedOperand(D3D10_SB_OPERAND_TYPE_TEMP, 0b0001, 1));
|
||||
shader_code_.push_back(bool_constant_test_register);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, 0, 3));
|
||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER,
|
||||
(bool_constant_index >> 5) & 3, 3));
|
||||
shader_code_.push_back(cbuffer_index_bool_loop_constants_);
|
||||
shader_code_.push_back(uint32_t(CbufferRegister::kBoolLoopConstants));
|
||||
shader_code_.push_back(bool_constant_index >> 5);
|
||||
shader_code_.push_back(bool_constant_index >> 7);
|
||||
shader_code_.push_back(
|
||||
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
|
||||
shader_code_.push_back(1u << (bool_constant_index & 31));
|
||||
|
@ -3109,11 +2942,11 @@ void DxbcShaderTranslator::ProcessLoopStartInstruction(
|
|||
shader_code_.push_back(0);
|
||||
shader_code_.push_back(0);
|
||||
shader_code_.push_back(EncodeVectorReplicatedOperand(
|
||||
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, 0, 3));
|
||||
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, instr.loop_constant_index & 3, 3));
|
||||
shader_code_.push_back(cbuffer_index_bool_loop_constants_);
|
||||
shader_code_.push_back(uint32_t(CbufferRegister::kBoolLoopConstants));
|
||||
// 8 because of bool constants.
|
||||
shader_code_.push_back(8 + instr.loop_constant_index);
|
||||
// 2 because of bool constants.
|
||||
shader_code_.push_back(2 + (instr.loop_constant_index >> 2));
|
||||
++stat_.instruction_count;
|
||||
++stat_.uint_instruction_count;
|
||||
|
||||
|
@ -3312,12 +3145,12 @@ void DxbcShaderTranslator::ProcessLoopEndInstruction(
|
|||
shader_code_.push_back(
|
||||
EncodeScalarOperand(D3D10_SB_OPERAND_TYPE_IMMEDIATE32, 0));
|
||||
shader_code_.push_back(16);
|
||||
shader_code_.push_back(
|
||||
EncodeVectorSelectOperand(D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, 0, 3));
|
||||
shader_code_.push_back(EncodeVectorSelectOperand(
|
||||
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, instr.loop_constant_index & 3, 3));
|
||||
shader_code_.push_back(cbuffer_index_bool_loop_constants_);
|
||||
shader_code_.push_back(uint32_t(CbufferRegister::kBoolLoopConstants));
|
||||
// 8 because of bool constants.
|
||||
shader_code_.push_back(8 + instr.loop_constant_index);
|
||||
// 2 because of bool constants.
|
||||
shader_code_.push_back(2 + (instr.loop_constant_index >> 2));
|
||||
++stat_.instruction_count;
|
||||
++stat_.uint_instruction_count;
|
||||
|
||||
|
@ -3431,8 +3264,6 @@ const DxbcShaderTranslator::RdefType DxbcShaderTranslator::rdef_types_[size_t(
|
|||
{nullptr, 1, 19, 1, 4, 2, 0, RdefTypeIndex::kUint4, nullptr},
|
||||
// kUint4Array8
|
||||
{nullptr, 1, 19, 1, 4, 8, 0, RdefTypeIndex::kUint4, nullptr},
|
||||
// kUint4Array32
|
||||
{nullptr, 1, 19, 1, 4, 32, 0, RdefTypeIndex::kUint4, nullptr},
|
||||
// kUint4Array48
|
||||
{nullptr, 1, 19, 1, 4, 48, 0, RdefTypeIndex::kUint4, nullptr},
|
||||
};
|
||||
|
@ -3720,10 +3551,10 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
|
|||
if (cbuffer_index_bool_loop_constants_ != kCbufferIndexUnallocated) {
|
||||
shader_object_.push_back(constant_name_offset_bool);
|
||||
shader_object_.push_back(0);
|
||||
shader_object_.push_back(8 * 4 * sizeof(uint32_t));
|
||||
shader_object_.push_back(2 * 4 * sizeof(uint32_t));
|
||||
shader_object_.push_back(0x2);
|
||||
shader_object_.push_back(types_offset +
|
||||
uint32_t(RdefTypeIndex::kUint4Array8) * type_size);
|
||||
uint32_t(RdefTypeIndex::kUint4Array2) * type_size);
|
||||
shader_object_.push_back(0);
|
||||
shader_object_.push_back(0xFFFFFFFFu);
|
||||
shader_object_.push_back(0);
|
||||
|
@ -3731,11 +3562,11 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
|
|||
shader_object_.push_back(0);
|
||||
new_offset += constant_size;
|
||||
shader_object_.push_back(constant_name_offset_loop);
|
||||
shader_object_.push_back(2 * 4 * sizeof(uint32_t));
|
||||
shader_object_.push_back(8 * 4 * sizeof(uint32_t));
|
||||
shader_object_.push_back(32 * 4 * sizeof(uint32_t));
|
||||
shader_object_.push_back(0x2);
|
||||
shader_object_.push_back(
|
||||
types_offset + uint32_t(RdefTypeIndex::kUint4Array32) * type_size);
|
||||
shader_object_.push_back(types_offset +
|
||||
uint32_t(RdefTypeIndex::kUint4Array8) * type_size);
|
||||
shader_object_.push_back(0);
|
||||
shader_object_.push_back(0xFFFFFFFFu);
|
||||
shader_object_.push_back(0);
|
||||
|
@ -3813,7 +3644,7 @@ void DxbcShaderTranslator::WriteResourceDefinitions() {
|
|||
// Bool constants and loop constants are separate for easier debugging.
|
||||
shader_object_.push_back(2);
|
||||
shader_object_.push_back(constant_offset_bool_loop);
|
||||
shader_object_.push_back((8 + 32) * 4 * sizeof(uint32_t));
|
||||
shader_object_.push_back((2 + 8) * 4 * sizeof(uint32_t));
|
||||
shader_object_.push_back(0);
|
||||
shader_object_.push_back(0);
|
||||
} else if (i == cbuffer_index_fetch_constants_) {
|
||||
|
@ -4493,16 +4324,14 @@ void DxbcShaderTranslator::WriteShaderCode() {
|
|||
shader_object_.push_back(
|
||||
ENCODE_D3D10_SB_OPCODE_TYPE(D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER) |
|
||||
ENCODE_D3D10_SB_D3D10_SB_CONSTANT_BUFFER_ACCESS_PATTERN(
|
||||
bool_loop_constants_dynamic_indexed_
|
||||
? D3D10_SB_CONSTANT_BUFFER_DYNAMIC_INDEXED
|
||||
: D3D10_SB_CONSTANT_BUFFER_IMMEDIATE_INDEXED) |
|
||||
D3D10_SB_CONSTANT_BUFFER_IMMEDIATE_INDEXED) |
|
||||
ENCODE_D3D10_SB_TOKENIZED_INSTRUCTION_LENGTH(7));
|
||||
shader_object_.push_back(EncodeVectorSwizzledOperand(
|
||||
D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER, kSwizzleXYZW, 3));
|
||||
shader_object_.push_back(cbuffer_index_bool_loop_constants_);
|
||||
shader_object_.push_back(uint32_t(CbufferRegister::kBoolLoopConstants));
|
||||
shader_object_.push_back(uint32_t(CbufferRegister::kBoolLoopConstants));
|
||||
shader_object_.push_back(40);
|
||||
shader_object_.push_back(10);
|
||||
shader_object_.push_back(0);
|
||||
}
|
||||
|
||||
|
|
|
@ -919,12 +919,10 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
kFloat4Array6,
|
||||
// Float constants - size written dynamically.
|
||||
kFloat4ConstantArray,
|
||||
// Front/back stencil, render target keep masks.
|
||||
// Bool constants, front/back stencil, render target keep masks.
|
||||
kUint4Array2,
|
||||
// Bool constants.
|
||||
kUint4Array8,
|
||||
// Loop constants.
|
||||
kUint4Array32,
|
||||
kUint4Array8,
|
||||
// Fetch constants.
|
||||
kUint4Array48,
|
||||
|
||||
|
@ -978,7 +976,6 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
// Whether constants are dynamically indexed and need to be marked as such in
|
||||
// dcl_constantBuffer.
|
||||
bool float_constants_dynamic_indexed_;
|
||||
bool bool_loop_constants_dynamic_indexed_;
|
||||
|
||||
// Offsets of float constant indices in shader_code_, for remapping in
|
||||
// CompleteTranslation (initially, at these offsets, guest float constant
|
||||
|
|
|
@ -139,10 +139,6 @@ enum class InstructionStorageSource {
|
|||
kRegister,
|
||||
// Source is stored in a float constant indexed by storage_index [0-511].
|
||||
kConstantFloat,
|
||||
// Source is stored in a float constant indexed by storage_index [0-31].
|
||||
kConstantInt,
|
||||
// Source is stored in a float constant indexed by storage_index [0-255].
|
||||
kConstantBool,
|
||||
// Source is stored in a vertex fetch constant indexed by storage_index
|
||||
// [0-95].
|
||||
kVertexFetchConstant,
|
||||
|
@ -568,10 +564,10 @@ class Shader {
|
|||
// base, so bit 0 in a vertex shader is register 0, and bit 0 in a fragment
|
||||
// shader is register 256.
|
||||
uint64_t float_bitmap[256 / 64];
|
||||
// Bitmap of all kConstantInt registers read by the shader.
|
||||
// Bitmap of all loop constants read by the shader.
|
||||
// Each bit corresponds to a storage index [0-31].
|
||||
uint32_t int_bitmap;
|
||||
// Bitmap of all kConstantBool registers read by the shader.
|
||||
uint32_t loop_bitmap;
|
||||
// Bitmap of all bool constants read by the shader.
|
||||
// Each bit corresponds to a storage index [0-255].
|
||||
uint32_t bool_bitmap[256 / 32];
|
||||
|
||||
|
|
|
@ -183,7 +183,7 @@ bool ShaderTranslator::TranslateInternal(Shader* shader,
|
|||
4 * 4 * constant_register_map_.float_count;
|
||||
// Each bit indicates a single word.
|
||||
constant_register_map_.packed_byte_length +=
|
||||
4 * xe::bit_count(constant_register_map_.int_bitmap);
|
||||
4 * xe::bit_count(constant_register_map_.loop_bitmap);
|
||||
// Direct map between words and words we upload.
|
||||
for (int i = 0; i < 8; ++i) {
|
||||
if (constant_register_map_.bool_bitmap[i]) {
|
||||
|
@ -714,7 +714,7 @@ void ShaderTranslator::TranslateControlFlowLoopStart(
|
|||
ParsedLoopStartInstruction i;
|
||||
i.dword_index = cf_index_;
|
||||
i.loop_constant_index = cf.loop_id();
|
||||
constant_register_map_.int_bitmap |= 1 << i.loop_constant_index;
|
||||
constant_register_map_.loop_bitmap |= 1 << i.loop_constant_index;
|
||||
i.is_repeat = cf.is_repeat();
|
||||
i.loop_skip_address = cf.address();
|
||||
|
||||
|
@ -730,7 +730,7 @@ void ShaderTranslator::TranslateControlFlowLoopEnd(
|
|||
i.is_predicated_break = cf.is_predicated_break();
|
||||
i.predicate_condition = cf.condition();
|
||||
i.loop_constant_index = cf.loop_id();
|
||||
constant_register_map_.int_bitmap |= 1 << i.loop_constant_index;
|
||||
constant_register_map_.loop_bitmap |= 1 << i.loop_constant_index;
|
||||
i.loop_body_address = cf.address();
|
||||
|
||||
i.Disassemble(&ucode_disasm_buffer_);
|
||||
|
|
|
@ -93,12 +93,6 @@ void DisassembleSourceOperand(const InstructionOperand& op, StringBuffer* out) {
|
|||
case InstructionStorageSource::kConstantFloat:
|
||||
out->Append('c');
|
||||
break;
|
||||
case InstructionStorageSource::kConstantInt:
|
||||
out->Append('i');
|
||||
break;
|
||||
case InstructionStorageSource::kConstantBool:
|
||||
out->Append('b');
|
||||
break;
|
||||
case InstructionStorageSource::kTextureFetchConstant:
|
||||
case InstructionStorageSource::kVertexFetchConstant:
|
||||
assert_always();
|
||||
|
|
|
@ -400,7 +400,7 @@ std::pair<VkDeviceSize, VkDeviceSize> BufferCache::UploadConstantRegisters(
|
|||
}
|
||||
}
|
||||
for (int i = 0; i < 32; ++i) {
|
||||
if (constant_register_map.int_bitmap & (1 << i)) {
|
||||
if (constant_register_map.loop_bitmap & (1 << i)) {
|
||||
xe::store<uint32_t>(dest_ptr,
|
||||
values[XE_GPU_REG_SHADER_CONSTANT_LOOP_00 + i].u32);
|
||||
dest_ptr += 4;
|
||||
|
|
|
@ -178,13 +178,16 @@ void Window::OnPaint(UIEvent* e) {
|
|||
|
||||
++frame_count_;
|
||||
++fps_frame_count_;
|
||||
uint64_t now_ns = xe::Clock::QueryHostSystemTime();
|
||||
|
||||
if (now_ns > fps_update_time_ns_ + 1000 * 10000) {
|
||||
static auto tick_frequency = Clock::QueryHostTickFrequency();
|
||||
auto now_ticks = Clock::QueryHostTickCount();
|
||||
// Average fps over 1 second.
|
||||
if (now_ticks > fps_update_time_ticks_ + tick_frequency * 1) {
|
||||
fps_ = static_cast<uint32_t>(
|
||||
fps_frame_count_ /
|
||||
(static_cast<double>(now_ns - fps_update_time_ns_) / 10000000.0));
|
||||
fps_update_time_ns_ = now_ns;
|
||||
(static_cast<double>(now_ticks - fps_update_time_ticks_) /
|
||||
tick_frequency));
|
||||
fps_update_time_ticks_ = now_ticks;
|
||||
|
||||
fps_frame_count_ = 0;
|
||||
#if XE_OPTION_PROFILING
|
||||
// This means FPS counter will not work with profiling disabled (e.g. on
|
||||
|
@ -220,16 +223,16 @@ void Window::OnPaint(UIEvent* e) {
|
|||
|
||||
// Prepare ImGui for use this frame.
|
||||
auto& io = imgui_drawer_->GetIO();
|
||||
if (!last_paint_time_ns_) {
|
||||
if (!last_paint_time_ticks_) {
|
||||
io.DeltaTime = 0.0f;
|
||||
last_paint_time_ns_ = now_ns;
|
||||
last_paint_time_ticks_ = now_ticks;
|
||||
} else {
|
||||
io.DeltaTime = (now_ns - last_paint_time_ns_) / 10000000.0f;
|
||||
last_paint_time_ns_ = now_ns;
|
||||
io.DeltaTime = (now_ticks - last_paint_time_ticks_) /
|
||||
static_cast<float>(tick_frequency);
|
||||
last_paint_time_ticks_ = now_ticks;
|
||||
}
|
||||
io.DisplaySize = ImVec2(static_cast<float>(scaled_width()),
|
||||
static_cast<float>(scaled_height()));
|
||||
ImGui::NewFrame();
|
||||
|
||||
context_->BeginSwap();
|
||||
if (context_->WasLost()) {
|
||||
|
@ -237,6 +240,8 @@ void Window::OnPaint(UIEvent* e) {
|
|||
return;
|
||||
}
|
||||
|
||||
ImGui::NewFrame();
|
||||
|
||||
ForEachListener([e](auto listener) { listener->OnPainting(e); });
|
||||
on_painting(e);
|
||||
ForEachListener([e](auto listener) { listener->OnPaint(e); });
|
||||
|
|
|
@ -192,9 +192,9 @@ class Window {
|
|||
|
||||
uint32_t frame_count_ = 0;
|
||||
uint32_t fps_ = 0;
|
||||
uint64_t fps_update_time_ns_ = 0;
|
||||
uint64_t fps_update_time_ticks_ = 0;
|
||||
uint64_t fps_frame_count_ = 0;
|
||||
uint64_t last_paint_time_ns_ = 0;
|
||||
uint64_t last_paint_time_ticks_ = 0;
|
||||
|
||||
bool display_fps_ = false;
|
||||
uint32_t game_fps_ = 0;
|
||||
|
|
Loading…
Reference in New Issue