[D3D12] Compact float constants and don't split them into pages

This commit is contained in:
Triang3l 2018-09-30 20:17:26 +03:00
parent eb50ebd885
commit c4599ff211
7 changed files with 733 additions and 485 deletions

View File

@ -175,7 +175,8 @@ ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature(
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV; range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV;
range.NumDescriptors = 1; range.NumDescriptors = 1;
range.BaseShaderRegister = 2; range.BaseShaderRegister =
uint32_t(DxbcShaderTranslator::CbufferRegister::kFetchConstants);
range.RegisterSpace = 0; range.RegisterSpace = 0;
range.OffsetInDescriptorsFromTableStart = 0; range.OffsetInDescriptorsFromTableStart = 0;
} }
@ -189,8 +190,9 @@ ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature(
parameter.DescriptorTable.pDescriptorRanges = ⦥ parameter.DescriptorTable.pDescriptorRanges = ⦥
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX; parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX;
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV; range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV;
range.NumDescriptors = 8; range.NumDescriptors = 1;
range.BaseShaderRegister = 3; range.BaseShaderRegister =
uint32_t(DxbcShaderTranslator::CbufferRegister::kFloatConstants);
range.RegisterSpace = 0; range.RegisterSpace = 0;
range.OffsetInDescriptorsFromTableStart = 0; range.OffsetInDescriptorsFromTableStart = 0;
} }
@ -204,8 +206,9 @@ ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature(
parameter.DescriptorTable.pDescriptorRanges = ⦥ parameter.DescriptorTable.pDescriptorRanges = ⦥
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL; parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL;
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV; range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV;
range.NumDescriptors = 8; range.NumDescriptors = 1;
range.BaseShaderRegister = 3; range.BaseShaderRegister =
uint32_t(DxbcShaderTranslator::CbufferRegister::kFloatConstants);
range.RegisterSpace = 0; range.RegisterSpace = 0;
range.OffsetInDescriptorsFromTableStart = 0; range.OffsetInDescriptorsFromTableStart = 0;
} }
@ -220,7 +223,8 @@ ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature(
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL; parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV; range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV;
range.NumDescriptors = 2; range.NumDescriptors = 2;
range.BaseShaderRegister = 0; range.BaseShaderRegister =
uint32_t(DxbcShaderTranslator::CbufferRegister::kSystemConstants);
range.RegisterSpace = 0; range.RegisterSpace = 0;
range.OffsetInDescriptorsFromTableStart = 0; range.OffsetInDescriptorsFromTableStart = 0;
} }
@ -741,8 +745,22 @@ void D3D12CommandProcessor::WriteRegister(uint32_t index, uint32_t value) {
if (index >= XE_GPU_REG_SHADER_CONSTANT_000_X && if (index >= XE_GPU_REG_SHADER_CONSTANT_000_X &&
index <= XE_GPU_REG_SHADER_CONSTANT_511_W) { index <= XE_GPU_REG_SHADER_CONSTANT_511_W) {
uint32_t component_index = index - XE_GPU_REG_SHADER_CONSTANT_000_X; if (current_queue_frame_ != UINT32_MAX) {
cbuffer_bindings_float_[component_index >> 7].up_to_date = false; uint32_t float_constant_index =
(index - XE_GPU_REG_SHADER_CONSTANT_000_X) >> 2;
if (float_constant_index >= 256) {
float_constant_index -= 256;
if (float_constant_map_pixel_[float_constant_index >> 6] &
(1ull << (float_constant_index & 63))) {
cbuffer_bindings_pixel_float_.up_to_date = false;
}
} else {
if (float_constant_map_vertex_[float_constant_index >> 6] &
(1ull << (float_constant_index & 63))) {
cbuffer_bindings_vertex_float_.up_to_date = false;
}
}
}
} else if (index >= XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031 && } else if (index >= XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031 &&
index <= XE_GPU_REG_SHADER_CONSTANT_LOOP_31) { index <= XE_GPU_REG_SHADER_CONSTANT_LOOP_31) {
cbuffer_bindings_bool_loop_.up_to_date = false; cbuffer_bindings_bool_loop_.up_to_date = false;
@ -1145,10 +1163,12 @@ bool D3D12CommandProcessor::BeginFrame() {
current_graphics_root_up_to_date_ = 0; current_graphics_root_up_to_date_ = 0;
current_view_heap_ = nullptr; current_view_heap_ = nullptr;
current_sampler_heap_ = nullptr; current_sampler_heap_ = nullptr;
std::memset(float_constant_map_vertex_, 0,
sizeof(float_constant_map_vertex_));
std::memset(float_constant_map_pixel_, 0, sizeof(float_constant_map_pixel_));
cbuffer_bindings_system_.up_to_date = false; cbuffer_bindings_system_.up_to_date = false;
for (uint32_t i = 0; i < xe::countof(cbuffer_bindings_float_); ++i) { cbuffer_bindings_vertex_float_.up_to_date = false;
cbuffer_bindings_float_[i].up_to_date = false; cbuffer_bindings_pixel_float_.up_to_date = false;
}
cbuffer_bindings_bool_loop_.up_to_date = false; cbuffer_bindings_bool_loop_.up_to_date = false;
cbuffer_bindings_fetch_.up_to_date = false; cbuffer_bindings_fetch_.up_to_date = false;
draw_view_full_update_ = 0; draw_view_full_update_ = 0;
@ -1657,14 +1677,64 @@ bool D3D12CommandProcessor::UpdateBindings(
// Begin updating descriptors. // Begin updating descriptors.
bool write_common_constant_views = false; bool write_common_constant_views = false;
bool write_vertex_float_constant_view = false;
bool write_pixel_float_constant_view = false;
bool write_fetch_constant_view = false; bool write_fetch_constant_view = false;
bool write_vertex_float_constant_views = false;
bool write_pixel_float_constant_views = false;
// TODO(Triang3l): Update textures and samplers only if shaders or binding // TODO(Triang3l): Update textures and samplers only if shaders or binding
// hash change. // hash change.
bool write_textures = texture_count != 0; bool write_textures = texture_count != 0;
bool write_samplers = sampler_count != 0; bool write_samplers = sampler_count != 0;
// Check if the float constant layout is still the same and get the counts.
const Shader::ConstantRegisterMap& vertex_shader_float_constant_map =
vertex_shader->constant_register_map();
uint32_t vertex_shader_float_constant_count =
vertex_shader_float_constant_map.float_count;
// Even if the shader doesn't need any float constants, a valid binding must
// still be provided, so if the first draw in the frame with the current root
// signature doesn't have float constants at all, still allocate an empty
// buffer.
uint32_t vertex_shader_float_constant_size =
xe::align(uint32_t(std::max(vertex_shader_float_constant_count, 1u) * 4 *
sizeof(float)),
256u);
for (uint32_t i = 0; i < 4; ++i) {
if (float_constant_map_vertex_[i] !=
vertex_shader_float_constant_map.float_bitmap[i]) {
float_constant_map_vertex_[i] =
vertex_shader_float_constant_map.float_bitmap[i];
// If no float constants at all, we can reuse any buffer for them, so not
// invalidating.
if (vertex_shader_float_constant_map.float_count != 0) {
cbuffer_bindings_vertex_float_.up_to_date = false;
}
}
}
uint32_t pixel_shader_float_constant_count = 0;
if (pixel_shader != nullptr) {
const Shader::ConstantRegisterMap& pixel_shader_float_constant_map =
pixel_shader->constant_register_map();
pixel_shader_float_constant_count =
pixel_shader_float_constant_map.float_count;
for (uint32_t i = 0; i < 4; ++i) {
if (float_constant_map_pixel_[i] !=
pixel_shader_float_constant_map.float_bitmap[i]) {
float_constant_map_pixel_[i] =
pixel_shader_float_constant_map.float_bitmap[i];
if (pixel_shader_float_constant_map.float_count != 0) {
cbuffer_bindings_pixel_float_.up_to_date = false;
}
}
}
} else {
std::memset(float_constant_map_pixel_, 0,
sizeof(float_constant_map_pixel_));
}
uint32_t pixel_shader_float_constant_size =
xe::align(uint32_t(std::max(pixel_shader_float_constant_count, 1u) * 4 *
sizeof(float)),
256u);
// Update constant buffers. // Update constant buffers.
if (!cbuffer_bindings_system_.up_to_date) { if (!cbuffer_bindings_system_.up_to_date) {
uint8_t* system_constants = constant_buffer_pool_->RequestFull( uint8_t* system_constants = constant_buffer_pool_->RequestFull(
@ -1678,6 +1748,60 @@ bool D3D12CommandProcessor::UpdateBindings(
cbuffer_bindings_system_.up_to_date = true; cbuffer_bindings_system_.up_to_date = true;
write_common_constant_views = true; write_common_constant_views = true;
} }
if (!cbuffer_bindings_vertex_float_.up_to_date) {
uint8_t* float_constants = constant_buffer_pool_->RequestFull(
vertex_shader_float_constant_size, nullptr, nullptr,
&cbuffer_bindings_vertex_float_.buffer_address);
if (float_constants == nullptr) {
return false;
}
for (uint32_t i = 0; i < 4; ++i) {
uint64_t float_constant_map_entry =
vertex_shader_float_constant_map.float_bitmap[i];
uint32_t float_constant_index;
while (xe::bit_scan_forward(float_constant_map_entry,
&float_constant_index)) {
float_constant_map_entry &= ~(1ull << float_constant_index);
std::memcpy(float_constants,
&regs[XE_GPU_REG_SHADER_CONSTANT_000_X + (i << 8) +
(float_constant_index << 2)]
.f32,
4 * sizeof(float));
float_constants += 4 * sizeof(float);
}
}
cbuffer_bindings_vertex_float_.up_to_date = true;
write_vertex_float_constant_view = true;
}
if (!cbuffer_bindings_pixel_float_.up_to_date) {
uint8_t* float_constants = constant_buffer_pool_->RequestFull(
pixel_shader_float_constant_size, nullptr, nullptr,
&cbuffer_bindings_pixel_float_.buffer_address);
if (float_constants == nullptr) {
return false;
}
if (pixel_shader != nullptr) {
const Shader::ConstantRegisterMap& pixel_shader_float_constant_map =
pixel_shader->constant_register_map();
for (uint32_t i = 0; i < 4; ++i) {
uint64_t float_constant_map_entry =
pixel_shader_float_constant_map.float_bitmap[i];
uint32_t float_constant_index;
while (xe::bit_scan_forward(float_constant_map_entry,
&float_constant_index)) {
float_constant_map_entry &= ~(1ull << float_constant_index);
std::memcpy(float_constants,
&regs[XE_GPU_REG_SHADER_CONSTANT_256_X + (i << 8) +
(float_constant_index << 2)]
.f32,
4 * sizeof(float));
float_constants += 4 * sizeof(float);
}
}
}
cbuffer_bindings_pixel_float_.up_to_date = true;
write_pixel_float_constant_view = true;
}
if (!cbuffer_bindings_bool_loop_.up_to_date) { if (!cbuffer_bindings_bool_loop_.up_to_date) {
uint32_t* bool_loop_constants = uint32_t* bool_loop_constants =
reinterpret_cast<uint32_t*>(constant_buffer_pool_->RequestFull( reinterpret_cast<uint32_t*>(constant_buffer_pool_->RequestFull(
@ -1711,26 +1835,6 @@ bool D3D12CommandProcessor::UpdateBindings(
cbuffer_bindings_fetch_.up_to_date = true; cbuffer_bindings_fetch_.up_to_date = true;
write_fetch_constant_view = true; write_fetch_constant_view = true;
} }
for (uint32_t i = 0; i < 16; ++i) {
ConstantBufferBinding& float_binding = cbuffer_bindings_float_[i];
if (float_binding.up_to_date) {
continue;
}
uint8_t* float_constants = constant_buffer_pool_->RequestFull(
512, nullptr, nullptr, &float_binding.buffer_address);
if (float_constants == nullptr) {
return false;
}
std::memcpy(float_constants,
&regs[XE_GPU_REG_SHADER_CONSTANT_000_X + (i << 7)].f32,
32 * 4 * sizeof(uint32_t));
float_binding.up_to_date = true;
if (i < 8) {
write_vertex_float_constant_views = true;
} else {
write_pixel_float_constant_views = true;
}
}
// Allocate the descriptors. // Allocate the descriptors.
uint32_t view_count_partial_update = 0; uint32_t view_count_partial_update = 0;
@ -1738,18 +1842,18 @@ bool D3D12CommandProcessor::UpdateBindings(
// System and bool/loop constants. // System and bool/loop constants.
view_count_partial_update += 2; view_count_partial_update += 2;
} }
if (write_vertex_float_constant_view) {
// Vertex float constants.
++view_count_partial_update;
}
if (write_pixel_float_constant_view) {
// Pixel float constants.
++view_count_partial_update;
}
if (write_fetch_constant_view) { if (write_fetch_constant_view) {
// Fetch constants. // Fetch constants.
++view_count_partial_update; ++view_count_partial_update;
} }
if (write_vertex_float_constant_views) {
// Vertex float constants.
view_count_partial_update += 8;
}
if (write_pixel_float_constant_views) {
// Pixel float constants.
view_count_partial_update += 8;
}
if (write_textures) { if (write_textures) {
view_count_partial_update += texture_count; view_count_partial_update += texture_count;
} }
@ -1783,8 +1887,8 @@ bool D3D12CommandProcessor::UpdateBindings(
draw_view_full_update_ = view_full_update_index; draw_view_full_update_ = view_full_update_index;
write_common_constant_views = true; write_common_constant_views = true;
write_fetch_constant_view = true; write_fetch_constant_view = true;
write_vertex_float_constant_views = true; write_vertex_float_constant_view = true;
write_pixel_float_constant_views = true; write_pixel_float_constant_view = true;
write_textures = texture_count != 0; write_textures = texture_count != 0;
// If updating fully, write the shared memory descriptor (t0, space1). // If updating fully, write the shared memory descriptor (t0, space1).
shared_memory_->CreateSRV(view_cpu_handle); shared_memory_->CreateSRV(view_cpu_handle);
@ -1821,9 +1925,33 @@ bool D3D12CommandProcessor::UpdateBindings(
current_graphics_root_up_to_date_ &= current_graphics_root_up_to_date_ &=
~(1u << kRootParameter_CommonConstants); ~(1u << kRootParameter_CommonConstants);
} }
if (write_vertex_float_constant_view) {
gpu_handle_vertex_float_constants_ = view_gpu_handle;
// Vertex float constants (b2).
constant_buffer_desc.BufferLocation =
cbuffer_bindings_vertex_float_.buffer_address;
constant_buffer_desc.SizeInBytes = vertex_shader_float_constant_size;
device->CreateConstantBufferView(&constant_buffer_desc, view_cpu_handle);
view_cpu_handle.ptr += descriptor_size_view;
view_gpu_handle.ptr += descriptor_size_view;
current_graphics_root_up_to_date_ &=
~(1u << kRootParameter_VertexFloatConstants);
}
if (write_pixel_float_constant_view) {
gpu_handle_pixel_float_constants_ = view_gpu_handle;
// Pixel float constants (b2).
constant_buffer_desc.BufferLocation =
cbuffer_bindings_pixel_float_.buffer_address;
constant_buffer_desc.SizeInBytes = pixel_shader_float_constant_size;
device->CreateConstantBufferView(&constant_buffer_desc, view_cpu_handle);
view_cpu_handle.ptr += descriptor_size_view;
view_gpu_handle.ptr += descriptor_size_view;
current_graphics_root_up_to_date_ &=
~(1u << kRootParameter_PixelFloatConstants);
}
if (write_fetch_constant_view) { if (write_fetch_constant_view) {
gpu_handle_fetch_constants_ = view_gpu_handle; gpu_handle_fetch_constants_ = view_gpu_handle;
// Fetch constants (b2). // Fetch constants (b3).
constant_buffer_desc.BufferLocation = constant_buffer_desc.BufferLocation =
cbuffer_bindings_fetch_.buffer_address; cbuffer_bindings_fetch_.buffer_address;
constant_buffer_desc.SizeInBytes = 768; constant_buffer_desc.SizeInBytes = 768;
@ -1832,34 +1960,6 @@ bool D3D12CommandProcessor::UpdateBindings(
view_gpu_handle.ptr += descriptor_size_view; view_gpu_handle.ptr += descriptor_size_view;
current_graphics_root_up_to_date_ &= ~(1u << kRootParameter_FetchConstants); current_graphics_root_up_to_date_ &= ~(1u << kRootParameter_FetchConstants);
} }
if (write_vertex_float_constant_views) {
gpu_handle_vertex_float_constants_ = view_gpu_handle;
// Vertex float constants (b3-b10).
for (uint32_t i = 0; i < 8; ++i) {
constant_buffer_desc.BufferLocation =
cbuffer_bindings_float_[i].buffer_address;
constant_buffer_desc.SizeInBytes = 512;
device->CreateConstantBufferView(&constant_buffer_desc, view_cpu_handle);
view_cpu_handle.ptr += descriptor_size_view;
view_gpu_handle.ptr += descriptor_size_view;
}
current_graphics_root_up_to_date_ &=
~(1u << kRootParameter_VertexFloatConstants);
}
if (write_pixel_float_constant_views) {
gpu_handle_pixel_float_constants_ = view_gpu_handle;
// Pixel float constants (b3-b10).
for (uint32_t i = 0; i < 8; ++i) {
constant_buffer_desc.BufferLocation =
cbuffer_bindings_float_[8 + i].buffer_address;
constant_buffer_desc.SizeInBytes = 512;
device->CreateConstantBufferView(&constant_buffer_desc, view_cpu_handle);
view_cpu_handle.ptr += descriptor_size_view;
view_gpu_handle.ptr += descriptor_size_view;
}
current_graphics_root_up_to_date_ &=
~(1u << kRootParameter_PixelFloatConstants);
}
if (write_textures) { if (write_textures) {
if (pixel_texture_count != 0) { if (pixel_texture_count != 0) {
assert_true(current_graphics_root_extras_.pixel_textures != assert_true(current_graphics_root_extras_.pixel_textures !=

View File

@ -142,28 +142,26 @@ class D3D12CommandProcessor : public CommandProcessor {
// These are always present. // These are always present.
// Very frequently changed, especially for UI draws, and for models drawn in // Very frequently changed, especially for UI draws, and for models drawn in
// multiple parts - contains vertex and texture fetch constants (b2). // multiple parts - contains vertex and texture fetch constants (b3).
kRootParameter_FetchConstants, kRootParameter_FetchConstants,
// Quite frequently changed (for one object drawn multiple times, for // Quite frequently changed (for one object drawn multiple times, for
// instance - may contain projection matrices) - 8 pages of float constants // instance - may contain projection matrices) (b2).
// (b3-b10).
kRootParameter_VertexFloatConstants, kRootParameter_VertexFloatConstants,
// Less frequently changed (per-material) - 8 pages of float constants // Less frequently changed (per-material) (b2).
// (b3-b10).
kRootParameter_PixelFloatConstants, kRootParameter_PixelFloatConstants,
// Rarely changed - system constants like viewport and alpha testing (b0) // Rarely changed - system constants like viewport and alpha testing (b0)
// and loop and bool constants (b1). // and loop and bool constants (b1).
kRootParameter_CommonConstants, kRootParameter_CommonConstants,
// Never changed - shared memory byte address buffer (t0, space1). // Never changed - shared memory byte address buffer (t0).
kRootParameter_SharedMemory, kRootParameter_SharedMemory,
kRootParameter_Count_Base, kRootParameter_Count_Base,
// Extra parameter that may or may not exist: // Extra parameter that may or may not exist:
// - Pixel textures. // - Pixel textures (t1+).
// - Pixel samplers. // - Pixel samplers (s0+).
// - Vertex textures. // - Vertex textures (t1+).
// - Vertex samplers. // - Vertex samplers (s0+).
kRootParameter_Count_Max = kRootParameter_Count_Base + 4, kRootParameter_Count_Max = kRootParameter_Count_Base + 4,
}; };
@ -271,13 +269,18 @@ class D3D12CommandProcessor : public CommandProcessor {
// System shader constants. // System shader constants.
DxbcShaderTranslator::SystemConstants system_constants_; DxbcShaderTranslator::SystemConstants system_constants_;
// Float constant usage masks of the last draw call.
uint64_t float_constant_map_vertex_[4];
uint64_t float_constant_map_pixel_[4];
// Constant buffer bindings. // Constant buffer bindings.
struct ConstantBufferBinding { struct ConstantBufferBinding {
D3D12_GPU_VIRTUAL_ADDRESS buffer_address; D3D12_GPU_VIRTUAL_ADDRESS buffer_address;
bool up_to_date; bool up_to_date;
}; };
ConstantBufferBinding cbuffer_bindings_system_; ConstantBufferBinding cbuffer_bindings_system_;
ConstantBufferBinding cbuffer_bindings_float_[16]; ConstantBufferBinding cbuffer_bindings_vertex_float_;
ConstantBufferBinding cbuffer_bindings_pixel_float_;
ConstantBufferBinding cbuffer_bindings_bool_loop_; ConstantBufferBinding cbuffer_bindings_bool_loop_;
ConstantBufferBinding cbuffer_bindings_fetch_; ConstantBufferBinding cbuffer_bindings_fetch_;

File diff suppressed because it is too large Load Diff

View File

@ -26,6 +26,17 @@ class DxbcShaderTranslator : public ShaderTranslator {
DxbcShaderTranslator(); DxbcShaderTranslator();
~DxbcShaderTranslator() override; ~DxbcShaderTranslator() override;
// Constant buffer bindings in space 0.
enum class CbufferRegister {
// The D3D12 command processor has system and bool/loop constants in a
// single descriptor range.
// TODO(Triang3l): Make them root CBVs for speed.
kSystemConstants,
kBoolLoopConstants,
kFloatConstants,
kFetchConstants,
};
enum : uint32_t { enum : uint32_t {
kSysFlag_XYDividedByW = 1, kSysFlag_XYDividedByW = 1,
kSysFlag_ZDividedByW = kSysFlag_XYDividedByW << 1, kSysFlag_ZDividedByW = kSysFlag_XYDividedByW << 1,
@ -38,9 +49,8 @@ class DxbcShaderTranslator : public ShaderTranslator {
}; };
// IF SYSTEM CONSTANTS ARE CHANGED OR ADDED, THE FOLLOWING MUST BE UPDATED: // IF SYSTEM CONSTANTS ARE CHANGED OR ADDED, THE FOLLOWING MUST BE UPDATED:
// - kSysConst enum (registers and first components). // - kSysConst enum (indices, registers and first components).
// - rdef_constants_. // - system_constant_rdef_.
// - rdef_constant_buffers_ system constant buffer size.
// - d3d12/shaders/xenos_draw.hlsli (for geometry shaders). // - d3d12/shaders/xenos_draw.hlsli (for geometry shaders).
struct SystemConstants { struct SystemConstants {
// vec4 0 // vec4 0
@ -144,54 +154,59 @@ class DxbcShaderTranslator : public ShaderTranslator {
void ProcessAluInstruction(const ParsedAluInstruction& instr) override; void ProcessAluInstruction(const ParsedAluInstruction& instr) override;
private: private:
static constexpr uint32_t kFloatConstantsPerPage = 32;
static constexpr uint32_t kFloatConstantPageCount = 8;
// Constant buffer bindings in space 0.
enum class CbufferRegister {
kSystemConstants,
kBoolLoopConstants,
kFetchConstants,
kFloatConstantsFirst,
kFloatConstantsLast = kFloatConstantsFirst + kFloatConstantPageCount - 1,
};
enum : uint32_t { enum : uint32_t {
kSysConst_Flags_Index = 0,
kSysConst_Flags_Vec = 0, kSysConst_Flags_Vec = 0,
kSysConst_Flags_Comp = 0, kSysConst_Flags_Comp = 0,
kSysConst_VertexIndexEndian_Index = kSysConst_Flags_Index + 1,
kSysConst_VertexIndexEndian_Vec = 0, kSysConst_VertexIndexEndian_Vec = 0,
kSysConst_VertexIndexEndian_Comp = 1, kSysConst_VertexIndexEndian_Comp = 1,
kSysConst_VertexBaseIndex_Index = kSysConst_VertexIndexEndian_Index + 1,
kSysConst_VertexBaseIndex_Vec = 0, kSysConst_VertexBaseIndex_Vec = 0,
kSysConst_VertexBaseIndex_Comp = 2, kSysConst_VertexBaseIndex_Comp = 2,
kSysConst_PixelPosReg_Index = kSysConst_VertexBaseIndex_Index + 1,
kSysConst_PixelPosReg_Vec = 0, kSysConst_PixelPosReg_Vec = 0,
kSysConst_PixelPosReg_Comp = 3, kSysConst_PixelPosReg_Comp = 3,
kSysConst_NDCScale_Index = kSysConst_PixelPosReg_Index + 1,
kSysConst_NDCScale_Vec = 1, kSysConst_NDCScale_Vec = 1,
kSysConst_NDCScale_Comp = 0, kSysConst_NDCScale_Comp = 0,
kSysConst_PixelHalfPixelOffset_Index = kSysConst_NDCScale_Index + 1,
kSysConst_PixelHalfPixelOffset_Vec = 1, kSysConst_PixelHalfPixelOffset_Vec = 1,
kSysConst_PixelHalfPixelOffset_Comp = 3, kSysConst_PixelHalfPixelOffset_Comp = 3,
kSysConst_NDCOffset_Index = kSysConst_PixelHalfPixelOffset_Index + 1,
kSysConst_NDCOffset_Vec = 2, kSysConst_NDCOffset_Vec = 2,
kSysConst_NDCOffset_Comp = 0, kSysConst_NDCOffset_Comp = 0,
kSysConst_AlphaTest_Index = kSysConst_NDCOffset_Index + 1,
kSysConst_AlphaTest_Vec = 2, kSysConst_AlphaTest_Vec = 2,
kSysConst_AlphaTest_Comp = 3, kSysConst_AlphaTest_Comp = 3,
kSysConst_PointSize_Index = kSysConst_AlphaTest_Index + 1,
kSysConst_PointSize_Vec = 3, kSysConst_PointSize_Vec = 3,
kSysConst_PointSize_Comp = 0, kSysConst_PointSize_Comp = 0,
kSysConst_PointSizeMinMax_Index = kSysConst_PointSize_Index + 1,
kSysConst_PointSizeMinMax_Vec = 3, kSysConst_PointSizeMinMax_Vec = 3,
kSysConst_PointSizeMinMax_Comp = 2, kSysConst_PointSizeMinMax_Comp = 2,
kSysConst_PointScreenToNDC_Index = kSysConst_PointSizeMinMax_Index + 1,
kSysConst_PointScreenToNDC_Vec = 4, kSysConst_PointScreenToNDC_Vec = 4,
kSysConst_PointScreenToNDC_Comp = 0, kSysConst_PointScreenToNDC_Comp = 0,
kSysConst_SSAAInvScale_Index = kSysConst_PointScreenToNDC_Index + 1,
kSysConst_SSAAInvScale_Vec = 4, kSysConst_SSAAInvScale_Vec = 4,
kSysConst_SSAAInvScale_Comp = 2, kSysConst_SSAAInvScale_Comp = 2,
kSysConst_AlphaTestRange_Index = kSysConst_SSAAInvScale_Index + 1,
kSysConst_AlphaTestRange_Vec = 5, kSysConst_AlphaTestRange_Vec = 5,
kSysConst_AlphaTestRange_Comp = 0, kSysConst_AlphaTestRange_Comp = 0,
kSysConst_ColorExpBias_Index = kSysConst_AlphaTestRange_Index + 1,
kSysConst_ColorExpBias_Vec = 6, kSysConst_ColorExpBias_Vec = 6,
kSysConst_ColorOutputMap_Index = kSysConst_ColorExpBias_Index + 1,
kSysConst_ColorOutputMap_Vec = 7, kSysConst_ColorOutputMap_Vec = 7,
kSysConst_Count = kSysConst_ColorOutputMap_Index + 1
}; };
static constexpr uint32_t kInterpolatorCount = 16; static constexpr uint32_t kInterpolatorCount = 16;
@ -316,7 +331,9 @@ class DxbcShaderTranslator : public ShaderTranslator {
Type type; Type type;
uint32_t index; uint32_t index;
bool is_dynamic_indexed; // If the operand is dynamically indexed directly when it's used as an
// operand in DXBC instructions.
InstructionStorageAddressingMode addressing_mode;
uint32_t swizzle; uint32_t swizzle;
bool is_negated; bool is_negated;
@ -421,15 +438,14 @@ class DxbcShaderTranslator : public ShaderTranslator {
kInt, kInt,
kUint, kUint,
kUint4, kUint4,
// Float constants - size written dynamically.
kFloat4ConstantArray,
// Bool constants. // Bool constants.
kUint4Array8, kUint4Array8,
// Loop constants. // Loop constants.
kUint4Array32, kUint4Array32,
// Fetch constants. // Fetch constants.
kUint4Array48, kUint4Array48,
// Float constants in one page.
kFloatConstantPageArray,
kFloatConstantPageStruct,
kCount, kCount,
kUnknown = kCount kUnknown = kCount
@ -440,7 +456,6 @@ class DxbcShaderTranslator : public ShaderTranslator {
RdefTypeIndex type; RdefTypeIndex type;
uint32_t offset; uint32_t offset;
}; };
static const RdefStructMember rdef_float_constant_page_member_;
struct RdefType { struct RdefType {
// Name ignored for arrays. // Name ignored for arrays.
@ -459,73 +474,35 @@ class DxbcShaderTranslator : public ShaderTranslator {
}; };
static const RdefType rdef_types_[size_t(RdefTypeIndex::kCount)]; static const RdefType rdef_types_[size_t(RdefTypeIndex::kCount)];
enum class RdefConstantIndex { // Number of constant buffer bindings used in this shader - also used for
kSystemConstantFirst, // generation of indices of constant buffers that are optional.
kSysFlags = kSystemConstantFirst, uint32_t cbuffer_count_;
kSysVertexBaseIndex, static constexpr uint32_t kCbufferIndexUnallocated = UINT32_MAX;
kSysVertexIndexEndian, uint32_t cbuffer_index_system_constants_;
kSysPixelPosReg, uint32_t cbuffer_index_float_constants_;
kSysNDCScale, uint32_t cbuffer_index_bool_loop_constants_;
kSysPixelHalfPixelOffset, uint32_t cbuffer_index_fetch_constants_;
kSysNDCOffset,
kSysAlphaTest,
kSysPointSize,
kSysPointSizeMinMax,
kSysPointScreenToNDC,
kSysSSAAInvScale,
kSysAlphaTestRange,
kSysColorExpBias,
kSysColorOutputMap,
kSystemConstantLast = kSysColorOutputMap,
kBoolConstants, struct SystemConstantRdef {
kLoopConstants,
kFetchConstants,
kFloatConstants,
kCount,
kSystemConstantCount = kSystemConstantLast - kSystemConstantFirst + 1,
};
struct RdefConstant {
const char* name; const char* name;
RdefTypeIndex type; RdefTypeIndex type;
uint32_t offset; uint32_t offset;
uint32_t size; uint32_t size;
}; };
static const RdefConstant rdef_constants_[size_t(RdefConstantIndex::kCount)]; static const SystemConstantRdef system_constant_rdef_[kSysConst_Count];
static_assert(uint32_t(RdefConstantIndex::kCount) <= 64, // Mask of system constants (1 << kSysConst_#_Index) used in the shader, so
"Too many constants in all constant buffers - can't use a 64 " // the remaining ones can be marked as unused in RDEF.
"bit vector to store which constants are used"); uint32_t system_constants_used_;
uint64_t rdef_constants_used_;
enum class RdefConstantBufferIndex { // Whether constants are dynamically indexed and need to be marked as such in
kSystemConstants, // dcl_constantBuffer.
kBoolLoopConstants, bool float_constants_dynamic_indexed_;
kFetchConstants, bool bool_loop_constants_dynamic_indexed_;
kFloatConstants,
kCount // Offsets of float constant indices in shader_code_, for remapping in
}; // CompleteTranslation (initially, at these offsets, guest float constant
struct RdefConstantBuffer { // indices are written).
const char* name; std::vector<uint32_t> float_constant_index_offsets_;
RdefConstantIndex first_constant;
uint32_t constant_count;
uint32_t size;
CbufferRegister register_index;
uint32_t binding_count;
// True if created like `cbuffer`, false for `ConstantBuffer<T>`.
bool user_packed;
bool dynamic_indexed;
};
static const RdefConstantBuffer
rdef_constant_buffers_[size_t(RdefConstantBufferIndex::kCount)];
// Order of dcl_constantbuffer instructions, from most frequenly accessed to
// least frequently accessed (hint to driver according to the DXBC header).
static const RdefConstantBufferIndex
constant_buffer_dcl_order_[size_t(RdefConstantBufferIndex::kCount)];
// Number of currently allocated Xenia internal r# registers. // Number of currently allocated Xenia internal r# registers.
uint32_t system_temp_count_current_; uint32_t system_temp_count_current_;

View File

@ -524,6 +524,9 @@ class Shader {
// Each bit corresponds to a storage index [0-255]. // Each bit corresponds to a storage index [0-255].
uint32_t bool_bitmap[256 / 32]; uint32_t bool_bitmap[256 / 32];
// Total number of kConstantFloat registers read by the shader.
uint32_t float_count;
// Computed byte count of all registers required when packed. // Computed byte count of all registers required when packed.
uint32_t packed_byte_length; uint32_t packed_byte_length;
}; };

View File

@ -143,14 +143,17 @@ bool ShaderTranslator::TranslateInternal(Shader* shader) {
TranslateBlocks(); TranslateBlocks();
// Compute total bytes used by the register map. // Compute total number of float registers and total bytes used by the
// This saves us work later when we need to pack them. // register map. This saves us work later when we need to pack them.
constant_register_map_.packed_byte_length = 0; constant_register_map_.packed_byte_length = 0;
constant_register_map_.float_count = 0;
for (int i = 0; i < 4; ++i) { for (int i = 0; i < 4; ++i) {
// Each bit indicates a vec4 (4 floats). // Each bit indicates a vec4 (4 floats).
constant_register_map_.packed_byte_length += constant_register_map_.float_count +=
4 * 4 * xe::bit_count(constant_register_map_.float_bitmap[i]); xe::bit_count(constant_register_map_.float_bitmap[i]);
} }
constant_register_map_.packed_byte_length +=
4 * 4 * constant_register_map_.float_count;
// Each bit indicates a single word. // Each bit indicates a single word.
constant_register_map_.packed_byte_length += constant_register_map_.packed_byte_length +=
4 * xe::bit_count(constant_register_map_.int_bitmap); 4 * xe::bit_count(constant_register_map_.int_bitmap);
@ -1303,9 +1306,16 @@ void ShaderTranslator::ParseAluVectorInstruction(
// Track constant float register loads. // Track constant float register loads.
if (i.operands[j].storage_source == if (i.operands[j].storage_source ==
InstructionStorageSource::kConstantFloat) { InstructionStorageSource::kConstantFloat) {
auto register_index = i.operands[j].storage_index; if (i.operands[j].storage_addressing_mode !=
constant_register_map_.float_bitmap[register_index / 64] |= InstructionStorageAddressingMode::kStatic) {
1ull << (register_index % 64); // Dynamic addressing makes all constants required.
std::memset(constant_register_map_.float_bitmap, 0xFF,
sizeof(constant_register_map_.float_bitmap));
} else {
auto register_index = i.operands[j].storage_index;
constant_register_map_.float_bitmap[register_index / 64] |=
1ull << (register_index % 64);
}
} }
} }
@ -1438,8 +1448,15 @@ void ShaderTranslator::ParseAluScalarInstruction(
// Track constant float register loads. // Track constant float register loads.
auto register_index = i.operands[0].storage_index; auto register_index = i.operands[0].storage_index;
constant_register_map_.float_bitmap[register_index / 64] |= if (i.operands[0].storage_addressing_mode !=
1ull << (register_index % 64); InstructionStorageAddressingMode::kStatic) {
// Dynamic addressing makes all constants required.
std::memset(constant_register_map_.float_bitmap, 0xFF,
sizeof(constant_register_map_.float_bitmap));
} else {
constant_register_map_.float_bitmap[register_index / 64] |=
1ull << (register_index % 64);
}
ParseAluInstructionOperandSpecial(op, InstructionStorageSource::kRegister, ParseAluInstructionOperandSpecial(op, InstructionStorageSource::kRegister,
reg2, op.src_negate(3), const_slot, reg2, op.src_negate(3), const_slot,

View File

@ -45,6 +45,9 @@ class ShaderTranslator {
bool is_vertex_shader() const { return shader_type_ == ShaderType::kVertex; } bool is_vertex_shader() const { return shader_type_ == ShaderType::kVertex; }
// True if the current shader is a pixel shader. // True if the current shader is a pixel shader.
bool is_pixel_shader() const { return shader_type_ == ShaderType::kPixel; } bool is_pixel_shader() const { return shader_type_ == ShaderType::kPixel; }
const Shader::ConstantRegisterMap& constant_register_map() const {
return constant_register_map_;
}
// True if the current shader addresses general-purpose registers with dynamic // True if the current shader addresses general-purpose registers with dynamic
// indices. // indices.
bool uses_register_dynamic_addressing() const { bool uses_register_dynamic_addressing() const {