[D3D12] Compact float constants and don't split them into pages
This commit is contained in:
parent
eb50ebd885
commit
c4599ff211
|
@ -175,7 +175,8 @@ ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature(
|
|||
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV;
|
||||
range.NumDescriptors = 1;
|
||||
range.BaseShaderRegister = 2;
|
||||
range.BaseShaderRegister =
|
||||
uint32_t(DxbcShaderTranslator::CbufferRegister::kFetchConstants);
|
||||
range.RegisterSpace = 0;
|
||||
range.OffsetInDescriptorsFromTableStart = 0;
|
||||
}
|
||||
|
@ -189,8 +190,9 @@ ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature(
|
|||
parameter.DescriptorTable.pDescriptorRanges = ⦥
|
||||
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_VERTEX;
|
||||
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV;
|
||||
range.NumDescriptors = 8;
|
||||
range.BaseShaderRegister = 3;
|
||||
range.NumDescriptors = 1;
|
||||
range.BaseShaderRegister =
|
||||
uint32_t(DxbcShaderTranslator::CbufferRegister::kFloatConstants);
|
||||
range.RegisterSpace = 0;
|
||||
range.OffsetInDescriptorsFromTableStart = 0;
|
||||
}
|
||||
|
@ -204,8 +206,9 @@ ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature(
|
|||
parameter.DescriptorTable.pDescriptorRanges = ⦥
|
||||
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_PIXEL;
|
||||
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV;
|
||||
range.NumDescriptors = 8;
|
||||
range.BaseShaderRegister = 3;
|
||||
range.NumDescriptors = 1;
|
||||
range.BaseShaderRegister =
|
||||
uint32_t(DxbcShaderTranslator::CbufferRegister::kFloatConstants);
|
||||
range.RegisterSpace = 0;
|
||||
range.OffsetInDescriptorsFromTableStart = 0;
|
||||
}
|
||||
|
@ -220,7 +223,8 @@ ID3D12RootSignature* D3D12CommandProcessor::GetRootSignature(
|
|||
parameter.ShaderVisibility = D3D12_SHADER_VISIBILITY_ALL;
|
||||
range.RangeType = D3D12_DESCRIPTOR_RANGE_TYPE_CBV;
|
||||
range.NumDescriptors = 2;
|
||||
range.BaseShaderRegister = 0;
|
||||
range.BaseShaderRegister =
|
||||
uint32_t(DxbcShaderTranslator::CbufferRegister::kSystemConstants);
|
||||
range.RegisterSpace = 0;
|
||||
range.OffsetInDescriptorsFromTableStart = 0;
|
||||
}
|
||||
|
@ -741,8 +745,22 @@ void D3D12CommandProcessor::WriteRegister(uint32_t index, uint32_t value) {
|
|||
|
||||
if (index >= XE_GPU_REG_SHADER_CONSTANT_000_X &&
|
||||
index <= XE_GPU_REG_SHADER_CONSTANT_511_W) {
|
||||
uint32_t component_index = index - XE_GPU_REG_SHADER_CONSTANT_000_X;
|
||||
cbuffer_bindings_float_[component_index >> 7].up_to_date = false;
|
||||
if (current_queue_frame_ != UINT32_MAX) {
|
||||
uint32_t float_constant_index =
|
||||
(index - XE_GPU_REG_SHADER_CONSTANT_000_X) >> 2;
|
||||
if (float_constant_index >= 256) {
|
||||
float_constant_index -= 256;
|
||||
if (float_constant_map_pixel_[float_constant_index >> 6] &
|
||||
(1ull << (float_constant_index & 63))) {
|
||||
cbuffer_bindings_pixel_float_.up_to_date = false;
|
||||
}
|
||||
} else {
|
||||
if (float_constant_map_vertex_[float_constant_index >> 6] &
|
||||
(1ull << (float_constant_index & 63))) {
|
||||
cbuffer_bindings_vertex_float_.up_to_date = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (index >= XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031 &&
|
||||
index <= XE_GPU_REG_SHADER_CONSTANT_LOOP_31) {
|
||||
cbuffer_bindings_bool_loop_.up_to_date = false;
|
||||
|
@ -1145,10 +1163,12 @@ bool D3D12CommandProcessor::BeginFrame() {
|
|||
current_graphics_root_up_to_date_ = 0;
|
||||
current_view_heap_ = nullptr;
|
||||
current_sampler_heap_ = nullptr;
|
||||
std::memset(float_constant_map_vertex_, 0,
|
||||
sizeof(float_constant_map_vertex_));
|
||||
std::memset(float_constant_map_pixel_, 0, sizeof(float_constant_map_pixel_));
|
||||
cbuffer_bindings_system_.up_to_date = false;
|
||||
for (uint32_t i = 0; i < xe::countof(cbuffer_bindings_float_); ++i) {
|
||||
cbuffer_bindings_float_[i].up_to_date = false;
|
||||
}
|
||||
cbuffer_bindings_vertex_float_.up_to_date = false;
|
||||
cbuffer_bindings_pixel_float_.up_to_date = false;
|
||||
cbuffer_bindings_bool_loop_.up_to_date = false;
|
||||
cbuffer_bindings_fetch_.up_to_date = false;
|
||||
draw_view_full_update_ = 0;
|
||||
|
@ -1657,14 +1677,64 @@ bool D3D12CommandProcessor::UpdateBindings(
|
|||
|
||||
// Begin updating descriptors.
|
||||
bool write_common_constant_views = false;
|
||||
bool write_vertex_float_constant_view = false;
|
||||
bool write_pixel_float_constant_view = false;
|
||||
bool write_fetch_constant_view = false;
|
||||
bool write_vertex_float_constant_views = false;
|
||||
bool write_pixel_float_constant_views = false;
|
||||
// TODO(Triang3l): Update textures and samplers only if shaders or binding
|
||||
// hash change.
|
||||
bool write_textures = texture_count != 0;
|
||||
bool write_samplers = sampler_count != 0;
|
||||
|
||||
// Check if the float constant layout is still the same and get the counts.
|
||||
const Shader::ConstantRegisterMap& vertex_shader_float_constant_map =
|
||||
vertex_shader->constant_register_map();
|
||||
uint32_t vertex_shader_float_constant_count =
|
||||
vertex_shader_float_constant_map.float_count;
|
||||
// Even if the shader doesn't need any float constants, a valid binding must
|
||||
// still be provided, so if the first draw in the frame with the current root
|
||||
// signature doesn't have float constants at all, still allocate an empty
|
||||
// buffer.
|
||||
uint32_t vertex_shader_float_constant_size =
|
||||
xe::align(uint32_t(std::max(vertex_shader_float_constant_count, 1u) * 4 *
|
||||
sizeof(float)),
|
||||
256u);
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
if (float_constant_map_vertex_[i] !=
|
||||
vertex_shader_float_constant_map.float_bitmap[i]) {
|
||||
float_constant_map_vertex_[i] =
|
||||
vertex_shader_float_constant_map.float_bitmap[i];
|
||||
// If no float constants at all, we can reuse any buffer for them, so not
|
||||
// invalidating.
|
||||
if (vertex_shader_float_constant_map.float_count != 0) {
|
||||
cbuffer_bindings_vertex_float_.up_to_date = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
uint32_t pixel_shader_float_constant_count = 0;
|
||||
if (pixel_shader != nullptr) {
|
||||
const Shader::ConstantRegisterMap& pixel_shader_float_constant_map =
|
||||
pixel_shader->constant_register_map();
|
||||
pixel_shader_float_constant_count =
|
||||
pixel_shader_float_constant_map.float_count;
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
if (float_constant_map_pixel_[i] !=
|
||||
pixel_shader_float_constant_map.float_bitmap[i]) {
|
||||
float_constant_map_pixel_[i] =
|
||||
pixel_shader_float_constant_map.float_bitmap[i];
|
||||
if (pixel_shader_float_constant_map.float_count != 0) {
|
||||
cbuffer_bindings_pixel_float_.up_to_date = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
std::memset(float_constant_map_pixel_, 0,
|
||||
sizeof(float_constant_map_pixel_));
|
||||
}
|
||||
uint32_t pixel_shader_float_constant_size =
|
||||
xe::align(uint32_t(std::max(pixel_shader_float_constant_count, 1u) * 4 *
|
||||
sizeof(float)),
|
||||
256u);
|
||||
|
||||
// Update constant buffers.
|
||||
if (!cbuffer_bindings_system_.up_to_date) {
|
||||
uint8_t* system_constants = constant_buffer_pool_->RequestFull(
|
||||
|
@ -1678,6 +1748,60 @@ bool D3D12CommandProcessor::UpdateBindings(
|
|||
cbuffer_bindings_system_.up_to_date = true;
|
||||
write_common_constant_views = true;
|
||||
}
|
||||
if (!cbuffer_bindings_vertex_float_.up_to_date) {
|
||||
uint8_t* float_constants = constant_buffer_pool_->RequestFull(
|
||||
vertex_shader_float_constant_size, nullptr, nullptr,
|
||||
&cbuffer_bindings_vertex_float_.buffer_address);
|
||||
if (float_constants == nullptr) {
|
||||
return false;
|
||||
}
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
uint64_t float_constant_map_entry =
|
||||
vertex_shader_float_constant_map.float_bitmap[i];
|
||||
uint32_t float_constant_index;
|
||||
while (xe::bit_scan_forward(float_constant_map_entry,
|
||||
&float_constant_index)) {
|
||||
float_constant_map_entry &= ~(1ull << float_constant_index);
|
||||
std::memcpy(float_constants,
|
||||
®s[XE_GPU_REG_SHADER_CONSTANT_000_X + (i << 8) +
|
||||
(float_constant_index << 2)]
|
||||
.f32,
|
||||
4 * sizeof(float));
|
||||
float_constants += 4 * sizeof(float);
|
||||
}
|
||||
}
|
||||
cbuffer_bindings_vertex_float_.up_to_date = true;
|
||||
write_vertex_float_constant_view = true;
|
||||
}
|
||||
if (!cbuffer_bindings_pixel_float_.up_to_date) {
|
||||
uint8_t* float_constants = constant_buffer_pool_->RequestFull(
|
||||
pixel_shader_float_constant_size, nullptr, nullptr,
|
||||
&cbuffer_bindings_pixel_float_.buffer_address);
|
||||
if (float_constants == nullptr) {
|
||||
return false;
|
||||
}
|
||||
if (pixel_shader != nullptr) {
|
||||
const Shader::ConstantRegisterMap& pixel_shader_float_constant_map =
|
||||
pixel_shader->constant_register_map();
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
uint64_t float_constant_map_entry =
|
||||
pixel_shader_float_constant_map.float_bitmap[i];
|
||||
uint32_t float_constant_index;
|
||||
while (xe::bit_scan_forward(float_constant_map_entry,
|
||||
&float_constant_index)) {
|
||||
float_constant_map_entry &= ~(1ull << float_constant_index);
|
||||
std::memcpy(float_constants,
|
||||
®s[XE_GPU_REG_SHADER_CONSTANT_256_X + (i << 8) +
|
||||
(float_constant_index << 2)]
|
||||
.f32,
|
||||
4 * sizeof(float));
|
||||
float_constants += 4 * sizeof(float);
|
||||
}
|
||||
}
|
||||
}
|
||||
cbuffer_bindings_pixel_float_.up_to_date = true;
|
||||
write_pixel_float_constant_view = true;
|
||||
}
|
||||
if (!cbuffer_bindings_bool_loop_.up_to_date) {
|
||||
uint32_t* bool_loop_constants =
|
||||
reinterpret_cast<uint32_t*>(constant_buffer_pool_->RequestFull(
|
||||
|
@ -1711,26 +1835,6 @@ bool D3D12CommandProcessor::UpdateBindings(
|
|||
cbuffer_bindings_fetch_.up_to_date = true;
|
||||
write_fetch_constant_view = true;
|
||||
}
|
||||
for (uint32_t i = 0; i < 16; ++i) {
|
||||
ConstantBufferBinding& float_binding = cbuffer_bindings_float_[i];
|
||||
if (float_binding.up_to_date) {
|
||||
continue;
|
||||
}
|
||||
uint8_t* float_constants = constant_buffer_pool_->RequestFull(
|
||||
512, nullptr, nullptr, &float_binding.buffer_address);
|
||||
if (float_constants == nullptr) {
|
||||
return false;
|
||||
}
|
||||
std::memcpy(float_constants,
|
||||
®s[XE_GPU_REG_SHADER_CONSTANT_000_X + (i << 7)].f32,
|
||||
32 * 4 * sizeof(uint32_t));
|
||||
float_binding.up_to_date = true;
|
||||
if (i < 8) {
|
||||
write_vertex_float_constant_views = true;
|
||||
} else {
|
||||
write_pixel_float_constant_views = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Allocate the descriptors.
|
||||
uint32_t view_count_partial_update = 0;
|
||||
|
@ -1738,18 +1842,18 @@ bool D3D12CommandProcessor::UpdateBindings(
|
|||
// System and bool/loop constants.
|
||||
view_count_partial_update += 2;
|
||||
}
|
||||
if (write_vertex_float_constant_view) {
|
||||
// Vertex float constants.
|
||||
++view_count_partial_update;
|
||||
}
|
||||
if (write_pixel_float_constant_view) {
|
||||
// Pixel float constants.
|
||||
++view_count_partial_update;
|
||||
}
|
||||
if (write_fetch_constant_view) {
|
||||
// Fetch constants.
|
||||
++view_count_partial_update;
|
||||
}
|
||||
if (write_vertex_float_constant_views) {
|
||||
// Vertex float constants.
|
||||
view_count_partial_update += 8;
|
||||
}
|
||||
if (write_pixel_float_constant_views) {
|
||||
// Pixel float constants.
|
||||
view_count_partial_update += 8;
|
||||
}
|
||||
if (write_textures) {
|
||||
view_count_partial_update += texture_count;
|
||||
}
|
||||
|
@ -1783,8 +1887,8 @@ bool D3D12CommandProcessor::UpdateBindings(
|
|||
draw_view_full_update_ = view_full_update_index;
|
||||
write_common_constant_views = true;
|
||||
write_fetch_constant_view = true;
|
||||
write_vertex_float_constant_views = true;
|
||||
write_pixel_float_constant_views = true;
|
||||
write_vertex_float_constant_view = true;
|
||||
write_pixel_float_constant_view = true;
|
||||
write_textures = texture_count != 0;
|
||||
// If updating fully, write the shared memory descriptor (t0, space1).
|
||||
shared_memory_->CreateSRV(view_cpu_handle);
|
||||
|
@ -1821,9 +1925,33 @@ bool D3D12CommandProcessor::UpdateBindings(
|
|||
current_graphics_root_up_to_date_ &=
|
||||
~(1u << kRootParameter_CommonConstants);
|
||||
}
|
||||
if (write_vertex_float_constant_view) {
|
||||
gpu_handle_vertex_float_constants_ = view_gpu_handle;
|
||||
// Vertex float constants (b2).
|
||||
constant_buffer_desc.BufferLocation =
|
||||
cbuffer_bindings_vertex_float_.buffer_address;
|
||||
constant_buffer_desc.SizeInBytes = vertex_shader_float_constant_size;
|
||||
device->CreateConstantBufferView(&constant_buffer_desc, view_cpu_handle);
|
||||
view_cpu_handle.ptr += descriptor_size_view;
|
||||
view_gpu_handle.ptr += descriptor_size_view;
|
||||
current_graphics_root_up_to_date_ &=
|
||||
~(1u << kRootParameter_VertexFloatConstants);
|
||||
}
|
||||
if (write_pixel_float_constant_view) {
|
||||
gpu_handle_pixel_float_constants_ = view_gpu_handle;
|
||||
// Pixel float constants (b2).
|
||||
constant_buffer_desc.BufferLocation =
|
||||
cbuffer_bindings_pixel_float_.buffer_address;
|
||||
constant_buffer_desc.SizeInBytes = pixel_shader_float_constant_size;
|
||||
device->CreateConstantBufferView(&constant_buffer_desc, view_cpu_handle);
|
||||
view_cpu_handle.ptr += descriptor_size_view;
|
||||
view_gpu_handle.ptr += descriptor_size_view;
|
||||
current_graphics_root_up_to_date_ &=
|
||||
~(1u << kRootParameter_PixelFloatConstants);
|
||||
}
|
||||
if (write_fetch_constant_view) {
|
||||
gpu_handle_fetch_constants_ = view_gpu_handle;
|
||||
// Fetch constants (b2).
|
||||
// Fetch constants (b3).
|
||||
constant_buffer_desc.BufferLocation =
|
||||
cbuffer_bindings_fetch_.buffer_address;
|
||||
constant_buffer_desc.SizeInBytes = 768;
|
||||
|
@ -1832,34 +1960,6 @@ bool D3D12CommandProcessor::UpdateBindings(
|
|||
view_gpu_handle.ptr += descriptor_size_view;
|
||||
current_graphics_root_up_to_date_ &= ~(1u << kRootParameter_FetchConstants);
|
||||
}
|
||||
if (write_vertex_float_constant_views) {
|
||||
gpu_handle_vertex_float_constants_ = view_gpu_handle;
|
||||
// Vertex float constants (b3-b10).
|
||||
for (uint32_t i = 0; i < 8; ++i) {
|
||||
constant_buffer_desc.BufferLocation =
|
||||
cbuffer_bindings_float_[i].buffer_address;
|
||||
constant_buffer_desc.SizeInBytes = 512;
|
||||
device->CreateConstantBufferView(&constant_buffer_desc, view_cpu_handle);
|
||||
view_cpu_handle.ptr += descriptor_size_view;
|
||||
view_gpu_handle.ptr += descriptor_size_view;
|
||||
}
|
||||
current_graphics_root_up_to_date_ &=
|
||||
~(1u << kRootParameter_VertexFloatConstants);
|
||||
}
|
||||
if (write_pixel_float_constant_views) {
|
||||
gpu_handle_pixel_float_constants_ = view_gpu_handle;
|
||||
// Pixel float constants (b3-b10).
|
||||
for (uint32_t i = 0; i < 8; ++i) {
|
||||
constant_buffer_desc.BufferLocation =
|
||||
cbuffer_bindings_float_[8 + i].buffer_address;
|
||||
constant_buffer_desc.SizeInBytes = 512;
|
||||
device->CreateConstantBufferView(&constant_buffer_desc, view_cpu_handle);
|
||||
view_cpu_handle.ptr += descriptor_size_view;
|
||||
view_gpu_handle.ptr += descriptor_size_view;
|
||||
}
|
||||
current_graphics_root_up_to_date_ &=
|
||||
~(1u << kRootParameter_PixelFloatConstants);
|
||||
}
|
||||
if (write_textures) {
|
||||
if (pixel_texture_count != 0) {
|
||||
assert_true(current_graphics_root_extras_.pixel_textures !=
|
||||
|
|
|
@ -142,28 +142,26 @@ class D3D12CommandProcessor : public CommandProcessor {
|
|||
// These are always present.
|
||||
|
||||
// Very frequently changed, especially for UI draws, and for models drawn in
|
||||
// multiple parts - contains vertex and texture fetch constants (b2).
|
||||
// multiple parts - contains vertex and texture fetch constants (b3).
|
||||
kRootParameter_FetchConstants,
|
||||
// Quite frequently changed (for one object drawn multiple times, for
|
||||
// instance - may contain projection matrices) - 8 pages of float constants
|
||||
// (b3-b10).
|
||||
// instance - may contain projection matrices) (b2).
|
||||
kRootParameter_VertexFloatConstants,
|
||||
// Less frequently changed (per-material) - 8 pages of float constants
|
||||
// (b3-b10).
|
||||
// Less frequently changed (per-material) (b2).
|
||||
kRootParameter_PixelFloatConstants,
|
||||
// Rarely changed - system constants like viewport and alpha testing (b0)
|
||||
// and loop and bool constants (b1).
|
||||
kRootParameter_CommonConstants,
|
||||
// Never changed - shared memory byte address buffer (t0, space1).
|
||||
// Never changed - shared memory byte address buffer (t0).
|
||||
kRootParameter_SharedMemory,
|
||||
|
||||
kRootParameter_Count_Base,
|
||||
|
||||
// Extra parameter that may or may not exist:
|
||||
// - Pixel textures.
|
||||
// - Pixel samplers.
|
||||
// - Vertex textures.
|
||||
// - Vertex samplers.
|
||||
// - Pixel textures (t1+).
|
||||
// - Pixel samplers (s0+).
|
||||
// - Vertex textures (t1+).
|
||||
// - Vertex samplers (s0+).
|
||||
|
||||
kRootParameter_Count_Max = kRootParameter_Count_Base + 4,
|
||||
};
|
||||
|
@ -271,13 +269,18 @@ class D3D12CommandProcessor : public CommandProcessor {
|
|||
// System shader constants.
|
||||
DxbcShaderTranslator::SystemConstants system_constants_;
|
||||
|
||||
// Float constant usage masks of the last draw call.
|
||||
uint64_t float_constant_map_vertex_[4];
|
||||
uint64_t float_constant_map_pixel_[4];
|
||||
|
||||
// Constant buffer bindings.
|
||||
struct ConstantBufferBinding {
|
||||
D3D12_GPU_VIRTUAL_ADDRESS buffer_address;
|
||||
bool up_to_date;
|
||||
};
|
||||
ConstantBufferBinding cbuffer_bindings_system_;
|
||||
ConstantBufferBinding cbuffer_bindings_float_[16];
|
||||
ConstantBufferBinding cbuffer_bindings_vertex_float_;
|
||||
ConstantBufferBinding cbuffer_bindings_pixel_float_;
|
||||
ConstantBufferBinding cbuffer_bindings_bool_loop_;
|
||||
ConstantBufferBinding cbuffer_bindings_fetch_;
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -26,6 +26,17 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
DxbcShaderTranslator();
|
||||
~DxbcShaderTranslator() override;
|
||||
|
||||
// Constant buffer bindings in space 0.
|
||||
enum class CbufferRegister {
|
||||
// The D3D12 command processor has system and bool/loop constants in a
|
||||
// single descriptor range.
|
||||
// TODO(Triang3l): Make them root CBVs for speed.
|
||||
kSystemConstants,
|
||||
kBoolLoopConstants,
|
||||
kFloatConstants,
|
||||
kFetchConstants,
|
||||
};
|
||||
|
||||
enum : uint32_t {
|
||||
kSysFlag_XYDividedByW = 1,
|
||||
kSysFlag_ZDividedByW = kSysFlag_XYDividedByW << 1,
|
||||
|
@ -38,9 +49,8 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
};
|
||||
|
||||
// IF SYSTEM CONSTANTS ARE CHANGED OR ADDED, THE FOLLOWING MUST BE UPDATED:
|
||||
// - kSysConst enum (registers and first components).
|
||||
// - rdef_constants_.
|
||||
// - rdef_constant_buffers_ system constant buffer size.
|
||||
// - kSysConst enum (indices, registers and first components).
|
||||
// - system_constant_rdef_.
|
||||
// - d3d12/shaders/xenos_draw.hlsli (for geometry shaders).
|
||||
struct SystemConstants {
|
||||
// vec4 0
|
||||
|
@ -144,54 +154,59 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
void ProcessAluInstruction(const ParsedAluInstruction& instr) override;
|
||||
|
||||
private:
|
||||
static constexpr uint32_t kFloatConstantsPerPage = 32;
|
||||
static constexpr uint32_t kFloatConstantPageCount = 8;
|
||||
|
||||
// Constant buffer bindings in space 0.
|
||||
enum class CbufferRegister {
|
||||
kSystemConstants,
|
||||
kBoolLoopConstants,
|
||||
kFetchConstants,
|
||||
kFloatConstantsFirst,
|
||||
kFloatConstantsLast = kFloatConstantsFirst + kFloatConstantPageCount - 1,
|
||||
};
|
||||
|
||||
enum : uint32_t {
|
||||
kSysConst_Flags_Index = 0,
|
||||
kSysConst_Flags_Vec = 0,
|
||||
kSysConst_Flags_Comp = 0,
|
||||
kSysConst_VertexIndexEndian_Index = kSysConst_Flags_Index + 1,
|
||||
kSysConst_VertexIndexEndian_Vec = 0,
|
||||
kSysConst_VertexIndexEndian_Comp = 1,
|
||||
kSysConst_VertexBaseIndex_Index = kSysConst_VertexIndexEndian_Index + 1,
|
||||
kSysConst_VertexBaseIndex_Vec = 0,
|
||||
kSysConst_VertexBaseIndex_Comp = 2,
|
||||
kSysConst_PixelPosReg_Index = kSysConst_VertexBaseIndex_Index + 1,
|
||||
kSysConst_PixelPosReg_Vec = 0,
|
||||
kSysConst_PixelPosReg_Comp = 3,
|
||||
|
||||
kSysConst_NDCScale_Index = kSysConst_PixelPosReg_Index + 1,
|
||||
kSysConst_NDCScale_Vec = 1,
|
||||
kSysConst_NDCScale_Comp = 0,
|
||||
kSysConst_PixelHalfPixelOffset_Index = kSysConst_NDCScale_Index + 1,
|
||||
kSysConst_PixelHalfPixelOffset_Vec = 1,
|
||||
kSysConst_PixelHalfPixelOffset_Comp = 3,
|
||||
|
||||
kSysConst_NDCOffset_Index = kSysConst_PixelHalfPixelOffset_Index + 1,
|
||||
kSysConst_NDCOffset_Vec = 2,
|
||||
kSysConst_NDCOffset_Comp = 0,
|
||||
kSysConst_AlphaTest_Index = kSysConst_NDCOffset_Index + 1,
|
||||
kSysConst_AlphaTest_Vec = 2,
|
||||
kSysConst_AlphaTest_Comp = 3,
|
||||
|
||||
kSysConst_PointSize_Index = kSysConst_AlphaTest_Index + 1,
|
||||
kSysConst_PointSize_Vec = 3,
|
||||
kSysConst_PointSize_Comp = 0,
|
||||
kSysConst_PointSizeMinMax_Index = kSysConst_PointSize_Index + 1,
|
||||
kSysConst_PointSizeMinMax_Vec = 3,
|
||||
kSysConst_PointSizeMinMax_Comp = 2,
|
||||
|
||||
kSysConst_PointScreenToNDC_Index = kSysConst_PointSizeMinMax_Index + 1,
|
||||
kSysConst_PointScreenToNDC_Vec = 4,
|
||||
kSysConst_PointScreenToNDC_Comp = 0,
|
||||
kSysConst_SSAAInvScale_Index = kSysConst_PointScreenToNDC_Index + 1,
|
||||
kSysConst_SSAAInvScale_Vec = 4,
|
||||
kSysConst_SSAAInvScale_Comp = 2,
|
||||
|
||||
kSysConst_AlphaTestRange_Index = kSysConst_SSAAInvScale_Index + 1,
|
||||
kSysConst_AlphaTestRange_Vec = 5,
|
||||
kSysConst_AlphaTestRange_Comp = 0,
|
||||
|
||||
kSysConst_ColorExpBias_Index = kSysConst_AlphaTestRange_Index + 1,
|
||||
kSysConst_ColorExpBias_Vec = 6,
|
||||
|
||||
kSysConst_ColorOutputMap_Index = kSysConst_ColorExpBias_Index + 1,
|
||||
kSysConst_ColorOutputMap_Vec = 7,
|
||||
|
||||
kSysConst_Count = kSysConst_ColorOutputMap_Index + 1
|
||||
};
|
||||
|
||||
static constexpr uint32_t kInterpolatorCount = 16;
|
||||
|
@ -316,7 +331,9 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
|
||||
Type type;
|
||||
uint32_t index;
|
||||
bool is_dynamic_indexed;
|
||||
// If the operand is dynamically indexed directly when it's used as an
|
||||
// operand in DXBC instructions.
|
||||
InstructionStorageAddressingMode addressing_mode;
|
||||
|
||||
uint32_t swizzle;
|
||||
bool is_negated;
|
||||
|
@ -421,15 +438,14 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
kInt,
|
||||
kUint,
|
||||
kUint4,
|
||||
// Float constants - size written dynamically.
|
||||
kFloat4ConstantArray,
|
||||
// Bool constants.
|
||||
kUint4Array8,
|
||||
// Loop constants.
|
||||
kUint4Array32,
|
||||
// Fetch constants.
|
||||
kUint4Array48,
|
||||
// Float constants in one page.
|
||||
kFloatConstantPageArray,
|
||||
kFloatConstantPageStruct,
|
||||
|
||||
kCount,
|
||||
kUnknown = kCount
|
||||
|
@ -440,7 +456,6 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
RdefTypeIndex type;
|
||||
uint32_t offset;
|
||||
};
|
||||
static const RdefStructMember rdef_float_constant_page_member_;
|
||||
|
||||
struct RdefType {
|
||||
// Name ignored for arrays.
|
||||
|
@ -459,73 +474,35 @@ class DxbcShaderTranslator : public ShaderTranslator {
|
|||
};
|
||||
static const RdefType rdef_types_[size_t(RdefTypeIndex::kCount)];
|
||||
|
||||
enum class RdefConstantIndex {
|
||||
kSystemConstantFirst,
|
||||
kSysFlags = kSystemConstantFirst,
|
||||
kSysVertexBaseIndex,
|
||||
kSysVertexIndexEndian,
|
||||
kSysPixelPosReg,
|
||||
kSysNDCScale,
|
||||
kSysPixelHalfPixelOffset,
|
||||
kSysNDCOffset,
|
||||
kSysAlphaTest,
|
||||
kSysPointSize,
|
||||
kSysPointSizeMinMax,
|
||||
kSysPointScreenToNDC,
|
||||
kSysSSAAInvScale,
|
||||
kSysAlphaTestRange,
|
||||
kSysColorExpBias,
|
||||
kSysColorOutputMap,
|
||||
kSystemConstantLast = kSysColorOutputMap,
|
||||
// Number of constant buffer bindings used in this shader - also used for
|
||||
// generation of indices of constant buffers that are optional.
|
||||
uint32_t cbuffer_count_;
|
||||
static constexpr uint32_t kCbufferIndexUnallocated = UINT32_MAX;
|
||||
uint32_t cbuffer_index_system_constants_;
|
||||
uint32_t cbuffer_index_float_constants_;
|
||||
uint32_t cbuffer_index_bool_loop_constants_;
|
||||
uint32_t cbuffer_index_fetch_constants_;
|
||||
|
||||
kBoolConstants,
|
||||
kLoopConstants,
|
||||
|
||||
kFetchConstants,
|
||||
|
||||
kFloatConstants,
|
||||
|
||||
kCount,
|
||||
kSystemConstantCount = kSystemConstantLast - kSystemConstantFirst + 1,
|
||||
};
|
||||
struct RdefConstant {
|
||||
struct SystemConstantRdef {
|
||||
const char* name;
|
||||
RdefTypeIndex type;
|
||||
uint32_t offset;
|
||||
uint32_t size;
|
||||
};
|
||||
static const RdefConstant rdef_constants_[size_t(RdefConstantIndex::kCount)];
|
||||
static_assert(uint32_t(RdefConstantIndex::kCount) <= 64,
|
||||
"Too many constants in all constant buffers - can't use a 64 "
|
||||
"bit vector to store which constants are used");
|
||||
uint64_t rdef_constants_used_;
|
||||
static const SystemConstantRdef system_constant_rdef_[kSysConst_Count];
|
||||
// Mask of system constants (1 << kSysConst_#_Index) used in the shader, so
|
||||
// the remaining ones can be marked as unused in RDEF.
|
||||
uint32_t system_constants_used_;
|
||||
|
||||
enum class RdefConstantBufferIndex {
|
||||
kSystemConstants,
|
||||
kBoolLoopConstants,
|
||||
kFetchConstants,
|
||||
kFloatConstants,
|
||||
// Whether constants are dynamically indexed and need to be marked as such in
|
||||
// dcl_constantBuffer.
|
||||
bool float_constants_dynamic_indexed_;
|
||||
bool bool_loop_constants_dynamic_indexed_;
|
||||
|
||||
kCount
|
||||
};
|
||||
struct RdefConstantBuffer {
|
||||
const char* name;
|
||||
RdefConstantIndex first_constant;
|
||||
uint32_t constant_count;
|
||||
uint32_t size;
|
||||
CbufferRegister register_index;
|
||||
uint32_t binding_count;
|
||||
// True if created like `cbuffer`, false for `ConstantBuffer<T>`.
|
||||
bool user_packed;
|
||||
bool dynamic_indexed;
|
||||
};
|
||||
static const RdefConstantBuffer
|
||||
rdef_constant_buffers_[size_t(RdefConstantBufferIndex::kCount)];
|
||||
|
||||
// Order of dcl_constantbuffer instructions, from most frequenly accessed to
|
||||
// least frequently accessed (hint to driver according to the DXBC header).
|
||||
static const RdefConstantBufferIndex
|
||||
constant_buffer_dcl_order_[size_t(RdefConstantBufferIndex::kCount)];
|
||||
// Offsets of float constant indices in shader_code_, for remapping in
|
||||
// CompleteTranslation (initially, at these offsets, guest float constant
|
||||
// indices are written).
|
||||
std::vector<uint32_t> float_constant_index_offsets_;
|
||||
|
||||
// Number of currently allocated Xenia internal r# registers.
|
||||
uint32_t system_temp_count_current_;
|
||||
|
|
|
@ -524,6 +524,9 @@ class Shader {
|
|||
// Each bit corresponds to a storage index [0-255].
|
||||
uint32_t bool_bitmap[256 / 32];
|
||||
|
||||
// Total number of kConstantFloat registers read by the shader.
|
||||
uint32_t float_count;
|
||||
|
||||
// Computed byte count of all registers required when packed.
|
||||
uint32_t packed_byte_length;
|
||||
};
|
||||
|
|
|
@ -143,14 +143,17 @@ bool ShaderTranslator::TranslateInternal(Shader* shader) {
|
|||
|
||||
TranslateBlocks();
|
||||
|
||||
// Compute total bytes used by the register map.
|
||||
// This saves us work later when we need to pack them.
|
||||
// Compute total number of float registers and total bytes used by the
|
||||
// register map. This saves us work later when we need to pack them.
|
||||
constant_register_map_.packed_byte_length = 0;
|
||||
constant_register_map_.float_count = 0;
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
// Each bit indicates a vec4 (4 floats).
|
||||
constant_register_map_.packed_byte_length +=
|
||||
4 * 4 * xe::bit_count(constant_register_map_.float_bitmap[i]);
|
||||
constant_register_map_.float_count +=
|
||||
xe::bit_count(constant_register_map_.float_bitmap[i]);
|
||||
}
|
||||
constant_register_map_.packed_byte_length +=
|
||||
4 * 4 * constant_register_map_.float_count;
|
||||
// Each bit indicates a single word.
|
||||
constant_register_map_.packed_byte_length +=
|
||||
4 * xe::bit_count(constant_register_map_.int_bitmap);
|
||||
|
@ -1303,9 +1306,16 @@ void ShaderTranslator::ParseAluVectorInstruction(
|
|||
// Track constant float register loads.
|
||||
if (i.operands[j].storage_source ==
|
||||
InstructionStorageSource::kConstantFloat) {
|
||||
auto register_index = i.operands[j].storage_index;
|
||||
constant_register_map_.float_bitmap[register_index / 64] |=
|
||||
1ull << (register_index % 64);
|
||||
if (i.operands[j].storage_addressing_mode !=
|
||||
InstructionStorageAddressingMode::kStatic) {
|
||||
// Dynamic addressing makes all constants required.
|
||||
std::memset(constant_register_map_.float_bitmap, 0xFF,
|
||||
sizeof(constant_register_map_.float_bitmap));
|
||||
} else {
|
||||
auto register_index = i.operands[j].storage_index;
|
||||
constant_register_map_.float_bitmap[register_index / 64] |=
|
||||
1ull << (register_index % 64);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1438,8 +1448,15 @@ void ShaderTranslator::ParseAluScalarInstruction(
|
|||
|
||||
// Track constant float register loads.
|
||||
auto register_index = i.operands[0].storage_index;
|
||||
constant_register_map_.float_bitmap[register_index / 64] |=
|
||||
1ull << (register_index % 64);
|
||||
if (i.operands[0].storage_addressing_mode !=
|
||||
InstructionStorageAddressingMode::kStatic) {
|
||||
// Dynamic addressing makes all constants required.
|
||||
std::memset(constant_register_map_.float_bitmap, 0xFF,
|
||||
sizeof(constant_register_map_.float_bitmap));
|
||||
} else {
|
||||
constant_register_map_.float_bitmap[register_index / 64] |=
|
||||
1ull << (register_index % 64);
|
||||
}
|
||||
|
||||
ParseAluInstructionOperandSpecial(op, InstructionStorageSource::kRegister,
|
||||
reg2, op.src_negate(3), const_slot,
|
||||
|
|
|
@ -45,6 +45,9 @@ class ShaderTranslator {
|
|||
bool is_vertex_shader() const { return shader_type_ == ShaderType::kVertex; }
|
||||
// True if the current shader is a pixel shader.
|
||||
bool is_pixel_shader() const { return shader_type_ == ShaderType::kPixel; }
|
||||
const Shader::ConstantRegisterMap& constant_register_map() const {
|
||||
return constant_register_map_;
|
||||
}
|
||||
// True if the current shader addresses general-purpose registers with dynamic
|
||||
// indices.
|
||||
bool uses_register_dynamic_addressing() const {
|
||||
|
|
Loading…
Reference in New Issue