[D3D12] Unify UploadBufferPool page size (2 MB), add alignment parameter
This commit is contained in:
parent
dfbe36a8aa
commit
2cebd3cabe
|
@ -26,23 +26,28 @@
|
||||||
namespace xe {
|
namespace xe {
|
||||||
|
|
||||||
template <typename T, size_t N>
|
template <typename T, size_t N>
|
||||||
size_t countof(T (&arr)[N]) {
|
constexpr size_t countof(T (&arr)[N]) {
|
||||||
return std::extent<T[N]>::value;
|
return std::extent<T[N]>::value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
constexpr bool is_pow2(T value) {
|
||||||
|
return (value & (value - 1)) == 0;
|
||||||
|
}
|
||||||
|
|
||||||
// Rounds up the given value to the given alignment.
|
// Rounds up the given value to the given alignment.
|
||||||
template <typename T>
|
template <typename T>
|
||||||
T align(T value, T alignment) {
|
constexpr T align(T value, T alignment) {
|
||||||
return (value + alignment - 1) & ~(alignment - 1);
|
return (value + alignment - 1) & ~(alignment - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Rounds the given number up to the next highest multiple.
|
// Rounds the given number up to the next highest multiple.
|
||||||
template <typename T, typename V>
|
template <typename T, typename V>
|
||||||
T round_up(T value, V multiple) {
|
constexpr T round_up(T value, V multiple) {
|
||||||
return value ? (((value + multiple - 1) / multiple) * multiple) : multiple;
|
return value ? (((value + multiple - 1) / multiple) * multiple) : multiple;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline float saturate(float value) {
|
constexpr float saturate(float value) {
|
||||||
return std::max(std::min(1.0f, value), -1.0f);
|
return std::max(std::min(1.0f, value), -1.0f);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -62,7 +67,7 @@ T next_pow2(T value) {
|
||||||
|
|
||||||
#if __cpp_lib_gcd_lcm
|
#if __cpp_lib_gcd_lcm
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline constexpr T greatest_common_divisor(T a, T b) {
|
constexpr T greatest_common_divisor(T a, T b) {
|
||||||
return std::gcd(a, b);
|
return std::gcd(a, b);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
|
@ -77,14 +82,14 @@ constexpr T greatest_common_divisor(T a, T b) {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline constexpr void reduce_fraction(T& numerator, T& denominator) {
|
constexpr void reduce_fraction(T& numerator, T& denominator) {
|
||||||
auto gcd = greatest_common_divisor(numerator, denominator);
|
auto gcd = greatest_common_divisor(numerator, denominator);
|
||||||
numerator /= gcd;
|
numerator /= gcd;
|
||||||
denominator /= gcd;
|
denominator /= gcd;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline constexpr void reduce_fraction(std::pair<T, T>& fraction) {
|
constexpr void reduce_fraction(std::pair<T, T>& fraction) {
|
||||||
reduce_fraction<T>(fraction.first, fraction.second);
|
reduce_fraction<T>(fraction.first, fraction.second);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -890,8 +890,10 @@ bool D3D12CommandProcessor::SetupContext() {
|
||||||
cvars::d3d12_edram_rov && provider.AreRasterizerOrderedViewsSupported();
|
cvars::d3d12_edram_rov && provider.AreRasterizerOrderedViewsSupported();
|
||||||
|
|
||||||
// Initialize resource binding.
|
// Initialize resource binding.
|
||||||
constant_buffer_pool_ =
|
constant_buffer_pool_ = std::make_unique<ui::d3d12::UploadBufferPool>(
|
||||||
std::make_unique<ui::d3d12::UploadBufferPool>(provider, 1024 * 1024);
|
provider, std::max(ui::d3d12::UploadBufferPool::kDefaultPageSize,
|
||||||
|
uint32_t(D3D12_REQ_CONSTANT_BUFFER_ELEMENT_COUNT * 4 *
|
||||||
|
sizeof(float))));
|
||||||
if (bindless_resources_used_) {
|
if (bindless_resources_used_) {
|
||||||
D3D12_DESCRIPTOR_HEAP_DESC view_bindless_heap_desc;
|
D3D12_DESCRIPTOR_HEAP_DESC view_bindless_heap_desc;
|
||||||
view_bindless_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
|
view_bindless_heap_desc.Type = D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV;
|
||||||
|
@ -3519,13 +3521,6 @@ bool D3D12CommandProcessor::UpdateBindings(
|
||||||
const Shader::ConstantRegisterMap& float_constant_map_vertex =
|
const Shader::ConstantRegisterMap& float_constant_map_vertex =
|
||||||
vertex_shader->constant_register_map();
|
vertex_shader->constant_register_map();
|
||||||
uint32_t float_constant_count_vertex = float_constant_map_vertex.float_count;
|
uint32_t float_constant_count_vertex = float_constant_map_vertex.float_count;
|
||||||
// Even if the shader doesn't need any float constants, a valid binding must
|
|
||||||
// still be provided, so if the first draw in the frame with the current root
|
|
||||||
// signature doesn't have float constants at all, still allocate an empty
|
|
||||||
// buffer.
|
|
||||||
uint32_t float_constant_size_vertex = xe::align(
|
|
||||||
uint32_t(std::max(float_constant_count_vertex, 1u) * 4 * sizeof(float)),
|
|
||||||
256u);
|
|
||||||
for (uint32_t i = 0; i < 4; ++i) {
|
for (uint32_t i = 0; i < 4; ++i) {
|
||||||
if (current_float_constant_map_vertex_[i] !=
|
if (current_float_constant_map_vertex_[i] !=
|
||||||
float_constant_map_vertex.float_bitmap[i]) {
|
float_constant_map_vertex.float_bitmap[i]) {
|
||||||
|
@ -3557,15 +3552,13 @@ bool D3D12CommandProcessor::UpdateBindings(
|
||||||
std::memset(current_float_constant_map_pixel_, 0,
|
std::memset(current_float_constant_map_pixel_, 0,
|
||||||
sizeof(current_float_constant_map_pixel_));
|
sizeof(current_float_constant_map_pixel_));
|
||||||
}
|
}
|
||||||
uint32_t float_constant_size_pixel = xe::align(
|
|
||||||
uint32_t(std::max(float_constant_count_pixel, 1u) * 4 * sizeof(float)),
|
|
||||||
256u);
|
|
||||||
|
|
||||||
// Write the constant buffer data.
|
// Write the constant buffer data.
|
||||||
if (!cbuffer_binding_system_.up_to_date) {
|
if (!cbuffer_binding_system_.up_to_date) {
|
||||||
uint8_t* system_constants = constant_buffer_pool_->Request(
|
uint8_t* system_constants = constant_buffer_pool_->Request(
|
||||||
frame_current_, xe::align(uint32_t(sizeof(system_constants_)), 256u),
|
frame_current_, sizeof(system_constants_),
|
||||||
nullptr, nullptr, &cbuffer_binding_system_.address);
|
D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, nullptr, nullptr,
|
||||||
|
&cbuffer_binding_system_.address);
|
||||||
if (system_constants == nullptr) {
|
if (system_constants == nullptr) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -3576,8 +3569,15 @@ bool D3D12CommandProcessor::UpdateBindings(
|
||||||
~(1u << root_parameter_system_constants);
|
~(1u << root_parameter_system_constants);
|
||||||
}
|
}
|
||||||
if (!cbuffer_binding_float_vertex_.up_to_date) {
|
if (!cbuffer_binding_float_vertex_.up_to_date) {
|
||||||
|
// Even if the shader doesn't need any float constants, a valid binding must
|
||||||
|
// still be provided, so if the first draw in the frame with the current
|
||||||
|
// root signature doesn't have float constants at all, still allocate an
|
||||||
|
// empty buffer.
|
||||||
uint8_t* float_constants = constant_buffer_pool_->Request(
|
uint8_t* float_constants = constant_buffer_pool_->Request(
|
||||||
frame_current_, float_constant_size_vertex, nullptr, nullptr,
|
frame_current_,
|
||||||
|
uint32_t(std::max(float_constant_count_vertex, uint32_t(1)) * 4 *
|
||||||
|
sizeof(float)),
|
||||||
|
D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, nullptr, nullptr,
|
||||||
&cbuffer_binding_float_vertex_.address);
|
&cbuffer_binding_float_vertex_.address);
|
||||||
if (float_constants == nullptr) {
|
if (float_constants == nullptr) {
|
||||||
return false;
|
return false;
|
||||||
|
@ -3603,7 +3603,10 @@ bool D3D12CommandProcessor::UpdateBindings(
|
||||||
}
|
}
|
||||||
if (!cbuffer_binding_float_pixel_.up_to_date) {
|
if (!cbuffer_binding_float_pixel_.up_to_date) {
|
||||||
uint8_t* float_constants = constant_buffer_pool_->Request(
|
uint8_t* float_constants = constant_buffer_pool_->Request(
|
||||||
frame_current_, float_constant_size_pixel, nullptr, nullptr,
|
frame_current_,
|
||||||
|
uint32_t(std::max(float_constant_count_pixel, uint32_t(1)) * 4 *
|
||||||
|
sizeof(float)),
|
||||||
|
D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, nullptr, nullptr,
|
||||||
&cbuffer_binding_float_pixel_.address);
|
&cbuffer_binding_float_pixel_.address);
|
||||||
if (float_constants == nullptr) {
|
if (float_constants == nullptr) {
|
||||||
return false;
|
return false;
|
||||||
|
@ -3632,28 +3635,33 @@ bool D3D12CommandProcessor::UpdateBindings(
|
||||||
~(1u << root_parameter_float_constants_pixel);
|
~(1u << root_parameter_float_constants_pixel);
|
||||||
}
|
}
|
||||||
if (!cbuffer_binding_bool_loop_.up_to_date) {
|
if (!cbuffer_binding_bool_loop_.up_to_date) {
|
||||||
uint8_t* bool_loop_constants =
|
constexpr uint32_t kBoolLoopConstantsSize = (8 + 32) * sizeof(uint32_t);
|
||||||
constant_buffer_pool_->Request(frame_current_, 256, nullptr, nullptr,
|
uint8_t* bool_loop_constants = constant_buffer_pool_->Request(
|
||||||
&cbuffer_binding_bool_loop_.address);
|
frame_current_, kBoolLoopConstantsSize,
|
||||||
|
D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, nullptr, nullptr,
|
||||||
|
&cbuffer_binding_bool_loop_.address);
|
||||||
if (bool_loop_constants == nullptr) {
|
if (bool_loop_constants == nullptr) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
std::memcpy(bool_loop_constants,
|
std::memcpy(bool_loop_constants,
|
||||||
®s[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32,
|
®s[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32,
|
||||||
(8 + 32) * sizeof(uint32_t));
|
kBoolLoopConstantsSize);
|
||||||
cbuffer_binding_bool_loop_.up_to_date = true;
|
cbuffer_binding_bool_loop_.up_to_date = true;
|
||||||
current_graphics_root_up_to_date_ &=
|
current_graphics_root_up_to_date_ &=
|
||||||
~(1u << root_parameter_bool_loop_constants);
|
~(1u << root_parameter_bool_loop_constants);
|
||||||
}
|
}
|
||||||
if (!cbuffer_binding_fetch_.up_to_date) {
|
if (!cbuffer_binding_fetch_.up_to_date) {
|
||||||
|
constexpr uint32_t kFetchConstantsSize = 32 * 6 * sizeof(uint32_t);
|
||||||
uint8_t* fetch_constants = constant_buffer_pool_->Request(
|
uint8_t* fetch_constants = constant_buffer_pool_->Request(
|
||||||
frame_current_, 768, nullptr, nullptr, &cbuffer_binding_fetch_.address);
|
frame_current_, kFetchConstantsSize,
|
||||||
|
D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, nullptr, nullptr,
|
||||||
|
&cbuffer_binding_fetch_.address);
|
||||||
if (fetch_constants == nullptr) {
|
if (fetch_constants == nullptr) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
std::memcpy(fetch_constants,
|
std::memcpy(fetch_constants,
|
||||||
®s[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0].u32,
|
®s[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0].u32,
|
||||||
32 * 6 * sizeof(uint32_t));
|
kFetchConstantsSize);
|
||||||
cbuffer_binding_fetch_.up_to_date = true;
|
cbuffer_binding_fetch_.up_to_date = true;
|
||||||
current_graphics_root_up_to_date_ &=
|
current_graphics_root_up_to_date_ &=
|
||||||
~(1u << root_parameter_fetch_constants);
|
~(1u << root_parameter_fetch_constants);
|
||||||
|
@ -3885,12 +3893,10 @@ bool D3D12CommandProcessor::UpdateBindings(
|
||||||
uint32_t* descriptor_indices =
|
uint32_t* descriptor_indices =
|
||||||
reinterpret_cast<uint32_t*>(constant_buffer_pool_->Request(
|
reinterpret_cast<uint32_t*>(constant_buffer_pool_->Request(
|
||||||
frame_current_,
|
frame_current_,
|
||||||
xe::align(
|
uint32_t(std::max(texture_count_vertex + sampler_count_vertex,
|
||||||
uint32_t(std::max(texture_count_vertex + sampler_count_vertex,
|
uint32_t(1)) *
|
||||||
uint32_t(1)) *
|
sizeof(uint32_t)),
|
||||||
sizeof(uint32_t)),
|
D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, nullptr, nullptr,
|
||||||
uint32_t(256)),
|
|
||||||
nullptr, nullptr,
|
|
||||||
&cbuffer_binding_descriptor_indices_vertex_.address));
|
&cbuffer_binding_descriptor_indices_vertex_.address));
|
||||||
if (!descriptor_indices) {
|
if (!descriptor_indices) {
|
||||||
return false;
|
return false;
|
||||||
|
@ -3923,12 +3929,10 @@ bool D3D12CommandProcessor::UpdateBindings(
|
||||||
uint32_t* descriptor_indices =
|
uint32_t* descriptor_indices =
|
||||||
reinterpret_cast<uint32_t*>(constant_buffer_pool_->Request(
|
reinterpret_cast<uint32_t*>(constant_buffer_pool_->Request(
|
||||||
frame_current_,
|
frame_current_,
|
||||||
xe::align(
|
uint32_t(std::max(texture_count_pixel + sampler_count_pixel,
|
||||||
uint32_t(std::max(texture_count_pixel + sampler_count_pixel,
|
uint32_t(1)) *
|
||||||
uint32_t(1)) *
|
sizeof(uint32_t)),
|
||||||
sizeof(uint32_t)),
|
D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, nullptr, nullptr,
|
||||||
uint32_t(256)),
|
|
||||||
nullptr, nullptr,
|
|
||||||
&cbuffer_binding_descriptor_indices_pixel_.address));
|
&cbuffer_binding_descriptor_indices_pixel_.address));
|
||||||
if (!descriptor_indices) {
|
if (!descriptor_indices) {
|
||||||
return false;
|
return false;
|
||||||
|
|
|
@ -52,11 +52,13 @@ bool PrimitiveConverter::Initialize() {
|
||||||
D3D12_HEAP_FLAGS heap_flag_create_not_zeroed =
|
D3D12_HEAP_FLAGS heap_flag_create_not_zeroed =
|
||||||
provider.GetHeapFlagCreateNotZeroed();
|
provider.GetHeapFlagCreateNotZeroed();
|
||||||
|
|
||||||
// There can be at most 65535 indices in a Xenos draw call, but they can be up
|
// There can be at most 65535 indices in a Xenos draw call (16 bit index
|
||||||
// to 4 bytes large, and conversion can add more indices (almost triple the
|
// count), but they can be up to 4 bytes large, and conversion can add more
|
||||||
// count for triangle strips, for instance).
|
// indices (almost triple the count for triangle strips or fans, for
|
||||||
buffer_pool_ =
|
// instance).
|
||||||
std::make_unique<ui::d3d12::UploadBufferPool>(provider, 4 * 1024 * 1024);
|
buffer_pool_ = std::make_unique<ui::d3d12::UploadBufferPool>(
|
||||||
|
provider, std::max(uint32_t(65535 * 3 * sizeof(uint32_t)),
|
||||||
|
ui::d3d12::UploadBufferPool::kDefaultPageSize));
|
||||||
|
|
||||||
// Create the static index buffer for non-indexed drawing.
|
// Create the static index buffer for non-indexed drawing.
|
||||||
D3D12_RESOURCE_DESC static_ib_desc;
|
D3D12_RESOURCE_DESC static_ib_desc;
|
||||||
|
@ -697,8 +699,8 @@ void* PrimitiveConverter::AllocateIndices(
|
||||||
}
|
}
|
||||||
D3D12_GPU_VIRTUAL_ADDRESS gpu_address;
|
D3D12_GPU_VIRTUAL_ADDRESS gpu_address;
|
||||||
uint8_t* mapping =
|
uint8_t* mapping =
|
||||||
buffer_pool_->Request(command_processor_.GetCurrentFrame(), size, nullptr,
|
buffer_pool_->Request(command_processor_.GetCurrentFrame(), size, 16,
|
||||||
nullptr, &gpu_address);
|
nullptr, nullptr, &gpu_address);
|
||||||
if (mapping == nullptr) {
|
if (mapping == nullptr) {
|
||||||
XELOGE("Failed to allocate space for {} converted {}-bit vertex indices",
|
XELOGE("Failed to allocate space for {} converted {}-bit vertex indices",
|
||||||
count, format == xenos::IndexFormat::kInt32 ? 32 : 16);
|
count, format == xenos::IndexFormat::kInt32 ? 32 : 16);
|
||||||
|
|
|
@ -1507,7 +1507,7 @@ void RenderTargetCache::RestoreEdramSnapshot(const void* snapshot) {
|
||||||
ID3D12Resource* upload_buffer;
|
ID3D12Resource* upload_buffer;
|
||||||
uint32_t upload_buffer_offset;
|
uint32_t upload_buffer_offset;
|
||||||
void* upload_buffer_mapping = edram_snapshot_restore_pool_->Request(
|
void* upload_buffer_mapping = edram_snapshot_restore_pool_->Request(
|
||||||
command_processor_.GetCurrentSubmission(), xenos::kEdramSizeBytes,
|
command_processor_.GetCurrentSubmission(), xenos::kEdramSizeBytes, 1,
|
||||||
&upload_buffer, &upload_buffer_offset, nullptr);
|
&upload_buffer, &upload_buffer_offset, nullptr);
|
||||||
if (!upload_buffer_mapping) {
|
if (!upload_buffer_mapping) {
|
||||||
XELOGE("Failed to get a buffer for restoring a EDRAM snapshot");
|
XELOGE("Failed to get a buffer for restoring a EDRAM snapshot");
|
||||||
|
|
|
@ -154,8 +154,8 @@ bool SharedMemory::Initialize() {
|
||||||
system_page_flags_.resize((page_count_ + 63) / 64);
|
system_page_flags_.resize((page_count_ + 63) / 64);
|
||||||
|
|
||||||
upload_buffer_pool_ = std::make_unique<ui::d3d12::UploadBufferPool>(
|
upload_buffer_pool_ = std::make_unique<ui::d3d12::UploadBufferPool>(
|
||||||
provider,
|
provider, xe::align(ui::d3d12::UploadBufferPool::kDefaultPageSize,
|
||||||
xe::align(uint32_t(4 * 1024 * 1024), uint32_t(1) << page_size_log2_));
|
uint32_t(1) << page_size_log2_));
|
||||||
|
|
||||||
memory_invalidation_callback_handle_ =
|
memory_invalidation_callback_handle_ =
|
||||||
memory_.RegisterPhysicalMemoryInvalidationCallback(
|
memory_.RegisterPhysicalMemoryInvalidationCallback(
|
||||||
|
@ -442,8 +442,9 @@ bool SharedMemory::RequestRange(uint32_t start, uint32_t length) {
|
||||||
uint32_t upload_buffer_offset, upload_buffer_size;
|
uint32_t upload_buffer_offset, upload_buffer_size;
|
||||||
uint8_t* upload_buffer_mapping = upload_buffer_pool_->RequestPartial(
|
uint8_t* upload_buffer_mapping = upload_buffer_pool_->RequestPartial(
|
||||||
command_processor_.GetCurrentSubmission(),
|
command_processor_.GetCurrentSubmission(),
|
||||||
upload_range_length << page_size_log2_, &upload_buffer,
|
upload_range_length << page_size_log2_,
|
||||||
&upload_buffer_offset, &upload_buffer_size, nullptr);
|
uint32_t(1) << page_size_log2_, &upload_buffer, &upload_buffer_offset,
|
||||||
|
&upload_buffer_size, nullptr);
|
||||||
if (upload_buffer_mapping == nullptr) {
|
if (upload_buffer_mapping == nullptr) {
|
||||||
XELOGE("Shared memory: Failed to get an upload buffer");
|
XELOGE("Shared memory: Failed to get an upload buffer");
|
||||||
return false;
|
return false;
|
||||||
|
|
|
@ -2396,9 +2396,9 @@ bool TextureCache::LoadTextureData(Texture* texture) {
|
||||||
}
|
}
|
||||||
D3D12_GPU_VIRTUAL_ADDRESS cbuffer_gpu_address;
|
D3D12_GPU_VIRTUAL_ADDRESS cbuffer_gpu_address;
|
||||||
uint8_t* cbuffer_mapping = cbuffer_pool.Request(
|
uint8_t* cbuffer_mapping = cbuffer_pool.Request(
|
||||||
command_processor_.GetCurrentFrame(),
|
command_processor_.GetCurrentFrame(), sizeof(load_constants),
|
||||||
xe::align(uint32_t(sizeof(load_constants)), uint32_t(256)), nullptr,
|
D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, nullptr, nullptr,
|
||||||
nullptr, &cbuffer_gpu_address);
|
&cbuffer_gpu_address);
|
||||||
if (cbuffer_mapping == nullptr) {
|
if (cbuffer_mapping == nullptr) {
|
||||||
command_processor_.ReleaseScratchGPUBuffer(copy_buffer,
|
command_processor_.ReleaseScratchGPUBuffer(copy_buffer,
|
||||||
copy_buffer_state);
|
copy_buffer_state);
|
||||||
|
|
|
@ -287,8 +287,7 @@ bool D3D12ImmediateDrawer::Initialize() {
|
||||||
device->CreateSampler(&sampler_desc, sampler_handle);
|
device->CreateSampler(&sampler_desc, sampler_handle);
|
||||||
|
|
||||||
// Create pools for draws.
|
// Create pools for draws.
|
||||||
vertex_buffer_pool_ =
|
vertex_buffer_pool_ = std::make_unique<UploadBufferPool>(provider);
|
||||||
std::make_unique<UploadBufferPool>(provider, 2 * 1024 * 1024);
|
|
||||||
texture_descriptor_pool_ = std::make_unique<DescriptorHeapPool>(
|
texture_descriptor_pool_ = std::make_unique<DescriptorHeapPool>(
|
||||||
device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 2048);
|
device, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV, 2048);
|
||||||
texture_descriptor_pool_heap_index_ = DescriptorHeapPool::kHeapIndexInvalid;
|
texture_descriptor_pool_heap_index_ = DescriptorHeapPool::kHeapIndexInvalid;
|
||||||
|
@ -506,8 +505,8 @@ void D3D12ImmediateDrawer::BeginDrawBatch(const ImmediateDrawBatch& batch) {
|
||||||
vertex_buffer_view.SizeInBytes =
|
vertex_buffer_view.SizeInBytes =
|
||||||
batch.vertex_count * uint32_t(sizeof(ImmediateVertex));
|
batch.vertex_count * uint32_t(sizeof(ImmediateVertex));
|
||||||
void* vertex_buffer_mapping = vertex_buffer_pool_->Request(
|
void* vertex_buffer_mapping = vertex_buffer_pool_->Request(
|
||||||
current_fence_value, vertex_buffer_view.SizeInBytes, nullptr, nullptr,
|
current_fence_value, vertex_buffer_view.SizeInBytes, sizeof(uint32_t),
|
||||||
&vertex_buffer_view.BufferLocation);
|
nullptr, nullptr, &vertex_buffer_view.BufferLocation);
|
||||||
if (vertex_buffer_mapping == nullptr) {
|
if (vertex_buffer_mapping == nullptr) {
|
||||||
XELOGE("Failed to get a buffer for {} vertices in the immediate drawer",
|
XELOGE("Failed to get a buffer for {} vertices in the immediate drawer",
|
||||||
batch.vertex_count);
|
batch.vertex_count);
|
||||||
|
@ -524,8 +523,7 @@ void D3D12ImmediateDrawer::BeginDrawBatch(const ImmediateDrawBatch& batch) {
|
||||||
index_buffer_view.SizeInBytes = batch.index_count * sizeof(uint16_t);
|
index_buffer_view.SizeInBytes = batch.index_count * sizeof(uint16_t);
|
||||||
index_buffer_view.Format = DXGI_FORMAT_R16_UINT;
|
index_buffer_view.Format = DXGI_FORMAT_R16_UINT;
|
||||||
void* index_buffer_mapping = vertex_buffer_pool_->Request(
|
void* index_buffer_mapping = vertex_buffer_pool_->Request(
|
||||||
current_fence_value,
|
current_fence_value, index_buffer_view.SizeInBytes, sizeof(uint16_t),
|
||||||
xe::align(index_buffer_view.SizeInBytes, UINT(sizeof(uint32_t))),
|
|
||||||
nullptr, nullptr, &index_buffer_view.BufferLocation);
|
nullptr, nullptr, &index_buffer_view.BufferLocation);
|
||||||
if (index_buffer_mapping == nullptr) {
|
if (index_buffer_mapping == nullptr) {
|
||||||
XELOGE("Failed to get a buffer for {} indices in the immediate drawer",
|
XELOGE("Failed to get a buffer for {} indices in the immediate drawer",
|
||||||
|
|
|
@ -13,14 +13,20 @@
|
||||||
|
|
||||||
#include "xenia/base/assert.h"
|
#include "xenia/base/assert.h"
|
||||||
#include "xenia/base/logging.h"
|
#include "xenia/base/logging.h"
|
||||||
|
#include "xenia/base/math.h"
|
||||||
#include "xenia/ui/d3d12/d3d12_util.h"
|
#include "xenia/ui/d3d12/d3d12_util.h"
|
||||||
|
|
||||||
namespace xe {
|
namespace xe {
|
||||||
namespace ui {
|
namespace ui {
|
||||||
namespace d3d12 {
|
namespace d3d12 {
|
||||||
|
|
||||||
|
// Align to D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT not to waste any space if
|
||||||
|
// it's smaller (the size of the heap backing the buffer will be aligned to
|
||||||
|
// D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT anyway).
|
||||||
UploadBufferPool::UploadBufferPool(D3D12Provider& provider, uint32_t page_size)
|
UploadBufferPool::UploadBufferPool(D3D12Provider& provider, uint32_t page_size)
|
||||||
: provider_(provider), page_size_(page_size) {}
|
: provider_(provider),
|
||||||
|
page_size_(xe::align(
|
||||||
|
page_size, uint32_t(D3D12_DEFAULT_RESOURCE_PLACEMENT_ALIGNMENT))) {}
|
||||||
|
|
||||||
UploadBufferPool::~UploadBufferPool() { ClearCache(); }
|
UploadBufferPool::~UploadBufferPool() { ClearCache(); }
|
||||||
|
|
||||||
|
@ -68,9 +74,13 @@ void UploadBufferPool::ClearCache() {
|
||||||
}
|
}
|
||||||
|
|
||||||
uint8_t* UploadBufferPool::Request(uint64_t submission_index, uint32_t size,
|
uint8_t* UploadBufferPool::Request(uint64_t submission_index, uint32_t size,
|
||||||
|
uint32_t alignment,
|
||||||
ID3D12Resource** buffer_out,
|
ID3D12Resource** buffer_out,
|
||||||
uint32_t* offset_out,
|
uint32_t* offset_out,
|
||||||
D3D12_GPU_VIRTUAL_ADDRESS* gpu_address_out) {
|
D3D12_GPU_VIRTUAL_ADDRESS* gpu_address_out) {
|
||||||
|
assert_not_zero(alignment);
|
||||||
|
assert_true(xe::is_pow2(alignment));
|
||||||
|
size = xe::align(size, alignment);
|
||||||
assert_true(size <= page_size_);
|
assert_true(size <= page_size_);
|
||||||
if (size > page_size_) {
|
if (size > page_size_) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
@ -79,7 +89,8 @@ uint8_t* UploadBufferPool::Request(uint64_t submission_index, uint32_t size,
|
||||||
submission_index >= writable_first_->last_submission_index);
|
submission_index >= writable_first_->last_submission_index);
|
||||||
assert_true(!submitted_last_ ||
|
assert_true(!submitted_last_ ||
|
||||||
submission_index >= submitted_last_->last_submission_index);
|
submission_index >= submitted_last_->last_submission_index);
|
||||||
if (page_size_ - current_page_used_ < size || !writable_first_) {
|
uint32_t current_page_used_aligned = xe::align(current_page_used_, alignment);
|
||||||
|
if (current_page_used_aligned + size > page_size_ || !writable_first_) {
|
||||||
// Start a new page if can't fit all the bytes or don't have an open page.
|
// Start a new page if can't fit all the bytes or don't have an open page.
|
||||||
if (writable_first_) {
|
if (writable_first_) {
|
||||||
// Close the page that was current.
|
// Close the page that was current.
|
||||||
|
@ -128,33 +139,39 @@ uint8_t* UploadBufferPool::Request(uint64_t submission_index, uint32_t size,
|
||||||
writable_last_ = writable_first_;
|
writable_last_ = writable_first_;
|
||||||
}
|
}
|
||||||
current_page_used_ = 0;
|
current_page_used_ = 0;
|
||||||
|
current_page_used_aligned = 0;
|
||||||
}
|
}
|
||||||
writable_first_->last_submission_index = submission_index;
|
writable_first_->last_submission_index = submission_index;
|
||||||
if (buffer_out) {
|
if (buffer_out) {
|
||||||
*buffer_out = writable_first_->buffer;
|
*buffer_out = writable_first_->buffer;
|
||||||
}
|
}
|
||||||
if (offset_out) {
|
if (offset_out) {
|
||||||
*offset_out = current_page_used_;
|
*offset_out = current_page_used_aligned;
|
||||||
}
|
}
|
||||||
if (gpu_address_out) {
|
if (gpu_address_out) {
|
||||||
*gpu_address_out = writable_first_->gpu_address + current_page_used_;
|
*gpu_address_out = writable_first_->gpu_address + current_page_used_aligned;
|
||||||
}
|
}
|
||||||
uint8_t* mapping =
|
uint8_t* mapping = reinterpret_cast<uint8_t*>(writable_first_->mapping) +
|
||||||
reinterpret_cast<uint8_t*>(writable_first_->mapping) + current_page_used_;
|
current_page_used_aligned;
|
||||||
current_page_used_ += size;
|
current_page_used_ = current_page_used_aligned + size;
|
||||||
return mapping;
|
return mapping;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint8_t* UploadBufferPool::RequestPartial(
|
uint8_t* UploadBufferPool::RequestPartial(
|
||||||
uint64_t submission_index, uint32_t size, ID3D12Resource** buffer_out,
|
uint64_t submission_index, uint32_t size, uint32_t alignment,
|
||||||
uint32_t* offset_out, uint32_t* size_out,
|
ID3D12Resource** buffer_out, uint32_t* offset_out, uint32_t* size_out,
|
||||||
D3D12_GPU_VIRTUAL_ADDRESS* gpu_address_out) {
|
D3D12_GPU_VIRTUAL_ADDRESS* gpu_address_out) {
|
||||||
|
assert_not_zero(alignment);
|
||||||
|
assert_true(xe::is_pow2(alignment));
|
||||||
|
size = xe::align(size, alignment);
|
||||||
size = std::min(size, page_size_);
|
size = std::min(size, page_size_);
|
||||||
if (current_page_used_ < page_size_) {
|
uint32_t current_page_used_aligned = xe::align(current_page_used_, alignment);
|
||||||
size = std::min(size, page_size_ - current_page_used_);
|
if (current_page_used_aligned + alignment <= page_size_) {
|
||||||
|
size = std::min(
|
||||||
|
size, (page_size_ - current_page_used_aligned) & ~(alignment - 1));
|
||||||
}
|
}
|
||||||
uint8_t* mapping =
|
uint8_t* mapping = Request(submission_index, size, alignment, buffer_out,
|
||||||
Request(submission_index, size, buffer_out, offset_out, gpu_address_out);
|
offset_out, gpu_address_out);
|
||||||
if (!mapping) {
|
if (!mapping) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,7 +23,12 @@ namespace d3d12 {
|
||||||
|
|
||||||
class UploadBufferPool {
|
class UploadBufferPool {
|
||||||
public:
|
public:
|
||||||
UploadBufferPool(D3D12Provider& provider, uint32_t page_size);
|
// Taken from the Direct3D 12 MiniEngine sample (LinearAllocator
|
||||||
|
// kCpuAllocatorPageSize). Large enough for most cases.
|
||||||
|
static constexpr uint32_t kDefaultPageSize = 2 * 1024 * 1024;
|
||||||
|
|
||||||
|
UploadBufferPool(D3D12Provider& provider,
|
||||||
|
uint32_t page_size = kDefaultPageSize);
|
||||||
~UploadBufferPool();
|
~UploadBufferPool();
|
||||||
|
|
||||||
void Reclaim(uint64_t completed_submission_index);
|
void Reclaim(uint64_t completed_submission_index);
|
||||||
|
@ -31,13 +36,13 @@ class UploadBufferPool {
|
||||||
|
|
||||||
// Request to write data in a single piece, creating a new page if the current
|
// Request to write data in a single piece, creating a new page if the current
|
||||||
// one doesn't have enough free space.
|
// one doesn't have enough free space.
|
||||||
uint8_t* Request(uint64_t submission_index, uint32_t size,
|
uint8_t* Request(uint64_t submission_index, uint32_t size, uint32_t alignment,
|
||||||
ID3D12Resource** buffer_out, uint32_t* offset_out,
|
ID3D12Resource** buffer_out, uint32_t* offset_out,
|
||||||
D3D12_GPU_VIRTUAL_ADDRESS* gpu_address_out);
|
D3D12_GPU_VIRTUAL_ADDRESS* gpu_address_out);
|
||||||
// Request to write data in multiple parts, filling the buffer entirely.
|
// Request to write data in multiple parts, filling the buffer entirely.
|
||||||
uint8_t* RequestPartial(uint64_t submission_index, uint32_t size,
|
uint8_t* RequestPartial(uint64_t submission_index, uint32_t size,
|
||||||
ID3D12Resource** buffer_out, uint32_t* offset_out,
|
uint32_t alignment, ID3D12Resource** buffer_out,
|
||||||
uint32_t* size_out,
|
uint32_t* offset_out, uint32_t* size_out,
|
||||||
D3D12_GPU_VIRTUAL_ADDRESS* gpu_address_out);
|
D3D12_GPU_VIRTUAL_ADDRESS* gpu_address_out);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
Loading…
Reference in New Issue