[Vulkan] Texture loading

This commit is contained in:
Triang3l 2022-05-24 22:42:22 +03:00
parent 9c445d397b
commit 6c9a06b2da
6 changed files with 1186 additions and 215 deletions

View File

@ -154,6 +154,17 @@ void DeferredCommandBuffer::Execute(VkCommandBuffer command_buffer) {
xe::align(sizeof(ArgsVkCopyBuffer), alignof(VkBufferCopy))));
} break;
case Command::kVkCopyBufferToImage: {
auto& args = *reinterpret_cast<const ArgsVkCopyBufferToImage*>(stream);
dfn.vkCmdCopyBufferToImage(
command_buffer, args.src_buffer, args.dst_image,
args.dst_image_layout, args.region_count,
reinterpret_cast<const VkBufferImageCopy*>(
reinterpret_cast<const uint8_t*>(stream) +
xe::align(sizeof(ArgsVkCopyBufferToImage),
alignof(VkBufferImageCopy))));
} break;
case Command::kVkDispatch: {
auto& args = *reinterpret_cast<const ArgsVkDispatch*>(stream);
dfn.vkCmdDispatch(command_buffer, args.group_count_x,

View File

@ -206,6 +206,31 @@ class DeferredCommandBuffer {
regions, sizeof(VkBufferCopy) * region_count);
}
VkBufferImageCopy* CmdCopyBufferToImageEmplace(VkBuffer src_buffer,
VkImage dst_image,
VkImageLayout dst_image_layout,
uint32_t region_count) {
const size_t header_size =
xe::align(sizeof(ArgsVkCopyBufferToImage), alignof(VkBufferImageCopy));
uint8_t* args_ptr = reinterpret_cast<uint8_t*>(
WriteCommand(Command::kVkCopyBufferToImage,
header_size + sizeof(VkBufferImageCopy) * region_count));
auto& args = *reinterpret_cast<ArgsVkCopyBufferToImage*>(args_ptr);
args.src_buffer = src_buffer;
args.dst_image = dst_image;
args.dst_image_layout = dst_image_layout;
args.region_count = region_count;
return reinterpret_cast<VkBufferImageCopy*>(args_ptr + header_size);
}
void CmdVkCopyBufferToImage(VkBuffer src_buffer, VkImage dst_image,
VkImageLayout dst_image_layout,
uint32_t region_count,
const VkBufferImageCopy* regions) {
std::memcpy(CmdCopyBufferToImageEmplace(src_buffer, dst_image,
dst_image_layout, region_count),
regions, sizeof(VkBufferImageCopy) * region_count);
}
void CmdVkDispatch(uint32_t group_count_x, uint32_t group_count_y,
uint32_t group_count_z) {
auto& args = *reinterpret_cast<ArgsVkDispatch*>(
@ -342,6 +367,7 @@ class DeferredCommandBuffer {
kVkClearAttachments,
kVkClearColorImage,
kVkCopyBuffer,
kVkCopyBufferToImage,
kVkDispatch,
kVkDraw,
kVkDrawIndexed,
@ -428,6 +454,15 @@ class DeferredCommandBuffer {
static_assert(alignof(VkBufferCopy) <= alignof(uintmax_t));
};
struct ArgsVkCopyBufferToImage {
VkBuffer src_buffer;
VkImage dst_image;
VkImageLayout dst_image_layout;
uint32_t region_count;
// Followed by aligned VkBufferImageCopy[].
static_assert(alignof(VkBufferImageCopy) <= alignof(uintmax_t));
};
struct ArgsVkDispatch {
uint32_t group_count_x;
uint32_t group_count_y;

View File

@ -306,6 +306,7 @@ bool VulkanCommandProcessor::SetupContext() {
return false;
}
// Requires the transient descriptor set layouts.
// TODO(Triang3l): Actual draw resolution scale.
texture_cache_ =
VulkanTextureCache::Create(*register_file_, *shared_memory_, 1, 1, *this,
@ -603,10 +604,11 @@ void VulkanCommandProcessor::ShutdownContext() {
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
VkDevice device = provider.device();
for (const auto& framebuffer_pair : swap_framebuffers_outdated_) {
dfn.vkDestroyFramebuffer(device, framebuffer_pair.second, nullptr);
}
swap_framebuffers_outdated_.clear();
ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device,
scratch_buffer_);
ui::vulkan::util::DestroyAndNullHandle(dfn.vkFreeMemory, device,
scratch_buffer_memory_);
for (SwapFramebuffer& swap_framebuffer : swap_framebuffers_) {
ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyFramebuffer, device,
swap_framebuffer.framebuffer);
@ -675,6 +677,19 @@ void VulkanCommandProcessor::ShutdownContext() {
}
command_buffers_writable_.clear();
for (const auto& destroy_pair : destroy_framebuffers_) {
dfn.vkDestroyFramebuffer(device, destroy_pair.second, nullptr);
}
destroy_framebuffers_.clear();
for (const auto& destroy_pair : destroy_buffers_) {
dfn.vkDestroyBuffer(device, destroy_pair.second, nullptr);
}
destroy_buffers_.clear();
for (const auto& destroy_pair : destroy_memory_) {
dfn.vkFreeMemory(device, destroy_pair.second, nullptr);
}
destroy_memory_.clear();
std::memset(closed_frame_submissions_, 0, sizeof(closed_frame_submissions_));
frame_completed_ = 0;
frame_current_ = 1;
@ -843,7 +858,7 @@ void VulkanCommandProcessor::IssueSwap(uint32_t frontbuffer_ptr,
dfn.vkDestroyFramebuffer(device, new_swap_framebuffer.framebuffer,
nullptr);
} else {
swap_framebuffers_outdated_.emplace_back(
destroy_framebuffers_.emplace_back(
new_swap_framebuffer.last_submission,
new_swap_framebuffer.framebuffer);
}
@ -1387,6 +1402,83 @@ VulkanCommandProcessor::GetPipelineLayout(size_t texture_count_pixel,
return &emplaced_pair.first->second;
}
VulkanCommandProcessor::ScratchBufferAcquisition
VulkanCommandProcessor::AcquireScratchGpuBuffer(
VkDeviceSize size, VkPipelineStageFlags initial_stage_mask,
VkAccessFlags initial_access_mask) {
assert_true(submission_open_);
assert_false(scratch_buffer_used_);
if (!submission_open_ || scratch_buffer_used_ || !size) {
return ScratchBufferAcquisition();
}
uint64_t submission_current = GetCurrentSubmission();
if (scratch_buffer_ != VK_NULL_HANDLE && size <= scratch_buffer_size_) {
// Already used previously - transition.
PushBufferMemoryBarrier(scratch_buffer_, 0, VK_WHOLE_SIZE,
scratch_buffer_last_stage_mask_, initial_stage_mask,
scratch_buffer_last_access_mask_,
initial_access_mask);
scratch_buffer_last_stage_mask_ = initial_stage_mask;
scratch_buffer_last_access_mask_ = initial_access_mask;
scratch_buffer_last_usage_submission_ = submission_current;
scratch_buffer_used_ = true;
return ScratchBufferAcquisition(*this, scratch_buffer_, initial_stage_mask,
initial_access_mask);
}
size = xe::align(size, kScratchBufferSizeIncrement);
const ui::vulkan::VulkanProvider& provider = GetVulkanProvider();
VkDeviceMemory new_scratch_buffer_memory;
VkBuffer new_scratch_buffer;
// VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT for
// texture loading.
if (!ui::vulkan::util::CreateDedicatedAllocationBuffer(
provider, size,
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
ui::vulkan::util::MemoryPurpose::kDeviceLocal, new_scratch_buffer,
new_scratch_buffer_memory)) {
XELOGE(
"VulkanCommandProcessor: Failed to create a {} MB scratch GPU buffer",
size >> 20);
return ScratchBufferAcquisition();
}
if (submission_completed_ >= scratch_buffer_last_usage_submission_) {
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
VkDevice device = provider.device();
if (scratch_buffer_ != VK_NULL_HANDLE) {
dfn.vkDestroyBuffer(device, scratch_buffer_, nullptr);
}
if (scratch_buffer_memory_ != VK_NULL_HANDLE) {
dfn.vkFreeMemory(device, scratch_buffer_memory_, nullptr);
}
} else {
if (scratch_buffer_ != VK_NULL_HANDLE) {
destroy_buffers_.emplace_back(scratch_buffer_last_usage_submission_,
scratch_buffer_);
}
if (scratch_buffer_memory_ != VK_NULL_HANDLE) {
destroy_memory_.emplace_back(scratch_buffer_last_usage_submission_,
scratch_buffer_memory_);
}
}
scratch_buffer_memory_ = new_scratch_buffer_memory;
scratch_buffer_ = new_scratch_buffer;
scratch_buffer_size_ = size;
// Not used yet, no need for a barrier.
scratch_buffer_last_stage_mask_ = initial_access_mask;
scratch_buffer_last_access_mask_ = initial_stage_mask;
scratch_buffer_last_usage_submission_ = submission_current;
scratch_buffer_used_ = true;
return ScratchBufferAcquisition(*this, new_scratch_buffer, initial_stage_mask,
initial_access_mask);
}
void VulkanCommandProcessor::BindExternalGraphicsPipeline(
VkPipeline pipeline, bool keep_dynamic_depth_bias,
bool keep_dynamic_blend_constants, bool keep_dynamic_stencil_mask_ref) {
@ -1915,14 +2007,30 @@ void VulkanCommandProcessor::CheckSubmissionFenceAndDeviceLoss(
texture_cache_->CompletedSubmissionUpdated(submission_completed_);
// Destroy outdated swap objects.
while (!swap_framebuffers_outdated_.empty()) {
const auto& framebuffer_pair = swap_framebuffers_outdated_.front();
if (framebuffer_pair.first > submission_completed_) {
// Destroy objects scheduled for destruction.
while (!destroy_framebuffers_.empty()) {
const auto& destroy_pair = destroy_framebuffers_.front();
if (destroy_pair.first > submission_completed_) {
break;
}
dfn.vkDestroyFramebuffer(device, framebuffer_pair.second, nullptr);
swap_framebuffers_outdated_.pop_front();
dfn.vkDestroyFramebuffer(device, destroy_pair.second, nullptr);
destroy_framebuffers_.pop_front();
}
while (!destroy_buffers_.empty()) {
const auto& destroy_pair = destroy_buffers_.front();
if (destroy_pair.first > submission_completed_) {
break;
}
dfn.vkDestroyBuffer(device, destroy_pair.second, nullptr);
destroy_buffers_.pop_front();
}
while (!destroy_memory_.empty()) {
const auto& destroy_pair = destroy_memory_.front();
if (destroy_pair.first > submission_completed_) {
break;
}
dfn.vkFreeMemory(device, destroy_pair.second, nullptr);
destroy_memory_.pop_front();
}
}
@ -2136,6 +2244,8 @@ bool VulkanCommandProcessor::EndSubmission(bool is_swap) {
}
if (submission_open_) {
assert_false(scratch_buffer_used_);
EndRenderPass();
render_target_cache_->EndSubmission();
@ -3120,6 +3230,25 @@ uint8_t* VulkanCommandProcessor::WriteTransientUniformBufferBinding(
return mapping;
}
uint8_t* VulkanCommandProcessor::WriteTransientUniformBufferBinding(
size_t size, SingleTransientDescriptorLayout transient_descriptor_layout,
VkDescriptorSet& descriptor_set_out) {
VkDescriptorBufferInfo write_descriptor_buffer_info;
VkWriteDescriptorSet write_descriptor_set;
uint8_t* mapping = WriteTransientUniformBufferBinding(
size, transient_descriptor_layout, write_descriptor_buffer_info,
write_descriptor_set);
if (!mapping) {
return nullptr;
}
const ui::vulkan::VulkanProvider& provider = GetVulkanProvider();
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
VkDevice device = provider.device();
dfn.vkUpdateDescriptorSets(device, 1, &write_descriptor_set, 0, nullptr);
descriptor_set_out = write_descriptor_set.dstSet;
return mapping;
}
bool VulkanCommandProcessor::WriteTransientTextureBindings(
bool is_samplers, bool is_vertex, uint32_t binding_count,
VkDescriptorSetLayout descriptor_set_layout,

View File

@ -58,6 +58,84 @@ class VulkanCommandProcessor : public CommandProcessor {
kCount,
};
class ScratchBufferAcquisition {
public:
explicit ScratchBufferAcquisition() = default;
explicit ScratchBufferAcquisition(VulkanCommandProcessor& command_processor,
VkBuffer buffer,
VkPipelineStageFlags stage_mask,
VkAccessFlags access_mask)
: command_processor_(&command_processor),
buffer_(buffer),
stage_mask_(stage_mask),
access_mask_(access_mask) {}
ScratchBufferAcquisition(const ScratchBufferAcquisition& acquisition) =
delete;
ScratchBufferAcquisition& operator=(
const ScratchBufferAcquisition& acquisition) = delete;
ScratchBufferAcquisition(ScratchBufferAcquisition&& acquisition) {
command_processor_ = acquisition.command_processor_;
buffer_ = acquisition.buffer_;
stage_mask_ = acquisition.stage_mask_;
access_mask_ = acquisition.access_mask_;
acquisition.command_processor_ = nullptr;
acquisition.buffer_ = VK_NULL_HANDLE;
acquisition.stage_mask_ = 0;
acquisition.access_mask_ = 0;
}
ScratchBufferAcquisition& operator=(
ScratchBufferAcquisition&& acquisition) {
if (this == &acquisition) {
return *this;
}
command_processor_ = acquisition.command_processor_;
buffer_ = acquisition.buffer_;
stage_mask_ = acquisition.stage_mask_;
access_mask_ = acquisition.access_mask_;
acquisition.command_processor_ = nullptr;
acquisition.buffer_ = VK_NULL_HANDLE;
acquisition.stage_mask_ = 0;
acquisition.access_mask_ = 0;
return *this;
}
~ScratchBufferAcquisition() {
if (buffer_ != VK_NULL_HANDLE) {
assert_true(command_processor_->scratch_buffer_used_);
assert_true(command_processor_->scratch_buffer_ == buffer_);
command_processor_->scratch_buffer_last_stage_mask_ = stage_mask_;
command_processor_->scratch_buffer_last_access_mask_ = access_mask_;
command_processor_->scratch_buffer_last_usage_submission_ =
command_processor_->GetCurrentSubmission();
command_processor_->scratch_buffer_used_ = false;
}
}
// VK_NULL_HANDLE if failed to acquire or if moved.
VkBuffer buffer() const { return buffer_; }
VkPipelineStageFlags GetStageMask() const { return stage_mask_; }
VkPipelineStageFlags SetStageMask(VkPipelineStageFlags new_stage_mask) {
VkPipelineStageFlags old_stage_mask = stage_mask_;
stage_mask_ = new_stage_mask;
return old_stage_mask;
}
VkAccessFlags GetAccessMask() const { return access_mask_; }
VkAccessFlags SetAccessMask(VkAccessFlags new_access_mask) {
VkAccessFlags old_access_mask = access_mask_;
access_mask_ = new_access_mask;
return old_access_mask;
}
private:
VulkanCommandProcessor* command_processor_ = nullptr;
VkBuffer buffer_ = VK_NULL_HANDLE;
VkPipelineStageFlags stage_mask_ = 0;
VkAccessFlags access_mask_ = 0;
};
VulkanCommandProcessor(VulkanGraphicsSystem* graphics_system,
kernel::KernelState* kernel_state);
~VulkanCommandProcessor();
@ -140,6 +218,16 @@ class VulkanCommandProcessor : public CommandProcessor {
// A frame must be open.
VkDescriptorSet AllocateSingleTransientDescriptor(
SingleTransientDescriptorLayout transient_descriptor_layout);
// Allocates a descriptor, space in the uniform buffer pool, and fills the
// VkWriteDescriptorSet structure and VkDescriptorBufferInfo referenced by it.
// Returns null in case of failure.
uint8_t* WriteTransientUniformBufferBinding(
size_t size, SingleTransientDescriptorLayout transient_descriptor_layout,
VkDescriptorBufferInfo& descriptor_buffer_info_out,
VkWriteDescriptorSet& write_descriptor_set_out);
uint8_t* WriteTransientUniformBufferBinding(
size_t size, SingleTransientDescriptorLayout transient_descriptor_layout,
VkDescriptorSet& descriptor_set_out);
// The returned reference is valid until a cache clear.
VkDescriptorSetLayout GetTextureDescriptorSetLayout(bool is_samplers,
@ -150,6 +238,13 @@ class VulkanCommandProcessor : public CommandProcessor {
size_t texture_count_pixel, size_t sampler_count_pixel,
size_t texture_count_vertex, size_t sampler_count_vertex);
// Returns a single temporary GPU-side buffer within a submission for tasks
// like texture untiling and resolving. May push a buffer memory barrier into
// the initial usage. Submission must be open.
ScratchBufferAcquisition AcquireScratchGpuBuffer(
VkDeviceSize size, VkPipelineStageFlags initial_stage_mask,
VkAccessFlags initial_access_mask);
// Binds a graphics pipeline for host-specific purposes, invalidating the
// affected state. keep_dynamic_* must be false (to invalidate the dynamic
// state after binding the pipeline with the same state being static, or if
@ -340,13 +435,6 @@ class VulkanCommandProcessor : public CommandProcessor {
const draw_util::ViewportInfo& viewport_info);
bool UpdateBindings(const VulkanShader* vertex_shader,
const VulkanShader* pixel_shader);
// Allocates a descriptor, space in the uniform buffer pool, and fills the
// VkWriteDescriptorSet structure and VkDescriptorBufferInfo referenced by it.
// Returns null in case of failure.
uint8_t* WriteTransientUniformBufferBinding(
size_t size, SingleTransientDescriptorLayout transient_descriptor_layout,
VkDescriptorBufferInfo& descriptor_buffer_info_out,
VkWriteDescriptorSet& write_descriptor_set_out);
// Allocates a descriptor set and fills the VkWriteDescriptorSet structure.
// The descriptor set layout must be the one for the given is_samplers,
// is_vertex, binding_count (from GetTextureDescriptorSetLayout - may be
@ -390,6 +478,11 @@ class VulkanCommandProcessor : public CommandProcessor {
// Submission indices of frames that have already been submitted.
uint64_t closed_frame_submissions_[kMaxFramesInFlight] = {};
// <Submission where last used, resource>, sorted by the submission number.
std::deque<std::pair<uint64_t, VkDeviceMemory>> destroy_memory_;
std::deque<std::pair<uint64_t, VkBuffer>> destroy_buffers_;
std::deque<std::pair<uint64_t, VkFramebuffer>> destroy_framebuffers_;
std::vector<CommandBuffer> command_buffers_writable_;
std::deque<std::pair<uint64_t, CommandBuffer>> command_buffers_submitted_;
DeferredCommandBuffer deferred_command_buffer_;
@ -491,6 +584,16 @@ class VulkanCommandProcessor : public CommandProcessor {
std::vector<PendingBarrier> pending_barriers_;
PendingBarrier current_pending_barrier_;
// GPU-local scratch buffer.
static constexpr VkDeviceSize kScratchBufferSizeIncrement = 16 * 1024 * 1024;
VkDeviceMemory scratch_buffer_memory_ = VK_NULL_HANDLE;
VkBuffer scratch_buffer_ = VK_NULL_HANDLE;
VkDeviceSize scratch_buffer_size_ = 0;
VkPipelineStageFlags scratch_buffer_last_stage_mask_ = 0;
VkAccessFlags scratch_buffer_last_access_mask_ = 0;
uint64_t scratch_buffer_last_usage_submission_ = 0;
bool scratch_buffer_used_ = false;
// The current dynamic state of the graphics pipeline bind point. Note that
// binding any pipeline to the bind point with static state (even if it's
// unused, like depth bias being disabled, but the values themselves still not

File diff suppressed because it is too large Load Diff

View File

@ -28,6 +28,8 @@ class VulkanCommandProcessor;
class VulkanTextureCache final : public TextureCache {
public:
// Transient descriptor set layouts must be initialized in the command
// processor.
static std::unique_ptr<VulkanTextureCache> Create(
const RegisterFile& register_file, VulkanSharedMemory& shared_memory,
uint32_t draw_resolution_scale_x, uint32_t draw_resolution_scale_y,
@ -75,46 +77,15 @@ class VulkanTextureCache final : public TextureCache {
void UpdateTextureBindingsImpl(uint32_t fetch_constant_mask) override;
private:
enum class LoadMode {
k8bpb,
k16bpb,
k32bpb,
k64bpb,
k128bpb,
kR5G5B5A1ToB5G5R5A1,
kR5G6B5ToB5G6R5,
kR5G5B6ToB5G6R5WithRBGASwizzle,
kRGBA4ToARGB4,
kGBGR8ToRGB8,
kBGRG8ToRGB8,
kR10G11B11ToRGBA16,
kR10G11B11ToRGBA16SNorm,
kR11G11B10ToRGBA16,
kR11G11B10ToRGBA16SNorm,
kR16UNormToFloat,
kR16SNormToFloat,
kRG16UNormToFloat,
kRG16SNormToFloat,
kRGBA16UNormToFloat,
kRGBA16SNormToFloat,
kDXT1ToRGBA8,
kDXT3ToRGBA8,
kDXT5ToRGBA8,
kDXNToRG8,
kDXT3A,
kDXT3AAs1111ToARGB4,
kDXT5AToR8,
kCTX1,
kDepthUnorm,
kDepthFloat,
kCount,
kUnknown = kCount
enum LoadDescriptorSetIndex {
kLoadDescriptorSetIndexDestination,
kLoadDescriptorSetIndexSource,
kLoadDescriptorSetIndexConstants,
kLoadDescriptorSetCount,
};
struct HostFormat {
LoadMode load_mode;
LoadShaderIndex load_shader;
// Do NOT add integer formats to this - they are not filterable, can only be
// read with ImageFetch, not ImageSample! If any game is seen using
// num_format 1 for fixed-point formats (for floating-point, it's normally
@ -275,6 +246,10 @@ class VulkanTextureCache final : public TextureCache {
static const HostFormatPair kBestHostFormats[64];
HostFormatPair host_formats_[64];
VkPipelineLayout load_pipeline_layout_ = VK_NULL_HANDLE;
std::array<VkPipeline, kLoadShaderCount> load_pipelines_{};
std::array<VkPipeline, kLoadShaderCount> load_pipelines_scaled_{};
// If both images can be placed in the same allocation, it's one allocation,
// otherwise it's two separate.
std::array<VkDeviceMemory, 2> null_images_memory_{};