[Vulkan] Render target resolving
This commit is contained in:
parent
c6ec6d8239
commit
0dc480721f
|
@ -289,10 +289,11 @@ bool VulkanCommandProcessor::SetupContext() {
|
|||
return false;
|
||||
}
|
||||
|
||||
// Requires the transient descriptor set layouts.
|
||||
// TODO(Triang3l): Get the actual draw resolution scale when the texture cache
|
||||
// supports resolution scaling.
|
||||
render_target_cache_ = std::make_unique<VulkanRenderTargetCache>(
|
||||
*register_file_, *memory_, &trace_writer_, 1, 1, *this);
|
||||
*register_file_, *memory_, trace_writer_, 1, 1, *this);
|
||||
if (!render_target_cache_->Initialize()) {
|
||||
XELOGE("Failed to initialize the render target cache");
|
||||
return false;
|
||||
|
@ -1884,6 +1885,14 @@ bool VulkanCommandProcessor::IssueCopy() {
|
|||
return false;
|
||||
}
|
||||
|
||||
uint32_t written_address, written_length;
|
||||
if (!render_target_cache_->Resolve(*memory_, *shared_memory_, *texture_cache_,
|
||||
written_address, written_length)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// TODO(Triang3l): CPU readback.
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1893,6 +1902,7 @@ void VulkanCommandProcessor::InitializeTrace() {
|
|||
if (!BeginSubmission(true)) {
|
||||
return;
|
||||
}
|
||||
// TODO(Triang3l): Write the EDRAM.
|
||||
bool shared_memory_submitted =
|
||||
shared_memory_->InitializeTraceSubmitDownloads();
|
||||
if (!shared_memory_submitted) {
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -20,6 +20,8 @@
|
|||
#include "xenia/base/hash.h"
|
||||
#include "xenia/base/xxhash.h"
|
||||
#include "xenia/gpu/render_target_cache.h"
|
||||
#include "xenia/gpu/vulkan/vulkan_shared_memory.h"
|
||||
#include "xenia/gpu/vulkan/vulkan_texture_cache.h"
|
||||
#include "xenia/gpu/xenos.h"
|
||||
#include "xenia/ui/vulkan/single_layout_descriptor_set_pool.h"
|
||||
#include "xenia/ui/vulkan/vulkan_provider.h"
|
||||
|
@ -86,12 +88,14 @@ class VulkanRenderTargetCache final : public RenderTargetCache {
|
|||
};
|
||||
|
||||
VulkanRenderTargetCache(const RegisterFile& register_file,
|
||||
const Memory& memory, TraceWriter* trace_writer,
|
||||
const Memory& memory, TraceWriter& trace_writer,
|
||||
uint32_t draw_resolution_scale_x,
|
||||
uint32_t draw_resolution_scale_y,
|
||||
VulkanCommandProcessor& command_processor);
|
||||
~VulkanRenderTargetCache();
|
||||
|
||||
// Transient descriptor set layouts must be initialized in the command
|
||||
// processor.
|
||||
bool Initialize();
|
||||
void Shutdown(bool from_destructor = false);
|
||||
void ClearCache() override;
|
||||
|
@ -102,6 +106,13 @@ class VulkanRenderTargetCache final : public RenderTargetCache {
|
|||
// TODO(Triang3l): Fragment shader interlock.
|
||||
Path GetPath() const override { return Path::kHostRenderTargets; }
|
||||
|
||||
// Performs the resolve to a shared memory area according to the current
|
||||
// register values, and also clears the render targets if needed. Must be in a
|
||||
// frame for calling.
|
||||
bool Resolve(const Memory& memory, VulkanSharedMemory& shared_memory,
|
||||
VulkanTextureCache& texture_cache, uint32_t& written_address_out,
|
||||
uint32_t& written_length_out);
|
||||
|
||||
bool Update(bool is_rasterization_done,
|
||||
reg::RB_DEPTHCONTROL normalized_depth_control,
|
||||
uint32_t normalized_color_mask,
|
||||
|
@ -182,6 +193,7 @@ class VulkanRenderTargetCache final : public RenderTargetCache {
|
|||
// Trace playback.
|
||||
kTransferWrite,
|
||||
};
|
||||
|
||||
enum class EdramBufferModificationStatus {
|
||||
// The values are ordered by how strong the barrier conditions are.
|
||||
// No uncommitted shader writes.
|
||||
|
@ -192,6 +204,23 @@ class VulkanRenderTargetCache final : public RenderTargetCache {
|
|||
// Need to commit before any next fragment shader interlock usage.
|
||||
kViaUnordered,
|
||||
};
|
||||
|
||||
enum ResolveCopyDescriptorSet : uint32_t {
|
||||
// Never changes.
|
||||
kResolveCopyDescriptorSetEdram,
|
||||
// Shared memory or a region in it.
|
||||
kResolveCopyDescriptorSetDest,
|
||||
|
||||
kResolveCopyDescriptorSetCount,
|
||||
};
|
||||
|
||||
struct ResolveCopyShaderCode {
|
||||
const uint32_t* unscaled;
|
||||
size_t unscaled_size_bytes;
|
||||
const uint32_t* scaled;
|
||||
size_t scaled_size_bytes;
|
||||
};
|
||||
|
||||
static void GetEdramBufferUsageMasks(EdramBufferUsage usage,
|
||||
VkPipelineStageFlags& stage_mask_out,
|
||||
VkAccessFlags& access_mask_out);
|
||||
|
@ -204,6 +233,7 @@ class VulkanRenderTargetCache final : public RenderTargetCache {
|
|||
EdramBufferModificationStatus::kViaFragmentShaderInterlock);
|
||||
|
||||
VulkanCommandProcessor& command_processor_;
|
||||
TraceWriter& trace_writer_;
|
||||
|
||||
// Accessible in fragment and compute shaders.
|
||||
VkDescriptorSetLayout descriptor_set_layout_storage_buffer_ = VK_NULL_HANDLE;
|
||||
|
@ -224,6 +254,12 @@ class VulkanRenderTargetCache final : public RenderTargetCache {
|
|||
VkDescriptorPool edram_storage_buffer_descriptor_pool_ = VK_NULL_HANDLE;
|
||||
VkDescriptorSet edram_storage_buffer_descriptor_set_;
|
||||
|
||||
VkPipelineLayout resolve_copy_pipeline_layout_ = VK_NULL_HANDLE;
|
||||
static const ResolveCopyShaderCode
|
||||
kResolveCopyShaders[size_t(draw_util::ResolveCopyShaderIndex::kCount)];
|
||||
std::array<VkPipeline, size_t(draw_util::ResolveCopyShaderIndex::kCount)>
|
||||
resolve_copy_pipelines_{};
|
||||
|
||||
// RenderPassKey::key -> VkRenderPass.
|
||||
// VK_NULL_HANDLE if failed to create.
|
||||
std::unordered_map<uint32_t, VkRenderPass> render_passes_;
|
||||
|
@ -627,6 +663,136 @@ class VulkanRenderTargetCache final : public RenderTargetCache {
|
|||
}
|
||||
};
|
||||
|
||||
union DumpPipelineKey {
|
||||
uint32_t key;
|
||||
struct {
|
||||
xenos::MsaaSamples msaa_samples : 2;
|
||||
uint32_t resource_format : 4;
|
||||
// Last bit because this affects the pipeline - after sorting, only change
|
||||
// it at most once. Depth buffers have an additional stencil SRV.
|
||||
uint32_t is_depth : 1;
|
||||
};
|
||||
|
||||
DumpPipelineKey() : key(0) { static_assert_size(*this, sizeof(key)); }
|
||||
|
||||
struct Hasher {
|
||||
size_t operator()(const DumpPipelineKey& key) const {
|
||||
return std::hash<uint32_t>{}(key.key);
|
||||
}
|
||||
};
|
||||
bool operator==(const DumpPipelineKey& other_key) const {
|
||||
return key == other_key.key;
|
||||
}
|
||||
bool operator!=(const DumpPipelineKey& other_key) const {
|
||||
return !(*this == other_key);
|
||||
}
|
||||
bool operator<(const DumpPipelineKey& other_key) const {
|
||||
return key < other_key.key;
|
||||
}
|
||||
|
||||
xenos::ColorRenderTargetFormat GetColorFormat() const {
|
||||
assert_false(is_depth);
|
||||
return xenos::ColorRenderTargetFormat(resource_format);
|
||||
}
|
||||
xenos::DepthRenderTargetFormat GetDepthFormat() const {
|
||||
assert_true(is_depth);
|
||||
return xenos::DepthRenderTargetFormat(resource_format);
|
||||
}
|
||||
};
|
||||
|
||||
// There's no strict dependency on the group size in dumping, for simplicity
|
||||
// calculations especially with resolution scaling, dividing manually (as the
|
||||
// group size is not unlimited). The only restriction is that an integer
|
||||
// multiple of it must be 80x16 samples (and no larger than that) for 32bpp,
|
||||
// or 40x16 samples for 64bpp (because only a half of the pair of tiles may
|
||||
// need to be dumped). Using 8x16 since that's 128 - the minimum required
|
||||
// group size on Vulkan, and the maximum number of lanes in a subgroup on
|
||||
// Vulkan.
|
||||
static constexpr uint32_t kDumpSamplesPerGroupX = 8;
|
||||
static constexpr uint32_t kDumpSamplesPerGroupY = 16;
|
||||
|
||||
union DumpPitches {
|
||||
uint32_t pitches;
|
||||
struct {
|
||||
// Both in tiles.
|
||||
uint32_t dest_pitch : xenos::kEdramPitchTilesBits;
|
||||
uint32_t source_pitch : xenos::kEdramPitchTilesBits;
|
||||
};
|
||||
DumpPitches() : pitches(0) { static_assert_size(*this, sizeof(pitches)); }
|
||||
bool operator==(const DumpPitches& other_pitches) const {
|
||||
return pitches == other_pitches.pitches;
|
||||
}
|
||||
bool operator!=(const DumpPitches& other_pitches) const {
|
||||
return !(*this == other_pitches);
|
||||
}
|
||||
};
|
||||
|
||||
union DumpOffsets {
|
||||
uint32_t offsets;
|
||||
struct {
|
||||
uint32_t dispatch_first_tile : xenos::kEdramBaseTilesBits;
|
||||
uint32_t source_base_tiles : xenos::kEdramBaseTilesBits;
|
||||
};
|
||||
DumpOffsets() : offsets(0) { static_assert_size(*this, sizeof(offsets)); }
|
||||
bool operator==(const DumpOffsets& other_offsets) const {
|
||||
return offsets == other_offsets.offsets;
|
||||
}
|
||||
bool operator!=(const DumpOffsets& other_offsets) const {
|
||||
return !(*this == other_offsets);
|
||||
}
|
||||
};
|
||||
|
||||
enum DumpDescriptorSet : uint32_t {
|
||||
// Never changes. Same in both color and depth pipeline layouts, keep the
|
||||
// first for pipeline layout compatibility, to only have to set it once.
|
||||
kDumpDescriptorSetEdram,
|
||||
// One resolve may need multiple sources. Different descriptor set layouts
|
||||
// for color and depth.
|
||||
kDumpDescriptorSetSource,
|
||||
|
||||
kDumpDescriptorSetCount,
|
||||
};
|
||||
|
||||
enum DumpPushConstant : uint32_t {
|
||||
// May be different for different sources.
|
||||
kDumpPushConstantPitches,
|
||||
// May be changed multiple times for the same source.
|
||||
kDumpPushConstantOffsets,
|
||||
|
||||
kDumpPushConstantCount,
|
||||
};
|
||||
|
||||
struct DumpInvocation {
|
||||
ResolveCopyDumpRectangle rectangle;
|
||||
DumpPipelineKey pipeline_key;
|
||||
DumpInvocation(const ResolveCopyDumpRectangle& rectangle,
|
||||
const DumpPipelineKey& pipeline_key)
|
||||
: rectangle(rectangle), pipeline_key(pipeline_key) {}
|
||||
bool operator<(const DumpInvocation& other_invocation) {
|
||||
// Sort by the pipeline key primarily to reduce pipeline state (context)
|
||||
// switches.
|
||||
if (pipeline_key != other_invocation.pipeline_key) {
|
||||
return pipeline_key < other_invocation.pipeline_key;
|
||||
}
|
||||
assert_not_null(rectangle.render_target);
|
||||
uint32_t render_target_index =
|
||||
static_cast<const VulkanRenderTarget*>(rectangle.render_target)
|
||||
->temporary_sort_index();
|
||||
const ResolveCopyDumpRectangle& other_rectangle =
|
||||
other_invocation.rectangle;
|
||||
uint32_t other_render_target_index =
|
||||
static_cast<const VulkanRenderTarget*>(other_rectangle.render_target)
|
||||
->temporary_sort_index();
|
||||
if (render_target_index != other_render_target_index) {
|
||||
return render_target_index < other_render_target_index;
|
||||
}
|
||||
if (rectangle.row_first != other_rectangle.row_first) {
|
||||
return rectangle.row_first < other_rectangle.row_first;
|
||||
}
|
||||
return rectangle.row_first_start < other_rectangle.row_first_start;
|
||||
}
|
||||
};
|
||||
|
||||
// Returns the framebuffer object, or VK_NULL_HANDLE if failed to create.
|
||||
const Framebuffer* GetFramebuffer(
|
||||
RenderPassKey render_pass_key, uint32_t pitch_tiles_at_32bpp,
|
||||
|
@ -649,6 +815,13 @@ class VulkanRenderTargetCache final : public RenderTargetCache {
|
|||
const uint64_t* render_target_resolve_clear_values = nullptr,
|
||||
const Transfer::Rectangle* resolve_clear_rectangle = nullptr);
|
||||
|
||||
VkPipeline GetDumpPipeline(DumpPipelineKey key);
|
||||
|
||||
// Writes contents of host render targets within rectangles from
|
||||
// ResolveInfo::GetCopyEdramTileSpan to edram_buffer_.
|
||||
void DumpRenderTargets(uint32_t dump_base, uint32_t dump_row_length_used,
|
||||
uint32_t dump_rows, uint32_t dump_pitch);
|
||||
|
||||
bool gamma_render_target_as_srgb_ = false;
|
||||
|
||||
bool msaa_2x_attachments_supported_ = false;
|
||||
|
@ -688,8 +861,22 @@ class VulkanRenderTargetCache final : public RenderTargetCache {
|
|||
TransferPipelineKey::Hasher>
|
||||
transfer_pipelines_;
|
||||
|
||||
VkPipelineLayout dump_pipeline_layout_color_ = VK_NULL_HANDLE;
|
||||
VkPipelineLayout dump_pipeline_layout_depth_ = VK_NULL_HANDLE;
|
||||
// Compute pipelines for copying host render target contents to the EDRAM
|
||||
// buffer. VK_NULL_HANDLE if failed to create.
|
||||
std::unordered_map<DumpPipelineKey, VkPipeline, DumpPipelineKey::Hasher>
|
||||
dump_pipelines_;
|
||||
|
||||
// Temporary storage for Resolve.
|
||||
std::vector<Transfer> clear_transfers_[2];
|
||||
|
||||
// Temporary storage for PerformTransfersAndResolveClears.
|
||||
std::vector<TransferInvocation> current_transfer_invocations_;
|
||||
|
||||
// Temporary storage for DumpRenderTargets.
|
||||
std::vector<ResolveCopyDumpRectangle> dump_rectangles_;
|
||||
std::vector<DumpInvocation> dump_invocations_;
|
||||
};
|
||||
|
||||
} // namespace vulkan
|
||||
|
|
Loading…
Reference in New Issue