RenderCache::dirty() - used to tell if we need to begin a new pass

Round all pixel pitch/heights up before dividing.
This commit is contained in:
Dr. Chat 2016-03-25 16:31:12 -05:00
parent 181b2af5a4
commit 0e41774e36
2 changed files with 66 additions and 37 deletions

View File

@ -39,7 +39,7 @@ VkFormat ColorRenderTargetFormatToVkFormat(ColorRenderTargetFormat format) {
case ColorRenderTargetFormat::k_2_10_10_10_FLOAT_unknown: case ColorRenderTargetFormat::k_2_10_10_10_FLOAT_unknown:
// WARNING: this is wrong, most likely - no float form in vulkan? // WARNING: this is wrong, most likely - no float form in vulkan?
XELOGW("Unsupported EDRAM format k_2_10_10_10_FLOAT used"); XELOGW("Unsupported EDRAM format k_2_10_10_10_FLOAT used");
return VK_FORMAT_A2R10G10B10_SSCALED_PACK32; return VK_FORMAT_A2R10G10B10_UNORM_PACK32;
case ColorRenderTargetFormat::k_16_16: case ColorRenderTargetFormat::k_16_16:
return VK_FORMAT_R16G16_UNORM; return VK_FORMAT_R16G16_UNORM;
case ColorRenderTargetFormat::k_16_16_16_16: case ColorRenderTargetFormat::k_16_16_16_16:
@ -451,10 +451,7 @@ RenderCache::RenderCache(RegisterFile* register_file,
CheckResult(status, "vkBindBufferMemory"); CheckResult(status, "vkBindBufferMemory");
if (status == VK_SUCCESS) { if (status == VK_SUCCESS) {
status = vkBindBufferMemory(*device_, edram_buffer_, edram_memory_, 0); // For debugging, upload a grid into the EDRAM buffer.
CheckResult(status, "vkBindBufferMemory");
// Upload a grid into the EDRAM buffer.
uint32_t* gpu_data = nullptr; uint32_t* gpu_data = nullptr;
status = vkMapMemory(*device_, edram_memory_, 0, buffer_requirements.size, status = vkMapMemory(*device_, edram_memory_, 0, buffer_requirements.size,
0, reinterpret_cast<void**>(&gpu_data)); 0, reinterpret_cast<void**>(&gpu_data));
@ -490,6 +487,25 @@ RenderCache::~RenderCache() {
vkFreeMemory(*device_, edram_memory_, nullptr); vkFreeMemory(*device_, edram_memory_, nullptr);
} }
bool RenderCache::dirty() const {
auto& regs = *register_file_;
auto& cur_regs = shadow_registers_;
bool dirty = false;
dirty |= cur_regs.rb_modecontrol != regs[XE_GPU_REG_RB_MODECONTROL].u32;
dirty |= cur_regs.rb_surface_info != regs[XE_GPU_REG_RB_SURFACE_INFO].u32;
dirty |= cur_regs.rb_color_info != regs[XE_GPU_REG_RB_COLOR_INFO].u32;
dirty |= cur_regs.rb_color1_info != regs[XE_GPU_REG_RB_COLOR1_INFO].u32;
dirty |= cur_regs.rb_color2_info != regs[XE_GPU_REG_RB_COLOR2_INFO].u32;
dirty |= cur_regs.rb_color3_info != regs[XE_GPU_REG_RB_COLOR3_INFO].u32;
dirty |= cur_regs.rb_depth_info != regs[XE_GPU_REG_RB_DEPTH_INFO].u32;
dirty |= cur_regs.pa_sc_window_scissor_tl !=
regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL].u32;
dirty |= cur_regs.pa_sc_window_scissor_br !=
regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR].u32;
return dirty;
}
const RenderState* RenderCache::BeginRenderPass(VkCommandBuffer command_buffer, const RenderState* RenderCache::BeginRenderPass(VkCommandBuffer command_buffer,
VulkanShader* vertex_shader, VulkanShader* vertex_shader,
VulkanShader* pixel_shader) { VulkanShader* pixel_shader) {
@ -739,8 +755,8 @@ bool RenderCache::ConfigureRenderPass(VkCommandBuffer command_buffer,
for (int i = 0; i < 4; ++i) { for (int i = 0; i < 4; ++i) {
TileViewKey color_key; TileViewKey color_key;
color_key.tile_offset = config->color[i].edram_base; color_key.tile_offset = config->color[i].edram_base;
color_key.tile_width = config->surface_pitch_px / 80; color_key.tile_width = xe::round_up(config->surface_pitch_px, 80) / 80;
color_key.tile_height = config->surface_height_px / 16; color_key.tile_height = xe::round_up(config->surface_height_px, 16) / 16;
color_key.color_or_depth = 1; color_key.color_or_depth = 1;
color_key.edram_format = static_cast<uint16_t>(config->color[i].format); color_key.edram_format = static_cast<uint16_t>(config->color[i].format);
target_color_attachments[i] = target_color_attachments[i] =
@ -753,8 +769,10 @@ bool RenderCache::ConfigureRenderPass(VkCommandBuffer command_buffer,
TileViewKey depth_stencil_key; TileViewKey depth_stencil_key;
depth_stencil_key.tile_offset = config->depth_stencil.edram_base; depth_stencil_key.tile_offset = config->depth_stencil.edram_base;
depth_stencil_key.tile_width = config->surface_pitch_px / 80; depth_stencil_key.tile_width =
depth_stencil_key.tile_height = config->surface_height_px / 16; xe::round_up(config->surface_pitch_px, 80) / 80;
depth_stencil_key.tile_height =
xe::round_up(config->surface_height_px, 16) / 16;
depth_stencil_key.color_or_depth = 0; depth_stencil_key.color_or_depth = 0;
depth_stencil_key.edram_format = depth_stencil_key.edram_format =
static_cast<uint16_t>(config->depth_stencil.format); static_cast<uint16_t>(config->depth_stencil.format);
@ -960,6 +978,7 @@ void RenderCache::BlitToImage(VkCommandBuffer command_buffer,
&buffer_barrier, 0, nullptr); &buffer_barrier, 0, nullptr);
// Update the tile view with current EDRAM contents. // Update the tile view with current EDRAM contents.
// TODO: Heuristics to determine if this copy is avoidable.
VkBufferImageCopy buffer_copy; VkBufferImageCopy buffer_copy;
buffer_copy.bufferOffset = edram_base * 5120; buffer_copy.bufferOffset = edram_base * 5120;
buffer_copy.bufferImageHeight = 0; buffer_copy.bufferImageHeight = 0;
@ -980,29 +999,26 @@ void RenderCache::BlitToImage(VkCommandBuffer command_buffer,
image_barrier.pNext = nullptr; image_barrier.pNext = nullptr;
image_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; image_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
image_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; image_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
if (image_layout != VK_IMAGE_LAYOUT_GENERAL && image_barrier.srcAccessMask = 0;
image_layout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) { image_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
image_barrier.srcAccessMask = 0; image_barrier.oldLayout = image_layout;
image_barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT; image_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
image_barrier.oldLayout = image_layout; image_barrier.image = image;
image_barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; image_barrier.subresourceRange = {0, 0, 1, 0, 1};
image_barrier.image = image; image_barrier.subresourceRange.aspectMask =
image_barrier.subresourceRange = {0, 0, 1, 0, 1}; color_or_depth ? VK_IMAGE_ASPECT_COLOR_BIT
image_barrier.subresourceRange.aspectMask = : VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
color_or_depth
? VK_IMAGE_ASPECT_COLOR_BIT
: VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0,
nullptr, 1, &image_barrier); nullptr, 1, &image_barrier);
}
// If we overflow we'll lose the device here. // If we overflow we'll lose the device here.
assert_true(extents.width <= key.tile_width * 80u); assert_true(extents.width <= key.tile_width * 80u);
assert_true(extents.height <= key.tile_height * 16u); assert_true(extents.height <= key.tile_height * 16u);
// Now issue the blit to the destination. // Now issue the blit to the destination.
// TODO: Resolve to destination if necessary.
VkImageBlit image_blit; VkImageBlit image_blit;
image_blit.srcSubresource = {0, 0, 0, 1}; image_blit.srcSubresource = {0, 0, 0, 1};
image_blit.srcSubresource.aspectMask = image_blit.srcSubresource.aspectMask =
@ -1024,15 +1040,12 @@ void RenderCache::BlitToImage(VkCommandBuffer command_buffer,
image, image_layout, 1, &image_blit, filter); image, image_layout, 1, &image_blit, filter);
// Transition the image back into its previous layout. // Transition the image back into its previous layout.
if (image_layout != VK_IMAGE_LAYOUT_GENERAL && image_barrier.srcAccessMask = image_barrier.dstAccessMask;
image_layout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) { image_barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
image_barrier.srcAccessMask = image_barrier.dstAccessMask; std::swap(image_barrier.oldLayout, image_barrier.newLayout);
image_barrier.dstAccessMask = 0; vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
std::swap(image_barrier.oldLayout, image_barrier.newLayout); VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0,
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, nullptr, 1, &image_barrier);
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 0,
nullptr, 1, &image_barrier);
}
} }
void RenderCache::ClearEDRAMColor(VkCommandBuffer command_buffer, void RenderCache::ClearEDRAMColor(VkCommandBuffer command_buffer,
@ -1040,6 +1053,9 @@ void RenderCache::ClearEDRAMColor(VkCommandBuffer command_buffer,
ColorRenderTargetFormat format, ColorRenderTargetFormat format,
uint32_t pitch, uint32_t height, uint32_t pitch, uint32_t height,
float* color) { float* color) {
// TODO: For formats <= 4 bpp, we can directly fill the EDRAM buffer. Just
// need to detect this and calculate a value.
// Grab a tile view (as we need to clear an image first) // Grab a tile view (as we need to clear an image first)
TileViewKey key; TileViewKey key;
key.color_or_depth = 1; key.color_or_depth = 1;
@ -1076,6 +1092,9 @@ void RenderCache::ClearEDRAMDepthStencil(VkCommandBuffer command_buffer,
DepthRenderTargetFormat format, DepthRenderTargetFormat format,
uint32_t pitch, uint32_t height, uint32_t pitch, uint32_t height,
float depth, uint32_t stencil) { float depth, uint32_t stencil) {
// TODO: For formats <= 4 bpp, we can directly fill the EDRAM buffer. Just
// need to detect this and calculate a value.
// Grab a tile view (as we need to clear an image first) // Grab a tile view (as we need to clear an image first)
TileViewKey key; TileViewKey key;
key.color_or_depth = 0; key.color_or_depth = 0;

View File

@ -37,8 +37,10 @@ struct TileViewKey {
uint16_t tile_height; uint16_t tile_height;
// 1 if format is ColorRenderTargetFormat, else DepthRenderTargetFormat. // 1 if format is ColorRenderTargetFormat, else DepthRenderTargetFormat.
uint16_t color_or_depth : 1; uint16_t color_or_depth : 1;
// Surface MSAA samples
// uint16_t msaa_samples : 2;
// Either ColorRenderTargetFormat or DepthRenderTargetFormat. // Either ColorRenderTargetFormat or DepthRenderTargetFormat.
uint16_t edram_format : 15; uint16_t edram_format : 15; // 13;
}; };
static_assert(sizeof(TileViewKey) == 8, "Key must be tightly packed"); static_assert(sizeof(TileViewKey) == 8, "Key must be tightly packed");
@ -249,6 +251,10 @@ class RenderCache {
RenderCache(RegisterFile* register_file, ui::vulkan::VulkanDevice* device); RenderCache(RegisterFile* register_file, ui::vulkan::VulkanDevice* device);
~RenderCache(); ~RenderCache();
// Call this to determine if you should start a new render pass or continue
// with an already open pass.
bool dirty() const;
// Begins a render pass targeting the state-specified framebuffer formats. // Begins a render pass targeting the state-specified framebuffer formats.
// The command buffer will be transitioned into the render pass phase. // The command buffer will be transitioned into the render pass phase.
const RenderState* BeginRenderPass(VkCommandBuffer command_buffer, const RenderState* BeginRenderPass(VkCommandBuffer command_buffer,
@ -263,23 +269,27 @@ class RenderCache {
void ClearCache(); void ClearCache();
// Queues commands to copy EDRAM contents into an image. // Queues commands to copy EDRAM contents into an image.
// The command buffer must not be inside of a render pass when calling this.
void RawCopyToImage(VkCommandBuffer command_buffer, uint32_t edram_base, void RawCopyToImage(VkCommandBuffer command_buffer, uint32_t edram_base,
VkImage image, VkImageLayout image_layout, VkImage image, VkImageLayout image_layout,
bool color_or_depth, VkOffset3D offset, bool color_or_depth, VkOffset3D offset,
VkExtent3D extents); VkExtent3D extents);
// Queues commands to blit EDRAM contents into an image. // Queues commands to blit EDRAM contents into an image.
// The command buffer must not be inside of a render pass when calling this.
void BlitToImage(VkCommandBuffer command_buffer, uint32_t edram_base, void BlitToImage(VkCommandBuffer command_buffer, uint32_t edram_base,
uint32_t pitch, uint32_t height, VkImage image, uint32_t pitch, uint32_t height, VkImage image,
VkImageLayout image_layout, bool color_or_depth, VkImageLayout image_layout, bool color_or_depth,
uint32_t format, VkFilter filter, VkOffset3D offset, uint32_t format, VkFilter filter, VkOffset3D offset,
VkExtent3D extents); VkExtent3D extents);
// Queues commands to clear EDRAM contents with a solid color // Queues commands to clear EDRAM contents with a solid color.
// The command buffer must not be inside of a render pass when calling this.
void ClearEDRAMColor(VkCommandBuffer command_buffer, uint32_t edram_base, void ClearEDRAMColor(VkCommandBuffer command_buffer, uint32_t edram_base,
ColorRenderTargetFormat format, uint32_t pitch, ColorRenderTargetFormat format, uint32_t pitch,
uint32_t height, float* color); uint32_t height, float* color);
// Queues commands to clear EDRAM contents with depth/stencil values. // Queues commands to clear EDRAM contents with depth/stencil values.
// The command buffer must not be inside of a render pass when calling this.
void ClearEDRAMDepthStencil(VkCommandBuffer command_buffer, void ClearEDRAMDepthStencil(VkCommandBuffer command_buffer,
uint32_t edram_base, uint32_t edram_base,
DepthRenderTargetFormat format, uint32_t pitch, DepthRenderTargetFormat format, uint32_t pitch,
@ -307,7 +317,7 @@ class RenderCache {
RegisterFile* register_file_ = nullptr; RegisterFile* register_file_ = nullptr;
ui::vulkan::VulkanDevice* device_ = nullptr; ui::vulkan::VulkanDevice* device_ = nullptr;
// Entire 10MiB of EDRAM, aliased to hell by various VkImages. // Entire 10MiB of EDRAM.
VkDeviceMemory edram_memory_ = nullptr; VkDeviceMemory edram_memory_ = nullptr;
// Buffer overlayed 1:1 with edram_memory_ to allow raw access. // Buffer overlayed 1:1 with edram_memory_ to allow raw access.
VkBuffer edram_buffer_ = nullptr; VkBuffer edram_buffer_ = nullptr;