Enable native MSAA

Copy back EDRAM buffers in order by base offset.
This commit is contained in:
Dr. Chat 2016-04-01 21:52:39 -05:00
parent 2eca3ce9e6
commit 50f72b4e42
6 changed files with 333 additions and 167 deletions

View File

@ -187,6 +187,10 @@ PipelineCache::UpdateStatus PipelineCache::ConfigurePipeline(
VkCommandBuffer command_buffer, const RenderState* render_state, VkCommandBuffer command_buffer, const RenderState* render_state,
VulkanShader* vertex_shader, VulkanShader* pixel_shader, VulkanShader* vertex_shader, VulkanShader* pixel_shader,
PrimitiveType primitive_type, VkPipeline* pipeline_out) { PrimitiveType primitive_type, VkPipeline* pipeline_out) {
#if FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_f("gpu");
#endif // FINE_GRAINED_DRAW_SCOPES
assert_not_null(pipeline_out); assert_not_null(pipeline_out);
// Perform a pass over all registers and state updating our cached structures. // Perform a pass over all registers and state updating our cached structures.
@ -323,6 +327,10 @@ VkShaderModule PipelineCache::GetGeometryShader(PrimitiveType primitive_type,
bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer, bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer,
bool full_update) { bool full_update) {
#if FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_f("gpu");
#endif // FINE_GRAINED_DRAW_SCOPES
auto& regs = set_dynamic_state_registers_; auto& regs = set_dynamic_state_registers_;
bool window_offset_dirty = SetShadowRegister(&regs.pa_sc_window_offset, bool window_offset_dirty = SetShadowRegister(&regs.pa_sc_window_offset,
@ -393,20 +401,25 @@ bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer,
auto surface_msaa = auto surface_msaa =
static_cast<MsaaSamples>((regs.rb_surface_info >> 16) & 0x3); static_cast<MsaaSamples>((regs.rb_surface_info >> 16) & 0x3);
// TODO(benvanik): ?? // TODO(benvanik): ??
// FIXME: Some games depend on these for proper clears (e.g. only clearing
// half the size they actually want with 4x MSAA), but others don't.
// Figure out how these games are expecting clears to be done.
float window_width_scalar = 1; float window_width_scalar = 1;
float window_height_scalar = 1; float window_height_scalar = 1;
switch (surface_msaa) { switch (surface_msaa) {
case MsaaSamples::k1X: case MsaaSamples::k1X:
break; break;
case MsaaSamples::k2X: case MsaaSamples::k2X:
window_width_scalar = 2; // ??
window_width_scalar = window_height_scalar = 1.41421356f;
break; break;
case MsaaSamples::k4X: case MsaaSamples::k4X:
window_width_scalar = 2; window_width_scalar = window_height_scalar = 2;
window_height_scalar = 2;
break; break;
} }
// window_width_scalar = window_height_scalar = 1;
// Whether each of the viewport settings are enabled. // Whether each of the viewport settings are enabled.
// http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf // http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf
bool vport_xscale_enable = (regs.pa_cl_vte_cntl & (1 << 0)) > 0; bool vport_xscale_enable = (regs.pa_cl_vte_cntl & (1 << 0)) > 0;
@ -434,6 +447,7 @@ bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer,
float voy = vport_yoffset_enable ? regs.pa_cl_vport_yoffset : 0; float voy = vport_yoffset_enable ? regs.pa_cl_vport_yoffset : 0;
float vsx = vport_xscale_enable ? regs.pa_cl_vport_xscale : 1; float vsx = vport_xscale_enable ? regs.pa_cl_vport_xscale : 1;
float vsy = vport_yscale_enable ? regs.pa_cl_vport_yscale : 1; float vsy = vport_yscale_enable ? regs.pa_cl_vport_yscale : 1;
window_width_scalar = window_height_scalar = 1; window_width_scalar = window_height_scalar = 1;
float vpw = 2 * window_width_scalar * vsx; float vpw = 2 * window_width_scalar * vsx;
float vph = -2 * window_height_scalar * vsy; float vph = -2 * window_height_scalar * vsy;
@ -481,25 +495,25 @@ bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer,
vkCmdSetBlendConstants(command_buffer, regs.rb_blend_rgba); vkCmdSetBlendConstants(command_buffer, regs.rb_blend_rgba);
} }
// VK_DYNAMIC_STATE_LINE_WIDTH if (full_update) {
vkCmdSetLineWidth(command_buffer, 1.0f); // VK_DYNAMIC_STATE_LINE_WIDTH
vkCmdSetLineWidth(command_buffer, 1.0f);
// VK_DYNAMIC_STATE_DEPTH_BIAS // VK_DYNAMIC_STATE_DEPTH_BIAS
vkCmdSetDepthBias(command_buffer, 0.0f, 0.0f, 0.0f); vkCmdSetDepthBias(command_buffer, 0.0f, 0.0f, 0.0f);
// VK_DYNAMIC_STATE_DEPTH_BOUNDS // VK_DYNAMIC_STATE_DEPTH_BOUNDS
vkCmdSetDepthBounds(command_buffer, 0.0f, 1.0f); vkCmdSetDepthBounds(command_buffer, 0.0f, 1.0f);
// VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK // VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK
vkCmdSetStencilCompareMask(command_buffer, VK_STENCIL_FRONT_AND_BACK, 0); vkCmdSetStencilCompareMask(command_buffer, VK_STENCIL_FRONT_AND_BACK, 0);
// VK_DYNAMIC_STATE_STENCIL_REFERENCE // VK_DYNAMIC_STATE_STENCIL_REFERENCE
vkCmdSetStencilReference(command_buffer, VK_STENCIL_FRONT_AND_BACK, 0); vkCmdSetStencilReference(command_buffer, VK_STENCIL_FRONT_AND_BACK, 0);
// VK_DYNAMIC_STATE_STENCIL_WRITE_MASK // VK_DYNAMIC_STATE_STENCIL_WRITE_MASK
vkCmdSetStencilWriteMask(command_buffer, VK_STENCIL_FRONT_AND_BACK, 0); vkCmdSetStencilWriteMask(command_buffer, VK_STENCIL_FRONT_AND_BACK, 0);
}
// TODO(benvanik): push constants.
bool push_constants_dirty = full_update || viewport_state_dirty; bool push_constants_dirty = full_update || viewport_state_dirty;
push_constants_dirty |= push_constants_dirty |=
@ -530,7 +544,7 @@ bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer,
push_constants.window_scale[1] = -1.0f; push_constants.window_scale[1] = -1.0f;
} else { } else {
push_constants.window_scale[0] = 1.0f / 2560.0f; push_constants.window_scale[0] = 1.0f / 2560.0f;
push_constants.window_scale[1] = -1.0f / 2560.0f; push_constants.window_scale[1] = 1.0f / 2560.0f;
} }
// http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf // http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf
@ -756,7 +770,7 @@ PipelineCache::UpdateStatus PipelineCache::UpdateVertexInputState(
: VK_FORMAT_A2R10G10B10_UNORM_PACK32; : VK_FORMAT_A2R10G10B10_UNORM_PACK32;
break; break;
case VertexFormat::k_10_11_11: case VertexFormat::k_10_11_11:
assert_always("unsupported?"); // assert_always("unsupported?");
vertex_attrib_descr.format = VK_FORMAT_B10G11R11_UFLOAT_PACK32; vertex_attrib_descr.format = VK_FORMAT_B10G11R11_UFLOAT_PACK32;
break; break;
case VertexFormat::k_11_11_10: case VertexFormat::k_11_11_10:
@ -934,6 +948,7 @@ PipelineCache::UpdateStatus PipelineCache::UpdateRasterizationState(
XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR); XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR);
dirty |= SetShadowRegister(&regs.multi_prim_ib_reset_index, dirty |= SetShadowRegister(&regs.multi_prim_ib_reset_index,
XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX); XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX);
dirty |= SetShadowRegister(&regs.rb_modecontrol, XE_GPU_REG_RB_MODECONTROL);
regs.primitive_type = primitive_type; regs.primitive_type = primitive_type;
XXH64_update(&hash_state_, &regs, sizeof(regs)); XXH64_update(&hash_state_, &regs, sizeof(regs));
if (!dirty) { if (!dirty) {
@ -947,7 +962,13 @@ PipelineCache::UpdateStatus PipelineCache::UpdateRasterizationState(
// TODO(benvanik): right setting? // TODO(benvanik): right setting?
state_info.depthClampEnable = VK_FALSE; state_info.depthClampEnable = VK_FALSE;
// TODO(benvanik): use in depth-only mode? // Discard rasterizer output in depth-only mode.
// TODO(DrChat): Figure out how to make this work properly.
/*
auto enable_mode = static_cast<xenos::ModeControl>(regs.rb_modecontrol & 0x7);
state_info.rasterizerDiscardEnable =
enable_mode == xenos::ModeControl::kColorDepth ? VK_FALSE : VK_TRUE;
//*/
state_info.rasterizerDiscardEnable = VK_FALSE; state_info.rasterizerDiscardEnable = VK_FALSE;
bool poly_mode = ((regs.pa_su_sc_mode_cntl >> 3) & 0x3) != 0; bool poly_mode = ((regs.pa_su_sc_mode_cntl >> 3) & 0x3) != 0;
@ -1004,20 +1025,49 @@ PipelineCache::UpdateStatus PipelineCache::UpdateMultisampleState() {
auto& regs = update_multisample_state_regs_; auto& regs = update_multisample_state_regs_;
auto& state_info = update_multisample_state_info_; auto& state_info = update_multisample_state_info_;
bool dirty = false;
dirty |= SetShadowRegister(&regs.pa_sc_aa_config, XE_GPU_REG_PA_SC_AA_CONFIG);
dirty |= SetShadowRegister(&regs.pa_su_sc_mode_cntl,
XE_GPU_REG_PA_SU_SC_MODE_CNTL);
dirty |= SetShadowRegister(&regs.rb_surface_info, XE_GPU_REG_RB_SURFACE_INFO);
XXH64_update(&hash_state_, &regs, sizeof(regs));
if (!dirty) {
return UpdateStatus::kCompatible;
}
state_info.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; state_info.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO;
state_info.pNext = nullptr; state_info.pNext = nullptr;
state_info.flags = 0; state_info.flags = 0;
// PA_SC_AA_CONFIG MSAA_NUM_SAMPLES // PA_SC_AA_CONFIG MSAA_NUM_SAMPLES
// PA_SU_SC_MODE_CNTL MSAA_ENABLE // PA_SU_SC_MODE_CNTL MSAA_ENABLE
state_info.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; // state_info.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT;
//*
auto msaa_num_samples =
static_cast<MsaaSamples>((regs.rb_surface_info >> 16) & 0x3);
switch (msaa_num_samples) {
case MsaaSamples::k1X:
state_info.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT;
break;
case MsaaSamples::k2X:
state_info.rasterizationSamples = VK_SAMPLE_COUNT_2_BIT;
break;
case MsaaSamples::k4X:
state_info.rasterizationSamples = VK_SAMPLE_COUNT_4_BIT;
break;
default:
assert_unhandled_case(msaa_num_samples);
break;
}
//*/
state_info.sampleShadingEnable = VK_FALSE; state_info.sampleShadingEnable = VK_FALSE;
state_info.minSampleShading = 0; state_info.minSampleShading = 0;
state_info.pSampleMask = nullptr; state_info.pSampleMask = nullptr;
state_info.alphaToCoverageEnable = VK_FALSE; state_info.alphaToCoverageEnable = VK_FALSE;
state_info.alphaToOneEnable = VK_FALSE; state_info.alphaToOneEnable = VK_FALSE;
return UpdateStatus::kCompatible; return UpdateStatus::kMismatch;
} }
PipelineCache::UpdateStatus PipelineCache::UpdateDepthStencilState() { PipelineCache::UpdateStatus PipelineCache::UpdateDepthStencilState() {

View File

@ -211,6 +211,7 @@ class PipelineCache {
uint32_t pa_sc_screen_scissor_tl; uint32_t pa_sc_screen_scissor_tl;
uint32_t pa_sc_screen_scissor_br; uint32_t pa_sc_screen_scissor_br;
uint32_t multi_prim_ib_reset_index; uint32_t multi_prim_ib_reset_index;
uint32_t rb_modecontrol;
UpdateRasterizationStateRegisters() { Reset(); } UpdateRasterizationStateRegisters() { Reset(); }
void Reset() { std::memset(this, 0, sizeof(*this)); } void Reset() { std::memset(this, 0, sizeof(*this)); }
@ -218,6 +219,10 @@ class PipelineCache {
VkPipelineRasterizationStateCreateInfo update_rasterization_state_info_; VkPipelineRasterizationStateCreateInfo update_rasterization_state_info_;
struct UpdateMultisampleStateeRegisters { struct UpdateMultisampleStateeRegisters {
uint32_t pa_sc_aa_config;
uint32_t pa_su_sc_mode_cntl;
uint32_t rb_surface_info;
UpdateMultisampleStateeRegisters() { Reset(); } UpdateMultisampleStateeRegisters() { Reset(); }
void Reset() { std::memset(this, 0, sizeof(*this)); } void Reset() { std::memset(this, 0, sizeof(*this)); }
} update_multisample_state_regs_; } update_multisample_state_regs_;

View File

@ -165,8 +165,23 @@ CachedTileView::CachedTileView(ui::vulkan::VulkanDevice* device,
image_info.extent.depth = 1; image_info.extent.depth = 1;
image_info.mipLevels = 1; image_info.mipLevels = 1;
image_info.arrayLayers = 1; image_info.arrayLayers = 1;
image_info.samples = // image_info.samples = VK_SAMPLE_COUNT_1_BIT;
static_cast<VkSampleCountFlagBits>(VK_SAMPLE_COUNT_1_BIT); //*
auto msaa_samples = static_cast<MsaaSamples>(key.msaa_samples);
switch (msaa_samples) {
case MsaaSamples::k1X:
image_info.samples = VK_SAMPLE_COUNT_1_BIT;
break;
case MsaaSamples::k2X:
image_info.samples = VK_SAMPLE_COUNT_2_BIT;
break;
case MsaaSamples::k4X:
image_info.samples = VK_SAMPLE_COUNT_4_BIT;
break;
default:
assert_unhandled_case(msaa_samples);
}
//*/
image_info.tiling = VK_IMAGE_TILING_OPTIMAL; image_info.tiling = VK_IMAGE_TILING_OPTIMAL;
image_info.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT | image_info.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT |
@ -322,13 +337,29 @@ CachedRenderPass::CachedRenderPass(VkDevice device,
: device_(device) { : device_(device) {
std::memcpy(&config, &desired_config, sizeof(config)); std::memcpy(&config, &desired_config, sizeof(config));
VkSampleCountFlagBits sample_count;
switch (desired_config.surface_msaa) {
case MsaaSamples::k1X:
sample_count = VK_SAMPLE_COUNT_1_BIT;
break;
case MsaaSamples::k2X:
sample_count = VK_SAMPLE_COUNT_2_BIT;
break;
case MsaaSamples::k4X:
sample_count = VK_SAMPLE_COUNT_4_BIT;
break;
default:
assert_unhandled_case(desired_config.surface_msaa);
break;
}
// Initialize all attachments to default unused. // Initialize all attachments to default unused.
// As we set layout(location=RT) in shaders we must always provide 4. // As we set layout(location=RT) in shaders we must always provide 4.
VkAttachmentDescription attachments[5]; VkAttachmentDescription attachments[5];
for (int i = 0; i < 4; ++i) { for (int i = 0; i < 4; ++i) {
attachments[i].flags = 0; attachments[i].flags = 0;
attachments[i].format = VK_FORMAT_UNDEFINED; attachments[i].format = VK_FORMAT_UNDEFINED;
attachments[i].samples = VK_SAMPLE_COUNT_1_BIT; attachments[i].samples = sample_count;
attachments[i].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; attachments[i].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
attachments[i].storeOp = VK_ATTACHMENT_STORE_OP_STORE; attachments[i].storeOp = VK_ATTACHMENT_STORE_OP_STORE;
attachments[i].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD; attachments[i].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
@ -339,7 +370,7 @@ CachedRenderPass::CachedRenderPass(VkDevice device,
auto& depth_stencil_attachment = attachments[4]; auto& depth_stencil_attachment = attachments[4];
depth_stencil_attachment.flags = 0; depth_stencil_attachment.flags = 0;
depth_stencil_attachment.format = VK_FORMAT_UNDEFINED; depth_stencil_attachment.format = VK_FORMAT_UNDEFINED;
depth_stencil_attachment.samples = VK_SAMPLE_COUNT_1_BIT; depth_stencil_attachment.samples = sample_count;
depth_stencil_attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; depth_stencil_attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
depth_stencil_attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE; depth_stencil_attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
depth_stencil_attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD; depth_stencil_attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
@ -404,6 +435,10 @@ CachedRenderPass::~CachedRenderPass() {
bool CachedRenderPass::IsCompatible( bool CachedRenderPass::IsCompatible(
const RenderConfiguration& desired_config) const { const RenderConfiguration& desired_config) const {
if (config.surface_msaa != desired_config.surface_msaa) {
return false;
}
for (int i = 0; i < 4; ++i) { for (int i = 0; i < 4; ++i) {
// TODO(benvanik): allow compatible vulkan formats. // TODO(benvanik): allow compatible vulkan formats.
if (config.color[i].format != desired_config.color[i].format) { if (config.color[i].format != desired_config.color[i].format) {
@ -503,12 +538,18 @@ bool RenderCache::dirty() const {
regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL].u32; regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL].u32;
dirty |= cur_regs.pa_sc_window_scissor_br != dirty |= cur_regs.pa_sc_window_scissor_br !=
regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR].u32; regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR].u32;
dirty |= (cur_regs.rb_depthcontrol & (0x4 | 0x2)) !=
(regs[XE_GPU_REG_RB_DEPTHCONTROL].u32 & (0x4 | 0x2));
return dirty; return dirty;
} }
const RenderState* RenderCache::BeginRenderPass(VkCommandBuffer command_buffer, const RenderState* RenderCache::BeginRenderPass(VkCommandBuffer command_buffer,
VulkanShader* vertex_shader, VulkanShader* vertex_shader,
VulkanShader* pixel_shader) { VulkanShader* pixel_shader) {
#if FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_f("gpu");
#endif // FINE_GRAINED_DRAW_SCOPES
assert_null(current_command_buffer_); assert_null(current_command_buffer_);
current_command_buffer_ = command_buffer; current_command_buffer_ = command_buffer;
@ -520,6 +561,7 @@ const RenderState* RenderCache::BeginRenderPass(VkCommandBuffer command_buffer,
bool dirty = false; bool dirty = false;
dirty |= SetShadowRegister(&regs.rb_modecontrol, XE_GPU_REG_RB_MODECONTROL); dirty |= SetShadowRegister(&regs.rb_modecontrol, XE_GPU_REG_RB_MODECONTROL);
dirty |= SetShadowRegister(&regs.rb_surface_info, XE_GPU_REG_RB_SURFACE_INFO); dirty |= SetShadowRegister(&regs.rb_surface_info, XE_GPU_REG_RB_SURFACE_INFO);
dirty |= SetShadowRegister(&regs.rb_color_mask, XE_GPU_REG_RB_COLOR_MASK);
dirty |= SetShadowRegister(&regs.rb_color_info, XE_GPU_REG_RB_COLOR_INFO); dirty |= SetShadowRegister(&regs.rb_color_info, XE_GPU_REG_RB_COLOR_INFO);
dirty |= SetShadowRegister(&regs.rb_color1_info, XE_GPU_REG_RB_COLOR1_INFO); dirty |= SetShadowRegister(&regs.rb_color1_info, XE_GPU_REG_RB_COLOR1_INFO);
dirty |= SetShadowRegister(&regs.rb_color2_info, XE_GPU_REG_RB_COLOR2_INFO); dirty |= SetShadowRegister(&regs.rb_color2_info, XE_GPU_REG_RB_COLOR2_INFO);
@ -529,7 +571,11 @@ const RenderState* RenderCache::BeginRenderPass(VkCommandBuffer command_buffer,
XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL); XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL);
dirty |= SetShadowRegister(&regs.pa_sc_window_scissor_br, dirty |= SetShadowRegister(&regs.pa_sc_window_scissor_br,
XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR); XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR);
regs.rb_depthcontrol = register_file_->values[XE_GPU_REG_RB_DEPTHCONTROL].u32; dirty |=
(regs.rb_depthcontrol & (0x4 | 0x2)) !=
(register_file_->values[XE_GPU_REG_RB_DEPTHCONTROL].u32 & (0x4 | 0x2));
regs.rb_depthcontrol =
register_file_->values[XE_GPU_REG_RB_DEPTHCONTROL].u32 & (0x4 | 0x2);
if (!dirty && current_state_.render_pass) { if (!dirty && current_state_.render_pass) {
// No registers have changed so we can reuse the previous render pass - // No registers have changed so we can reuse the previous render pass -
// just begin with what we had. // just begin with what we had.
@ -549,7 +595,10 @@ const RenderState* RenderCache::BeginRenderPass(VkCommandBuffer command_buffer,
// Speculatively see if targets are actually used so we can skip copies // Speculatively see if targets are actually used so we can skip copies
for (int i = 0; i < 4; i++) { for (int i = 0; i < 4; i++) {
config->color[i].used = pixel_shader->writes_color_target(i); uint32_t color_mask = (regs.rb_color_mask >> (i * 4)) & 0xF;
config->color[i].used =
config->mode_control == xenos::ModeControl::kColorDepth &&
color_mask != 0;
} }
config->depth_stencil.used = !!(regs.rb_depthcontrol & (0x4 | 0x2)); config->depth_stencil.used = !!(regs.rb_depthcontrol & (0x4 | 0x2));
@ -558,66 +607,20 @@ const RenderState* RenderCache::BeginRenderPass(VkCommandBuffer command_buffer,
current_state_.framebuffer = framebuffer; current_state_.framebuffer = framebuffer;
current_state_.framebuffer_handle = framebuffer->handle; current_state_.framebuffer_handle = framebuffer->handle;
VkBufferMemoryBarrier barrier;
barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
barrier.pNext = nullptr;
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.buffer = edram_buffer_;
barrier.offset = 0;
barrier.size = 0;
// Copy EDRAM buffer into render targets with tight packing.
VkBufferImageCopy region;
region.bufferRowLength = 0;
region.bufferImageHeight = 0;
region.imageOffset = {0, 0, 0};
// Depth // Depth
auto depth_target = current_state_.framebuffer->depth_stencil_attachment; auto depth_target = current_state_.framebuffer->depth_stencil_attachment;
if (depth_target && current_state_.config.depth_stencil.used) { if (depth_target && current_state_.config.depth_stencil.used) {
region.imageSubresource = { UpdateTileView(command_buffer, depth_target, true);
VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, 0, 0, 1};
region.bufferOffset = depth_target->key.tile_offset * 5120;
// Wait for any potential copies to finish.
barrier.offset = region.bufferOffset;
barrier.size = depth_target->key.tile_width * 80 *
depth_target->key.tile_height * 16 * 4;
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 1,
&barrier, 0, nullptr);
region.imageExtent = {depth_target->key.tile_width * 80u,
depth_target->key.tile_height * 16u, 1};
vkCmdCopyBufferToImage(command_buffer, edram_buffer_, depth_target->image,
VK_IMAGE_LAYOUT_GENERAL, 1, &region);
} }
// Color // Color
region.imageSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1};
for (int i = 0; i < 4; i++) { for (int i = 0; i < 4; i++) {
auto target = current_state_.framebuffer->color_attachments[i]; auto target = current_state_.framebuffer->color_attachments[i];
if (!target || !current_state_.config.color[i].used) { if (!target || !current_state_.config.color[i].used) {
continue; continue;
} }
region.bufferOffset = target->key.tile_offset * 5120; UpdateTileView(command_buffer, target, true);
// Wait for any potential copies to finish.
barrier.offset = region.bufferOffset;
barrier.size =
target->key.tile_width * 80 * target->key.tile_height * 16 * 4;
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 1,
&barrier, 0, nullptr);
region.imageExtent = {target->key.tile_width * 80u,
target->key.tile_height * 16u, 1};
vkCmdCopyBufferToImage(command_buffer, edram_buffer_, target->image,
VK_IMAGE_LAYOUT_GENERAL, 1, &region);
} }
} }
if (!render_pass) { if (!render_pass) {
@ -758,6 +761,7 @@ bool RenderCache::ConfigureRenderPass(VkCommandBuffer command_buffer,
color_key.tile_width = xe::round_up(config->surface_pitch_px, 80) / 80; color_key.tile_width = xe::round_up(config->surface_pitch_px, 80) / 80;
color_key.tile_height = xe::round_up(config->surface_height_px, 16) / 16; color_key.tile_height = xe::round_up(config->surface_height_px, 16) / 16;
color_key.color_or_depth = 1; color_key.color_or_depth = 1;
color_key.msaa_samples = static_cast<uint16_t>(config->surface_msaa);
color_key.edram_format = static_cast<uint16_t>(config->color[i].format); color_key.edram_format = static_cast<uint16_t>(config->color[i].format);
target_color_attachments[i] = target_color_attachments[i] =
FindOrCreateTileView(command_buffer, color_key); FindOrCreateTileView(command_buffer, color_key);
@ -774,6 +778,8 @@ bool RenderCache::ConfigureRenderPass(VkCommandBuffer command_buffer,
depth_stencil_key.tile_height = depth_stencil_key.tile_height =
xe::round_up(config->surface_height_px, 16) / 16; xe::round_up(config->surface_height_px, 16) / 16;
depth_stencil_key.color_or_depth = 0; depth_stencil_key.color_or_depth = 0;
depth_stencil_key.msaa_samples =
static_cast<uint16_t>(config->surface_msaa);
depth_stencil_key.edram_format = depth_stencil_key.edram_format =
static_cast<uint16_t>(config->depth_stencil.format); static_cast<uint16_t>(config->depth_stencil.format);
auto target_depth_stencil_attachment = auto target_depth_stencil_attachment =
@ -810,6 +816,51 @@ CachedTileView* RenderCache::FindOrCreateTileView(
return tile_view; return tile_view;
} }
void RenderCache::UpdateTileView(VkCommandBuffer command_buffer,
CachedTileView* view, bool load,
bool insert_barrier) {
if (insert_barrier) {
VkBufferMemoryBarrier barrier;
barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
barrier.pNext = nullptr;
if (load) {
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
} else {
barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
}
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
barrier.buffer = edram_buffer_;
barrier.offset = view->key.tile_offset * 5120;
barrier.size = view->key.tile_width * 80 * view->key.tile_height * 16 * 4;
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 1,
&barrier, 0, nullptr);
}
VkBufferImageCopy region;
region.bufferOffset = view->key.tile_offset * 5120;
region.bufferRowLength = 0;
region.bufferImageHeight = 0;
region.imageSubresource = {0, 0, 0, 1};
region.imageSubresource.aspectMask =
view->key.color_or_depth
? VK_IMAGE_ASPECT_COLOR_BIT
: VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
region.imageOffset = {0, 0, 0};
region.imageExtent = {view->key.tile_width * 80u, view->key.tile_height * 16u,
1};
if (load) {
vkCmdCopyBufferToImage(command_buffer, edram_buffer_, view->image,
VK_IMAGE_LAYOUT_GENERAL, 1, &region);
} else {
vkCmdCopyImageToBuffer(command_buffer, view->image, VK_IMAGE_LAYOUT_GENERAL,
edram_buffer_, 1, &region);
}
}
CachedTileView* RenderCache::FindTileView(const TileViewKey& view_key) const { CachedTileView* RenderCache::FindTileView(const TileViewKey& view_key) const {
// Check the cache. // Check the cache.
// TODO(benvanik): better lookup. // TODO(benvanik): better lookup.
@ -837,35 +888,31 @@ void RenderCache::EndRenderPass() {
// can't get the correct height atm) and we may end up overwriting the valid // can't get the correct height atm) and we may end up overwriting the valid
// contents of another render target by mistake! Need to reorder copy commands // contents of another render target by mistake! Need to reorder copy commands
// to avoid this. // to avoid this.
VkBufferImageCopy region;
region.bufferRowLength = 0; std::vector<CachedTileView*> cached_views;
region.bufferImageHeight = 0;
region.imageOffset = {0, 0, 0}; // Depth
// Depth/stencil
auto depth_target = current_state_.framebuffer->depth_stencil_attachment; auto depth_target = current_state_.framebuffer->depth_stencil_attachment;
if (depth_target && current_state_.config.depth_stencil.used) { if (depth_target && current_state_.config.depth_stencil.used) {
region.imageSubresource = { cached_views.push_back(depth_target);
VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, 0, 0, 1};
region.bufferOffset = depth_target->key.tile_offset * 5120;
region.imageExtent = {depth_target->key.tile_width * 80u,
depth_target->key.tile_height * 16u, 1};
vkCmdCopyImageToBuffer(current_command_buffer_, depth_target->image,
VK_IMAGE_LAYOUT_GENERAL, edram_buffer_, 1, &region);
} }
// Color // Color
region.imageSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1};
for (int i = 0; i < 4; i++) { for (int i = 0; i < 4; i++) {
auto target = current_state_.framebuffer->color_attachments[i]; auto target = current_state_.framebuffer->color_attachments[i];
if (!target || !current_state_.config.color[i].used) { if (!target || !current_state_.config.color[i].used) {
continue; continue;
} }
region.bufferOffset = target->key.tile_offset * 5120; cached_views.push_back(target);
region.imageExtent = {target->key.tile_width * 80u, }
target->key.tile_height * 16u, 1};
vkCmdCopyImageToBuffer(current_command_buffer_, target->image, std::sort(
VK_IMAGE_LAYOUT_GENERAL, edram_buffer_, 1, &region); cached_views.begin(), cached_views.end(),
[](CachedTileView const* a, CachedTileView const* b) { return *a < *b; });
for (auto view : cached_views) {
UpdateTileView(current_command_buffer_, view, false, false);
} }
current_command_buffer_ = nullptr; current_command_buffer_ = nullptr;
@ -920,6 +967,7 @@ void RenderCache::RawCopyToImage(VkCommandBuffer command_buffer,
&buffer_barrier, 0, nullptr); &buffer_barrier, 0, nullptr);
// Issue the copy command. // Issue the copy command.
// TODO(DrChat): Stencil copies.
VkBufferImageCopy region; VkBufferImageCopy region;
region.bufferOffset = edram_base * 5120; region.bufferOffset = edram_base * 5120;
region.bufferImageHeight = 0; region.bufferImageHeight = 0;
@ -928,8 +976,7 @@ void RenderCache::RawCopyToImage(VkCommandBuffer command_buffer,
region.imageExtent = extents; region.imageExtent = extents;
region.imageSubresource = {0, 0, 0, 1}; region.imageSubresource = {0, 0, 0, 1};
region.imageSubresource.aspectMask = region.imageSubresource.aspectMask =
color_or_depth ? VK_IMAGE_ASPECT_COLOR_BIT color_or_depth ? VK_IMAGE_ASPECT_COLOR_BIT : VK_IMAGE_ASPECT_DEPTH_BIT;
: VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
vkCmdCopyBufferToImage(command_buffer, edram_buffer_, image, image_layout, 1, vkCmdCopyBufferToImage(command_buffer, edram_buffer_, image, image_layout, 1,
&region); &region);
@ -947,13 +994,15 @@ void RenderCache::RawCopyToImage(VkCommandBuffer command_buffer,
void RenderCache::BlitToImage(VkCommandBuffer command_buffer, void RenderCache::BlitToImage(VkCommandBuffer command_buffer,
uint32_t edram_base, uint32_t pitch, uint32_t edram_base, uint32_t pitch,
uint32_t height, VkImage image, uint32_t height, MsaaSamples num_samples,
VkImageLayout image_layout, bool color_or_depth, VkImage image, VkImageLayout image_layout,
uint32_t format, VkFilter filter, bool color_or_depth, uint32_t format,
VkOffset3D offset, VkExtent3D extents) { VkFilter filter, VkOffset3D offset,
VkExtent3D extents) {
// Grab a tile view that represents the source image. // Grab a tile view that represents the source image.
TileViewKey key; TileViewKey key;
key.color_or_depth = color_or_depth ? 1 : 0; key.color_or_depth = color_or_depth ? 1 : 0;
key.msaa_samples = static_cast<uint16_t>(num_samples);
key.edram_format = format; key.edram_format = format;
key.tile_offset = edram_base; key.tile_offset = edram_base;
key.tile_width = xe::round_up(pitch, 80) / 80; key.tile_width = xe::round_up(pitch, 80) / 80;
@ -979,14 +1028,14 @@ void RenderCache::BlitToImage(VkCommandBuffer command_buffer,
// Update the tile view with current EDRAM contents. // Update the tile view with current EDRAM contents.
// TODO: Heuristics to determine if this copy is avoidable. // TODO: Heuristics to determine if this copy is avoidable.
// TODO(DrChat): Stencil copies.
VkBufferImageCopy buffer_copy; VkBufferImageCopy buffer_copy;
buffer_copy.bufferOffset = edram_base * 5120; buffer_copy.bufferOffset = edram_base * 5120;
buffer_copy.bufferImageHeight = 0; buffer_copy.bufferImageHeight = 0;
buffer_copy.bufferRowLength = 0; buffer_copy.bufferRowLength = 0;
buffer_copy.imageSubresource = {0, 0, 0, 1}; buffer_copy.imageSubresource = {0, 0, 0, 1};
buffer_copy.imageSubresource.aspectMask = buffer_copy.imageSubresource.aspectMask =
color_or_depth ? VK_IMAGE_ASPECT_COLOR_BIT color_or_depth ? VK_IMAGE_ASPECT_COLOR_BIT : VK_IMAGE_ASPECT_DEPTH_BIT;
: VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
buffer_copy.imageExtent = {key.tile_width * 80u, key.tile_height * 16u, 1u}; buffer_copy.imageExtent = {key.tile_width * 80u, key.tile_height * 16u, 1u};
buffer_copy.imageOffset = {0, 0, 0}; buffer_copy.imageOffset = {0, 0, 0};
vkCmdCopyBufferToImage(command_buffer, edram_buffer_, tile_view->image, vkCmdCopyBufferToImage(command_buffer, edram_buffer_, tile_view->image,
@ -1018,26 +1067,48 @@ void RenderCache::BlitToImage(VkCommandBuffer command_buffer,
assert_true(extents.height <= key.tile_height * 16u); assert_true(extents.height <= key.tile_height * 16u);
// Now issue the blit to the destination. // Now issue the blit to the destination.
// TODO: Resolve to destination if necessary. if (num_samples == MsaaSamples::k1X) {
VkImageBlit image_blit; VkImageBlit image_blit;
image_blit.srcSubresource = {0, 0, 0, 1}; image_blit.srcSubresource = {0, 0, 0, 1};
image_blit.srcSubresource.aspectMask = image_blit.srcSubresource.aspectMask =
color_or_depth ? VK_IMAGE_ASPECT_COLOR_BIT color_or_depth
: VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; ? VK_IMAGE_ASPECT_COLOR_BIT
image_blit.srcOffsets[0] = {0, 0, 0}; : VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
image_blit.srcOffsets[1] = {int32_t(extents.width), int32_t(extents.height), image_blit.srcOffsets[0] = {0, 0, 0};
int32_t(extents.depth)}; image_blit.srcOffsets[1] = {int32_t(extents.width), int32_t(extents.height),
int32_t(extents.depth)};
image_blit.dstSubresource = {0, 0, 0, 1}; image_blit.dstSubresource = {0, 0, 0, 1};
image_blit.dstSubresource.aspectMask = image_blit.dstSubresource.aspectMask =
color_or_depth ? VK_IMAGE_ASPECT_COLOR_BIT color_or_depth
: VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; ? VK_IMAGE_ASPECT_COLOR_BIT
image_blit.dstOffsets[0] = offset; : VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
image_blit.dstOffsets[1] = {offset.x + int32_t(extents.width), image_blit.dstOffsets[0] = offset;
offset.y + int32_t(extents.height), image_blit.dstOffsets[1] = {offset.x + int32_t(extents.width),
offset.z + int32_t(extents.depth)}; offset.y + int32_t(extents.height),
vkCmdBlitImage(command_buffer, tile_view->image, VK_IMAGE_LAYOUT_GENERAL, offset.z + int32_t(extents.depth)};
image, image_layout, 1, &image_blit, filter); vkCmdBlitImage(command_buffer, tile_view->image, VK_IMAGE_LAYOUT_GENERAL,
image, image_layout, 1, &image_blit, filter);
} else {
VkImageResolve image_resolve;
image_resolve.srcSubresource = {0, 0, 0, 1};
image_resolve.srcSubresource.aspectMask =
color_or_depth
? VK_IMAGE_ASPECT_COLOR_BIT
: VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
image_resolve.srcOffset = {0, 0, 0};
image_resolve.dstSubresource = {0, 0, 0, 1};
image_resolve.dstSubresource.aspectMask =
color_or_depth
? VK_IMAGE_ASPECT_COLOR_BIT
: VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
image_resolve.dstOffset = offset;
image_resolve.extent = extents;
vkCmdResolveImage(command_buffer, tile_view->image, VK_IMAGE_LAYOUT_GENERAL,
image, image_layout, 1, &image_resolve);
}
// Transition the image back into its previous layout. // Transition the image back into its previous layout.
image_barrier.srcAccessMask = image_barrier.dstAccessMask; image_barrier.srcAccessMask = image_barrier.dstAccessMask;
@ -1052,13 +1123,14 @@ void RenderCache::ClearEDRAMColor(VkCommandBuffer command_buffer,
uint32_t edram_base, uint32_t edram_base,
ColorRenderTargetFormat format, ColorRenderTargetFormat format,
uint32_t pitch, uint32_t height, uint32_t pitch, uint32_t height,
float* color) { MsaaSamples num_samples, float* color) {
// TODO: For formats <= 4 bpp, we can directly fill the EDRAM buffer. Just // TODO: For formats <= 4 bpp, we can directly fill the EDRAM buffer. Just
// need to detect this and calculate a value. // need to detect this and calculate a value.
// Grab a tile view (as we need to clear an image first) // Grab a tile view (as we need to clear an image first)
TileViewKey key; TileViewKey key;
key.color_or_depth = 1; key.color_or_depth = 1;
key.msaa_samples = static_cast<uint16_t>(num_samples);
key.edram_format = static_cast<uint16_t>(format); key.edram_format = static_cast<uint16_t>(format);
key.tile_offset = edram_base; key.tile_offset = edram_base;
key.tile_width = xe::round_up(pitch, 80) / 80; key.tile_width = xe::round_up(pitch, 80) / 80;
@ -1091,13 +1163,15 @@ void RenderCache::ClearEDRAMDepthStencil(VkCommandBuffer command_buffer,
uint32_t edram_base, uint32_t edram_base,
DepthRenderTargetFormat format, DepthRenderTargetFormat format,
uint32_t pitch, uint32_t height, uint32_t pitch, uint32_t height,
float depth, uint32_t stencil) { MsaaSamples num_samples, float depth,
uint32_t stencil) {
// TODO: For formats <= 4 bpp, we can directly fill the EDRAM buffer. Just // TODO: For formats <= 4 bpp, we can directly fill the EDRAM buffer. Just
// need to detect this and calculate a value. // need to detect this and calculate a value.
// Grab a tile view (as we need to clear an image first) // Grab a tile view (as we need to clear an image first)
TileViewKey key; TileViewKey key;
key.color_or_depth = 0; key.color_or_depth = 0;
key.msaa_samples = static_cast<uint16_t>(num_samples);
key.edram_format = static_cast<uint16_t>(format); key.edram_format = static_cast<uint16_t>(format);
key.tile_offset = edram_base; key.tile_offset = edram_base;
key.tile_width = xe::round_up(pitch, 80) / 80; key.tile_width = xe::round_up(pitch, 80) / 80;
@ -1117,12 +1191,13 @@ void RenderCache::ClearEDRAMDepthStencil(VkCommandBuffer command_buffer,
VK_IMAGE_LAYOUT_GENERAL, &clear_value, 1, &range); VK_IMAGE_LAYOUT_GENERAL, &clear_value, 1, &range);
// Copy image back into EDRAM buffer // Copy image back into EDRAM buffer
// TODO(DrChat): Stencil copies.
VkBufferImageCopy copy_range; VkBufferImageCopy copy_range;
copy_range.bufferOffset = edram_base * 5120; copy_range.bufferOffset = edram_base * 5120;
copy_range.bufferImageHeight = 0; copy_range.bufferImageHeight = 0;
copy_range.bufferRowLength = 0; copy_range.bufferRowLength = 0;
copy_range.imageSubresource = { copy_range.imageSubresource = {
VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, 0, 0, 1, VK_IMAGE_ASPECT_DEPTH_BIT, 0, 0, 1,
}; };
copy_range.imageExtent = {key.tile_width * 80u, key.tile_height * 16u, 1u}; copy_range.imageExtent = {key.tile_width * 80u, key.tile_height * 16u, 1u};
copy_range.imageOffset = {0, 0, 0}; copy_range.imageOffset = {0, 0, 0};
@ -1131,6 +1206,11 @@ void RenderCache::ClearEDRAMDepthStencil(VkCommandBuffer command_buffer,
&copy_range); &copy_range);
} }
void RenderCache::FillEDRAM(VkCommandBuffer command_buffer, uint32_t value) {
vkCmdFillBuffer(command_buffer, edram_buffer_, 0, kEdramBufferCapacity,
value);
}
bool RenderCache::SetShadowRegister(uint32_t* dest, uint32_t register_name) { bool RenderCache::SetShadowRegister(uint32_t* dest, uint32_t register_name) {
uint32_t value = register_file_->values[register_name].u32; uint32_t value = register_file_->values[register_name].u32;
if (*dest == value) { if (*dest == value) {

View File

@ -38,9 +38,9 @@ struct TileViewKey {
// 1 if format is ColorRenderTargetFormat, else DepthRenderTargetFormat. // 1 if format is ColorRenderTargetFormat, else DepthRenderTargetFormat.
uint16_t color_or_depth : 1; uint16_t color_or_depth : 1;
// Surface MSAA samples // Surface MSAA samples
// uint16_t msaa_samples : 2; uint16_t msaa_samples : 2;
// Either ColorRenderTargetFormat or DepthRenderTargetFormat. // Either ColorRenderTargetFormat or DepthRenderTargetFormat.
uint16_t edram_format : 15; // 13; uint16_t edram_format : 13;
}; };
static_assert(sizeof(TileViewKey) == 8, "Key must be tightly packed"); static_assert(sizeof(TileViewKey) == 8, "Key must be tightly packed");
@ -69,6 +69,10 @@ class CachedTileView {
return *a == *b; return *a == *b;
} }
bool operator<(const CachedTileView& other) const {
return key.tile_offset < other.key.tile_offset;
}
private: private:
VkDevice device_ = nullptr; VkDevice device_ = nullptr;
}; };
@ -278,22 +282,26 @@ class RenderCache {
// Queues commands to blit EDRAM contents into an image. // Queues commands to blit EDRAM contents into an image.
// The command buffer must not be inside of a render pass when calling this. // The command buffer must not be inside of a render pass when calling this.
void BlitToImage(VkCommandBuffer command_buffer, uint32_t edram_base, void BlitToImage(VkCommandBuffer command_buffer, uint32_t edram_base,
uint32_t pitch, uint32_t height, VkImage image, uint32_t pitch, uint32_t height, MsaaSamples num_samples,
VkImageLayout image_layout, bool color_or_depth, VkImage image, VkImageLayout image_layout,
uint32_t format, VkFilter filter, VkOffset3D offset, bool color_or_depth, uint32_t format, VkFilter filter,
VkExtent3D extents); VkOffset3D offset, VkExtent3D extents);
// Queues commands to clear EDRAM contents with a solid color. // Queues commands to clear EDRAM contents with a solid color.
// The command buffer must not be inside of a render pass when calling this. // The command buffer must not be inside of a render pass when calling this.
void ClearEDRAMColor(VkCommandBuffer command_buffer, uint32_t edram_base, void ClearEDRAMColor(VkCommandBuffer command_buffer, uint32_t edram_base,
ColorRenderTargetFormat format, uint32_t pitch, ColorRenderTargetFormat format, uint32_t pitch,
uint32_t height, float* color); uint32_t height, MsaaSamples num_samples, float* color);
// Queues commands to clear EDRAM contents with depth/stencil values. // Queues commands to clear EDRAM contents with depth/stencil values.
// The command buffer must not be inside of a render pass when calling this. // The command buffer must not be inside of a render pass when calling this.
void ClearEDRAMDepthStencil(VkCommandBuffer command_buffer, void ClearEDRAMDepthStencil(VkCommandBuffer command_buffer,
uint32_t edram_base, uint32_t edram_base,
DepthRenderTargetFormat format, uint32_t pitch, DepthRenderTargetFormat format, uint32_t pitch,
uint32_t height, float depth, uint32_t stencil); uint32_t height, MsaaSamples num_samples,
float depth, uint32_t stencil);
// Queues commands to fill EDRAM contents with a constant value.
// The command buffer must not be inside of a render pass when calling this.
void FillEDRAM(VkCommandBuffer command_buffer, uint32_t value);
private: private:
// Parses the current state into a configuration object. // Parses the current state into a configuration object.
@ -306,6 +314,9 @@ class RenderCache {
CachedTileView* FindOrCreateTileView(VkCommandBuffer command_buffer, CachedTileView* FindOrCreateTileView(VkCommandBuffer command_buffer,
const TileViewKey& view_key); const TileViewKey& view_key);
void UpdateTileView(VkCommandBuffer command_buffer, CachedTileView* view,
bool load, bool insert_barrier = true);
// Gets or creates a render pass and frame buffer for the given configuration. // Gets or creates a render pass and frame buffer for the given configuration.
// This attempts to reuse as much as possible across render passes and // This attempts to reuse as much as possible across render passes and
// framebuffers. // framebuffers.
@ -335,6 +346,7 @@ class RenderCache {
struct ShadowRegisters { struct ShadowRegisters {
uint32_t rb_modecontrol; uint32_t rb_modecontrol;
uint32_t rb_surface_info; uint32_t rb_surface_info;
uint32_t rb_color_mask;
uint32_t rb_color_info; uint32_t rb_color_info;
uint32_t rb_color1_info; uint32_t rb_color1_info;
uint32_t rb_color2_info; uint32_t rb_color2_info;

View File

@ -152,19 +152,8 @@ void VulkanCommandProcessor::PerformSwap(uint32_t frontbuffer_ptr,
// TODO(benvanik): move to CP or to host (trace dump, etc). // TODO(benvanik): move to CP or to host (trace dump, etc).
// This only needs to surround a vkQueueSubmit. // This only needs to surround a vkQueueSubmit.
static uint32_t frame = 0; if (queue_mutex_) {
if (device_->is_renderdoc_attached() && queue_mutex_->lock();
(FLAGS_vulkan_renderdoc_capture_all ||
trace_state_ == TraceState::kSingleFrame)) {
if (queue_mutex_) {
queue_mutex_->lock();
}
device_->BeginRenderDocFrameCapture();
if (queue_mutex_) {
queue_mutex_->unlock();
}
} }
// TODO(DrChat): If setup buffer is empty, don't bother queueing it up. // TODO(DrChat): If setup buffer is empty, don't bother queueing it up.
@ -182,45 +171,37 @@ void VulkanCommandProcessor::PerformSwap(uint32_t frontbuffer_ptr,
submit_info.signalSemaphoreCount = 0; submit_info.signalSemaphoreCount = 0;
submit_info.pSignalSemaphores = nullptr; submit_info.pSignalSemaphores = nullptr;
if (queue_mutex_) { if (queue_mutex_) {
queue_mutex_->lock(); // queue_mutex_->lock();
} }
status = vkQueueSubmit(queue_, 1, &submit_info, *current_batch_fence_); status = vkQueueSubmit(queue_, 1, &submit_info, *current_batch_fence_);
if (queue_mutex_) { if (queue_mutex_) {
queue_mutex_->unlock(); // queue_mutex_->unlock();
} }
CheckResult(status, "vkQueueSubmit"); CheckResult(status, "vkQueueSubmit");
// TODO(DrChat): Disable this completely.
VkFence fences[] = {*current_batch_fence_}; VkFence fences[] = {*current_batch_fence_};
status = vkWaitForFences(*device_, 1, fences, true, -1); status = vkWaitForFences(*device_, 1, fences, true, -1);
CheckResult(status, "vkWaitForFences"); CheckResult(status, "vkWaitForFences");
if (device_->is_renderdoc_attached() && if (device_->is_renderdoc_attached() && capturing_) {
(FLAGS_vulkan_renderdoc_capture_all ||
trace_state_ == TraceState::kSingleFrame)) {
if (queue_mutex_) {
queue_mutex_->lock();
}
device_->EndRenderDocFrameCapture(); device_->EndRenderDocFrameCapture();
capturing_ = false;
// HACK(DrChat): Used b/c I disabled trace saving code in the CP. // HACK(DrChat): Used b/c I disabled trace saving code in the CP.
// Remove later. // Remove later.
if (!trace_writer_.is_open()) { if (!trace_writer_.is_open()) {
trace_state_ = TraceState::kDisabled; trace_state_ = TraceState::kDisabled;
} }
}
if (queue_mutex_) { if (queue_mutex_) {
queue_mutex_->unlock(); queue_mutex_->unlock();
}
} }
// Scavenging. // Scavenging.
current_command_buffer_ = nullptr; current_command_buffer_ = nullptr;
current_setup_buffer_ = nullptr; current_setup_buffer_ = nullptr;
while (command_buffer_pool_->has_pending()) { command_buffer_pool_->Scavenge();
command_buffer_pool_->Scavenge();
xe::threading::MaybeYield();
}
texture_cache_->Scavenge(); texture_cache_->Scavenge();
current_batch_fence_ = nullptr; current_batch_fence_ = nullptr;
@ -331,6 +312,22 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type,
vkBeginCommandBuffer(current_setup_buffer_, &command_buffer_begin_info); vkBeginCommandBuffer(current_setup_buffer_, &command_buffer_begin_info);
CheckResult(status, "vkBeginCommandBuffer"); CheckResult(status, "vkBeginCommandBuffer");
static uint32_t frame = 0;
if (device_->is_renderdoc_attached() && !capturing_ &&
(FLAGS_vulkan_renderdoc_capture_all ||
trace_state_ == TraceState::kSingleFrame)) {
if (queue_mutex_) {
queue_mutex_->lock();
}
capturing_ = true;
device_->BeginRenderDocFrameCapture();
if (queue_mutex_) {
queue_mutex_->unlock();
}
}
started_command_buffer = true; started_command_buffer = true;
} }
auto command_buffer = current_command_buffer_; auto command_buffer = current_command_buffer_;
@ -357,6 +354,10 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type,
current_render_state_ = render_cache_->BeginRenderPass( current_render_state_ = render_cache_->BeginRenderPass(
command_buffer, vertex_shader, pixel_shader); command_buffer, vertex_shader, pixel_shader);
if (!current_render_state_) { if (!current_render_state_) {
command_buffer_pool_->CancelBatch();
current_command_buffer_ = nullptr;
current_setup_buffer_ = nullptr;
current_batch_fence_ = nullptr;
return false; return false;
} }
} }
@ -378,18 +379,30 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type,
// Pass registers to the shaders. // Pass registers to the shaders.
if (!PopulateConstants(command_buffer, vertex_shader, pixel_shader)) { if (!PopulateConstants(command_buffer, vertex_shader, pixel_shader)) {
render_cache_->EndRenderPass(); render_cache_->EndRenderPass();
command_buffer_pool_->CancelBatch();
current_command_buffer_ = nullptr;
current_setup_buffer_ = nullptr;
current_batch_fence_ = nullptr;
return false; return false;
} }
// Upload and bind index buffer data (if we have any). // Upload and bind index buffer data (if we have any).
if (!PopulateIndexBuffer(command_buffer, index_buffer_info)) { if (!PopulateIndexBuffer(command_buffer, index_buffer_info)) {
render_cache_->EndRenderPass(); render_cache_->EndRenderPass();
command_buffer_pool_->CancelBatch();
current_command_buffer_ = nullptr;
current_setup_buffer_ = nullptr;
current_batch_fence_ = nullptr;
return false; return false;
} }
// Upload and bind all vertex buffer data. // Upload and bind all vertex buffer data.
if (!PopulateVertexBuffers(command_buffer, vertex_shader)) { if (!PopulateVertexBuffers(command_buffer, vertex_shader)) {
render_cache_->EndRenderPass(); render_cache_->EndRenderPass();
command_buffer_pool_->CancelBatch();
current_command_buffer_ = nullptr;
current_setup_buffer_ = nullptr;
current_batch_fence_ = nullptr;
return false; return false;
} }
@ -423,6 +436,10 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type,
bool VulkanCommandProcessor::PopulateConstants(VkCommandBuffer command_buffer, bool VulkanCommandProcessor::PopulateConstants(VkCommandBuffer command_buffer,
VulkanShader* vertex_shader, VulkanShader* vertex_shader,
VulkanShader* pixel_shader) { VulkanShader* pixel_shader) {
#if FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_f("gpu");
#endif // FINE_GRAINED_DRAW_SCOPES
// Upload the constants the shaders require. // Upload the constants the shaders require.
// These are optional, and if none are defined 0 will be returned. // These are optional, and if none are defined 0 will be returned.
auto constant_offsets = buffer_cache_->UploadConstantRegisters( auto constant_offsets = buffer_cache_->UploadConstantRegisters(
@ -742,7 +759,7 @@ bool VulkanCommandProcessor::IssueCopy() {
tex_info.size_2d.input_height = dest_block_height; tex_info.size_2d.input_height = dest_block_height;
tex_info.size_2d.input_pitch = copy_dest_pitch * 4; tex_info.size_2d.input_pitch = copy_dest_pitch * 4;
auto texture = texture_cache_->DemandResolveTexture( auto texture = texture_cache_->DemandResolveTexture(
tex_info, ColorFormatToTextureFormat(copy_dest_format), nullptr, nullptr); tex_info, ColorFormatToTextureFormat(copy_dest_format), nullptr);
if (texture->image_layout == VK_IMAGE_LAYOUT_UNDEFINED) { if (texture->image_layout == VK_IMAGE_LAYOUT_UNDEFINED) {
// Transition the image to a general layout. // Transition the image to a general layout.
VkImageMemoryBarrier image_barrier; VkImageMemoryBarrier image_barrier;
@ -810,8 +827,9 @@ bool VulkanCommandProcessor::IssueCopy() {
case CopyCommand::kConvert: case CopyCommand::kConvert:
render_cache_->BlitToImage( render_cache_->BlitToImage(
command_buffer, edram_base, surface_pitch, resolve_extent.height, command_buffer, edram_base, surface_pitch, resolve_extent.height,
texture->image, texture->image_layout, copy_src_select <= 3, surface_msaa, texture->image, texture->image_layout,
src_format, VK_FILTER_LINEAR, resolve_offset, resolve_extent); copy_src_select <= 3, src_format, VK_FILTER_LINEAR, resolve_offset,
resolve_extent);
break; break;
case CopyCommand::kConstantOne: case CopyCommand::kConstantOne:
@ -839,7 +857,7 @@ bool VulkanCommandProcessor::IssueCopy() {
// TODO(DrChat): Do we know the surface height at this point? // TODO(DrChat): Do we know the surface height at this point?
render_cache_->ClearEDRAMColor(command_buffer, color_edram_base, render_cache_->ClearEDRAMColor(command_buffer, color_edram_base,
color_format, surface_pitch, color_format, surface_pitch,
resolve_extent.height, color); resolve_extent.height, surface_msaa, color);
} }
if (depth_clear_enabled) { if (depth_clear_enabled) {
@ -850,7 +868,7 @@ bool VulkanCommandProcessor::IssueCopy() {
// TODO(DrChat): Do we know the surface height at this point? // TODO(DrChat): Do we know the surface height at this point?
render_cache_->ClearEDRAMDepthStencil( render_cache_->ClearEDRAMDepthStencil(
command_buffer, depth_edram_base, depth_format, surface_pitch, command_buffer, depth_edram_base, depth_format, surface_pitch,
resolve_extent.height, depth, stencil); resolve_extent.height, surface_msaa, depth, stencil);
} }
return true; return true;

View File

@ -94,6 +94,7 @@ class VulkanCommandProcessor : public CommandProcessor {
// Last copy base address, for debugging only. // Last copy base address, for debugging only.
uint32_t last_copy_base_ = 0; uint32_t last_copy_base_ = 0;
bool capturing_ = false;
std::unique_ptr<BufferCache> buffer_cache_; std::unique_ptr<BufferCache> buffer_cache_;
std::unique_ptr<PipelineCache> pipeline_cache_; std::unique_ptr<PipelineCache> pipeline_cache_;