Enable native MSAA
Copy back EDRAM buffers in order by base offset.
This commit is contained in:
parent
2eca3ce9e6
commit
50f72b4e42
|
@ -187,6 +187,10 @@ PipelineCache::UpdateStatus PipelineCache::ConfigurePipeline(
|
|||
VkCommandBuffer command_buffer, const RenderState* render_state,
|
||||
VulkanShader* vertex_shader, VulkanShader* pixel_shader,
|
||||
PrimitiveType primitive_type, VkPipeline* pipeline_out) {
|
||||
#if FINE_GRAINED_DRAW_SCOPES
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
#endif // FINE_GRAINED_DRAW_SCOPES
|
||||
|
||||
assert_not_null(pipeline_out);
|
||||
|
||||
// Perform a pass over all registers and state updating our cached structures.
|
||||
|
@ -323,6 +327,10 @@ VkShaderModule PipelineCache::GetGeometryShader(PrimitiveType primitive_type,
|
|||
|
||||
bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer,
|
||||
bool full_update) {
|
||||
#if FINE_GRAINED_DRAW_SCOPES
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
#endif // FINE_GRAINED_DRAW_SCOPES
|
||||
|
||||
auto& regs = set_dynamic_state_registers_;
|
||||
|
||||
bool window_offset_dirty = SetShadowRegister(®s.pa_sc_window_offset,
|
||||
|
@ -393,20 +401,25 @@ bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer,
|
|||
auto surface_msaa =
|
||||
static_cast<MsaaSamples>((regs.rb_surface_info >> 16) & 0x3);
|
||||
// TODO(benvanik): ??
|
||||
// FIXME: Some games depend on these for proper clears (e.g. only clearing
|
||||
// half the size they actually want with 4x MSAA), but others don't.
|
||||
// Figure out how these games are expecting clears to be done.
|
||||
float window_width_scalar = 1;
|
||||
float window_height_scalar = 1;
|
||||
switch (surface_msaa) {
|
||||
case MsaaSamples::k1X:
|
||||
break;
|
||||
case MsaaSamples::k2X:
|
||||
window_width_scalar = 2;
|
||||
// ??
|
||||
window_width_scalar = window_height_scalar = 1.41421356f;
|
||||
break;
|
||||
case MsaaSamples::k4X:
|
||||
window_width_scalar = 2;
|
||||
window_height_scalar = 2;
|
||||
window_width_scalar = window_height_scalar = 2;
|
||||
break;
|
||||
}
|
||||
|
||||
// window_width_scalar = window_height_scalar = 1;
|
||||
|
||||
// Whether each of the viewport settings are enabled.
|
||||
// http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf
|
||||
bool vport_xscale_enable = (regs.pa_cl_vte_cntl & (1 << 0)) > 0;
|
||||
|
@ -434,6 +447,7 @@ bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer,
|
|||
float voy = vport_yoffset_enable ? regs.pa_cl_vport_yoffset : 0;
|
||||
float vsx = vport_xscale_enable ? regs.pa_cl_vport_xscale : 1;
|
||||
float vsy = vport_yscale_enable ? regs.pa_cl_vport_yscale : 1;
|
||||
|
||||
window_width_scalar = window_height_scalar = 1;
|
||||
float vpw = 2 * window_width_scalar * vsx;
|
||||
float vph = -2 * window_height_scalar * vsy;
|
||||
|
@ -481,25 +495,25 @@ bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer,
|
|||
vkCmdSetBlendConstants(command_buffer, regs.rb_blend_rgba);
|
||||
}
|
||||
|
||||
// VK_DYNAMIC_STATE_LINE_WIDTH
|
||||
vkCmdSetLineWidth(command_buffer, 1.0f);
|
||||
if (full_update) {
|
||||
// VK_DYNAMIC_STATE_LINE_WIDTH
|
||||
vkCmdSetLineWidth(command_buffer, 1.0f);
|
||||
|
||||
// VK_DYNAMIC_STATE_DEPTH_BIAS
|
||||
vkCmdSetDepthBias(command_buffer, 0.0f, 0.0f, 0.0f);
|
||||
// VK_DYNAMIC_STATE_DEPTH_BIAS
|
||||
vkCmdSetDepthBias(command_buffer, 0.0f, 0.0f, 0.0f);
|
||||
|
||||
// VK_DYNAMIC_STATE_DEPTH_BOUNDS
|
||||
vkCmdSetDepthBounds(command_buffer, 0.0f, 1.0f);
|
||||
// VK_DYNAMIC_STATE_DEPTH_BOUNDS
|
||||
vkCmdSetDepthBounds(command_buffer, 0.0f, 1.0f);
|
||||
|
||||
// VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK
|
||||
vkCmdSetStencilCompareMask(command_buffer, VK_STENCIL_FRONT_AND_BACK, 0);
|
||||
// VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK
|
||||
vkCmdSetStencilCompareMask(command_buffer, VK_STENCIL_FRONT_AND_BACK, 0);
|
||||
|
||||
// VK_DYNAMIC_STATE_STENCIL_REFERENCE
|
||||
vkCmdSetStencilReference(command_buffer, VK_STENCIL_FRONT_AND_BACK, 0);
|
||||
// VK_DYNAMIC_STATE_STENCIL_REFERENCE
|
||||
vkCmdSetStencilReference(command_buffer, VK_STENCIL_FRONT_AND_BACK, 0);
|
||||
|
||||
// VK_DYNAMIC_STATE_STENCIL_WRITE_MASK
|
||||
vkCmdSetStencilWriteMask(command_buffer, VK_STENCIL_FRONT_AND_BACK, 0);
|
||||
|
||||
// TODO(benvanik): push constants.
|
||||
// VK_DYNAMIC_STATE_STENCIL_WRITE_MASK
|
||||
vkCmdSetStencilWriteMask(command_buffer, VK_STENCIL_FRONT_AND_BACK, 0);
|
||||
}
|
||||
|
||||
bool push_constants_dirty = full_update || viewport_state_dirty;
|
||||
push_constants_dirty |=
|
||||
|
@ -530,7 +544,7 @@ bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer,
|
|||
push_constants.window_scale[1] = -1.0f;
|
||||
} else {
|
||||
push_constants.window_scale[0] = 1.0f / 2560.0f;
|
||||
push_constants.window_scale[1] = -1.0f / 2560.0f;
|
||||
push_constants.window_scale[1] = 1.0f / 2560.0f;
|
||||
}
|
||||
|
||||
// http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf
|
||||
|
@ -756,7 +770,7 @@ PipelineCache::UpdateStatus PipelineCache::UpdateVertexInputState(
|
|||
: VK_FORMAT_A2R10G10B10_UNORM_PACK32;
|
||||
break;
|
||||
case VertexFormat::k_10_11_11:
|
||||
assert_always("unsupported?");
|
||||
// assert_always("unsupported?");
|
||||
vertex_attrib_descr.format = VK_FORMAT_B10G11R11_UFLOAT_PACK32;
|
||||
break;
|
||||
case VertexFormat::k_11_11_10:
|
||||
|
@ -934,6 +948,7 @@ PipelineCache::UpdateStatus PipelineCache::UpdateRasterizationState(
|
|||
XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR);
|
||||
dirty |= SetShadowRegister(®s.multi_prim_ib_reset_index,
|
||||
XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX);
|
||||
dirty |= SetShadowRegister(®s.rb_modecontrol, XE_GPU_REG_RB_MODECONTROL);
|
||||
regs.primitive_type = primitive_type;
|
||||
XXH64_update(&hash_state_, ®s, sizeof(regs));
|
||||
if (!dirty) {
|
||||
|
@ -947,7 +962,13 @@ PipelineCache::UpdateStatus PipelineCache::UpdateRasterizationState(
|
|||
// TODO(benvanik): right setting?
|
||||
state_info.depthClampEnable = VK_FALSE;
|
||||
|
||||
// TODO(benvanik): use in depth-only mode?
|
||||
// Discard rasterizer output in depth-only mode.
|
||||
// TODO(DrChat): Figure out how to make this work properly.
|
||||
/*
|
||||
auto enable_mode = static_cast<xenos::ModeControl>(regs.rb_modecontrol & 0x7);
|
||||
state_info.rasterizerDiscardEnable =
|
||||
enable_mode == xenos::ModeControl::kColorDepth ? VK_FALSE : VK_TRUE;
|
||||
//*/
|
||||
state_info.rasterizerDiscardEnable = VK_FALSE;
|
||||
|
||||
bool poly_mode = ((regs.pa_su_sc_mode_cntl >> 3) & 0x3) != 0;
|
||||
|
@ -1004,20 +1025,49 @@ PipelineCache::UpdateStatus PipelineCache::UpdateMultisampleState() {
|
|||
auto& regs = update_multisample_state_regs_;
|
||||
auto& state_info = update_multisample_state_info_;
|
||||
|
||||
bool dirty = false;
|
||||
dirty |= SetShadowRegister(®s.pa_sc_aa_config, XE_GPU_REG_PA_SC_AA_CONFIG);
|
||||
dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl,
|
||||
XE_GPU_REG_PA_SU_SC_MODE_CNTL);
|
||||
dirty |= SetShadowRegister(®s.rb_surface_info, XE_GPU_REG_RB_SURFACE_INFO);
|
||||
XXH64_update(&hash_state_, ®s, sizeof(regs));
|
||||
if (!dirty) {
|
||||
return UpdateStatus::kCompatible;
|
||||
}
|
||||
|
||||
state_info.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO;
|
||||
state_info.pNext = nullptr;
|
||||
state_info.flags = 0;
|
||||
|
||||
// PA_SC_AA_CONFIG MSAA_NUM_SAMPLES
|
||||
// PA_SU_SC_MODE_CNTL MSAA_ENABLE
|
||||
state_info.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT;
|
||||
// state_info.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT;
|
||||
//*
|
||||
auto msaa_num_samples =
|
||||
static_cast<MsaaSamples>((regs.rb_surface_info >> 16) & 0x3);
|
||||
switch (msaa_num_samples) {
|
||||
case MsaaSamples::k1X:
|
||||
state_info.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT;
|
||||
break;
|
||||
case MsaaSamples::k2X:
|
||||
state_info.rasterizationSamples = VK_SAMPLE_COUNT_2_BIT;
|
||||
break;
|
||||
case MsaaSamples::k4X:
|
||||
state_info.rasterizationSamples = VK_SAMPLE_COUNT_4_BIT;
|
||||
break;
|
||||
default:
|
||||
assert_unhandled_case(msaa_num_samples);
|
||||
break;
|
||||
}
|
||||
//*/
|
||||
|
||||
state_info.sampleShadingEnable = VK_FALSE;
|
||||
state_info.minSampleShading = 0;
|
||||
state_info.pSampleMask = nullptr;
|
||||
state_info.alphaToCoverageEnable = VK_FALSE;
|
||||
state_info.alphaToOneEnable = VK_FALSE;
|
||||
|
||||
return UpdateStatus::kCompatible;
|
||||
return UpdateStatus::kMismatch;
|
||||
}
|
||||
|
||||
PipelineCache::UpdateStatus PipelineCache::UpdateDepthStencilState() {
|
||||
|
|
|
@ -211,6 +211,7 @@ class PipelineCache {
|
|||
uint32_t pa_sc_screen_scissor_tl;
|
||||
uint32_t pa_sc_screen_scissor_br;
|
||||
uint32_t multi_prim_ib_reset_index;
|
||||
uint32_t rb_modecontrol;
|
||||
|
||||
UpdateRasterizationStateRegisters() { Reset(); }
|
||||
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
||||
|
@ -218,6 +219,10 @@ class PipelineCache {
|
|||
VkPipelineRasterizationStateCreateInfo update_rasterization_state_info_;
|
||||
|
||||
struct UpdateMultisampleStateeRegisters {
|
||||
uint32_t pa_sc_aa_config;
|
||||
uint32_t pa_su_sc_mode_cntl;
|
||||
uint32_t rb_surface_info;
|
||||
|
||||
UpdateMultisampleStateeRegisters() { Reset(); }
|
||||
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
||||
} update_multisample_state_regs_;
|
||||
|
|
|
@ -165,8 +165,23 @@ CachedTileView::CachedTileView(ui::vulkan::VulkanDevice* device,
|
|||
image_info.extent.depth = 1;
|
||||
image_info.mipLevels = 1;
|
||||
image_info.arrayLayers = 1;
|
||||
image_info.samples =
|
||||
static_cast<VkSampleCountFlagBits>(VK_SAMPLE_COUNT_1_BIT);
|
||||
// image_info.samples = VK_SAMPLE_COUNT_1_BIT;
|
||||
//*
|
||||
auto msaa_samples = static_cast<MsaaSamples>(key.msaa_samples);
|
||||
switch (msaa_samples) {
|
||||
case MsaaSamples::k1X:
|
||||
image_info.samples = VK_SAMPLE_COUNT_1_BIT;
|
||||
break;
|
||||
case MsaaSamples::k2X:
|
||||
image_info.samples = VK_SAMPLE_COUNT_2_BIT;
|
||||
break;
|
||||
case MsaaSamples::k4X:
|
||||
image_info.samples = VK_SAMPLE_COUNT_4_BIT;
|
||||
break;
|
||||
default:
|
||||
assert_unhandled_case(msaa_samples);
|
||||
}
|
||||
//*/
|
||||
image_info.tiling = VK_IMAGE_TILING_OPTIMAL;
|
||||
image_info.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
|
||||
VK_IMAGE_USAGE_TRANSFER_DST_BIT |
|
||||
|
@ -322,13 +337,29 @@ CachedRenderPass::CachedRenderPass(VkDevice device,
|
|||
: device_(device) {
|
||||
std::memcpy(&config, &desired_config, sizeof(config));
|
||||
|
||||
VkSampleCountFlagBits sample_count;
|
||||
switch (desired_config.surface_msaa) {
|
||||
case MsaaSamples::k1X:
|
||||
sample_count = VK_SAMPLE_COUNT_1_BIT;
|
||||
break;
|
||||
case MsaaSamples::k2X:
|
||||
sample_count = VK_SAMPLE_COUNT_2_BIT;
|
||||
break;
|
||||
case MsaaSamples::k4X:
|
||||
sample_count = VK_SAMPLE_COUNT_4_BIT;
|
||||
break;
|
||||
default:
|
||||
assert_unhandled_case(desired_config.surface_msaa);
|
||||
break;
|
||||
}
|
||||
|
||||
// Initialize all attachments to default unused.
|
||||
// As we set layout(location=RT) in shaders we must always provide 4.
|
||||
VkAttachmentDescription attachments[5];
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
attachments[i].flags = 0;
|
||||
attachments[i].format = VK_FORMAT_UNDEFINED;
|
||||
attachments[i].samples = VK_SAMPLE_COUNT_1_BIT;
|
||||
attachments[i].samples = sample_count;
|
||||
attachments[i].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
|
||||
attachments[i].storeOp = VK_ATTACHMENT_STORE_OP_STORE;
|
||||
attachments[i].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
|
||||
|
@ -339,7 +370,7 @@ CachedRenderPass::CachedRenderPass(VkDevice device,
|
|||
auto& depth_stencil_attachment = attachments[4];
|
||||
depth_stencil_attachment.flags = 0;
|
||||
depth_stencil_attachment.format = VK_FORMAT_UNDEFINED;
|
||||
depth_stencil_attachment.samples = VK_SAMPLE_COUNT_1_BIT;
|
||||
depth_stencil_attachment.samples = sample_count;
|
||||
depth_stencil_attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
|
||||
depth_stencil_attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
|
||||
depth_stencil_attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
|
||||
|
@ -404,6 +435,10 @@ CachedRenderPass::~CachedRenderPass() {
|
|||
|
||||
bool CachedRenderPass::IsCompatible(
|
||||
const RenderConfiguration& desired_config) const {
|
||||
if (config.surface_msaa != desired_config.surface_msaa) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
// TODO(benvanik): allow compatible vulkan formats.
|
||||
if (config.color[i].format != desired_config.color[i].format) {
|
||||
|
@ -503,12 +538,18 @@ bool RenderCache::dirty() const {
|
|||
regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL].u32;
|
||||
dirty |= cur_regs.pa_sc_window_scissor_br !=
|
||||
regs[XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR].u32;
|
||||
dirty |= (cur_regs.rb_depthcontrol & (0x4 | 0x2)) !=
|
||||
(regs[XE_GPU_REG_RB_DEPTHCONTROL].u32 & (0x4 | 0x2));
|
||||
return dirty;
|
||||
}
|
||||
|
||||
const RenderState* RenderCache::BeginRenderPass(VkCommandBuffer command_buffer,
|
||||
VulkanShader* vertex_shader,
|
||||
VulkanShader* pixel_shader) {
|
||||
#if FINE_GRAINED_DRAW_SCOPES
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
#endif // FINE_GRAINED_DRAW_SCOPES
|
||||
|
||||
assert_null(current_command_buffer_);
|
||||
current_command_buffer_ = command_buffer;
|
||||
|
||||
|
@ -520,6 +561,7 @@ const RenderState* RenderCache::BeginRenderPass(VkCommandBuffer command_buffer,
|
|||
bool dirty = false;
|
||||
dirty |= SetShadowRegister(®s.rb_modecontrol, XE_GPU_REG_RB_MODECONTROL);
|
||||
dirty |= SetShadowRegister(®s.rb_surface_info, XE_GPU_REG_RB_SURFACE_INFO);
|
||||
dirty |= SetShadowRegister(®s.rb_color_mask, XE_GPU_REG_RB_COLOR_MASK);
|
||||
dirty |= SetShadowRegister(®s.rb_color_info, XE_GPU_REG_RB_COLOR_INFO);
|
||||
dirty |= SetShadowRegister(®s.rb_color1_info, XE_GPU_REG_RB_COLOR1_INFO);
|
||||
dirty |= SetShadowRegister(®s.rb_color2_info, XE_GPU_REG_RB_COLOR2_INFO);
|
||||
|
@ -529,7 +571,11 @@ const RenderState* RenderCache::BeginRenderPass(VkCommandBuffer command_buffer,
|
|||
XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL);
|
||||
dirty |= SetShadowRegister(®s.pa_sc_window_scissor_br,
|
||||
XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR);
|
||||
regs.rb_depthcontrol = register_file_->values[XE_GPU_REG_RB_DEPTHCONTROL].u32;
|
||||
dirty |=
|
||||
(regs.rb_depthcontrol & (0x4 | 0x2)) !=
|
||||
(register_file_->values[XE_GPU_REG_RB_DEPTHCONTROL].u32 & (0x4 | 0x2));
|
||||
regs.rb_depthcontrol =
|
||||
register_file_->values[XE_GPU_REG_RB_DEPTHCONTROL].u32 & (0x4 | 0x2);
|
||||
if (!dirty && current_state_.render_pass) {
|
||||
// No registers have changed so we can reuse the previous render pass -
|
||||
// just begin with what we had.
|
||||
|
@ -549,7 +595,10 @@ const RenderState* RenderCache::BeginRenderPass(VkCommandBuffer command_buffer,
|
|||
|
||||
// Speculatively see if targets are actually used so we can skip copies
|
||||
for (int i = 0; i < 4; i++) {
|
||||
config->color[i].used = pixel_shader->writes_color_target(i);
|
||||
uint32_t color_mask = (regs.rb_color_mask >> (i * 4)) & 0xF;
|
||||
config->color[i].used =
|
||||
config->mode_control == xenos::ModeControl::kColorDepth &&
|
||||
color_mask != 0;
|
||||
}
|
||||
config->depth_stencil.used = !!(regs.rb_depthcontrol & (0x4 | 0x2));
|
||||
|
||||
|
@ -558,66 +607,20 @@ const RenderState* RenderCache::BeginRenderPass(VkCommandBuffer command_buffer,
|
|||
current_state_.framebuffer = framebuffer;
|
||||
current_state_.framebuffer_handle = framebuffer->handle;
|
||||
|
||||
VkBufferMemoryBarrier barrier;
|
||||
barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
|
||||
barrier.pNext = nullptr;
|
||||
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
|
||||
barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
|
||||
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barrier.buffer = edram_buffer_;
|
||||
barrier.offset = 0;
|
||||
barrier.size = 0;
|
||||
|
||||
// Copy EDRAM buffer into render targets with tight packing.
|
||||
VkBufferImageCopy region;
|
||||
region.bufferRowLength = 0;
|
||||
region.bufferImageHeight = 0;
|
||||
region.imageOffset = {0, 0, 0};
|
||||
|
||||
// Depth
|
||||
auto depth_target = current_state_.framebuffer->depth_stencil_attachment;
|
||||
if (depth_target && current_state_.config.depth_stencil.used) {
|
||||
region.imageSubresource = {
|
||||
VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, 0, 0, 1};
|
||||
region.bufferOffset = depth_target->key.tile_offset * 5120;
|
||||
|
||||
// Wait for any potential copies to finish.
|
||||
barrier.offset = region.bufferOffset;
|
||||
barrier.size = depth_target->key.tile_width * 80 *
|
||||
depth_target->key.tile_height * 16 * 4;
|
||||
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
|
||||
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 1,
|
||||
&barrier, 0, nullptr);
|
||||
|
||||
region.imageExtent = {depth_target->key.tile_width * 80u,
|
||||
depth_target->key.tile_height * 16u, 1};
|
||||
vkCmdCopyBufferToImage(command_buffer, edram_buffer_, depth_target->image,
|
||||
VK_IMAGE_LAYOUT_GENERAL, 1, ®ion);
|
||||
UpdateTileView(command_buffer, depth_target, true);
|
||||
}
|
||||
|
||||
// Color
|
||||
region.imageSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1};
|
||||
for (int i = 0; i < 4; i++) {
|
||||
auto target = current_state_.framebuffer->color_attachments[i];
|
||||
if (!target || !current_state_.config.color[i].used) {
|
||||
continue;
|
||||
}
|
||||
|
||||
region.bufferOffset = target->key.tile_offset * 5120;
|
||||
|
||||
// Wait for any potential copies to finish.
|
||||
barrier.offset = region.bufferOffset;
|
||||
barrier.size =
|
||||
target->key.tile_width * 80 * target->key.tile_height * 16 * 4;
|
||||
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
|
||||
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 1,
|
||||
&barrier, 0, nullptr);
|
||||
|
||||
region.imageExtent = {target->key.tile_width * 80u,
|
||||
target->key.tile_height * 16u, 1};
|
||||
vkCmdCopyBufferToImage(command_buffer, edram_buffer_, target->image,
|
||||
VK_IMAGE_LAYOUT_GENERAL, 1, ®ion);
|
||||
UpdateTileView(command_buffer, target, true);
|
||||
}
|
||||
}
|
||||
if (!render_pass) {
|
||||
|
@ -758,6 +761,7 @@ bool RenderCache::ConfigureRenderPass(VkCommandBuffer command_buffer,
|
|||
color_key.tile_width = xe::round_up(config->surface_pitch_px, 80) / 80;
|
||||
color_key.tile_height = xe::round_up(config->surface_height_px, 16) / 16;
|
||||
color_key.color_or_depth = 1;
|
||||
color_key.msaa_samples = static_cast<uint16_t>(config->surface_msaa);
|
||||
color_key.edram_format = static_cast<uint16_t>(config->color[i].format);
|
||||
target_color_attachments[i] =
|
||||
FindOrCreateTileView(command_buffer, color_key);
|
||||
|
@ -774,6 +778,8 @@ bool RenderCache::ConfigureRenderPass(VkCommandBuffer command_buffer,
|
|||
depth_stencil_key.tile_height =
|
||||
xe::round_up(config->surface_height_px, 16) / 16;
|
||||
depth_stencil_key.color_or_depth = 0;
|
||||
depth_stencil_key.msaa_samples =
|
||||
static_cast<uint16_t>(config->surface_msaa);
|
||||
depth_stencil_key.edram_format =
|
||||
static_cast<uint16_t>(config->depth_stencil.format);
|
||||
auto target_depth_stencil_attachment =
|
||||
|
@ -810,6 +816,51 @@ CachedTileView* RenderCache::FindOrCreateTileView(
|
|||
return tile_view;
|
||||
}
|
||||
|
||||
void RenderCache::UpdateTileView(VkCommandBuffer command_buffer,
|
||||
CachedTileView* view, bool load,
|
||||
bool insert_barrier) {
|
||||
if (insert_barrier) {
|
||||
VkBufferMemoryBarrier barrier;
|
||||
barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
|
||||
barrier.pNext = nullptr;
|
||||
if (load) {
|
||||
barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
|
||||
barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
|
||||
} else {
|
||||
barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
|
||||
barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
|
||||
}
|
||||
barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
|
||||
barrier.buffer = edram_buffer_;
|
||||
barrier.offset = view->key.tile_offset * 5120;
|
||||
barrier.size = view->key.tile_width * 80 * view->key.tile_height * 16 * 4;
|
||||
vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
|
||||
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, 0, 0, nullptr, 1,
|
||||
&barrier, 0, nullptr);
|
||||
}
|
||||
|
||||
VkBufferImageCopy region;
|
||||
region.bufferOffset = view->key.tile_offset * 5120;
|
||||
region.bufferRowLength = 0;
|
||||
region.bufferImageHeight = 0;
|
||||
region.imageSubresource = {0, 0, 0, 1};
|
||||
region.imageSubresource.aspectMask =
|
||||
view->key.color_or_depth
|
||||
? VK_IMAGE_ASPECT_COLOR_BIT
|
||||
: VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
|
||||
region.imageOffset = {0, 0, 0};
|
||||
region.imageExtent = {view->key.tile_width * 80u, view->key.tile_height * 16u,
|
||||
1};
|
||||
if (load) {
|
||||
vkCmdCopyBufferToImage(command_buffer, edram_buffer_, view->image,
|
||||
VK_IMAGE_LAYOUT_GENERAL, 1, ®ion);
|
||||
} else {
|
||||
vkCmdCopyImageToBuffer(command_buffer, view->image, VK_IMAGE_LAYOUT_GENERAL,
|
||||
edram_buffer_, 1, ®ion);
|
||||
}
|
||||
}
|
||||
|
||||
CachedTileView* RenderCache::FindTileView(const TileViewKey& view_key) const {
|
||||
// Check the cache.
|
||||
// TODO(benvanik): better lookup.
|
||||
|
@ -837,35 +888,31 @@ void RenderCache::EndRenderPass() {
|
|||
// can't get the correct height atm) and we may end up overwriting the valid
|
||||
// contents of another render target by mistake! Need to reorder copy commands
|
||||
// to avoid this.
|
||||
VkBufferImageCopy region;
|
||||
region.bufferRowLength = 0;
|
||||
region.bufferImageHeight = 0;
|
||||
region.imageOffset = {0, 0, 0};
|
||||
// Depth/stencil
|
||||
|
||||
std::vector<CachedTileView*> cached_views;
|
||||
|
||||
// Depth
|
||||
auto depth_target = current_state_.framebuffer->depth_stencil_attachment;
|
||||
if (depth_target && current_state_.config.depth_stencil.used) {
|
||||
region.imageSubresource = {
|
||||
VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, 0, 0, 1};
|
||||
region.bufferOffset = depth_target->key.tile_offset * 5120;
|
||||
region.imageExtent = {depth_target->key.tile_width * 80u,
|
||||
depth_target->key.tile_height * 16u, 1};
|
||||
vkCmdCopyImageToBuffer(current_command_buffer_, depth_target->image,
|
||||
VK_IMAGE_LAYOUT_GENERAL, edram_buffer_, 1, ®ion);
|
||||
cached_views.push_back(depth_target);
|
||||
}
|
||||
|
||||
// Color
|
||||
region.imageSubresource = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1};
|
||||
for (int i = 0; i < 4; i++) {
|
||||
auto target = current_state_.framebuffer->color_attachments[i];
|
||||
if (!target || !current_state_.config.color[i].used) {
|
||||
continue;
|
||||
}
|
||||
|
||||
region.bufferOffset = target->key.tile_offset * 5120;
|
||||
region.imageExtent = {target->key.tile_width * 80u,
|
||||
target->key.tile_height * 16u, 1};
|
||||
vkCmdCopyImageToBuffer(current_command_buffer_, target->image,
|
||||
VK_IMAGE_LAYOUT_GENERAL, edram_buffer_, 1, ®ion);
|
||||
cached_views.push_back(target);
|
||||
}
|
||||
|
||||
std::sort(
|
||||
cached_views.begin(), cached_views.end(),
|
||||
[](CachedTileView const* a, CachedTileView const* b) { return *a < *b; });
|
||||
|
||||
for (auto view : cached_views) {
|
||||
UpdateTileView(current_command_buffer_, view, false, false);
|
||||
}
|
||||
|
||||
current_command_buffer_ = nullptr;
|
||||
|
@ -920,6 +967,7 @@ void RenderCache::RawCopyToImage(VkCommandBuffer command_buffer,
|
|||
&buffer_barrier, 0, nullptr);
|
||||
|
||||
// Issue the copy command.
|
||||
// TODO(DrChat): Stencil copies.
|
||||
VkBufferImageCopy region;
|
||||
region.bufferOffset = edram_base * 5120;
|
||||
region.bufferImageHeight = 0;
|
||||
|
@ -928,8 +976,7 @@ void RenderCache::RawCopyToImage(VkCommandBuffer command_buffer,
|
|||
region.imageExtent = extents;
|
||||
region.imageSubresource = {0, 0, 0, 1};
|
||||
region.imageSubresource.aspectMask =
|
||||
color_or_depth ? VK_IMAGE_ASPECT_COLOR_BIT
|
||||
: VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
|
||||
color_or_depth ? VK_IMAGE_ASPECT_COLOR_BIT : VK_IMAGE_ASPECT_DEPTH_BIT;
|
||||
vkCmdCopyBufferToImage(command_buffer, edram_buffer_, image, image_layout, 1,
|
||||
®ion);
|
||||
|
||||
|
@ -947,13 +994,15 @@ void RenderCache::RawCopyToImage(VkCommandBuffer command_buffer,
|
|||
|
||||
void RenderCache::BlitToImage(VkCommandBuffer command_buffer,
|
||||
uint32_t edram_base, uint32_t pitch,
|
||||
uint32_t height, VkImage image,
|
||||
VkImageLayout image_layout, bool color_or_depth,
|
||||
uint32_t format, VkFilter filter,
|
||||
VkOffset3D offset, VkExtent3D extents) {
|
||||
uint32_t height, MsaaSamples num_samples,
|
||||
VkImage image, VkImageLayout image_layout,
|
||||
bool color_or_depth, uint32_t format,
|
||||
VkFilter filter, VkOffset3D offset,
|
||||
VkExtent3D extents) {
|
||||
// Grab a tile view that represents the source image.
|
||||
TileViewKey key;
|
||||
key.color_or_depth = color_or_depth ? 1 : 0;
|
||||
key.msaa_samples = static_cast<uint16_t>(num_samples);
|
||||
key.edram_format = format;
|
||||
key.tile_offset = edram_base;
|
||||
key.tile_width = xe::round_up(pitch, 80) / 80;
|
||||
|
@ -979,14 +1028,14 @@ void RenderCache::BlitToImage(VkCommandBuffer command_buffer,
|
|||
|
||||
// Update the tile view with current EDRAM contents.
|
||||
// TODO: Heuristics to determine if this copy is avoidable.
|
||||
// TODO(DrChat): Stencil copies.
|
||||
VkBufferImageCopy buffer_copy;
|
||||
buffer_copy.bufferOffset = edram_base * 5120;
|
||||
buffer_copy.bufferImageHeight = 0;
|
||||
buffer_copy.bufferRowLength = 0;
|
||||
buffer_copy.imageSubresource = {0, 0, 0, 1};
|
||||
buffer_copy.imageSubresource.aspectMask =
|
||||
color_or_depth ? VK_IMAGE_ASPECT_COLOR_BIT
|
||||
: VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
|
||||
color_or_depth ? VK_IMAGE_ASPECT_COLOR_BIT : VK_IMAGE_ASPECT_DEPTH_BIT;
|
||||
buffer_copy.imageExtent = {key.tile_width * 80u, key.tile_height * 16u, 1u};
|
||||
buffer_copy.imageOffset = {0, 0, 0};
|
||||
vkCmdCopyBufferToImage(command_buffer, edram_buffer_, tile_view->image,
|
||||
|
@ -1018,26 +1067,48 @@ void RenderCache::BlitToImage(VkCommandBuffer command_buffer,
|
|||
assert_true(extents.height <= key.tile_height * 16u);
|
||||
|
||||
// Now issue the blit to the destination.
|
||||
// TODO: Resolve to destination if necessary.
|
||||
VkImageBlit image_blit;
|
||||
image_blit.srcSubresource = {0, 0, 0, 1};
|
||||
image_blit.srcSubresource.aspectMask =
|
||||
color_or_depth ? VK_IMAGE_ASPECT_COLOR_BIT
|
||||
: VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
|
||||
image_blit.srcOffsets[0] = {0, 0, 0};
|
||||
image_blit.srcOffsets[1] = {int32_t(extents.width), int32_t(extents.height),
|
||||
int32_t(extents.depth)};
|
||||
if (num_samples == MsaaSamples::k1X) {
|
||||
VkImageBlit image_blit;
|
||||
image_blit.srcSubresource = {0, 0, 0, 1};
|
||||
image_blit.srcSubresource.aspectMask =
|
||||
color_or_depth
|
||||
? VK_IMAGE_ASPECT_COLOR_BIT
|
||||
: VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
|
||||
image_blit.srcOffsets[0] = {0, 0, 0};
|
||||
image_blit.srcOffsets[1] = {int32_t(extents.width), int32_t(extents.height),
|
||||
int32_t(extents.depth)};
|
||||
|
||||
image_blit.dstSubresource = {0, 0, 0, 1};
|
||||
image_blit.dstSubresource.aspectMask =
|
||||
color_or_depth ? VK_IMAGE_ASPECT_COLOR_BIT
|
||||
: VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
|
||||
image_blit.dstOffsets[0] = offset;
|
||||
image_blit.dstOffsets[1] = {offset.x + int32_t(extents.width),
|
||||
offset.y + int32_t(extents.height),
|
||||
offset.z + int32_t(extents.depth)};
|
||||
vkCmdBlitImage(command_buffer, tile_view->image, VK_IMAGE_LAYOUT_GENERAL,
|
||||
image, image_layout, 1, &image_blit, filter);
|
||||
image_blit.dstSubresource = {0, 0, 0, 1};
|
||||
image_blit.dstSubresource.aspectMask =
|
||||
color_or_depth
|
||||
? VK_IMAGE_ASPECT_COLOR_BIT
|
||||
: VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
|
||||
image_blit.dstOffsets[0] = offset;
|
||||
image_blit.dstOffsets[1] = {offset.x + int32_t(extents.width),
|
||||
offset.y + int32_t(extents.height),
|
||||
offset.z + int32_t(extents.depth)};
|
||||
vkCmdBlitImage(command_buffer, tile_view->image, VK_IMAGE_LAYOUT_GENERAL,
|
||||
image, image_layout, 1, &image_blit, filter);
|
||||
} else {
|
||||
VkImageResolve image_resolve;
|
||||
image_resolve.srcSubresource = {0, 0, 0, 1};
|
||||
image_resolve.srcSubresource.aspectMask =
|
||||
color_or_depth
|
||||
? VK_IMAGE_ASPECT_COLOR_BIT
|
||||
: VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
|
||||
image_resolve.srcOffset = {0, 0, 0};
|
||||
|
||||
image_resolve.dstSubresource = {0, 0, 0, 1};
|
||||
image_resolve.dstSubresource.aspectMask =
|
||||
color_or_depth
|
||||
? VK_IMAGE_ASPECT_COLOR_BIT
|
||||
: VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
|
||||
image_resolve.dstOffset = offset;
|
||||
|
||||
image_resolve.extent = extents;
|
||||
vkCmdResolveImage(command_buffer, tile_view->image, VK_IMAGE_LAYOUT_GENERAL,
|
||||
image, image_layout, 1, &image_resolve);
|
||||
}
|
||||
|
||||
// Transition the image back into its previous layout.
|
||||
image_barrier.srcAccessMask = image_barrier.dstAccessMask;
|
||||
|
@ -1052,13 +1123,14 @@ void RenderCache::ClearEDRAMColor(VkCommandBuffer command_buffer,
|
|||
uint32_t edram_base,
|
||||
ColorRenderTargetFormat format,
|
||||
uint32_t pitch, uint32_t height,
|
||||
float* color) {
|
||||
MsaaSamples num_samples, float* color) {
|
||||
// TODO: For formats <= 4 bpp, we can directly fill the EDRAM buffer. Just
|
||||
// need to detect this and calculate a value.
|
||||
|
||||
// Grab a tile view (as we need to clear an image first)
|
||||
TileViewKey key;
|
||||
key.color_or_depth = 1;
|
||||
key.msaa_samples = static_cast<uint16_t>(num_samples);
|
||||
key.edram_format = static_cast<uint16_t>(format);
|
||||
key.tile_offset = edram_base;
|
||||
key.tile_width = xe::round_up(pitch, 80) / 80;
|
||||
|
@ -1091,13 +1163,15 @@ void RenderCache::ClearEDRAMDepthStencil(VkCommandBuffer command_buffer,
|
|||
uint32_t edram_base,
|
||||
DepthRenderTargetFormat format,
|
||||
uint32_t pitch, uint32_t height,
|
||||
float depth, uint32_t stencil) {
|
||||
MsaaSamples num_samples, float depth,
|
||||
uint32_t stencil) {
|
||||
// TODO: For formats <= 4 bpp, we can directly fill the EDRAM buffer. Just
|
||||
// need to detect this and calculate a value.
|
||||
|
||||
// Grab a tile view (as we need to clear an image first)
|
||||
TileViewKey key;
|
||||
key.color_or_depth = 0;
|
||||
key.msaa_samples = static_cast<uint16_t>(num_samples);
|
||||
key.edram_format = static_cast<uint16_t>(format);
|
||||
key.tile_offset = edram_base;
|
||||
key.tile_width = xe::round_up(pitch, 80) / 80;
|
||||
|
@ -1117,12 +1191,13 @@ void RenderCache::ClearEDRAMDepthStencil(VkCommandBuffer command_buffer,
|
|||
VK_IMAGE_LAYOUT_GENERAL, &clear_value, 1, &range);
|
||||
|
||||
// Copy image back into EDRAM buffer
|
||||
// TODO(DrChat): Stencil copies.
|
||||
VkBufferImageCopy copy_range;
|
||||
copy_range.bufferOffset = edram_base * 5120;
|
||||
copy_range.bufferImageHeight = 0;
|
||||
copy_range.bufferRowLength = 0;
|
||||
copy_range.imageSubresource = {
|
||||
VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, 0, 0, 1,
|
||||
VK_IMAGE_ASPECT_DEPTH_BIT, 0, 0, 1,
|
||||
};
|
||||
copy_range.imageExtent = {key.tile_width * 80u, key.tile_height * 16u, 1u};
|
||||
copy_range.imageOffset = {0, 0, 0};
|
||||
|
@ -1131,6 +1206,11 @@ void RenderCache::ClearEDRAMDepthStencil(VkCommandBuffer command_buffer,
|
|||
©_range);
|
||||
}
|
||||
|
||||
void RenderCache::FillEDRAM(VkCommandBuffer command_buffer, uint32_t value) {
|
||||
vkCmdFillBuffer(command_buffer, edram_buffer_, 0, kEdramBufferCapacity,
|
||||
value);
|
||||
}
|
||||
|
||||
bool RenderCache::SetShadowRegister(uint32_t* dest, uint32_t register_name) {
|
||||
uint32_t value = register_file_->values[register_name].u32;
|
||||
if (*dest == value) {
|
||||
|
|
|
@ -38,9 +38,9 @@ struct TileViewKey {
|
|||
// 1 if format is ColorRenderTargetFormat, else DepthRenderTargetFormat.
|
||||
uint16_t color_or_depth : 1;
|
||||
// Surface MSAA samples
|
||||
// uint16_t msaa_samples : 2;
|
||||
uint16_t msaa_samples : 2;
|
||||
// Either ColorRenderTargetFormat or DepthRenderTargetFormat.
|
||||
uint16_t edram_format : 15; // 13;
|
||||
uint16_t edram_format : 13;
|
||||
};
|
||||
static_assert(sizeof(TileViewKey) == 8, "Key must be tightly packed");
|
||||
|
||||
|
@ -69,6 +69,10 @@ class CachedTileView {
|
|||
return *a == *b;
|
||||
}
|
||||
|
||||
bool operator<(const CachedTileView& other) const {
|
||||
return key.tile_offset < other.key.tile_offset;
|
||||
}
|
||||
|
||||
private:
|
||||
VkDevice device_ = nullptr;
|
||||
};
|
||||
|
@ -278,22 +282,26 @@ class RenderCache {
|
|||
// Queues commands to blit EDRAM contents into an image.
|
||||
// The command buffer must not be inside of a render pass when calling this.
|
||||
void BlitToImage(VkCommandBuffer command_buffer, uint32_t edram_base,
|
||||
uint32_t pitch, uint32_t height, VkImage image,
|
||||
VkImageLayout image_layout, bool color_or_depth,
|
||||
uint32_t format, VkFilter filter, VkOffset3D offset,
|
||||
VkExtent3D extents);
|
||||
uint32_t pitch, uint32_t height, MsaaSamples num_samples,
|
||||
VkImage image, VkImageLayout image_layout,
|
||||
bool color_or_depth, uint32_t format, VkFilter filter,
|
||||
VkOffset3D offset, VkExtent3D extents);
|
||||
|
||||
// Queues commands to clear EDRAM contents with a solid color.
|
||||
// The command buffer must not be inside of a render pass when calling this.
|
||||
void ClearEDRAMColor(VkCommandBuffer command_buffer, uint32_t edram_base,
|
||||
ColorRenderTargetFormat format, uint32_t pitch,
|
||||
uint32_t height, float* color);
|
||||
uint32_t height, MsaaSamples num_samples, float* color);
|
||||
// Queues commands to clear EDRAM contents with depth/stencil values.
|
||||
// The command buffer must not be inside of a render pass when calling this.
|
||||
void ClearEDRAMDepthStencil(VkCommandBuffer command_buffer,
|
||||
uint32_t edram_base,
|
||||
DepthRenderTargetFormat format, uint32_t pitch,
|
||||
uint32_t height, float depth, uint32_t stencil);
|
||||
uint32_t height, MsaaSamples num_samples,
|
||||
float depth, uint32_t stencil);
|
||||
// Queues commands to fill EDRAM contents with a constant value.
|
||||
// The command buffer must not be inside of a render pass when calling this.
|
||||
void FillEDRAM(VkCommandBuffer command_buffer, uint32_t value);
|
||||
|
||||
private:
|
||||
// Parses the current state into a configuration object.
|
||||
|
@ -306,6 +314,9 @@ class RenderCache {
|
|||
CachedTileView* FindOrCreateTileView(VkCommandBuffer command_buffer,
|
||||
const TileViewKey& view_key);
|
||||
|
||||
void UpdateTileView(VkCommandBuffer command_buffer, CachedTileView* view,
|
||||
bool load, bool insert_barrier = true);
|
||||
|
||||
// Gets or creates a render pass and frame buffer for the given configuration.
|
||||
// This attempts to reuse as much as possible across render passes and
|
||||
// framebuffers.
|
||||
|
@ -335,6 +346,7 @@ class RenderCache {
|
|||
struct ShadowRegisters {
|
||||
uint32_t rb_modecontrol;
|
||||
uint32_t rb_surface_info;
|
||||
uint32_t rb_color_mask;
|
||||
uint32_t rb_color_info;
|
||||
uint32_t rb_color1_info;
|
||||
uint32_t rb_color2_info;
|
||||
|
|
|
@ -152,19 +152,8 @@ void VulkanCommandProcessor::PerformSwap(uint32_t frontbuffer_ptr,
|
|||
|
||||
// TODO(benvanik): move to CP or to host (trace dump, etc).
|
||||
// This only needs to surround a vkQueueSubmit.
|
||||
static uint32_t frame = 0;
|
||||
if (device_->is_renderdoc_attached() &&
|
||||
(FLAGS_vulkan_renderdoc_capture_all ||
|
||||
trace_state_ == TraceState::kSingleFrame)) {
|
||||
if (queue_mutex_) {
|
||||
queue_mutex_->lock();
|
||||
}
|
||||
|
||||
device_->BeginRenderDocFrameCapture();
|
||||
|
||||
if (queue_mutex_) {
|
||||
queue_mutex_->unlock();
|
||||
}
|
||||
if (queue_mutex_) {
|
||||
queue_mutex_->lock();
|
||||
}
|
||||
|
||||
// TODO(DrChat): If setup buffer is empty, don't bother queueing it up.
|
||||
|
@ -182,45 +171,37 @@ void VulkanCommandProcessor::PerformSwap(uint32_t frontbuffer_ptr,
|
|||
submit_info.signalSemaphoreCount = 0;
|
||||
submit_info.pSignalSemaphores = nullptr;
|
||||
if (queue_mutex_) {
|
||||
queue_mutex_->lock();
|
||||
// queue_mutex_->lock();
|
||||
}
|
||||
status = vkQueueSubmit(queue_, 1, &submit_info, *current_batch_fence_);
|
||||
if (queue_mutex_) {
|
||||
queue_mutex_->unlock();
|
||||
// queue_mutex_->unlock();
|
||||
}
|
||||
CheckResult(status, "vkQueueSubmit");
|
||||
|
||||
// TODO(DrChat): Disable this completely.
|
||||
VkFence fences[] = {*current_batch_fence_};
|
||||
status = vkWaitForFences(*device_, 1, fences, true, -1);
|
||||
CheckResult(status, "vkWaitForFences");
|
||||
|
||||
if (device_->is_renderdoc_attached() &&
|
||||
(FLAGS_vulkan_renderdoc_capture_all ||
|
||||
trace_state_ == TraceState::kSingleFrame)) {
|
||||
if (queue_mutex_) {
|
||||
queue_mutex_->lock();
|
||||
}
|
||||
|
||||
if (device_->is_renderdoc_attached() && capturing_) {
|
||||
device_->EndRenderDocFrameCapture();
|
||||
capturing_ = false;
|
||||
|
||||
// HACK(DrChat): Used b/c I disabled trace saving code in the CP.
|
||||
// Remove later.
|
||||
if (!trace_writer_.is_open()) {
|
||||
trace_state_ = TraceState::kDisabled;
|
||||
}
|
||||
|
||||
if (queue_mutex_) {
|
||||
queue_mutex_->unlock();
|
||||
}
|
||||
}
|
||||
if (queue_mutex_) {
|
||||
queue_mutex_->unlock();
|
||||
}
|
||||
|
||||
// Scavenging.
|
||||
current_command_buffer_ = nullptr;
|
||||
current_setup_buffer_ = nullptr;
|
||||
while (command_buffer_pool_->has_pending()) {
|
||||
command_buffer_pool_->Scavenge();
|
||||
xe::threading::MaybeYield();
|
||||
}
|
||||
command_buffer_pool_->Scavenge();
|
||||
|
||||
texture_cache_->Scavenge();
|
||||
current_batch_fence_ = nullptr;
|
||||
|
@ -331,6 +312,22 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
|||
vkBeginCommandBuffer(current_setup_buffer_, &command_buffer_begin_info);
|
||||
CheckResult(status, "vkBeginCommandBuffer");
|
||||
|
||||
static uint32_t frame = 0;
|
||||
if (device_->is_renderdoc_attached() && !capturing_ &&
|
||||
(FLAGS_vulkan_renderdoc_capture_all ||
|
||||
trace_state_ == TraceState::kSingleFrame)) {
|
||||
if (queue_mutex_) {
|
||||
queue_mutex_->lock();
|
||||
}
|
||||
|
||||
capturing_ = true;
|
||||
device_->BeginRenderDocFrameCapture();
|
||||
|
||||
if (queue_mutex_) {
|
||||
queue_mutex_->unlock();
|
||||
}
|
||||
}
|
||||
|
||||
started_command_buffer = true;
|
||||
}
|
||||
auto command_buffer = current_command_buffer_;
|
||||
|
@ -357,6 +354,10 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
|||
current_render_state_ = render_cache_->BeginRenderPass(
|
||||
command_buffer, vertex_shader, pixel_shader);
|
||||
if (!current_render_state_) {
|
||||
command_buffer_pool_->CancelBatch();
|
||||
current_command_buffer_ = nullptr;
|
||||
current_setup_buffer_ = nullptr;
|
||||
current_batch_fence_ = nullptr;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -378,18 +379,30 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
|||
// Pass registers to the shaders.
|
||||
if (!PopulateConstants(command_buffer, vertex_shader, pixel_shader)) {
|
||||
render_cache_->EndRenderPass();
|
||||
command_buffer_pool_->CancelBatch();
|
||||
current_command_buffer_ = nullptr;
|
||||
current_setup_buffer_ = nullptr;
|
||||
current_batch_fence_ = nullptr;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Upload and bind index buffer data (if we have any).
|
||||
if (!PopulateIndexBuffer(command_buffer, index_buffer_info)) {
|
||||
render_cache_->EndRenderPass();
|
||||
command_buffer_pool_->CancelBatch();
|
||||
current_command_buffer_ = nullptr;
|
||||
current_setup_buffer_ = nullptr;
|
||||
current_batch_fence_ = nullptr;
|
||||
return false;
|
||||
}
|
||||
|
||||
// Upload and bind all vertex buffer data.
|
||||
if (!PopulateVertexBuffers(command_buffer, vertex_shader)) {
|
||||
render_cache_->EndRenderPass();
|
||||
command_buffer_pool_->CancelBatch();
|
||||
current_command_buffer_ = nullptr;
|
||||
current_setup_buffer_ = nullptr;
|
||||
current_batch_fence_ = nullptr;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -423,6 +436,10 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
|||
bool VulkanCommandProcessor::PopulateConstants(VkCommandBuffer command_buffer,
|
||||
VulkanShader* vertex_shader,
|
||||
VulkanShader* pixel_shader) {
|
||||
#if FINE_GRAINED_DRAW_SCOPES
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
#endif // FINE_GRAINED_DRAW_SCOPES
|
||||
|
||||
// Upload the constants the shaders require.
|
||||
// These are optional, and if none are defined 0 will be returned.
|
||||
auto constant_offsets = buffer_cache_->UploadConstantRegisters(
|
||||
|
@ -742,7 +759,7 @@ bool VulkanCommandProcessor::IssueCopy() {
|
|||
tex_info.size_2d.input_height = dest_block_height;
|
||||
tex_info.size_2d.input_pitch = copy_dest_pitch * 4;
|
||||
auto texture = texture_cache_->DemandResolveTexture(
|
||||
tex_info, ColorFormatToTextureFormat(copy_dest_format), nullptr, nullptr);
|
||||
tex_info, ColorFormatToTextureFormat(copy_dest_format), nullptr);
|
||||
if (texture->image_layout == VK_IMAGE_LAYOUT_UNDEFINED) {
|
||||
// Transition the image to a general layout.
|
||||
VkImageMemoryBarrier image_barrier;
|
||||
|
@ -810,8 +827,9 @@ bool VulkanCommandProcessor::IssueCopy() {
|
|||
case CopyCommand::kConvert:
|
||||
render_cache_->BlitToImage(
|
||||
command_buffer, edram_base, surface_pitch, resolve_extent.height,
|
||||
texture->image, texture->image_layout, copy_src_select <= 3,
|
||||
src_format, VK_FILTER_LINEAR, resolve_offset, resolve_extent);
|
||||
surface_msaa, texture->image, texture->image_layout,
|
||||
copy_src_select <= 3, src_format, VK_FILTER_LINEAR, resolve_offset,
|
||||
resolve_extent);
|
||||
break;
|
||||
|
||||
case CopyCommand::kConstantOne:
|
||||
|
@ -839,7 +857,7 @@ bool VulkanCommandProcessor::IssueCopy() {
|
|||
// TODO(DrChat): Do we know the surface height at this point?
|
||||
render_cache_->ClearEDRAMColor(command_buffer, color_edram_base,
|
||||
color_format, surface_pitch,
|
||||
resolve_extent.height, color);
|
||||
resolve_extent.height, surface_msaa, color);
|
||||
}
|
||||
|
||||
if (depth_clear_enabled) {
|
||||
|
@ -850,7 +868,7 @@ bool VulkanCommandProcessor::IssueCopy() {
|
|||
// TODO(DrChat): Do we know the surface height at this point?
|
||||
render_cache_->ClearEDRAMDepthStencil(
|
||||
command_buffer, depth_edram_base, depth_format, surface_pitch,
|
||||
resolve_extent.height, depth, stencil);
|
||||
resolve_extent.height, surface_msaa, depth, stencil);
|
||||
}
|
||||
|
||||
return true;
|
||||
|
|
|
@ -94,6 +94,7 @@ class VulkanCommandProcessor : public CommandProcessor {
|
|||
|
||||
// Last copy base address, for debugging only.
|
||||
uint32_t last_copy_base_ = 0;
|
||||
bool capturing_ = false;
|
||||
|
||||
std::unique_ptr<BufferCache> buffer_cache_;
|
||||
std::unique_ptr<PipelineCache> pipeline_cache_;
|
||||
|
|
Loading…
Reference in New Issue