diff --git a/src/xenia/gpu/vulkan/buffer_cache.cc b/src/xenia/gpu/vulkan/buffer_cache.cc index 5b9f8a182..32c2cef4b 100644 --- a/src/xenia/gpu/vulkan/buffer_cache.cc +++ b/src/xenia/gpu/vulkan/buffer_cache.cc @@ -188,8 +188,9 @@ BufferCache::~BufferCache() { vkDestroyBuffer(device_, transient_vertex_buffer_, nullptr); } -VkDeviceSize BufferCache::UploadConstantRegisters( - const Shader::ConstantRegisterMap& constant_register_map) { +std::pair BufferCache::UploadConstantRegisters( + const Shader::ConstantRegisterMap& vertex_constant_register_map, + const Shader::ConstantRegisterMap& pixel_constant_register_map) { // Fat struct, including all registers: // struct { // vec4 float[512]; @@ -202,7 +203,7 @@ VkDeviceSize BufferCache::UploadConstantRegisters( auto offset = AllocateTransientData(uniform_buffer_alignment_, total_size); if (offset == VK_WHOLE_SIZE) { // OOM. - return VK_WHOLE_SIZE; + return {VK_WHOLE_SIZE, VK_WHOLE_SIZE}; } // Copy over all the registers. @@ -219,7 +220,7 @@ VkDeviceSize BufferCache::UploadConstantRegisters( 32 * 4); dest_ptr += 32 * 4; - return offset; + return {offset, offset}; // Packed upload code. // This is not currently supported by the shaders, but would be awesome. diff --git a/src/xenia/gpu/vulkan/buffer_cache.h b/src/xenia/gpu/vulkan/buffer_cache.h index 9a264a80b..1c7330e52 100644 --- a/src/xenia/gpu/vulkan/buffer_cache.h +++ b/src/xenia/gpu/vulkan/buffer_cache.h @@ -47,8 +47,10 @@ class BufferCache { // The registers are tightly packed in order as [floats, ints, bools]. // Returns an offset that can be used with the transient_descriptor_set or // VK_WHOLE_SIZE if the constants could not be uploaded (OOM). - VkDeviceSize UploadConstantRegisters( - const Shader::ConstantRegisterMap& constant_register_map); + // The returned offsets may alias. + std::pair UploadConstantRegisters( + const Shader::ConstantRegisterMap& vertex_constant_register_map, + const Shader::ConstantRegisterMap& pixel_constant_register_map); // Uploads index buffer data from guest memory, possibly eliding with // recently uploaded data or cached copies. diff --git a/src/xenia/gpu/vulkan/pipeline_cache.cc b/src/xenia/gpu/vulkan/pipeline_cache.cc index aca0d72b5..63bad5164 100644 --- a/src/xenia/gpu/vulkan/pipeline_cache.cc +++ b/src/xenia/gpu/vulkan/pipeline_cache.cc @@ -141,11 +141,435 @@ bool PipelineCache::ConfigurePipeline(VkCommandBuffer command_buffer, VulkanShader* vertex_shader, VulkanShader* pixel_shader, PrimitiveType primitive_type) { - // Uh, yeah. This happened. + // Perform a pass over all registers and state updating our cached structures. + // This will tell us if anything has changed that requires us to either build + // a new pipeline or use an existing one. + VkPipeline pipeline = nullptr; + auto update_status = UpdateState(vertex_shader, pixel_shader, primitive_type); + switch (update_status) { + case UpdateStatus::kCompatible: + // Requested pipeline is compatible with our previous one, so use that. + // Note that there still may be dynamic state that needs updating. + pipeline = current_pipeline_; + break; + case UpdateStatus::kMismatch: + // Pipeline state has changed. We need to either create a new one or find + // an old one that matches. + current_pipeline_ = nullptr; + break; + case UpdateStatus::kError: + // Error updating state - bail out. + // We are in an indeterminate state, so reset things for the next attempt. + current_pipeline_ = nullptr; + return false; + } + if (!pipeline) { + pipeline = GetPipeline(render_state); + current_pipeline_ = pipeline; + if (!pipeline) { + // Unable to create pipeline. + return false; + } + } - VkPipelineShaderStageCreateInfo pipeline_stages[3]; - uint32_t pipeline_stage_count = 0; - auto& vertex_pipeline_stage = pipeline_stages[pipeline_stage_count++]; + // Bind the pipeline. + vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); + + // Issue all changed dynamic state information commands. + // TODO(benvanik): dynamic state is kept in the command buffer, so if we + // have issued it before (regardless of pipeline) we don't need to do it now. + // TODO(benvanik): track whether we have issued on the given command buffer. + bool full_dynamic_state = true; + if (!SetDynamicState(command_buffer, full_dynamic_state)) { + // Failed to update state. + return false; + } + + return true; +} + +void PipelineCache::ClearCache() { + // TODO(benvanik): caching. +} + +VkPipeline PipelineCache::GetPipeline(const RenderState* render_state) { + VkPipelineDynamicStateCreateInfo dynamic_state_info; + dynamic_state_info.sType = + VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; + dynamic_state_info.pNext = nullptr; + dynamic_state_info.flags = 0; + VkDynamicState dynamic_states[] = { + VK_DYNAMIC_STATE_VIEWPORT, + VK_DYNAMIC_STATE_SCISSOR, + VK_DYNAMIC_STATE_LINE_WIDTH, + VK_DYNAMIC_STATE_DEPTH_BIAS, + VK_DYNAMIC_STATE_BLEND_CONSTANTS, + VK_DYNAMIC_STATE_DEPTH_BOUNDS, + VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, + VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, + VK_DYNAMIC_STATE_STENCIL_REFERENCE, + }; + dynamic_state_info.dynamicStateCount = + static_cast(xe::countof(dynamic_states)); + dynamic_state_info.pDynamicStates = dynamic_states; + + VkGraphicsPipelineCreateInfo pipeline_info; + pipeline_info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; + pipeline_info.pNext = nullptr; + pipeline_info.flags = VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT; + pipeline_info.stageCount = update_shader_stages_stage_count_; + pipeline_info.pStages = update_shader_stages_info_; + pipeline_info.pVertexInputState = &update_vertex_input_state_info_; + pipeline_info.pInputAssemblyState = &update_input_assembly_state_info_; + pipeline_info.pTessellationState = nullptr; + pipeline_info.pViewportState = &update_viewport_state_info_; + pipeline_info.pRasterizationState = &update_rasterization_state_info_; + pipeline_info.pMultisampleState = &update_multisample_state_info_; + pipeline_info.pDepthStencilState = &update_depth_stencil_state_info_; + pipeline_info.pColorBlendState = &update_color_blend_state_info_; + pipeline_info.pDynamicState = &dynamic_state_info; + pipeline_info.layout = pipeline_layout_; + pipeline_info.renderPass = render_state->render_pass_handle; + pipeline_info.subpass = 0; + pipeline_info.basePipelineHandle = nullptr; + pipeline_info.basePipelineIndex = 0; + + VkPipeline pipeline = nullptr; + auto err = vkCreateGraphicsPipelines(device_, nullptr, 1, &pipeline_info, + nullptr, &pipeline); + CheckResult(err, "vkCreateGraphicsPipelines"); + + // TODO(benvanik): don't leak. + + return pipeline; +} + +VkShaderModule PipelineCache::GetGeometryShader(PrimitiveType primitive_type) { + switch (primitive_type) { + case PrimitiveType::kLineList: + case PrimitiveType::kLineStrip: + case PrimitiveType::kTriangleList: + case PrimitiveType::kTriangleFan: + case PrimitiveType::kTriangleStrip: + // Supported directly - no need to emulate. + return nullptr; + case PrimitiveType::kPointList: + // TODO(benvanik): point list geometry shader. + return nullptr; + case PrimitiveType::kUnknown0x07: + assert_always("Unknown geometry type"); + return nullptr; + case PrimitiveType::kRectangleList: + // TODO(benvanik): rectangle list geometry shader. + return nullptr; + case PrimitiveType::kLineLoop: + // TODO(benvanik): line loop geometry shader. + return nullptr; + case PrimitiveType::kQuadList: + // TODO(benvanik): quad list geometry shader. + return nullptr; + case PrimitiveType::kQuadStrip: + // TODO(benvanik): quad strip geometry shader. + return nullptr; + default: + assert_unhandled_case(primitive_type); + return nullptr; + } +} + +bool PipelineCache::SetDynamicState(VkCommandBuffer command_buffer, + bool full_update) { + auto& regs = set_dynamic_state_registers_; + + bool window_offset_dirty = SetShadowRegister(®s.pa_sc_window_offset, + XE_GPU_REG_PA_SC_WINDOW_OFFSET); + + // Window parameters. + // http://ftp.tku.edu.tw/NetBSD/NetBSD-current/xsrc/external/mit/xf86-video-ati/dist/src/r600_reg_auto_r6xx.h + // See r200UpdateWindow: + // https://github.com/freedreno/mesa/blob/master/src/mesa/drivers/dri/r200/r200_state.c + int16_t window_offset_x = 0; + int16_t window_offset_y = 0; + if ((regs.pa_su_sc_mode_cntl >> 16) & 1) { + window_offset_x = regs.pa_sc_window_offset & 0x7FFF; + window_offset_y = (regs.pa_sc_window_offset >> 16) & 0x7FFF; + if (window_offset_x & 0x4000) { + window_offset_x |= 0x8000; + } + if (window_offset_y & 0x4000) { + window_offset_y |= 0x8000; + } + } + + // VK_DYNAMIC_STATE_SCISSOR + bool scissor_state_dirty = full_update || window_offset_dirty; + scissor_state_dirty |= SetShadowRegister(®s.pa_sc_window_scissor_tl, + XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL); + scissor_state_dirty |= SetShadowRegister(®s.pa_sc_window_scissor_br, + XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR); + if (scissor_state_dirty) { + int32_t ws_x = regs.pa_sc_window_scissor_tl & 0x7FFF; + int32_t ws_y = (regs.pa_sc_window_scissor_tl >> 16) & 0x7FFF; + uint32_t ws_w = (regs.pa_sc_window_scissor_br & 0x7FFF) - ws_x; + uint32_t ws_h = ((regs.pa_sc_window_scissor_br >> 16) & 0x7FFF) - ws_y; + ws_x += window_offset_x; + ws_y += window_offset_y; + + VkRect2D scissor_rect; + scissor_rect.offset.x = ws_x; + scissor_rect.offset.y = ws_y; + scissor_rect.extent.width = ws_w; + scissor_rect.extent.height = ws_h; + vkCmdSetScissor(command_buffer, 0, 1, &scissor_rect); + } + + // VK_DYNAMIC_STATE_VIEWPORT + bool viewport_state_dirty = full_update || window_offset_dirty; + viewport_state_dirty |= + SetShadowRegister(®s.rb_surface_info, XE_GPU_REG_RB_SURFACE_INFO); + viewport_state_dirty |= + SetShadowRegister(®s.pa_cl_vte_cntl, XE_GPU_REG_PA_CL_VTE_CNTL); + viewport_state_dirty |= SetShadowRegister(®s.pa_cl_vport_xoffset, + XE_GPU_REG_PA_CL_VPORT_XOFFSET); + viewport_state_dirty |= SetShadowRegister(®s.pa_cl_vport_yoffset, + XE_GPU_REG_PA_CL_VPORT_YOFFSET); + viewport_state_dirty |= SetShadowRegister(®s.pa_cl_vport_zoffset, + XE_GPU_REG_PA_CL_VPORT_ZOFFSET); + viewport_state_dirty |= SetShadowRegister(®s.pa_cl_vport_xscale, + XE_GPU_REG_PA_CL_VPORT_XSCALE); + viewport_state_dirty |= SetShadowRegister(®s.pa_cl_vport_yscale, + XE_GPU_REG_PA_CL_VPORT_YSCALE); + viewport_state_dirty |= SetShadowRegister(®s.pa_cl_vport_zscale, + XE_GPU_REG_PA_CL_VPORT_ZSCALE); + if (viewport_state_dirty) { + // HACK: no clue where to get these values. + // RB_SURFACE_INFO + auto surface_msaa = + static_cast((regs.rb_surface_info >> 16) & 0x3); + // TODO(benvanik): ?? + float window_width_scalar = 1; + float window_height_scalar = 1; + switch (surface_msaa) { + case MsaaSamples::k1X: + break; + case MsaaSamples::k2X: + window_width_scalar = 2; + break; + case MsaaSamples::k4X: + window_width_scalar = 2; + window_height_scalar = 2; + break; + } + + // Whether each of the viewport settings are enabled. + // http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf + bool vport_xscale_enable = (regs.pa_cl_vte_cntl & (1 << 0)) > 0; + bool vport_xoffset_enable = (regs.pa_cl_vte_cntl & (1 << 1)) > 0; + bool vport_yscale_enable = (regs.pa_cl_vte_cntl & (1 << 2)) > 0; + bool vport_yoffset_enable = (regs.pa_cl_vte_cntl & (1 << 3)) > 0; + bool vport_zscale_enable = (regs.pa_cl_vte_cntl & (1 << 4)) > 0; + bool vport_zoffset_enable = (regs.pa_cl_vte_cntl & (1 << 5)) > 0; + assert_true(vport_xscale_enable == vport_yscale_enable == + vport_zscale_enable == vport_xoffset_enable == + vport_yoffset_enable == vport_zoffset_enable); + + VkViewport viewport_rect; + viewport_rect.x = 0; + viewport_rect.y = 0; + viewport_rect.width = 100; + viewport_rect.height = 100; + viewport_rect.minDepth = 0; + viewport_rect.maxDepth = 1; + + if (vport_xscale_enable) { + float texel_offset_x = 0.0f; + float texel_offset_y = 0.0f; + float vox = vport_xoffset_enable ? regs.pa_cl_vport_xoffset : 0; + float voy = vport_yoffset_enable ? regs.pa_cl_vport_yoffset : 0; + float vsx = vport_xscale_enable ? regs.pa_cl_vport_xscale : 1; + float vsy = vport_yscale_enable ? regs.pa_cl_vport_yscale : 1; + window_width_scalar = window_height_scalar = 1; + float vpw = 2 * window_width_scalar * vsx; + float vph = -2 * window_height_scalar * vsy; + float vpx = window_width_scalar * vox - vpw / 2 + window_offset_x; + float vpy = window_height_scalar * voy - vph / 2 + window_offset_y; + viewport_rect.x = vpx + texel_offset_x; + viewport_rect.y = vpy + texel_offset_y; + viewport_rect.width = vpw; + viewport_rect.height = vph; + + // TODO(benvanik): depth range adjustment? + // float voz = vport_zoffset_enable ? regs.pa_cl_vport_zoffset : 0; + // float vsz = vport_zscale_enable ? regs.pa_cl_vport_zscale : 1; + } else { + float texel_offset_x = 0.0f; + float texel_offset_y = 0.0f; + float vpw = 2 * 2560.0f * window_width_scalar; + float vph = 2 * 2560.0f * window_height_scalar; + float vpx = -2560.0f * window_width_scalar + window_offset_x; + float vpy = -2560.0f * window_height_scalar + window_offset_y; + viewport_rect.x = vpx + texel_offset_x; + viewport_rect.y = vpy + texel_offset_y; + viewport_rect.width = vpw; + viewport_rect.height = vph; + } + float voz = vport_zoffset_enable ? regs.pa_cl_vport_zoffset : 0; + float vsz = vport_zscale_enable ? regs.pa_cl_vport_zscale : 1; + viewport_rect.minDepth = voz; + viewport_rect.maxDepth = voz + vsz; + + vkCmdSetViewport(command_buffer, 0, 1, &viewport_rect); + } + + // VK_DYNAMIC_STATE_BLEND_CONSTANTS + bool blend_constant_state_dirty = full_update; + blend_constant_state_dirty |= + SetShadowRegister(®s.rb_blend_rgba[0], XE_GPU_REG_RB_BLEND_RED); + blend_constant_state_dirty |= + SetShadowRegister(®s.rb_blend_rgba[1], XE_GPU_REG_RB_BLEND_GREEN); + blend_constant_state_dirty |= + SetShadowRegister(®s.rb_blend_rgba[2], XE_GPU_REG_RB_BLEND_BLUE); + blend_constant_state_dirty |= + SetShadowRegister(®s.rb_blend_rgba[3], XE_GPU_REG_RB_BLEND_ALPHA); + if (blend_constant_state_dirty) { + vkCmdSetBlendConstants(command_buffer, regs.rb_blend_rgba); + } + + // VK_DYNAMIC_STATE_LINE_WIDTH + vkCmdSetLineWidth(command_buffer, 1.0f); + + // VK_DYNAMIC_STATE_DEPTH_BIAS + vkCmdSetDepthBias(command_buffer, 0.0f, 0.0f, 0.0f); + + // VK_DYNAMIC_STATE_DEPTH_BOUNDS + vkCmdSetDepthBounds(command_buffer, 0.0f, 1.0f); + + // VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK + vkCmdSetStencilCompareMask(command_buffer, VK_STENCIL_FRONT_AND_BACK, 0); + + // VK_DYNAMIC_STATE_STENCIL_REFERENCE + vkCmdSetStencilReference(command_buffer, VK_STENCIL_FRONT_AND_BACK, 0); + + // VK_DYNAMIC_STATE_STENCIL_WRITE_MASK + vkCmdSetStencilWriteMask(command_buffer, VK_STENCIL_FRONT_AND_BACK, 0); + + // TODO(benvanik): push constants. + + return true; +} + +bool PipelineCache::SetShadowRegister(uint32_t* dest, uint32_t register_name) { + uint32_t value = register_file_->values[register_name].u32; + if (*dest == value) { + return false; + } + *dest = value; + return true; +} + +bool PipelineCache::SetShadowRegister(float* dest, uint32_t register_name) { + float value = register_file_->values[register_name].f32; + if (*dest == value) { + return false; + } + *dest = value; + return true; +} + +PipelineCache::UpdateStatus PipelineCache::UpdateRenderTargets() { + auto& regs = update_render_targets_regs_; + + bool dirty = false; + dirty |= SetShadowRegister(®s.rb_modecontrol, XE_GPU_REG_RB_MODECONTROL); + dirty |= SetShadowRegister(®s.rb_surface_info, XE_GPU_REG_RB_SURFACE_INFO); + dirty |= SetShadowRegister(®s.rb_color_info, XE_GPU_REG_RB_COLOR_INFO); + dirty |= SetShadowRegister(®s.rb_color1_info, XE_GPU_REG_RB_COLOR1_INFO); + dirty |= SetShadowRegister(®s.rb_color2_info, XE_GPU_REG_RB_COLOR2_INFO); + dirty |= SetShadowRegister(®s.rb_color3_info, XE_GPU_REG_RB_COLOR3_INFO); + dirty |= SetShadowRegister(®s.rb_color_mask, XE_GPU_REG_RB_COLOR_MASK); + dirty |= SetShadowRegister(®s.rb_depthcontrol, XE_GPU_REG_RB_DEPTHCONTROL); + dirty |= + SetShadowRegister(®s.rb_stencilrefmask, XE_GPU_REG_RB_STENCILREFMASK); + dirty |= SetShadowRegister(®s.rb_depth_info, XE_GPU_REG_RB_DEPTH_INFO); + if (!dirty) { + return UpdateStatus::kCompatible; + } + + SCOPE_profile_cpu_f("gpu"); + + return UpdateStatus::kMismatch; +} + +PipelineCache::UpdateStatus PipelineCache::UpdateState( + VulkanShader* vertex_shader, VulkanShader* pixel_shader, + PrimitiveType primitive_type) { + bool mismatch = false; + +#define CHECK_UPDATE_STATUS(status, mismatch, error_message) \ + { \ + if (status == UpdateStatus::kError) { \ + XELOGE(error_message); \ + return status; \ + } else if (status == UpdateStatus::kMismatch) { \ + mismatch = true; \ + } \ + } + + UpdateStatus status; + status = UpdateShaderStages(vertex_shader, pixel_shader, primitive_type); + CHECK_UPDATE_STATUS(status, mismatch, "Unable to update shader stages"); + status = UpdateVertexInputState(vertex_shader); + CHECK_UPDATE_STATUS(status, mismatch, "Unable to update vertex input state"); + status = UpdateInputAssemblyState(primitive_type); + CHECK_UPDATE_STATUS(status, mismatch, + "Unable to update input assembly state"); + status = UpdateViewportState(); + CHECK_UPDATE_STATUS(status, mismatch, "Unable to update viewport state"); + status = UpdateRasterizationState(primitive_type); + CHECK_UPDATE_STATUS(status, mismatch, "Unable to update rasterization state"); + status = UpdateMultisampleState(); + CHECK_UPDATE_STATUS(status, mismatch, "Unable to update multisample state"); + status = UpdateDepthStencilState(); + CHECK_UPDATE_STATUS(status, mismatch, "Unable to update depth/stencil state"); + status = UpdateColorBlendState(); + CHECK_UPDATE_STATUS(status, mismatch, "Unable to update color blend state"); + + return mismatch ? UpdateStatus::kMismatch : UpdateStatus::kCompatible; +} + +PipelineCache::UpdateStatus PipelineCache::UpdateShaderStages( + VulkanShader* vertex_shader, VulkanShader* pixel_shader, + PrimitiveType primitive_type) { + auto& regs = update_shader_stages_regs_; + + // These are the constant base addresses/ranges for shaders. + // We have these hardcoded right now cause nothing seems to differ. + assert_true(register_file_->values[XE_GPU_REG_SQ_VS_CONST].u32 == + 0x000FF000 || + register_file_->values[XE_GPU_REG_SQ_VS_CONST].u32 == 0x00000000); + assert_true(register_file_->values[XE_GPU_REG_SQ_PS_CONST].u32 == + 0x000FF100 || + register_file_->values[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000); + + bool dirty = false; + dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl, + XE_GPU_REG_PA_SU_SC_MODE_CNTL); + dirty |= SetShadowRegister(®s.sq_program_cntl, XE_GPU_REG_SQ_PROGRAM_CNTL); + dirty |= SetShadowRegister(®s.sq_context_misc, XE_GPU_REG_SQ_CONTEXT_MISC); + // dirty |= regs.vertex_shader != active_vertex_shader_; + // dirty |= regs.pixel_shader != active_pixel_shader_; + dirty |= regs.prim_type != primitive_type; + if (!dirty) { + return UpdateStatus::kCompatible; + } + // regs.vertex_shader = static_cast(active_vertex_shader_); + // regs.pixel_shader = static_cast(active_pixel_shader_); + regs.prim_type = primitive_type; + + update_shader_stages_stage_count_ = 0; + + auto& vertex_pipeline_stage = + update_shader_stages_info_[update_shader_stages_stage_count_++]; vertex_pipeline_stage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; vertex_pipeline_stage.pNext = nullptr; @@ -154,9 +578,11 @@ bool PipelineCache::ConfigurePipeline(VkCommandBuffer command_buffer, vertex_pipeline_stage.module = vertex_shader->shader_module(); vertex_pipeline_stage.pName = "main"; vertex_pipeline_stage.pSpecializationInfo = nullptr; + auto geometry_shader = GetGeometryShader(primitive_type); if (geometry_shader) { - auto& geometry_pipeline_stage = pipeline_stages[pipeline_stage_count++]; + auto& geometry_pipeline_stage = + update_shader_stages_info_[update_shader_stages_stage_count_++]; geometry_pipeline_stage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; geometry_pipeline_stage.pNext = nullptr; @@ -166,7 +592,9 @@ bool PipelineCache::ConfigurePipeline(VkCommandBuffer command_buffer, geometry_pipeline_stage.pName = "main"; geometry_pipeline_stage.pSpecializationInfo = nullptr; } - auto& pixel_pipeline_stage = pipeline_stages[pipeline_stage_count++]; + + auto& pixel_pipeline_stage = + update_shader_stages_info_[update_shader_stages_stage_count_++]; pixel_pipeline_stage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; pixel_pipeline_stage.pNext = nullptr; @@ -176,13 +604,28 @@ bool PipelineCache::ConfigurePipeline(VkCommandBuffer command_buffer, pixel_pipeline_stage.pName = "main"; pixel_pipeline_stage.pSpecializationInfo = nullptr; - VkPipelineVertexInputStateCreateInfo vertex_state_info; - vertex_state_info.sType = - VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; - vertex_state_info.pNext = nullptr; - VkVertexInputBindingDescription vertex_binding_descrs[64]; + return UpdateStatus::kMismatch; +} + +PipelineCache::UpdateStatus PipelineCache::UpdateVertexInputState( + VulkanShader* vertex_shader) { + auto& regs = update_vertex_input_state_regs_; + auto& state_info = update_vertex_input_state_info_; + + bool dirty = false; + dirty |= vertex_shader != regs.vertex_shader; + if (!dirty) { + return UpdateStatus::kCompatible; + } + regs.vertex_shader = vertex_shader; + + state_info.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO; + state_info.pNext = nullptr; + state_info.flags = 0; + + auto& vertex_binding_descrs = update_vertex_input_state_binding_descrs_; + auto& vertex_attrib_descrs = update_vertex_input_state_attrib_descrs_; uint32_t vertex_binding_count = 0; - VkVertexInputAttributeDescription vertex_attrib_descrs[64]; uint32_t vertex_attrib_count = 0; for (const auto& vertex_binding : vertex_shader->vertex_bindings()) { assert_true(vertex_binding_count < xe::countof(vertex_binding_descrs)); @@ -270,366 +713,114 @@ bool PipelineCache::ConfigurePipeline(VkCommandBuffer command_buffer, } } } - vertex_state_info.vertexBindingDescriptionCount = vertex_binding_count; - vertex_state_info.pVertexBindingDescriptions = vertex_binding_descrs; - vertex_state_info.vertexAttributeDescriptionCount = vertex_attrib_count; - vertex_state_info.pVertexAttributeDescriptions = vertex_attrib_descrs; - VkPipelineInputAssemblyStateCreateInfo input_info; - input_info.sType = - VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; - input_info.pNext = nullptr; - input_info.flags = 0; - input_info.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; - input_info.primitiveRestartEnable = VK_FALSE; + state_info.vertexBindingDescriptionCount = vertex_binding_count; + state_info.pVertexBindingDescriptions = vertex_binding_descrs; + state_info.vertexAttributeDescriptionCount = vertex_attrib_count; + state_info.pVertexAttributeDescriptions = vertex_attrib_descrs; - VkPipelineViewportStateCreateInfo viewport_state_info; - viewport_state_info.sType = - VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; - viewport_state_info.pNext = nullptr; - viewport_state_info.flags = 0; - VkViewport viewport; - viewport.x = 0; - viewport.y = 0; - viewport.width = 100; - viewport.height = 100; - viewport.minDepth = 0; - viewport.maxDepth = 1; - viewport_state_info.viewportCount = 1; - viewport_state_info.pViewports = &viewport; - VkRect2D scissor; - scissor.offset.x = 0; - scissor.offset.y = 0; - scissor.extent.width = 100; - scissor.extent.height = 100; - viewport_state_info.scissorCount = 1; - viewport_state_info.pScissors = &scissor; - - VkPipelineRasterizationStateCreateInfo rasterization_info; - rasterization_info.sType = - VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; - rasterization_info.pNext = nullptr; - rasterization_info.flags = 0; - rasterization_info.depthClampEnable = VK_FALSE; - rasterization_info.rasterizerDiscardEnable = VK_FALSE; - rasterization_info.polygonMode = VK_POLYGON_MODE_FILL; - rasterization_info.cullMode = VK_CULL_MODE_BACK_BIT; - rasterization_info.frontFace = VK_FRONT_FACE_CLOCKWISE; - rasterization_info.depthBiasEnable = VK_FALSE; - rasterization_info.depthBiasConstantFactor = 0; - rasterization_info.depthBiasClamp = 0; - rasterization_info.depthBiasSlopeFactor = 0; - rasterization_info.lineWidth = 1.0f; - - VkPipelineMultisampleStateCreateInfo multisample_info; - multisample_info.sType = - VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; - multisample_info.pNext = nullptr; - multisample_info.flags = 0; - multisample_info.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; - multisample_info.sampleShadingEnable = VK_FALSE; - multisample_info.minSampleShading = 0; - multisample_info.pSampleMask = nullptr; - multisample_info.alphaToCoverageEnable = VK_FALSE; - multisample_info.alphaToOneEnable = VK_FALSE; - - VkPipelineDepthStencilStateCreateInfo depth_stencil_info; - depth_stencil_info.sType = - VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; - depth_stencil_info.pNext = nullptr; - depth_stencil_info.flags = 0; - depth_stencil_info.depthTestEnable = VK_FALSE; - depth_stencil_info.depthWriteEnable = VK_FALSE; - depth_stencil_info.depthCompareOp = VK_COMPARE_OP_ALWAYS; - depth_stencil_info.depthBoundsTestEnable = VK_FALSE; - depth_stencil_info.stencilTestEnable = VK_FALSE; - depth_stencil_info.front.failOp = VK_STENCIL_OP_KEEP; - depth_stencil_info.front.passOp = VK_STENCIL_OP_KEEP; - depth_stencil_info.front.depthFailOp = VK_STENCIL_OP_KEEP; - depth_stencil_info.front.compareOp = VK_COMPARE_OP_ALWAYS; - depth_stencil_info.front.compareMask = 0; - depth_stencil_info.front.writeMask = 0; - depth_stencil_info.front.reference = 0; - depth_stencil_info.back.failOp = VK_STENCIL_OP_KEEP; - depth_stencil_info.back.passOp = VK_STENCIL_OP_KEEP; - depth_stencil_info.back.depthFailOp = VK_STENCIL_OP_KEEP; - depth_stencil_info.back.compareOp = VK_COMPARE_OP_ALWAYS; - depth_stencil_info.back.compareMask = 0; - depth_stencil_info.back.writeMask = 0; - depth_stencil_info.back.reference = 0; - depth_stencil_info.minDepthBounds = 0; - depth_stencil_info.maxDepthBounds = 0; - - VkPipelineColorBlendStateCreateInfo blend_info; - blend_info.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; - blend_info.pNext = nullptr; - blend_info.flags = 0; - blend_info.logicOpEnable = VK_FALSE; - blend_info.logicOp = VK_LOGIC_OP_NO_OP; - - VkPipelineColorBlendAttachmentState blend_attachments[1]; - blend_attachments[0].blendEnable = VK_TRUE; - blend_attachments[0].srcColorBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA; - blend_attachments[0].dstColorBlendFactor = - VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; - blend_attachments[0].colorBlendOp = VK_BLEND_OP_ADD; - blend_attachments[0].srcAlphaBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA; - blend_attachments[0].dstAlphaBlendFactor = - VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA; - blend_attachments[0].alphaBlendOp = VK_BLEND_OP_ADD; - blend_attachments[0].colorWriteMask = 0xF; - blend_info.attachmentCount = - static_cast(xe::countof(blend_attachments)); - blend_info.pAttachments = blend_attachments; - std::memset(blend_info.blendConstants, 0, sizeof(blend_info.blendConstants)); - - VkPipelineDynamicStateCreateInfo dynamic_state_info; - dynamic_state_info.sType = - VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO; - dynamic_state_info.pNext = nullptr; - dynamic_state_info.flags = 0; - // VkDynamicState dynamic_states[] = { - // VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR, - //}; - // dynamic_state_info.dynamicStateCount = - // static_cast(xe::countof(dynamic_states)); - // dynamic_state_info.pDynamicStates = dynamic_states; - dynamic_state_info.dynamicStateCount = 0; - dynamic_state_info.pDynamicStates = nullptr; - - VkGraphicsPipelineCreateInfo pipeline_info; - pipeline_info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO; - pipeline_info.pNext = nullptr; - pipeline_info.flags = VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT; - pipeline_info.stageCount = pipeline_stage_count; - pipeline_info.pStages = pipeline_stages; - pipeline_info.pVertexInputState = &vertex_state_info; - pipeline_info.pInputAssemblyState = &input_info; - pipeline_info.pTessellationState = nullptr; - pipeline_info.pViewportState = &viewport_state_info; - pipeline_info.pRasterizationState = &rasterization_info; - pipeline_info.pMultisampleState = &multisample_info; - pipeline_info.pDepthStencilState = &depth_stencil_info; - pipeline_info.pColorBlendState = &blend_info; - pipeline_info.pDynamicState = &dynamic_state_info; - pipeline_info.layout = pipeline_layout_; - pipeline_info.renderPass = render_state->render_pass_handle; - pipeline_info.subpass = 0; - pipeline_info.basePipelineHandle = nullptr; - pipeline_info.basePipelineIndex = 0; - - VkPipeline pipeline = nullptr; - auto err = vkCreateGraphicsPipelines(device_, nullptr, 1, &pipeline_info, - nullptr, &pipeline); - CheckResult(err, "vkCreateGraphicsPipelines"); - - // TODO(benvanik): don't leak pipelines >_> - vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline); - - return true; + return UpdateStatus::kMismatch; } -void PipelineCache::ClearCache() { - // TODO(benvanik): caching. -} - -VkShaderModule PipelineCache::GetGeometryShader(PrimitiveType primitive_type) { - switch (primitive_type) { - case PrimitiveType::kLineList: - case PrimitiveType::kLineStrip: - case PrimitiveType::kTriangleList: - case PrimitiveType::kTriangleFan: - case PrimitiveType::kTriangleStrip: - // Supported directly - no need to emulate. - return nullptr; - case PrimitiveType::kPointList: - // TODO(benvanik): point list geometry shader. - return nullptr; - case PrimitiveType::kUnknown0x07: - assert_always("Unknown geometry type"); - return nullptr; - case PrimitiveType::kRectangleList: - // TODO(benvanik): rectangle list geometry shader. - return nullptr; - case PrimitiveType::kLineLoop: - // TODO(benvanik): line loop geometry shader. - return nullptr; - case PrimitiveType::kQuadList: - // TODO(benvanik): quad list geometry shader. - return nullptr; - case PrimitiveType::kQuadStrip: - // TODO(benvanik): quad strip geometry shader. - return nullptr; - default: - assert_unhandled_case(primitive_type); - return nullptr; - } -} - -bool PipelineCache::SetShadowRegister(uint32_t* dest, uint32_t register_name) { - uint32_t value = register_file_->values[register_name].u32; - if (*dest == value) { - return false; - } - *dest = value; - return true; -} - -bool PipelineCache::SetShadowRegister(float* dest, uint32_t register_name) { - float value = register_file_->values[register_name].f32; - if (*dest == value) { - return false; - } - *dest = value; - return true; -} - -PipelineCache::UpdateStatus PipelineCache::UpdateShaders( - PrimitiveType prim_type) { - auto& regs = update_shaders_regs_; - - // These are the constant base addresses/ranges for shaders. - // We have these hardcoded right now cause nothing seems to differ. - assert_true(register_file_->values[XE_GPU_REG_SQ_VS_CONST].u32 == - 0x000FF000 || - register_file_->values[XE_GPU_REG_SQ_VS_CONST].u32 == 0x00000000); - assert_true(register_file_->values[XE_GPU_REG_SQ_PS_CONST].u32 == - 0x000FF100 || - register_file_->values[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000); +PipelineCache::UpdateStatus PipelineCache::UpdateInputAssemblyState( + PrimitiveType primitive_type) { + auto& regs = update_input_assembly_state_regs_; + auto& state_info = update_input_assembly_state_info_; bool dirty = false; + dirty |= primitive_type != regs.primitive_type; dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl, XE_GPU_REG_PA_SU_SC_MODE_CNTL); - dirty |= SetShadowRegister(®s.sq_program_cntl, XE_GPU_REG_SQ_PROGRAM_CNTL); - dirty |= SetShadowRegister(®s.sq_context_misc, XE_GPU_REG_SQ_CONTEXT_MISC); - // dirty |= regs.vertex_shader != active_vertex_shader_; - // dirty |= regs.pixel_shader != active_pixel_shader_; - dirty |= regs.prim_type != prim_type; + dirty |= SetShadowRegister(®s.multi_prim_ib_reset_index, + XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX); if (!dirty) { return UpdateStatus::kCompatible; } - // regs.vertex_shader = static_cast(active_vertex_shader_); - // regs.pixel_shader = static_cast(active_pixel_shader_); - regs.prim_type = prim_type; + regs.primitive_type = primitive_type; - SCOPE_profile_cpu_f("gpu"); + state_info.sType = + VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; + state_info.pNext = nullptr; + state_info.flags = 0; - return UpdateStatus::kMismatch; -} - -PipelineCache::UpdateStatus PipelineCache::UpdateRenderTargets() { - auto& regs = update_render_targets_regs_; - - bool dirty = false; - dirty |= SetShadowRegister(®s.rb_modecontrol, XE_GPU_REG_RB_MODECONTROL); - dirty |= SetShadowRegister(®s.rb_surface_info, XE_GPU_REG_RB_SURFACE_INFO); - dirty |= SetShadowRegister(®s.rb_color_info, XE_GPU_REG_RB_COLOR_INFO); - dirty |= SetShadowRegister(®s.rb_color1_info, XE_GPU_REG_RB_COLOR1_INFO); - dirty |= SetShadowRegister(®s.rb_color2_info, XE_GPU_REG_RB_COLOR2_INFO); - dirty |= SetShadowRegister(®s.rb_color3_info, XE_GPU_REG_RB_COLOR3_INFO); - dirty |= SetShadowRegister(®s.rb_color_mask, XE_GPU_REG_RB_COLOR_MASK); - dirty |= SetShadowRegister(®s.rb_depthcontrol, XE_GPU_REG_RB_DEPTHCONTROL); - dirty |= - SetShadowRegister(®s.rb_stencilrefmask, XE_GPU_REG_RB_STENCILREFMASK); - dirty |= SetShadowRegister(®s.rb_depth_info, XE_GPU_REG_RB_DEPTH_INFO); - if (!dirty) { - return UpdateStatus::kCompatible; + switch (primitive_type) { + case PrimitiveType::kPointList: + state_info.topology = VK_PRIMITIVE_TOPOLOGY_POINT_LIST; + break; + case PrimitiveType::kLineList: + state_info.topology = VK_PRIMITIVE_TOPOLOGY_LINE_LIST; + break; + case PrimitiveType::kLineStrip: + state_info.topology = VK_PRIMITIVE_TOPOLOGY_LINE_STRIP; + break; + case PrimitiveType::kLineLoop: + state_info.topology = VK_PRIMITIVE_TOPOLOGY_LINE_STRIP; + break; + case PrimitiveType::kTriangleList: + state_info.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; + break; + case PrimitiveType::kTriangleStrip: + state_info.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP; + break; + case PrimitiveType::kTriangleFan: + state_info.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN; + break; + case PrimitiveType::kRectangleList: + state_info.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST; + break; + case PrimitiveType::kQuadList: + state_info.topology = VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY; + break; + default: + case PrimitiveType::kUnknown0x07: + XELOGE("unsupported primitive type %d", primitive_type); + assert_unhandled_case(primitive_type); + return UpdateStatus::kError; } - SCOPE_profile_cpu_f("gpu"); + // TODO(benvanik): anything we can do about this? Vulkan seems to only support + // first. + assert_zero(regs.pa_su_sc_mode_cntl & (1 << 19)); + // if (regs.pa_su_sc_mode_cntl & (1 << 19)) { + // glProvokingVertex(GL_LAST_VERTEX_CONVENTION); + // } else { + // glProvokingVertex(GL_FIRST_VERTEX_CONVENTION); + // } + + if (regs.pa_su_sc_mode_cntl & (1 << 21)) { + state_info.primitiveRestartEnable = VK_TRUE; + } else { + state_info.primitiveRestartEnable = VK_FALSE; + } + // TODO(benvanik): no way to specify in Vulkan? + assert_true(regs.multi_prim_ib_reset_index == 0xFFFF || + regs.multi_prim_ib_reset_index == 0xFFFFFFFF); + // glPrimitiveRestartIndex(regs.multi_prim_ib_reset_index); return UpdateStatus::kMismatch; } -PipelineCache::UpdateStatus PipelineCache::UpdateState( - PrimitiveType prim_type) { - bool mismatch = false; - -#define CHECK_UPDATE_STATUS(status, mismatch, error_message) \ - { \ - if (status == UpdateStatus::kError) { \ - XELOGE(error_message); \ - return status; \ - } else if (status == UpdateStatus::kMismatch) { \ - mismatch = true; \ - } \ - } - - UpdateStatus status; - status = UpdateViewportState(); - CHECK_UPDATE_STATUS(status, mismatch, "Unable to update viewport state"); - status = UpdateRasterizerState(prim_type); - CHECK_UPDATE_STATUS(status, mismatch, "Unable to update rasterizer state"); - status = UpdateBlendState(); - CHECK_UPDATE_STATUS(status, mismatch, "Unable to update blend state"); - status = UpdateDepthStencilState(); - CHECK_UPDATE_STATUS(status, mismatch, "Unable to update depth/stencil state"); - - return mismatch ? UpdateStatus::kMismatch : UpdateStatus::kCompatible; -} - PipelineCache::UpdateStatus PipelineCache::UpdateViewportState() { - auto& regs = update_viewport_state_regs_; + auto& state_info = update_viewport_state_info_; - bool dirty = false; - // dirty |= SetShadowRegister(&state_regs.pa_cl_clip_cntl, - // XE_GPU_REG_PA_CL_CLIP_CNTL); - dirty |= SetShadowRegister(®s.rb_surface_info, XE_GPU_REG_RB_SURFACE_INFO); - dirty |= SetShadowRegister(®s.pa_cl_vte_cntl, XE_GPU_REG_PA_CL_VTE_CNTL); - dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl, - XE_GPU_REG_PA_SU_SC_MODE_CNTL); - dirty |= SetShadowRegister(®s.pa_sc_window_offset, - XE_GPU_REG_PA_SC_WINDOW_OFFSET); - dirty |= SetShadowRegister(®s.pa_sc_window_scissor_tl, - XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL); - dirty |= SetShadowRegister(®s.pa_sc_window_scissor_br, - XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR); - dirty |= SetShadowRegister(®s.pa_cl_vport_xoffset, - XE_GPU_REG_PA_CL_VPORT_XOFFSET); - dirty |= SetShadowRegister(®s.pa_cl_vport_yoffset, - XE_GPU_REG_PA_CL_VPORT_YOFFSET); - dirty |= SetShadowRegister(®s.pa_cl_vport_zoffset, - XE_GPU_REG_PA_CL_VPORT_ZOFFSET); - dirty |= SetShadowRegister(®s.pa_cl_vport_xscale, - XE_GPU_REG_PA_CL_VPORT_XSCALE); - dirty |= SetShadowRegister(®s.pa_cl_vport_yscale, - XE_GPU_REG_PA_CL_VPORT_YSCALE); - dirty |= SetShadowRegister(®s.pa_cl_vport_zscale, - XE_GPU_REG_PA_CL_VPORT_ZSCALE); + state_info.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; + state_info.pNext = nullptr; + state_info.flags = 0; - // Much of this state machine is extracted from: - // https://github.com/freedreno/mesa/blob/master/src/mesa/drivers/dri/r200/r200_state.c - // http://fossies.org/dox/MesaLib-10.3.5/fd2__gmem_8c_source.html - // http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf + state_info.viewportCount = 1; + state_info.scissorCount = 1; - // http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf - // VTX_XY_FMT = true: the incoming X, Y have already been multiplied by 1/W0. - // = false: multiply the X, Y coordinates by 1/W0. - // VTX_Z_FMT = true: the incoming Z has already been multiplied by 1/W0. - // = false: multiply the Z coordinate by 1/W0. - // VTX_W0_FMT = true: the incoming W0 is not 1/W0. Perform the reciprocal to - // get 1/W0. - // draw_batcher_.set_vtx_fmt((regs.pa_cl_vte_cntl >> 8) & 0x1 ? 1.0f : 0.0f, - // (regs.pa_cl_vte_cntl >> 9) & 0x1 ? 1.0f : 0.0f, - // (regs.pa_cl_vte_cntl >> 10) & 0x1 ? 1.0f : 0.0f); + // Ignored; set dynamically. + state_info.pViewports = nullptr; + state_info.pScissors = nullptr; - // Done in VS, no need to flush state. - // if ((regs.pa_cl_vte_cntl & (1 << 0)) > 0) { - // draw_batcher_.set_window_scalar(1.0f, 1.0f); - //} else { - // draw_batcher_.set_window_scalar(1.0f / 2560.0f, -1.0f / 2560.0f); - //} - - if (!dirty) { - return UpdateStatus::kCompatible; - } - - return UpdateStatus::kMismatch; + return UpdateStatus::kCompatible; } -PipelineCache::UpdateStatus PipelineCache::UpdateRasterizerState( - PrimitiveType prim_type) { - auto& regs = update_rasterizer_state_regs_; +PipelineCache::UpdateStatus PipelineCache::UpdateRasterizationState( + PrimitiveType primitive_type) { + auto& regs = update_rasterization_state_regs_; + auto& state_info = update_rasterization_state_info_; bool dirty = false; dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl, @@ -640,21 +831,130 @@ PipelineCache::UpdateStatus PipelineCache::UpdateRasterizerState( XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR); dirty |= SetShadowRegister(®s.multi_prim_ib_reset_index, XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX); - dirty |= regs.prim_type != prim_type; if (!dirty) { return UpdateStatus::kCompatible; } - regs.prim_type = prim_type; + state_info.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO; + state_info.pNext = nullptr; + state_info.flags = 0; - SCOPE_profile_cpu_f("gpu"); + // TODO(benvanik): right setting? + state_info.depthClampEnable = VK_FALSE; + + // TODO(benvanik): use in depth-only mode? + state_info.rasterizerDiscardEnable = VK_FALSE; + + bool poly_mode = ((regs.pa_su_sc_mode_cntl >> 3) & 0x3) != 0; + if (poly_mode) { + uint32_t front_poly_mode = (regs.pa_su_sc_mode_cntl >> 5) & 0x7; + uint32_t back_poly_mode = (regs.pa_su_sc_mode_cntl >> 8) & 0x7; + // Vulkan only supports both matching. + assert_true(front_poly_mode == back_poly_mode); + static const VkPolygonMode kFillModes[3] = { + VK_POLYGON_MODE_POINT, VK_POLYGON_MODE_LINE, VK_POLYGON_MODE_FILL, + }; + state_info.polygonMode = kFillModes[front_poly_mode]; + } else { + state_info.polygonMode = VK_POLYGON_MODE_FILL; + } + + switch (regs.pa_su_sc_mode_cntl & 0x3) { + case 0: + state_info.cullMode = VK_CULL_MODE_NONE; + break; + case 1: + state_info.cullMode = VK_CULL_MODE_FRONT_BIT; + break; + case 2: + state_info.cullMode = VK_CULL_MODE_BACK_BIT; + break; + } + if (regs.pa_su_sc_mode_cntl & 0x4) { + state_info.frontFace = VK_FRONT_FACE_CLOCKWISE; + } else { + state_info.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE; + } + if (primitive_type == PrimitiveType::kRectangleList) { + // Rectangle lists aren't culled. There may be other things they skip too. + state_info.cullMode = VK_CULL_MODE_NONE; + } + + state_info.depthBiasEnable = VK_FALSE; + + // Ignored; set dynamically: + state_info.depthBiasConstantFactor = 0; + state_info.depthBiasClamp = 0; + state_info.depthBiasSlopeFactor = 0; + state_info.lineWidth = 1.0f; return UpdateStatus::kMismatch; } -PipelineCache::UpdateStatus PipelineCache::UpdateBlendState() { - auto& reg_file = *register_file_; - auto& regs = update_blend_state_regs_; +PipelineCache::UpdateStatus PipelineCache::UpdateMultisampleState() { + auto& regs = update_multisample_state_regs_; + auto& state_info = update_multisample_state_info_; + + state_info.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO; + state_info.pNext = nullptr; + state_info.flags = 0; + + state_info.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT; + state_info.sampleShadingEnable = VK_FALSE; + state_info.minSampleShading = 0; + state_info.pSampleMask = nullptr; + state_info.alphaToCoverageEnable = VK_FALSE; + state_info.alphaToOneEnable = VK_FALSE; + + return UpdateStatus::kCompatible; +} + +PipelineCache::UpdateStatus PipelineCache::UpdateDepthStencilState() { + auto& regs = update_depth_stencil_state_regs_; + auto& state_info = update_depth_stencil_state_info_; + + bool dirty = false; + dirty |= SetShadowRegister(®s.rb_depthcontrol, XE_GPU_REG_RB_DEPTHCONTROL); + dirty |= + SetShadowRegister(®s.rb_stencilrefmask, XE_GPU_REG_RB_STENCILREFMASK); + if (!dirty) { + return UpdateStatus::kCompatible; + } + + state_info.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO; + state_info.pNext = nullptr; + state_info.flags = 0; + + state_info.depthTestEnable = VK_FALSE; + state_info.depthWriteEnable = VK_FALSE; + state_info.depthCompareOp = VK_COMPARE_OP_ALWAYS; + state_info.depthBoundsTestEnable = VK_FALSE; + state_info.stencilTestEnable = VK_FALSE; + state_info.front.failOp = VK_STENCIL_OP_KEEP; + state_info.front.passOp = VK_STENCIL_OP_KEEP; + state_info.front.depthFailOp = VK_STENCIL_OP_KEEP; + state_info.front.compareOp = VK_COMPARE_OP_ALWAYS; + state_info.back.failOp = VK_STENCIL_OP_KEEP; + state_info.back.passOp = VK_STENCIL_OP_KEEP; + state_info.back.depthFailOp = VK_STENCIL_OP_KEEP; + state_info.back.compareOp = VK_COMPARE_OP_ALWAYS; + + // Ignored; set dynamically. + state_info.minDepthBounds = 0; + state_info.maxDepthBounds = 0; + state_info.front.compareMask = 0; + state_info.front.writeMask = 0; + state_info.front.reference = 0; + state_info.back.compareMask = 0; + state_info.back.writeMask = 0; + state_info.back.reference = 0; + + return UpdateStatus::kMismatch; +} + +PipelineCache::UpdateStatus PipelineCache::UpdateColorBlendState() { + auto& regs = update_color_blend_state_regs_; + auto& state_info = update_color_blend_state_info_; // Alpha testing -- ALPHAREF, ALPHAFUNC, ALPHATESTENABLE // Deprecated in GL, implemented in shader. @@ -666,6 +966,8 @@ PipelineCache::UpdateStatus PipelineCache::UpdateBlendState() { // reg_file[XE_GPU_REG_RB_ALPHA_REF].f32); bool dirty = false; + dirty |= SetShadowRegister(®s.rb_colorcontrol, XE_GPU_REG_RB_COLORCONTROL); + dirty |= SetShadowRegister(®s.rb_color_mask, XE_GPU_REG_RB_COLOR_MASK); dirty |= SetShadowRegister(®s.rb_blendcontrol[0], XE_GPU_REG_RB_BLENDCONTROL_0); dirty |= @@ -674,31 +976,80 @@ PipelineCache::UpdateStatus PipelineCache::UpdateBlendState() { SetShadowRegister(®s.rb_blendcontrol[2], XE_GPU_REG_RB_BLENDCONTROL_2); dirty |= SetShadowRegister(®s.rb_blendcontrol[3], XE_GPU_REG_RB_BLENDCONTROL_3); - dirty |= SetShadowRegister(®s.rb_blend_rgba[0], XE_GPU_REG_RB_BLEND_RED); - dirty |= SetShadowRegister(®s.rb_blend_rgba[1], XE_GPU_REG_RB_BLEND_GREEN); - dirty |= SetShadowRegister(®s.rb_blend_rgba[2], XE_GPU_REG_RB_BLEND_BLUE); - dirty |= SetShadowRegister(®s.rb_blend_rgba[3], XE_GPU_REG_RB_BLEND_ALPHA); if (!dirty) { return UpdateStatus::kCompatible; } - SCOPE_profile_cpu_f("gpu"); + state_info.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO; + state_info.pNext = nullptr; + state_info.flags = 0; - return UpdateStatus::kMismatch; -} + state_info.logicOpEnable = VK_FALSE; + state_info.logicOp = VK_LOGIC_OP_NO_OP; -PipelineCache::UpdateStatus PipelineCache::UpdateDepthStencilState() { - auto& regs = update_depth_stencil_state_regs_; - - bool dirty = false; - dirty |= SetShadowRegister(®s.rb_depthcontrol, XE_GPU_REG_RB_DEPTHCONTROL); - dirty |= - SetShadowRegister(®s.rb_stencilrefmask, XE_GPU_REG_RB_STENCILREFMASK); - if (!dirty) { - return UpdateStatus::kCompatible; + static const VkBlendFactor kBlendFactorMap[] = { + /* 0 */ VK_BLEND_FACTOR_ZERO, + /* 1 */ VK_BLEND_FACTOR_ONE, + /* 2 */ VK_BLEND_FACTOR_ZERO, // ? + /* 3 */ VK_BLEND_FACTOR_ZERO, // ? + /* 4 */ VK_BLEND_FACTOR_SRC_COLOR, + /* 5 */ VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR, + /* 6 */ VK_BLEND_FACTOR_SRC_ALPHA, + /* 7 */ VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, + /* 8 */ VK_BLEND_FACTOR_DST_COLOR, + /* 9 */ VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR, + /* 10 */ VK_BLEND_FACTOR_DST_ALPHA, + /* 11 */ VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA, + /* 12 */ VK_BLEND_FACTOR_CONSTANT_COLOR, + /* 13 */ VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR, + /* 14 */ VK_BLEND_FACTOR_CONSTANT_ALPHA, + /* 15 */ VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA, + /* 16 */ VK_BLEND_FACTOR_SRC_ALPHA_SATURATE, + }; + static const VkBlendOp kBlendOpMap[] = { + /* 0 */ VK_BLEND_OP_ADD, + /* 1 */ VK_BLEND_OP_SUBTRACT, + /* 2 */ VK_BLEND_OP_MIN, + /* 3 */ VK_BLEND_OP_MAX, + /* 4 */ VK_BLEND_OP_REVERSE_SUBTRACT, + }; + auto& attachment_states = update_color_blend_attachment_states_; + for (int i = 0; i < 4; ++i) { + uint32_t blend_control = regs.rb_blendcontrol[i]; + auto& attachment_state = attachment_states[i]; + attachment_state.blendEnable = !(regs.rb_colorcontrol & 0x20); + // A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND + attachment_state.srcColorBlendFactor = + kBlendFactorMap[(blend_control & 0x0000001F) >> 0]; + // A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND + attachment_state.dstColorBlendFactor = + kBlendFactorMap[(blend_control & 0x00001F00) >> 8]; + // A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN + attachment_state.colorBlendOp = + kBlendOpMap[(blend_control & 0x000000E0) >> 5]; + // A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND + attachment_state.srcAlphaBlendFactor = + kBlendFactorMap[(blend_control & 0x001F0000) >> 16]; + // A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND + attachment_state.dstAlphaBlendFactor = + kBlendFactorMap[(blend_control & 0x1F000000) >> 24]; + // A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN + attachment_state.alphaBlendOp = + kBlendOpMap[(blend_control & 0x00E00000) >> 21]; + // A2XX_RB_COLOR_MASK_WRITE_* == D3DRS_COLORWRITEENABLE + // Lines up with VkColorComponentFlagBits, where R=bit 1, G=bit 2, etc.. + uint32_t write_mask = (regs.rb_color_mask >> (i * 4)) & 0xF; + attachment_state.colorWriteMask = write_mask; } - SCOPE_profile_cpu_f("gpu"); + state_info.attachmentCount = 4; + state_info.pAttachments = attachment_states; + + // Ignored; set dynamically. + state_info.blendConstants[0] = 0.0f; + state_info.blendConstants[1] = 0.0f; + state_info.blendConstants[2] = 0.0f; + state_info.blendConstants[3] = 0.0f; return UpdateStatus::kMismatch; } diff --git a/src/xenia/gpu/vulkan/pipeline_cache.h b/src/xenia/gpu/vulkan/pipeline_cache.h index aad43ca80..1eb9d75ed 100644 --- a/src/xenia/gpu/vulkan/pipeline_cache.h +++ b/src/xenia/gpu/vulkan/pipeline_cache.h @@ -57,10 +57,19 @@ class PipelineCache { void ClearCache(); private: + // Creates or retrieves an existing pipeline for the currently configured + // state. + VkPipeline GetPipeline(const RenderState* render_state); + // Gets a geometry shader used to emulate the given primitive type. // Returns nullptr if the primitive doesn't need to be emulated. VkShaderModule GetGeometryShader(PrimitiveType primitive_type); + // Sets required dynamic state on the command buffer. + // Only state that has changed since the last call will be set unless + // full_update is true. + bool SetDynamicState(VkCommandBuffer command_buffer, bool full_update); + RegisterFile* register_file_ = nullptr; VkDevice device_ = nullptr; @@ -80,6 +89,11 @@ class PipelineCache { // TODO(benvanik): geometry shader cache. + // Previously used pipeline. This matches our current state settings + // and allows us to quickly(ish) reuse the pipeline if no registers have + // changed. + VkPipeline current_pipeline_ = nullptr; + private: enum class UpdateStatus { kCompatible, @@ -87,13 +101,21 @@ class PipelineCache { kError, }; - UpdateStatus UpdateShaders(PrimitiveType prim_type); UpdateStatus UpdateRenderTargets(); - UpdateStatus UpdateState(PrimitiveType prim_type); + UpdateStatus UpdateState(VulkanShader* vertex_shader, + VulkanShader* pixel_shader, + PrimitiveType primitive_type); + + UpdateStatus UpdateShaderStages(VulkanShader* vertex_shader, + VulkanShader* pixel_shader, + PrimitiveType primitive_type); + UpdateStatus UpdateVertexInputState(VulkanShader* vertex_shader); + UpdateStatus UpdateInputAssemblyState(PrimitiveType primitive_type); UpdateStatus UpdateViewportState(); - UpdateStatus UpdateRasterizerState(PrimitiveType prim_type); - UpdateStatus UpdateBlendState(); + UpdateStatus UpdateRasterizationState(PrimitiveType primitive_type); + UpdateStatus UpdateMultisampleState(); UpdateStatus UpdateDepthStencilState(); + UpdateStatus UpdateColorBlendState(); bool SetShadowRegister(uint32_t* dest, uint32_t register_name); bool SetShadowRegister(float* dest, uint32_t register_name); @@ -113,6 +135,45 @@ class PipelineCache { UpdateRenderTargetsRegisters() { Reset(); } void Reset() { std::memset(this, 0, sizeof(*this)); } } update_render_targets_regs_; + + struct UpdateShaderStagesRegisters { + PrimitiveType prim_type; + uint32_t pa_su_sc_mode_cntl; + uint32_t sq_program_cntl; + uint32_t sq_context_misc; + VulkanShader* vertex_shader; + VulkanShader* pixel_shader; + + UpdateShaderStagesRegisters() { Reset(); } + void Reset() { + sq_program_cntl = 0; + vertex_shader = pixel_shader = nullptr; + } + } update_shader_stages_regs_; + VkPipelineShaderStageCreateInfo update_shader_stages_info_[3]; + uint32_t update_shader_stages_stage_count_ = 0; + + struct UpdateVertexInputStateRegisters { + VulkanShader* vertex_shader; + + UpdateVertexInputStateRegisters() { Reset(); } + void Reset() { std::memset(this, 0, sizeof(*this)); } + } update_vertex_input_state_regs_; + VkPipelineVertexInputStateCreateInfo update_vertex_input_state_info_; + VkVertexInputBindingDescription update_vertex_input_state_binding_descrs_[64]; + VkVertexInputAttributeDescription + update_vertex_input_state_attrib_descrs_[64]; + + struct UpdateInputAssemblyStateRegisters { + PrimitiveType primitive_type; + uint32_t pa_su_sc_mode_cntl; + uint32_t multi_prim_ib_reset_index; + + UpdateInputAssemblyStateRegisters() { Reset(); } + void Reset() { std::memset(this, 0, sizeof(*this)); } + } update_input_assembly_state_regs_; + VkPipelineInputAssemblyStateCreateInfo update_input_assembly_state_info_; + struct UpdateViewportStateRegisters { // uint32_t pa_cl_clip_cntl; uint32_t rb_surface_info; @@ -131,23 +192,26 @@ class PipelineCache { UpdateViewportStateRegisters() { Reset(); } void Reset() { std::memset(this, 0, sizeof(*this)); } } update_viewport_state_regs_; - struct UpdateRasterizerStateRegisters { + VkPipelineViewportStateCreateInfo update_viewport_state_info_; + + struct UpdateRasterizationStateRegisters { uint32_t pa_su_sc_mode_cntl; uint32_t pa_sc_screen_scissor_tl; uint32_t pa_sc_screen_scissor_br; uint32_t multi_prim_ib_reset_index; PrimitiveType prim_type; - UpdateRasterizerStateRegisters() { Reset(); } + UpdateRasterizationStateRegisters() { Reset(); } void Reset() { std::memset(this, 0, sizeof(*this)); } - } update_rasterizer_state_regs_; - struct UpdateBlendStateRegisters { - uint32_t rb_blendcontrol[4]; - float rb_blend_rgba[4]; + } update_rasterization_state_regs_; + VkPipelineRasterizationStateCreateInfo update_rasterization_state_info_; - UpdateBlendStateRegisters() { Reset(); } + struct UpdateMultisampleStateeRegisters { + UpdateMultisampleStateeRegisters() { Reset(); } void Reset() { std::memset(this, 0, sizeof(*this)); } - } update_blend_state_regs_; + } update_multisample_state_regs_; + VkPipelineMultisampleStateCreateInfo update_multisample_state_info_; + struct UpdateDepthStencilStateRegisters { uint32_t rb_depthcontrol; uint32_t rb_stencilrefmask; @@ -155,20 +219,40 @@ class PipelineCache { UpdateDepthStencilStateRegisters() { Reset(); } void Reset() { std::memset(this, 0, sizeof(*this)); } } update_depth_stencil_state_regs_; - struct UpdateShadersRegisters { - PrimitiveType prim_type; - uint32_t pa_su_sc_mode_cntl; - uint32_t sq_program_cntl; - uint32_t sq_context_misc; - VulkanShader* vertex_shader; - VulkanShader* pixel_shader; + VkPipelineDepthStencilStateCreateInfo update_depth_stencil_state_info_; - UpdateShadersRegisters() { Reset(); } - void Reset() { - sq_program_cntl = 0; - vertex_shader = pixel_shader = nullptr; - } - } update_shaders_regs_; + struct UpdateColorBlendStateRegisters { + uint32_t rb_colorcontrol; + uint32_t rb_color_mask; + uint32_t rb_blendcontrol[4]; + + UpdateColorBlendStateRegisters() { Reset(); } + void Reset() { std::memset(this, 0, sizeof(*this)); } + } update_color_blend_state_regs_; + VkPipelineColorBlendStateCreateInfo update_color_blend_state_info_; + VkPipelineColorBlendAttachmentState update_color_blend_attachment_states_[4]; + + struct SetDynamicStateRegisters { + uint32_t pa_sc_window_offset; + + uint32_t pa_su_sc_mode_cntl; + uint32_t pa_sc_window_scissor_tl; + uint32_t pa_sc_window_scissor_br; + + uint32_t rb_surface_info; + uint32_t pa_cl_vte_cntl; + float pa_cl_vport_xoffset; + float pa_cl_vport_yoffset; + float pa_cl_vport_zoffset; + float pa_cl_vport_xscale; + float pa_cl_vport_yscale; + float pa_cl_vport_zscale; + + float rb_blend_rgba[4]; + + SetDynamicStateRegisters() { Reset(); } + void Reset() { std::memset(this, 0, sizeof(*this)); } + } set_dynamic_state_registers_; }; } // namespace vulkan diff --git a/src/xenia/gpu/vulkan/vulkan_command_processor.cc b/src/xenia/gpu/vulkan/vulkan_command_processor.cc index 31460be79..646b050fb 100644 --- a/src/xenia/gpu/vulkan/vulkan_command_processor.cc +++ b/src/xenia/gpu/vulkan/vulkan_command_processor.cc @@ -321,12 +321,11 @@ bool VulkanCommandProcessor::PopulateConstants(VkCommandBuffer command_buffer, VulkanShader* pixel_shader) { // Upload the constants the shaders require. // These are optional, and if none are defined 0 will be returned. - VkDeviceSize vertex_constant_offset = buffer_cache_->UploadConstantRegisters( - vertex_shader->constant_register_map()); - VkDeviceSize pixel_constant_offset = buffer_cache_->UploadConstantRegisters( + auto constant_offsets = buffer_cache_->UploadConstantRegisters( + vertex_shader->constant_register_map(), pixel_shader->constant_register_map()); - if (vertex_constant_offset == VK_WHOLE_SIZE || - pixel_constant_offset == VK_WHOLE_SIZE) { + if (constant_offsets.first == VK_WHOLE_SIZE || + constant_offsets.second == VK_WHOLE_SIZE) { // Shader wants constants but we couldn't upload them. return false; } @@ -334,12 +333,14 @@ bool VulkanCommandProcessor::PopulateConstants(VkCommandBuffer command_buffer, // Configure constant uniform access to point at our offsets. auto constant_descriptor_set = buffer_cache_->constant_descriptor_set(); auto pipeline_layout = pipeline_cache_->pipeline_layout(); - uint32_t constant_offsets[2] = {static_cast(vertex_constant_offset), - static_cast(pixel_constant_offset)}; - vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, - pipeline_layout, 0, 1, &constant_descriptor_set, - static_cast(xe::countof(constant_offsets)), - constant_offsets); + uint32_t set_constant_offsets[2] = { + static_cast(constant_offsets.first), + static_cast(constant_offsets.second)}; + vkCmdBindDescriptorSets( + command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 0, 1, + &constant_descriptor_set, + static_cast(xe::countof(set_constant_offsets)), + set_constant_offsets); return true; }