[Vulkan] Basic draw call architecture + [D3D12] Some cleanup

This commit is contained in:
Triang3l 2020-11-14 14:16:04 +03:00
parent 08c50af7b8
commit 65c8d2b28e
17 changed files with 2235 additions and 177 deletions

View File

@ -2005,14 +2005,15 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
}
// Must not call anything that can change the descriptor heap from now on!
// Ensure vertex and index buffers are resident and draw.
// Ensure vertex buffers are resident.
// TODO(Triang3l): Cache residency for ranges in a way similar to how texture
// validity will be tracked.
// validity is tracked.
uint64_t vertex_buffers_resident[2] = {};
for (const auto& vertex_binding : vertex_shader->vertex_bindings()) {
for (const Shader::VertexBinding& vertex_binding :
vertex_shader->vertex_bindings()) {
uint32_t vfetch_index = vertex_binding.fetch_constant;
if (vertex_buffers_resident[vfetch_index >> 6] &
(1ull << (vfetch_index & 63))) {
(uint64_t(1) << (vfetch_index & 63))) {
continue;
}
const auto& vfetch_constant = regs.Get<xenos::xe_gpu_vertex_fetch_t>(
@ -2045,7 +2046,8 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
vfetch_constant.address << 2, vfetch_constant.size << 2);
return false;
}
vertex_buffers_resident[vfetch_index >> 6] |= 1ull << (vfetch_index & 63);
vertex_buffers_resident[vfetch_index >> 6] |= uint64_t(1)
<< (vfetch_index & 63);
}
// Gather memexport ranges and ensure the heaps for them are resident, and
@ -2745,12 +2747,12 @@ void D3D12CommandProcessor::ClearCommandAllocatorCache() {
}
void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) {
auto& regs = *register_file_;
#if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_f("gpu");
#endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
const RegisterFile& regs = *register_file_;
// Window parameters.
// http://ftp.tku.edu.tw/NetBSD/NetBSD-current/xsrc/external/mit/xf86-video-ati/dist/src/r600_reg_auto_r6xx.h
// See r200UpdateWindow:
@ -2846,14 +2848,14 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) {
scissor.right = pa_sc_window_scissor_br.br_x;
scissor.bottom = pa_sc_window_scissor_br.br_y;
if (!pa_sc_window_scissor_tl.window_offset_disable) {
scissor.left =
std::max(scissor.left + pa_sc_window_offset.window_x_offset, LONG(0));
scissor.top =
std::max(scissor.top + pa_sc_window_offset.window_y_offset, LONG(0));
scissor.right =
std::max(scissor.right + pa_sc_window_offset.window_x_offset, LONG(0));
scissor.bottom =
std::max(scissor.bottom + pa_sc_window_offset.window_y_offset, LONG(0));
scissor.left = std::max(
LONG(scissor.left + pa_sc_window_offset.window_x_offset), LONG(0));
scissor.top = std::max(
LONG(scissor.top + pa_sc_window_offset.window_y_offset), LONG(0));
scissor.right = std::max(
LONG(scissor.right + pa_sc_window_offset.window_x_offset), LONG(0));
scissor.bottom = std::max(
LONG(scissor.bottom + pa_sc_window_offset.window_y_offset), LONG(0));
}
scissor.left *= pixel_size_x;
scissor.top *= pixel_size_y;
@ -2915,12 +2917,11 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
uint32_t line_loop_closing_index, xenos::Endian index_endian,
uint32_t used_texture_mask, bool early_z, uint32_t color_mask,
const RenderTargetCache::PipelineRenderTarget render_targets[4]) {
auto& regs = *register_file_;
#if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_f("gpu");
#endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
const RegisterFile& regs = *register_file_;
auto pa_cl_clip_cntl = regs.Get<reg::PA_CL_CLIP_CNTL>();
auto pa_cl_vte_cntl = regs.Get<reg::PA_CL_VTE_CNTL>();
auto pa_su_point_minmax = regs.Get<reg::PA_SU_POINT_MINMAX>();
@ -3103,14 +3104,14 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
dirty |= system_constants_.line_loop_closing_index != line_loop_closing_index;
system_constants_.line_loop_closing_index = line_loop_closing_index;
// Vertex index offset.
dirty |= system_constants_.vertex_base_index != vgt_indx_offset;
system_constants_.vertex_base_index = vgt_indx_offset;
// Index or tessellation edge factor buffer endianness.
dirty |= system_constants_.vertex_index_endian != index_endian;
system_constants_.vertex_index_endian = index_endian;
// Vertex index offset.
dirty |= system_constants_.vertex_base_index != vgt_indx_offset;
system_constants_.vertex_base_index = vgt_indx_offset;
// User clip planes (UCP_ENA_#), when not CLIP_DISABLE.
if (!pa_cl_clip_cntl.clip_disable) {
for (uint32_t i = 0; i < 6; ++i) {
@ -3574,7 +3575,7 @@ bool D3D12CommandProcessor::UpdateBindings(
float_constant_map_vertex.float_bitmap[i];
// If no float constants at all, we can reuse any buffer for them, so not
// invalidating.
if (float_constant_map_vertex.float_count != 0) {
if (float_constant_count_vertex) {
cbuffer_binding_float_vertex_.up_to_date = false;
}
}
@ -3589,7 +3590,7 @@ bool D3D12CommandProcessor::UpdateBindings(
float_constant_map_pixel.float_bitmap[i]) {
current_float_constant_map_pixel_[i] =
float_constant_map_pixel.float_bitmap[i];
if (float_constant_map_pixel.float_count != 0) {
if (float_constant_count_pixel) {
cbuffer_binding_float_pixel_.up_to_date = false;
}
}

View File

@ -223,10 +223,10 @@ void PipelineCache::ClearCache(bool shutting_down) {
}
texture_binding_layout_map_.clear();
texture_binding_layouts_.clear();
for (auto it : shader_map_) {
for (auto it : shaders_) {
delete it.second;
}
shader_map_.clear();
shaders_.clear();
if (reinitialize_shader_storage) {
InitializeShaderStorage(shader_storage_root, shader_storage_title_id,
@ -374,8 +374,7 @@ void PipelineCache::InitializeShaderStorage(
}
size_t ucode_byte_count =
shader_header.ucode_dword_count * sizeof(uint32_t);
if (shader_map_.find(shader_header.ucode_data_hash) !=
shader_map_.end()) {
if (shaders_.find(shader_header.ucode_data_hash) != shaders_.end()) {
// Already added - usually shaders aren't added without the intention of
// translating them imminently, so don't do additional checks to
// actually ensure that translation happens right now (they would cause
@ -402,7 +401,7 @@ void PipelineCache::InitializeShaderStorage(
D3D12Shader* shader =
new D3D12Shader(shader_header.type, ucode_data_hash,
ucode_dwords.data(), shader_header.ucode_dword_count);
shader_map_.insert({ucode_data_hash, shader});
shaders_.insert({ucode_data_hash, shader});
// Create new threads if the currently existing threads can't keep up with
// file reading, but not more than the number of logical processors minus
// one.
@ -439,7 +438,7 @@ void PipelineCache::InitializeShaderStorage(
}
shader_translation_threads.clear();
for (D3D12Shader* shader : shaders_failed_to_translate) {
shader_map_.erase(shader->ucode_data_hash());
shaders_.erase(shader->ucode_data_hash());
delete shader;
}
}
@ -576,8 +575,8 @@ void PipelineCache::InitializeShaderStorage(
PipelineRuntimeDescription pipeline_runtime_description;
auto vertex_shader_it =
shader_map_.find(pipeline_description.vertex_shader_hash);
if (vertex_shader_it == shader_map_.end()) {
shaders_.find(pipeline_description.vertex_shader_hash);
if (vertex_shader_it == shaders_.end()) {
continue;
}
pipeline_runtime_description.vertex_shader = vertex_shader_it->second;
@ -586,8 +585,8 @@ void PipelineCache::InitializeShaderStorage(
}
if (pipeline_description.pixel_shader_hash) {
auto pixel_shader_it =
shader_map_.find(pipeline_description.pixel_shader_hash);
if (pixel_shader_it == shader_map_.end()) {
shaders_.find(pipeline_description.pixel_shader_hash);
if (pixel_shader_it == shaders_.end()) {
continue;
}
pipeline_runtime_description.pixel_shader = pixel_shader_it->second;
@ -779,8 +778,8 @@ D3D12Shader* PipelineCache::LoadShader(xenos::ShaderType shader_type,
uint32_t dword_count) {
// Hash the input memory and lookup the shader.
uint64_t data_hash = XXH64(host_address, dword_count * sizeof(uint32_t), 0);
auto it = shader_map_.find(data_hash);
if (it != shader_map_.end()) {
auto it = shaders_.find(data_hash);
if (it != shaders_.end()) {
// Shader has been previously loaded.
return it->second;
}
@ -790,7 +789,7 @@ D3D12Shader* PipelineCache::LoadShader(xenos::ShaderType shader_type,
// again.
D3D12Shader* shader =
new D3D12Shader(shader_type, data_hash, host_address, dword_count);
shader_map_.insert({data_hash, shader});
shaders_.insert({data_hash, shader});
return shader;
}

View File

@ -29,6 +29,7 @@
#include "xenia/gpu/dxbc_shader_translator.h"
#include "xenia/gpu/register_file.h"
#include "xenia/gpu/xenos.h"
#include "xenia/ui/d3d12/d3d12_api.h"
namespace xe {
namespace gpu {
@ -255,9 +256,9 @@ class PipelineCache {
IDxcUtils* dxc_utils_ = nullptr;
IDxcCompiler* dxc_compiler_ = nullptr;
// All loaded shaders mapped by their guest hash key.
// Ucode hash -> shader.
std::unordered_map<uint64_t, D3D12Shader*, xe::hash::IdentityHasher<uint64_t>>
shader_map_;
shaders_;
struct LayoutUID {
size_t uid;

View File

@ -288,7 +288,7 @@ void SpirvShaderTranslator::StartTranslation() {
id_vector_temp_.push_back(builder_->makeRuntimeArray(type_uint_));
// Storage buffers have std430 packing, no padding to 4-component vectors.
builder_->addDecoration(id_vector_temp_.back(), spv::DecorationArrayStride,
sizeof(uint32_t) * 4);
sizeof(uint32_t));
spv::Id type_shared_memory =
builder_->makeStructType(id_vector_temp_, "XeSharedMemory");
builder_->addMemberName(type_shared_memory, 0, "shared_memory");
@ -511,7 +511,9 @@ std::vector<uint8_t> SpirvShaderTranslator::CompleteTranslation() {
? spv::ExecutionModelTessellationEvaluation
: spv::ExecutionModelVertex;
}
if (features_.float_controls) {
// TODO(Triang3l): Re-enable float controls when
// VkPhysicalDeviceFloatControlsPropertiesKHR are handled.
/* if (features_.float_controls) {
// Flush to zero, similar to the real hardware, also for things like Shader
// Model 3 multiplication emulation.
builder_->addCapability(spv::CapabilityDenormFlushToZero);
@ -523,7 +525,7 @@ std::vector<uint8_t> SpirvShaderTranslator::CompleteTranslation() {
builder_->addCapability(spv::CapabilitySignedZeroInfNanPreserve);
builder_->addExecutionMode(function_main_,
spv::ExecutionModeSignedZeroInfNanPreserve, 32);
}
} */
spv::Instruction* entry_point =
builder_->addEntryPoint(execution_model, function_main_, "main");
for (spv::Id interface_id : main_interface_) {
@ -982,7 +984,19 @@ void SpirvShaderTranslator::StartVertexOrTessEvalShaderInMain() {
}
}
void SpirvShaderTranslator::CompleteVertexOrTessEvalShaderInMain() {}
void SpirvShaderTranslator::CompleteVertexOrTessEvalShaderInMain() {
// Write 1 to point size (using a geometry shader or another kind of fallback
// to expand point sprites - point size support is not guaranteed, and the
// size would also be limited, and can't be controlled independently along two
// axes).
id_vector_temp_.clear();
id_vector_temp_.push_back(
builder_->makeIntConstant(kOutputPerVertexMemberPointSize));
builder_->createStore(
const_float_1_,
builder_->createAccessChain(spv::StorageClassOutput, output_per_vertex_,
id_vector_temp_));
}
void SpirvShaderTranslator::UpdateExecConditionals(
ParsedExecInstruction::Type type, uint32_t bool_constant_index,
@ -1054,9 +1068,8 @@ void SpirvShaderTranslator::UpdateExecConditionals(
return;
}
cf_exec_condition_ = condition;
spv::Function& function = builder_->getBuildPoint()->getParent();
cf_exec_conditional_merge_ =
new spv::Block(builder_->getUniqueId(), function);
cf_exec_conditional_merge_ = new spv::Block(
builder_->getUniqueId(), builder_->getBuildPoint()->getParent());
SpirvCreateSelectionMerge(cf_exec_conditional_merge_->getId());
spv::Block& inner_block = builder_->makeNewBlock();
builder_->createConditionalBranch(
@ -1095,7 +1108,8 @@ void SpirvShaderTranslator::UpdateInstructionPredication(bool predicated,
spv::Id predicate_id =
builder_->createLoad(var_main_predicate_, spv::NoPrecision);
spv::Block& predicated_block = builder_->makeNewBlock();
cf_instruction_predicate_merge_ = &builder_->makeNewBlock();
cf_instruction_predicate_merge_ = new spv::Block(
builder_->getUniqueId(), builder_->getBuildPoint()->getParent());
SpirvCreateSelectionMerge(cf_instruction_predicate_merge_->getId());
builder_->createConditionalBranch(
predicate_id,
@ -1135,12 +1149,23 @@ void SpirvShaderTranslator::CloseExecConditionals() {
}
spv::Id SpirvShaderTranslator::GetStorageAddressingIndex(
InstructionStorageAddressingMode addressing_mode, uint32_t storage_index) {
InstructionStorageAddressingMode addressing_mode, uint32_t storage_index,
bool is_float_constant) {
EnsureBuildPointAvailable();
spv::Id base_pointer = spv::NoResult;
switch (addressing_mode) {
case InstructionStorageAddressingMode::kStatic:
return builder_->makeIntConstant(int(storage_index));
case InstructionStorageAddressingMode::kStatic: {
uint32_t static_storage_index = storage_index;
if (is_float_constant) {
static_storage_index =
constant_register_map().GetPackedFloatConstantIndex(storage_index);
assert_true(static_storage_index != UINT32_MAX);
if (static_storage_index == UINT32_MAX) {
static_storage_index = 0;
}
}
return builder_->makeIntConstant(int(static_storage_index));
}
case InstructionStorageAddressingMode::kAddressAbsolute:
base_pointer = var_main_address_absolute_;
break;
@ -1153,6 +1178,8 @@ spv::Id SpirvShaderTranslator::GetStorageAddressingIndex(
id_vector_temp_util_);
break;
}
assert_true(!is_float_constant ||
constant_register_map().float_dynamic_addressing);
assert_true(base_pointer != spv::NoResult);
spv::Id index = builder_->createLoad(base_pointer, spv::NoPrecision);
if (storage_index) {
@ -1165,8 +1192,9 @@ spv::Id SpirvShaderTranslator::GetStorageAddressingIndex(
spv::Id SpirvShaderTranslator::LoadOperandStorage(
const InstructionOperand& operand) {
spv::Id index = GetStorageAddressingIndex(operand.storage_addressing_mode,
operand.storage_index);
spv::Id index = GetStorageAddressingIndex(
operand.storage_addressing_mode, operand.storage_index,
operand.storage_source == InstructionStorageSource::kConstantFloat);
EnsureBuildPointAvailable();
spv::Id vec4_pointer = spv::NoResult;
switch (operand.storage_source) {
@ -1592,7 +1620,7 @@ spv::Id SpirvShaderTranslator::EndianSwap32Uint(spv::Id value, spv::Id endian) {
builder_->makeUintConstant(
static_cast<unsigned int>(xenos::Endian::k8in32)));
spv::Id is_8in16_or_8in32 =
builder_->createBinOp(spv::OpLogicalAnd, type_bool_, is_8in16, is_8in32);
builder_->createBinOp(spv::OpLogicalOr, type_bool_, is_8in16, is_8in32);
spv::Block& block_pre_8in16 = *builder_->getBuildPoint();
assert_false(block_pre_8in16.isTerminated());
spv::Block& block_8in16 = builder_->makeNewBlock();
@ -1633,7 +1661,7 @@ spv::Id SpirvShaderTranslator::EndianSwap32Uint(spv::Id value, spv::Id endian) {
builder_->makeUintConstant(
static_cast<unsigned int>(xenos::Endian::k16in32)));
spv::Id is_8in32_or_16in32 =
builder_->createBinOp(spv::OpLogicalAnd, type_bool_, is_8in32, is_16in32);
builder_->createBinOp(spv::OpLogicalOr, type_bool_, is_8in32, is_16in32);
spv::Block& block_pre_16in32 = *builder_->getBuildPoint();
spv::Block& block_16in32 = builder_->makeNewBlock();
spv::Block& block_16in32_merge = builder_->makeNewBlock();

View File

@ -39,26 +39,49 @@ class SpirvShaderTranslator : public ShaderTranslator {
// therefore SSBOs must only be used for shared memory - all other storage
// resources must be images or texel buffers.
enum DescriptorSet : uint32_t {
// In order of update frequency.
// Very frequently changed, especially for UI draws, and for models drawn in
// multiple parts - contains vertex and texture fetch constants.
kDescriptorSetFetchConstants,
// According to the "Pipeline Layout Compatibility" section of the Vulkan
// specification:
// "Two pipeline layouts are defined to be "compatible for set N" if they
// were created with identically defined descriptor set layouts for sets
// zero through N, and if they were created with identical push constant
// ranges."
// "Place the least frequently changing descriptor sets near the start of
// the pipeline layout, and place the descriptor sets representing the most
// frequently changing resources near the end. When pipelines are switched,
// only the descriptor set bindings that have been invalidated will need to
// be updated and the remainder of the descriptor set bindings will remain
// in place."
// This is partially the reverse of the Direct3D 12's rule of placing the
// most frequently changed descriptor sets in the beginning. Here all
// descriptor sets with an immutable layout are placed first, in reverse
// frequency of changing, and sets that may be different for different
// pipeline states last.
// Always the same descriptor set layouts for all pipeline layouts:
// Never changed.
kDescriptorSetSharedMemoryAndEdram,
// Pretty rarely used and rarely changed - flow control constants.
kDescriptorSetBoolLoopConstants,
// May stay the same across many draws.
kDescriptorSetSystemConstants,
// Less frequently changed (per-material).
kDescriptorSetFloatConstantsPixel,
// Quite frequently changed (for one object drawn multiple times, for
// instance - may contain projection matrices).
kDescriptorSetFloatConstantsVertex,
// Less frequently changed (per-material).
kDescriptorSetFloatConstantsPixel,
// Per-material, combined images and samplers.
kDescriptorSetTexturesPixel,
// Very frequently changed, especially for UI draws, and for models drawn in
// multiple parts - contains vertex and texture fetch constants.
kDescriptorSetFetchConstants,
// Mutable part of the pipeline layout:
kDescriptorSetMutableLayoutsStart,
// Rarely used at all, but may be changed at an unpredictable rate when
// vertex textures are used, combined images and samplers.
kDescriptorSetTexturesVertex,
// May stay the same across many draws.
kDescriptorSetSystemConstants,
// Pretty rarely used and rarely changed - flow control constants.
kDescriptorSetBoolLoopConstants,
// Never changed.
kDescriptorSetSharedMemoryAndEdram,
kDescriptorSetTexturesVertex = kDescriptorSetMutableLayoutsStart,
// Per-material, combined images and samplers.
kDescriptorSetTexturesPixel,
kDescriptorSetCount,
};
@ -162,7 +185,8 @@ class SpirvShaderTranslator : public ShaderTranslator {
void CloseExecConditionals();
spv::Id GetStorageAddressingIndex(
InstructionStorageAddressingMode addressing_mode, uint32_t storage_index);
InstructionStorageAddressingMode addressing_mode, uint32_t storage_index,
bool is_float_constant = false);
// Loads unswizzled operand without sign modifiers as float4.
spv::Id LoadOperandStorage(const InstructionOperand& operand);
spv::Id ApplyOperandModifiers(spv::Id operand_value,

View File

@ -46,15 +46,65 @@ void DeferredCommandBuffer::Execute(VkCommandBuffer command_buffer) {
stream_remaining -= kCommandHeaderSizeElements;
switch (header.command) {
case Command::kVkBeginRenderPass: {
auto& args = *reinterpret_cast<const ArgsVkBeginRenderPass*>(stream);
size_t offset_bytes = sizeof(ArgsVkBeginRenderPass);
VkRenderPassBeginInfo render_pass_begin_info;
render_pass_begin_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
render_pass_begin_info.pNext = nullptr;
render_pass_begin_info.renderPass = args.render_pass;
render_pass_begin_info.framebuffer = args.framebuffer;
render_pass_begin_info.renderArea = args.render_area;
render_pass_begin_info.clearValueCount = args.clear_value_count;
if (render_pass_begin_info.clearValueCount) {
offset_bytes = xe::align(offset_bytes, alignof(VkClearValue));
render_pass_begin_info.pClearValues =
reinterpret_cast<const VkClearValue*>(
reinterpret_cast<const uint8_t*>(stream) + offset_bytes);
offset_bytes +=
sizeof(VkClearValue) * render_pass_begin_info.clearValueCount;
} else {
render_pass_begin_info.pClearValues = nullptr;
}
dfn.vkCmdBeginRenderPass(command_buffer, &render_pass_begin_info,
args.contents);
} break;
case Command::kVkBindDescriptorSets: {
auto& args = *reinterpret_cast<const ArgsVkBindDescriptorSets*>(stream);
size_t offset_bytes = xe::align(sizeof(ArgsVkBindDescriptorSets),
alignof(VkDescriptorSet));
const VkDescriptorSet* descriptor_sets =
reinterpret_cast<const VkDescriptorSet*>(
reinterpret_cast<const uint8_t*>(stream) + offset_bytes);
offset_bytes += sizeof(VkDescriptorSet) * args.descriptor_set_count;
const uint32_t* dynamic_offsets = nullptr;
if (args.dynamic_offset_count) {
offset_bytes = xe::align(offset_bytes, alignof(uint32_t));
dynamic_offsets = reinterpret_cast<const uint32_t*>(
reinterpret_cast<const uint8_t*>(stream) + offset_bytes);
offset_bytes += sizeof(uint32_t) * args.dynamic_offset_count;
}
dfn.vkCmdBindDescriptorSets(command_buffer, args.pipeline_bind_point,
args.layout, args.first_set,
args.descriptor_set_count, descriptor_sets,
args.dynamic_offset_count, dynamic_offsets);
} break;
case Command::kVkBindIndexBuffer: {
auto& args = *reinterpret_cast<const ArgsVkBindIndexBuffer*>(stream);
dfn.vkCmdBindIndexBuffer(command_buffer, args.buffer, args.offset,
args.index_type);
} break;
case Command::kVkBindPipeline: {
auto& args = *reinterpret_cast<const ArgsVkBindPipeline*>(stream);
dfn.vkCmdBindPipeline(command_buffer, args.pipeline_bind_point,
args.pipeline);
} break;
case Command::kVkCopyBuffer: {
auto& args = *reinterpret_cast<const ArgsVkCopyBuffer*>(stream);
static_assert(alignof(VkBufferCopy) <= alignof(uintmax_t));
dfn.vkCmdCopyBuffer(
command_buffer, args.src_buffer, args.dst_buffer, args.region_count,
reinterpret_cast<const VkBufferCopy*>(
@ -62,26 +112,37 @@ void DeferredCommandBuffer::Execute(VkCommandBuffer command_buffer) {
xe::align(sizeof(ArgsVkCopyBuffer), alignof(VkBufferCopy))));
} break;
case Command::kVkDraw: {
auto& args = *reinterpret_cast<const ArgsVkDraw*>(stream);
dfn.vkCmdDraw(command_buffer, args.vertex_count, args.instance_count,
args.first_vertex, args.first_instance);
} break;
case Command::kVkDrawIndexed: {
auto& args = *reinterpret_cast<const ArgsVkDrawIndexed*>(stream);
dfn.vkCmdDrawIndexed(command_buffer, args.index_count,
args.instance_count, args.first_index,
args.vertex_offset, args.first_instance);
} break;
case Command::kVkEndRenderPass:
dfn.vkCmdEndRenderPass(command_buffer);
break;
case Command::kVkPipelineBarrier: {
auto& args = *reinterpret_cast<const ArgsVkPipelineBarrier*>(stream);
size_t barrier_offset_bytes = sizeof(ArgsVkPipelineBarrier);
const VkMemoryBarrier* memory_barriers;
const VkMemoryBarrier* memory_barriers = nullptr;
if (args.memory_barrier_count) {
static_assert(alignof(VkMemoryBarrier) <= alignof(uintmax_t));
barrier_offset_bytes =
xe::align(barrier_offset_bytes, alignof(VkMemoryBarrier));
memory_barriers = reinterpret_cast<const VkMemoryBarrier*>(
reinterpret_cast<const uint8_t*>(stream) + barrier_offset_bytes);
barrier_offset_bytes +=
sizeof(VkMemoryBarrier) * args.memory_barrier_count;
} else {
memory_barriers = nullptr;
}
const VkBufferMemoryBarrier* buffer_memory_barriers;
const VkBufferMemoryBarrier* buffer_memory_barriers = nullptr;
if (args.buffer_memory_barrier_count) {
static_assert(alignof(VkBufferMemoryBarrier) <= alignof(uintmax_t));
barrier_offset_bytes =
xe::align(barrier_offset_bytes, alignof(VkBufferMemoryBarrier));
buffer_memory_barriers =
@ -90,23 +151,16 @@ void DeferredCommandBuffer::Execute(VkCommandBuffer command_buffer) {
barrier_offset_bytes);
barrier_offset_bytes +=
sizeof(VkBufferMemoryBarrier) * args.buffer_memory_barrier_count;
} else {
buffer_memory_barriers = nullptr;
}
const VkImageMemoryBarrier* image_memory_barriers;
const VkImageMemoryBarrier* image_memory_barriers = nullptr;
if (args.image_memory_barrier_count) {
static_assert(alignof(VkImageMemoryBarrier) <= alignof(uintmax_t));
barrier_offset_bytes =
xe::align(barrier_offset_bytes, alignof(VkImageMemoryBarrier));
image_memory_barriers = reinterpret_cast<const VkImageMemoryBarrier*>(
reinterpret_cast<const uint8_t*>(stream) + barrier_offset_bytes);
barrier_offset_bytes +=
sizeof(VkImageMemoryBarrier) * args.image_memory_barrier_count;
} else {
image_memory_barriers = nullptr;
}
dfn.vkCmdPipelineBarrier(
command_buffer, args.src_stage_mask, args.dst_stage_mask,
args.dependency_flags, args.memory_barrier_count, memory_barriers,
@ -114,6 +168,24 @@ void DeferredCommandBuffer::Execute(VkCommandBuffer command_buffer) {
args.image_memory_barrier_count, image_memory_barriers);
} break;
case Command::kVkSetScissor: {
auto& args = *reinterpret_cast<const ArgsVkSetScissor*>(stream);
dfn.vkCmdSetScissor(
command_buffer, args.first_scissor, args.scissor_count,
reinterpret_cast<const VkRect2D*>(
reinterpret_cast<const uint8_t*>(stream) +
xe::align(sizeof(ArgsVkSetScissor), alignof(VkRect2D))));
} break;
case Command::kVkSetViewport: {
auto& args = *reinterpret_cast<const ArgsVkSetViewport*>(stream);
dfn.vkCmdSetViewport(
command_buffer, args.first_viewport, args.viewport_count,
reinterpret_cast<const VkViewport*>(
reinterpret_cast<const uint8_t*>(stream) +
xe::align(sizeof(ArgsVkSetViewport), alignof(VkViewport))));
} break;
default:
assert_unhandled_case(header.command);
break;
@ -133,38 +205,25 @@ void DeferredCommandBuffer::CmdVkPipelineBarrier(
uint32_t image_memory_barrier_count,
const VkImageMemoryBarrier* image_memory_barriers) {
size_t arguments_size = sizeof(ArgsVkPipelineBarrier);
size_t memory_barriers_offset;
size_t memory_barriers_offset = 0;
if (memory_barrier_count) {
static_assert(alignof(VkMemoryBarrier) <= alignof(uintmax_t));
arguments_size = xe::align(arguments_size, alignof(VkMemoryBarrier));
memory_barriers_offset = arguments_size;
arguments_size += sizeof(VkMemoryBarrier) * memory_barrier_count;
} else {
memory_barriers_offset = 0;
}
size_t buffer_memory_barriers_offset;
size_t buffer_memory_barriers_offset = 0;
if (buffer_memory_barrier_count) {
static_assert(alignof(VkBufferMemoryBarrier) <= alignof(uintmax_t));
arguments_size = xe::align(arguments_size, alignof(VkBufferMemoryBarrier));
buffer_memory_barriers_offset = arguments_size;
arguments_size +=
sizeof(VkBufferMemoryBarrier) * buffer_memory_barrier_count;
} else {
buffer_memory_barriers_offset = 0;
}
size_t image_memory_barriers_offset;
size_t image_memory_barriers_offset = 0;
if (image_memory_barrier_count) {
static_assert(alignof(VkImageMemoryBarrier) <= alignof(uintmax_t));
arguments_size = xe::align(arguments_size, alignof(VkImageMemoryBarrier));
image_memory_barriers_offset = arguments_size;
arguments_size += sizeof(VkImageMemoryBarrier) * image_memory_barrier_count;
} else {
image_memory_barriers_offset = 0;
}
uint8_t* args_ptr = reinterpret_cast<uint8_t*>(
WriteCommand(Command::kVkPipelineBarrier, arguments_size));
auto& args = *reinterpret_cast<ArgsVkPipelineBarrier*>(args_ptr);

View File

@ -14,6 +14,7 @@
#include <cstdint>
#include <cstring>
#include "xenia/base/assert.h"
#include "xenia/base/math.h"
#include "xenia/ui/vulkan/vulkan_provider.h"
@ -31,6 +32,65 @@ class DeferredCommandBuffer {
void Reset();
void Execute(VkCommandBuffer command_buffer);
// render_pass_begin->pNext of all barriers must be null.
void CmdVkBeginRenderPass(const VkRenderPassBeginInfo* render_pass_begin,
VkSubpassContents contents) {
assert_null(render_pass_begin->pNext);
size_t arguments_size = sizeof(ArgsVkBeginRenderPass);
uint32_t clear_value_count = render_pass_begin->clearValueCount;
size_t clear_values_offset = 0;
if (clear_value_count) {
arguments_size = xe::align(arguments_size, alignof(VkClearValue));
clear_values_offset = arguments_size;
arguments_size += sizeof(VkClearValue) * clear_value_count;
}
uint8_t* args_ptr = reinterpret_cast<uint8_t*>(
WriteCommand(Command::kVkBeginRenderPass, arguments_size));
auto& args = *reinterpret_cast<ArgsVkBeginRenderPass*>(args_ptr);
args.render_pass = render_pass_begin->renderPass;
args.framebuffer = render_pass_begin->framebuffer;
args.render_area = render_pass_begin->renderArea;
args.clear_value_count = clear_value_count;
args.contents = contents;
if (clear_value_count) {
std::memcpy(args_ptr + clear_values_offset,
render_pass_begin->pClearValues,
sizeof(VkClearValue) * clear_value_count);
}
}
void CmdVkBindDescriptorSets(VkPipelineBindPoint pipeline_bind_point,
VkPipelineLayout layout, uint32_t first_set,
uint32_t descriptor_set_count,
const VkDescriptorSet* descriptor_sets,
uint32_t dynamic_offset_count,
const uint32_t* dynamic_offsets) {
size_t arguments_size =
xe::align(sizeof(ArgsVkBindDescriptorSets), alignof(VkDescriptorSet));
size_t descriptor_sets_offset = arguments_size;
arguments_size += sizeof(VkDescriptorSet) * descriptor_set_count;
size_t dynamic_offsets_offset = 0;
if (dynamic_offset_count) {
arguments_size = xe::align(arguments_size, alignof(uint32_t));
dynamic_offsets_offset = arguments_size;
arguments_size += sizeof(uint32_t) * dynamic_offset_count;
}
uint8_t* args_ptr = reinterpret_cast<uint8_t*>(
WriteCommand(Command::kVkBindDescriptorSets, arguments_size));
auto& args = *reinterpret_cast<ArgsVkBindDescriptorSets*>(args_ptr);
args.pipeline_bind_point = pipeline_bind_point;
args.layout = layout;
args.first_set = first_set;
args.descriptor_set_count = descriptor_set_count;
args.dynamic_offset_count = dynamic_offset_count;
std::memcpy(args_ptr + descriptor_sets_offset, descriptor_sets,
sizeof(VkDescriptorSet) * descriptor_set_count);
if (dynamic_offset_count) {
std::memcpy(args_ptr + dynamic_offsets_offset, dynamic_offsets,
sizeof(uint32_t) * dynamic_offset_count);
}
}
void CmdVkBindIndexBuffer(VkBuffer buffer, VkDeviceSize offset,
VkIndexType index_type) {
auto& args = *reinterpret_cast<ArgsVkBindIndexBuffer*>(WriteCommand(
@ -40,9 +100,16 @@ class DeferredCommandBuffer {
args.index_type = index_type;
}
void CmdVkBindPipeline(VkPipelineBindPoint pipeline_bind_point,
VkPipeline pipeline) {
auto& args = *reinterpret_cast<ArgsVkBindPipeline*>(
WriteCommand(Command::kVkBindPipeline, sizeof(ArgsVkBindPipeline)));
args.pipeline_bind_point = pipeline_bind_point;
args.pipeline = pipeline;
}
VkBufferCopy* CmdCopyBufferEmplace(VkBuffer src_buffer, VkBuffer dst_buffer,
uint32_t region_count) {
static_assert(alignof(VkBufferCopy) <= alignof(uintmax_t));
const size_t header_size =
xe::align(sizeof(ArgsVkCopyBuffer), alignof(VkBufferCopy));
uint8_t* args_ptr = reinterpret_cast<uint8_t*>(
@ -60,6 +127,30 @@ class DeferredCommandBuffer {
regions, sizeof(VkBufferCopy) * region_count);
}
void CmdVkDraw(uint32_t vertex_count, uint32_t instance_count,
uint32_t first_vertex, uint32_t first_instance) {
auto& args = *reinterpret_cast<ArgsVkDraw*>(
WriteCommand(Command::kVkDraw, sizeof(ArgsVkDraw)));
args.vertex_count = vertex_count;
args.instance_count = instance_count;
args.first_vertex = first_vertex;
args.first_instance = first_instance;
}
void CmdVkDrawIndexed(uint32_t index_count, uint32_t instance_count,
uint32_t first_index, int32_t vertex_offset,
uint32_t first_instance) {
auto& args = *reinterpret_cast<ArgsVkDrawIndexed*>(
WriteCommand(Command::kVkDrawIndexed, sizeof(ArgsVkDrawIndexed)));
args.index_count = index_count;
args.instance_count = instance_count;
args.first_index = first_index;
args.vertex_offset = vertex_offset;
args.first_instance = first_instance;
}
void CmdVkEndRenderPass() { WriteCommand(Command::kVkEndRenderPass, 0); }
// pNext of all barriers must be null.
void CmdVkPipelineBarrier(VkPipelineStageFlags src_stage_mask,
VkPipelineStageFlags dst_stage_mask,
@ -71,11 +162,47 @@ class DeferredCommandBuffer {
uint32_t image_memory_barrier_count,
const VkImageMemoryBarrier* image_memory_barriers);
void CmdVkSetScissor(uint32_t first_scissor, uint32_t scissor_count,
const VkRect2D* scissors) {
const size_t header_size =
xe::align(sizeof(ArgsVkSetScissor), alignof(VkRect2D));
uint8_t* args_ptr = reinterpret_cast<uint8_t*>(
WriteCommand(Command::kVkSetScissor,
header_size + sizeof(VkRect2D) * scissor_count));
auto& args = *reinterpret_cast<ArgsVkSetScissor*>(args_ptr);
args.first_scissor = first_scissor;
args.scissor_count = scissor_count;
std::memcpy(args_ptr + header_size, scissors,
sizeof(VkRect2D) * scissor_count);
}
void CmdVkSetViewport(uint32_t first_viewport, uint32_t viewport_count,
const VkViewport* viewports) {
const size_t header_size =
xe::align(sizeof(ArgsVkSetViewport), alignof(VkViewport));
uint8_t* args_ptr = reinterpret_cast<uint8_t*>(
WriteCommand(Command::kVkSetViewport,
header_size + sizeof(VkViewport) * viewport_count));
auto& args = *reinterpret_cast<ArgsVkSetViewport*>(args_ptr);
args.first_viewport = first_viewport;
args.viewport_count = viewport_count;
std::memcpy(args_ptr + header_size, viewports,
sizeof(VkViewport) * viewport_count);
}
private:
enum class Command {
kVkBeginRenderPass,
kVkBindDescriptorSets,
kVkBindIndexBuffer,
kVkBindPipeline,
kVkCopyBuffer,
kVkDraw,
kVkDrawIndexed,
kVkEndRenderPass,
kVkPipelineBarrier,
kVkSetScissor,
kVkSetViewport,
};
struct CommandHeader {
@ -85,17 +212,58 @@ class DeferredCommandBuffer {
static constexpr size_t kCommandHeaderSizeElements =
(sizeof(CommandHeader) + sizeof(uintmax_t) - 1) / sizeof(uintmax_t);
struct ArgsVkBeginRenderPass {
VkRenderPass render_pass;
VkFramebuffer framebuffer;
VkRect2D render_area;
uint32_t clear_value_count;
VkSubpassContents contents;
// Followed by aligned optional VkClearValue[].
static_assert(alignof(VkClearValue) <= alignof(uintmax_t));
};
struct ArgsVkBindDescriptorSets {
VkPipelineBindPoint pipeline_bind_point;
VkPipelineLayout layout;
uint32_t first_set;
uint32_t descriptor_set_count;
uint32_t dynamic_offset_count;
// Followed by aligned VkDescriptorSet[], optional uint32_t[].
static_assert(alignof(VkDescriptorSet) <= alignof(uintmax_t));
};
struct ArgsVkBindIndexBuffer {
VkBuffer buffer;
VkDeviceSize offset;
VkIndexType index_type;
};
struct ArgsVkBindPipeline {
VkPipelineBindPoint pipeline_bind_point;
VkPipeline pipeline;
};
struct ArgsVkCopyBuffer {
VkBuffer src_buffer;
VkBuffer dst_buffer;
uint32_t region_count;
// Followed by VkBufferCopy[].
// Followed by aligned VkBufferCopy[].
static_assert(alignof(VkBufferCopy) <= alignof(uintmax_t));
};
struct ArgsVkDraw {
uint32_t vertex_count;
uint32_t instance_count;
uint32_t first_vertex;
uint32_t first_instance;
};
struct ArgsVkDrawIndexed {
uint32_t index_count;
uint32_t instance_count;
uint32_t first_index;
int32_t vertex_offset;
uint32_t first_instance;
};
struct ArgsVkPipelineBarrier {
@ -105,8 +273,25 @@ class DeferredCommandBuffer {
uint32_t memory_barrier_count;
uint32_t buffer_memory_barrier_count;
uint32_t image_memory_barrier_count;
// Followed by aligned VkMemoryBarrier[], VkBufferMemoryBarrier[],
// VkImageMemoryBarrier[].
// Followed by aligned optional VkMemoryBarrier[],
// optional VkBufferMemoryBarrier[], optional VkImageMemoryBarrier[].
static_assert(alignof(VkMemoryBarrier) <= alignof(uintmax_t));
static_assert(alignof(VkBufferMemoryBarrier) <= alignof(uintmax_t));
static_assert(alignof(VkImageMemoryBarrier) <= alignof(uintmax_t));
};
struct ArgsVkSetScissor {
uint32_t first_scissor;
uint32_t scissor_count;
// Followed by aligned VkRect2D[].
static_assert(alignof(VkRect2D) <= alignof(uintmax_t));
};
struct ArgsVkSetViewport {
uint32_t first_viewport;
uint32_t viewport_count;
// Followed by aligned VkViewport[].
static_assert(alignof(VkViewport) <= alignof(uintmax_t));
};
void* WriteCommand(Command command, size_t arguments_size_bytes);

View File

@ -9,15 +9,24 @@
#include "xenia/gpu/vulkan/vulkan_command_processor.h"
#include <algorithm>
#include <cstdint>
#include <cstring>
#include <iterator>
#include "xenia/base/assert.h"
#include "xenia/base/logging.h"
#include "xenia/base/math.h"
#include "xenia/base/profiling.h"
#include "xenia/gpu/gpu_flags.h"
#include "xenia/gpu/registers.h"
#include "xenia/gpu/shader.h"
#include "xenia/gpu/spirv_shader_translator.h"
#include "xenia/gpu/vulkan/vulkan_pipeline_cache.h"
#include "xenia/gpu/vulkan/vulkan_render_target_cache.h"
#include "xenia/gpu/vulkan/vulkan_shader.h"
#include "xenia/gpu/vulkan/vulkan_shared_memory.h"
#include "xenia/gpu/xenos.h"
#include "xenia/ui/vulkan/vulkan_context.h"
#include "xenia/ui/vulkan/vulkan_provider.h"
#include "xenia/ui/vulkan/vulkan_util.h"
@ -54,6 +63,16 @@ bool VulkanCommandProcessor::SetupContext() {
transient_descriptor_pool_uniform_buffers_ =
std::make_unique<ui::vulkan::TransientDescriptorPool>(
provider, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 32768, 32768);
// 16384 is bigger than any single uniform buffer that Xenia needs, but is the
// minimum maxUniformBufferRange, thus the safe minimum amount.
VkDeviceSize uniform_buffer_alignment = std::max(
provider.device_properties().limits.minUniformBufferOffsetAlignment,
VkDeviceSize(1));
uniform_buffer_pool_ = std::make_unique<ui::vulkan::VulkanUploadBufferPool>(
provider, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
xe::align(std::max(ui::GraphicsUploadBufferPool::kDefaultPageSize,
size_t(16384)),
size_t(uniform_buffer_alignment)));
VkDescriptorSetLayoutCreateInfo descriptor_set_layout_create_info;
descriptor_set_layout_create_info.sType =
@ -162,6 +181,20 @@ bool VulkanCommandProcessor::SetupContext() {
return false;
}
render_target_cache_ =
std::make_unique<VulkanRenderTargetCache>(*this, *register_file_);
if (!render_target_cache_->Initialize()) {
XELOGE("Failed to initialize the render target cache");
return false;
}
pipeline_cache_ = std::make_unique<VulkanPipelineCache>(
*this, *register_file_, *render_target_cache_);
if (!pipeline_cache_->Initialize()) {
XELOGE("Failed to initialize the graphics pipeline cache");
return false;
}
// Shared memory and EDRAM common bindings.
VkDescriptorPoolSize descriptor_pool_sizes[1];
descriptor_pool_sizes[0].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
@ -229,6 +262,9 @@ bool VulkanCommandProcessor::SetupContext() {
// interlocks case.
dfn.vkUpdateDescriptorSets(device, 1, write_descriptor_sets, 0, nullptr);
// Just not to expose uninitialized memory.
std::memset(&system_constants_, 0, sizeof(system_constants_));
return true;
}
@ -244,6 +280,10 @@ void VulkanCommandProcessor::ShutdownContext() {
dfn.vkDestroyDescriptorPool, device,
shared_memory_and_edram_descriptor_pool_);
pipeline_cache_.reset();
render_target_cache_.reset();
shared_memory_.reset();
for (const auto& pipeline_layout_pair : pipeline_layouts_) {
@ -276,6 +316,7 @@ void VulkanCommandProcessor::ShutdownContext() {
ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyDescriptorSetLayout,
device, descriptor_set_layout_empty_);
uniform_buffer_pool_.reset();
transient_descriptor_pool_uniform_buffers_.reset();
sparse_bind_wait_stage_mask_ = 0;
@ -325,6 +366,42 @@ void VulkanCommandProcessor::ShutdownContext() {
CommandProcessor::ShutdownContext();
}
void VulkanCommandProcessor::WriteRegister(uint32_t index, uint32_t value) {
CommandProcessor::WriteRegister(index, value);
if (index >= XE_GPU_REG_SHADER_CONSTANT_000_X &&
index <= XE_GPU_REG_SHADER_CONSTANT_511_W) {
if (frame_open_) {
uint32_t float_constant_index =
(index - XE_GPU_REG_SHADER_CONSTANT_000_X) >> 2;
if (float_constant_index >= 256) {
float_constant_index -= 256;
if (current_float_constant_map_pixel_[float_constant_index >> 6] &
(1ull << (float_constant_index & 63))) {
current_graphics_descriptor_set_values_up_to_date_ &=
~(uint32_t(1)
<< SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel);
}
} else {
if (current_float_constant_map_vertex_[float_constant_index >> 6] &
(1ull << (float_constant_index & 63))) {
current_graphics_descriptor_set_values_up_to_date_ &=
~(uint32_t(1)
<< SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex);
}
}
}
} else if (index >= XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031 &&
index <= XE_GPU_REG_SHADER_CONSTANT_LOOP_31) {
current_graphics_descriptor_set_values_up_to_date_ &= ~(
uint32_t(1) << SpirvShaderTranslator::kDescriptorSetBoolLoopConstants);
} else if (index >= XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 &&
index <= XE_GPU_REG_SHADER_CONSTANT_FETCH_31_5) {
current_graphics_descriptor_set_values_up_to_date_ &=
~(uint32_t(1) << SpirvShaderTranslator::kDescriptorSetFetchConstants);
}
}
void VulkanCommandProcessor::SparseBindBuffer(
VkBuffer buffer, uint32_t bind_count, const VkSparseMemoryBind* binds,
VkPipelineStageFlags wait_stage_mask) {
@ -356,17 +433,25 @@ void VulkanCommandProcessor::PerformSwap(uint32_t frontbuffer_ptr,
EndSubmission(true);
}
bool VulkanCommandProcessor::GetPipelineLayout(
uint32_t texture_count_pixel, uint32_t texture_count_vertex,
PipelineLayout& pipeline_layout_out) {
void VulkanCommandProcessor::EndRenderPass() {
assert_true(submission_open_);
if (current_render_pass_ == VK_NULL_HANDLE) {
return;
}
deferred_command_buffer_.CmdVkEndRenderPass();
current_render_pass_ = VK_NULL_HANDLE;
}
const VulkanPipelineCache::PipelineLayoutProvider*
VulkanCommandProcessor::GetPipelineLayout(uint32_t texture_count_pixel,
uint32_t texture_count_vertex) {
PipelineLayoutKey pipeline_layout_key;
pipeline_layout_key.texture_count_pixel = texture_count_pixel;
pipeline_layout_key.texture_count_vertex = texture_count_vertex;
{
auto it = pipeline_layouts_.find(pipeline_layout_key.key);
if (it != pipeline_layouts_.end()) {
pipeline_layout_out = it->second;
return true;
return &it->second;
}
}
@ -462,26 +547,28 @@ bool VulkanCommandProcessor::GetPipelineLayout(
VkDescriptorSetLayout
descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetCount];
descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetFetchConstants] =
descriptor_set_layout_fetch_bool_loop_constants_;
descriptor_set_layouts
[SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex] =
descriptor_set_layout_float_constants_vertex_;
descriptor_set_layouts
[SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel] =
descriptor_set_layout_float_constants_pixel_;
descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetTexturesPixel] =
descriptor_set_layout_textures_pixel;
descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetTexturesVertex] =
descriptor_set_layout_textures_vertex;
descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetSystemConstants] =
descriptor_set_layout_system_constants_;
descriptor_set_layouts
[SpirvShaderTranslator::kDescriptorSetBoolLoopConstants] =
descriptor_set_layout_fetch_bool_loop_constants_;
// Immutable layouts.
descriptor_set_layouts
[SpirvShaderTranslator::kDescriptorSetSharedMemoryAndEdram] =
descriptor_set_layout_shared_memory_and_edram_;
descriptor_set_layouts
[SpirvShaderTranslator::kDescriptorSetBoolLoopConstants] =
descriptor_set_layout_fetch_bool_loop_constants_;
descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetSystemConstants] =
descriptor_set_layout_system_constants_;
descriptor_set_layouts
[SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel] =
descriptor_set_layout_float_constants_pixel_;
descriptor_set_layouts
[SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex] =
descriptor_set_layout_float_constants_vertex_;
descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetFetchConstants] =
descriptor_set_layout_fetch_bool_loop_constants_;
// Mutable layouts.
descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetTexturesVertex] =
descriptor_set_layout_textures_vertex;
descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetTexturesPixel] =
descriptor_set_layout_textures_pixel;
VkPipelineLayoutCreateInfo pipeline_layout_create_info;
pipeline_layout_create_info.sType =
@ -508,16 +595,18 @@ bool VulkanCommandProcessor::GetPipelineLayout(
descriptor_set_layout_textures_pixel;
pipeline_layout_entry.descriptor_set_layout_textures_vertex_ref =
descriptor_set_layout_textures_vertex;
pipeline_layouts_.emplace(pipeline_layout_key.key, pipeline_layout_entry);
pipeline_layout_out = pipeline_layout_entry;
return true;
auto emplaced_pair =
pipeline_layouts_.emplace(pipeline_layout_key.key, pipeline_layout_entry);
// unordered_map insertion doesn't invalidate element references.
return &emplaced_pair.first->second;
}
Shader* VulkanCommandProcessor::LoadShader(xenos::ShaderType shader_type,
uint32_t guest_address,
const uint32_t* host_address,
uint32_t dword_count) {
return nullptr;
return pipeline_cache_->LoadShader(shader_type, guest_address, host_address,
dword_count);
}
bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
@ -530,9 +619,135 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
BeginSubmission(true);
auto vertex_shader = static_cast<VulkanShader*>(active_vertex_shader());
if (!vertex_shader) {
// Always need a vertex shader.
return false;
}
// TODO(Triang3l): Get a pixel shader.
VulkanShader* pixel_shader = nullptr;
VulkanRenderTargetCache::FramebufferKey framebuffer_key;
if (!render_target_cache_->UpdateRenderTargets(framebuffer_key)) {
return false;
}
VkFramebuffer framebuffer =
render_target_cache_->GetFramebuffer(framebuffer_key);
if (framebuffer == VK_NULL_HANDLE) {
return false;
}
VkRenderPass render_pass =
render_target_cache_->GetRenderPass(framebuffer_key.render_pass_key);
if (render_pass == VK_NULL_HANDLE) {
return false;
}
// Update the graphics pipeline, and if the new graphics pipeline has a
// different layout, invalidate incompatible descriptor sets before updating
// current_graphics_pipeline_layout_.
VkPipeline pipeline;
const VulkanPipelineCache::PipelineLayoutProvider* pipeline_layout_provider;
if (!pipeline_cache_->ConfigurePipeline(vertex_shader, pixel_shader,
framebuffer_key.render_pass_key,
pipeline, pipeline_layout_provider)) {
return false;
}
deferred_command_buffer_.CmdVkBindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS,
pipeline);
auto pipeline_layout =
static_cast<const PipelineLayout*>(pipeline_layout_provider);
if (current_graphics_pipeline_layout_ != pipeline_layout) {
if (current_graphics_pipeline_layout_) {
// Keep descriptor set layouts for which the new pipeline layout is
// compatible with the previous one (pipeline layouts are compatible for
// set N if set layouts 0 through N are compatible).
uint32_t descriptor_sets_kept =
uint32_t(SpirvShaderTranslator::kDescriptorSetCount);
if (current_graphics_pipeline_layout_
->descriptor_set_layout_textures_vertex_ref !=
pipeline_layout->descriptor_set_layout_textures_vertex_ref) {
descriptor_sets_kept = std::min(
descriptor_sets_kept,
uint32_t(SpirvShaderTranslator::kDescriptorSetTexturesVertex));
}
if (current_graphics_pipeline_layout_
->descriptor_set_layout_textures_pixel_ref !=
pipeline_layout->descriptor_set_layout_textures_pixel_ref) {
descriptor_sets_kept = std::min(
descriptor_sets_kept,
uint32_t(SpirvShaderTranslator::kDescriptorSetTexturesPixel));
}
} else {
// No or unknown pipeline layout previously bound - all bindings are in an
// indeterminate state.
current_graphics_descriptor_sets_bound_up_to_date_ = 0;
}
current_graphics_pipeline_layout_ = pipeline_layout;
}
// Update fixed-function dynamic state.
UpdateFixedFunctionState();
bool indexed = index_buffer_info != nullptr && index_buffer_info->guest_base;
// Actually draw.
// Update system constants before uploading them.
UpdateSystemConstantValues(indexed ? index_buffer_info->endianness
: xenos::Endian::kNone);
// Update uniform buffers and descriptor sets after binding the pipeline with
// the new layout.
if (!UpdateBindings(vertex_shader, pixel_shader)) {
return false;
}
const RegisterFile& regs = *register_file_;
// Ensure vertex buffers are resident.
// TODO(Triang3l): Cache residency for ranges in a way similar to how texture
// validity is tracked.
uint64_t vertex_buffers_resident[2] = {};
for (const Shader::VertexBinding& vertex_binding :
vertex_shader->vertex_bindings()) {
uint32_t vfetch_index = vertex_binding.fetch_constant;
if (vertex_buffers_resident[vfetch_index >> 6] &
(uint64_t(1) << (vfetch_index & 63))) {
continue;
}
const auto& vfetch_constant = regs.Get<xenos::xe_gpu_vertex_fetch_t>(
XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + vfetch_index * 2);
switch (vfetch_constant.type) {
case xenos::FetchConstantType::kVertex:
break;
case xenos::FetchConstantType::kInvalidVertex:
if (cvars::gpu_allow_invalid_fetch_constants) {
break;
}
XELOGW(
"Vertex fetch constant {} ({:08X} {:08X}) has \"invalid\" type! "
"This "
"is incorrect behavior, but you can try bypassing this by "
"launching Xenia with --gpu_allow_invalid_fetch_constants=true.",
vfetch_index, vfetch_constant.dword_0, vfetch_constant.dword_1);
return false;
default:
XELOGW(
"Vertex fetch constant {} ({:08X} {:08X}) is completely invalid!",
vfetch_index, vfetch_constant.dword_0, vfetch_constant.dword_1);
return false;
}
if (!shared_memory_->RequestRange(vfetch_constant.address << 2,
vfetch_constant.size << 2)) {
XELOGE(
"Failed to request vertex buffer at 0x{:08X} (size {}) in the shared "
"memory",
vfetch_constant.address << 2, vfetch_constant.size << 2);
return false;
}
vertex_buffers_resident[vfetch_index >> 6] |= uint64_t(1)
<< (vfetch_index & 63);
}
// Set up the geometry.
if (indexed) {
uint32_t index_size =
index_buffer_info->format == xenos::IndexFormat::kInt32
@ -557,6 +772,37 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
}
shared_memory_->Use(VulkanSharedMemory::Usage::kRead);
// After all commands that may dispatch or copy, enter the render pass before
// drawing.
if (current_render_pass_ != render_pass ||
current_framebuffer_ != framebuffer) {
if (current_render_pass_ != VK_NULL_HANDLE) {
deferred_command_buffer_.CmdVkEndRenderPass();
}
current_render_pass_ = render_pass;
current_framebuffer_ = framebuffer;
VkRenderPassBeginInfo render_pass_begin_info;
render_pass_begin_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
render_pass_begin_info.pNext = nullptr;
render_pass_begin_info.renderPass = render_pass;
render_pass_begin_info.framebuffer = framebuffer;
render_pass_begin_info.renderArea.offset.x = 0;
render_pass_begin_info.renderArea.offset.y = 0;
render_pass_begin_info.renderArea.extent.width = 1280;
render_pass_begin_info.renderArea.extent.height = 720;
render_pass_begin_info.clearValueCount = 0;
render_pass_begin_info.pClearValues = nullptr;
deferred_command_buffer_.CmdVkBeginRenderPass(&render_pass_begin_info,
VK_SUBPASS_CONTENTS_INLINE);
}
// Draw.
if (indexed) {
deferred_command_buffer_.CmdVkDrawIndexed(index_count, 1, 0, 0, 0);
} else {
deferred_command_buffer_.CmdVkDraw(index_count, 1, 0, 0);
}
return true;
}
@ -659,9 +905,6 @@ void VulkanCommandProcessor::CheckSubmissionFence(uint64_t await_submission) {
command_buffers_submitted_.pop_front();
}
// Reclaim descriptor pools.
transient_descriptor_pool_uniform_buffers_->Reclaim(submission_completed_);
shared_memory_->CompletedSubmissionUpdated();
}
@ -705,13 +948,41 @@ void VulkanCommandProcessor::BeginSubmission(bool is_guest_command) {
submission_open_ = true;
// Start a new deferred command buffer - will submit it to the real one in
// the end of the submission (when async pipeline state object creation
// requests are fulfilled).
// the end of the submission (when async pipeline object creation requests
// are fulfilled).
deferred_command_buffer_.Reset();
// Reset cached state of the command buffer.
ff_viewport_update_needed_ = true;
ff_scissor_update_needed_ = true;
current_render_pass_ = VK_NULL_HANDLE;
current_framebuffer_ = VK_NULL_HANDLE;
current_graphics_pipeline_ = VK_NULL_HANDLE;
current_graphics_pipeline_layout_ = nullptr;
current_graphics_descriptor_sets_bound_up_to_date_ = 0;
}
if (is_opening_frame) {
frame_open_ = true;
// Reset bindings that depend on transient data.
std::memset(current_float_constant_map_vertex_, 0,
sizeof(current_float_constant_map_vertex_));
std::memset(current_float_constant_map_pixel_, 0,
sizeof(current_float_constant_map_pixel_));
std::memset(current_graphics_descriptor_sets_, 0,
sizeof(current_graphics_descriptor_sets_));
current_graphics_descriptor_sets_
[SpirvShaderTranslator::kDescriptorSetSharedMemoryAndEdram] =
shared_memory_and_edram_descriptor_set_;
current_graphics_descriptor_set_values_up_to_date_ =
uint32_t(1)
<< SpirvShaderTranslator::kDescriptorSetSharedMemoryAndEdram;
// Reclaim pool pages - no need to do this every small submission since some
// may be reused.
transient_descriptor_pool_uniform_buffers_->Reclaim(frame_completed_);
uniform_buffer_pool_->Reclaim(frame_completed_);
}
}
@ -784,8 +1055,12 @@ bool VulkanCommandProcessor::EndSubmission(bool is_swap) {
bool is_closing_frame = is_swap && frame_open_;
if (submission_open_) {
EndRenderPass();
shared_memory_->EndSubmission();
uniform_buffer_pool_->FlushWrites();
// Submit sparse binds earlier, before executing the deferred command
// buffer, to reduce latency.
if (!sparse_memory_binds_.empty()) {
@ -910,13 +1185,30 @@ bool VulkanCommandProcessor::EndSubmission(bool is_swap) {
if (cache_clear_requested_ && AwaitAllQueueOperationsCompletion()) {
cache_clear_requested_ = false;
transient_descriptor_pool_uniform_buffers_->ClearCache();
assert_true(command_buffers_submitted_.empty());
for (const CommandBuffer& command_buffer : command_buffers_writable_) {
dfn.vkDestroyCommandPool(device, command_buffer.pool, nullptr);
}
command_buffers_writable_.clear();
uniform_buffer_pool_->ClearCache();
transient_descriptor_pool_uniform_buffers_->ClearCache();
pipeline_cache_->ClearCache();
render_target_cache_->ClearCache();
for (const auto& pipeline_layout_pair : pipeline_layouts_) {
dfn.vkDestroyPipelineLayout(
device, pipeline_layout_pair.second.pipeline_layout, nullptr);
}
pipeline_layouts_.clear();
for (const auto& descriptor_set_layout_pair :
descriptor_set_layouts_textures_) {
dfn.vkDestroyDescriptorSetLayout(
device, descriptor_set_layout_pair.second, nullptr);
}
descriptor_set_layouts_textures_.clear();
}
}
@ -936,6 +1228,441 @@ VkShaderStageFlags VulkanCommandProcessor::GetGuestVertexShaderStageFlags()
return stages;
}
void VulkanCommandProcessor::UpdateFixedFunctionState() {
#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_f("gpu");
#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
const RegisterFile& regs = *register_file_;
// Window parameters.
// http://ftp.tku.edu.tw/NetBSD/NetBSD-current/xsrc/external/mit/xf86-video-ati/dist/src/r600_reg_auto_r6xx.h
// See r200UpdateWindow:
// https://github.com/freedreno/mesa/blob/master/src/mesa/drivers/dri/r200/r200_state.c
auto pa_sc_window_offset = regs.Get<reg::PA_SC_WINDOW_OFFSET>();
uint32_t pixel_size_x = 1, pixel_size_y = 1;
// Viewport.
// PA_CL_VTE_CNTL contains whether offsets and scales are enabled.
// http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf
// In games, either all are enabled (for regular drawing) or none are (for
// rectangle lists usually).
//
// If scale/offset is enabled, the Xenos shader is writing (neglecting W
// division) position in the NDC (-1, -1, dx_clip_space_def - 1) -> (1, 1, 1)
// box. If it's not, the position is in screen space. Since we can only use
// the NDC in PC APIs, we use a viewport of the largest possible size, and
// divide the position by it in translated shaders.
//
// TODO(Triang3l): Move all of this to draw_util.
// TODO(Triang3l): Limit the viewport if exceeding the device limit; move to
// NDC scale/offset constants.
auto pa_cl_vte_cntl = regs.Get<reg::PA_CL_VTE_CNTL>();
float viewport_scale_x =
pa_cl_vte_cntl.vport_x_scale_ena
? std::abs(regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32)
: 4096.0f;
float viewport_scale_y =
pa_cl_vte_cntl.vport_y_scale_ena
? std::abs(regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32)
: 4096.0f;
float viewport_scale_z = pa_cl_vte_cntl.vport_z_scale_ena
? regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32
: 1.0f;
float viewport_offset_x = pa_cl_vte_cntl.vport_x_offset_ena
? regs[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32
: std::abs(viewport_scale_x);
float viewport_offset_y = pa_cl_vte_cntl.vport_y_offset_ena
? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32
: std::abs(viewport_scale_y);
float viewport_offset_z = pa_cl_vte_cntl.vport_z_offset_ena
? regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32
: 0.0f;
if (regs.Get<reg::PA_SU_SC_MODE_CNTL>().vtx_window_offset_enable) {
viewport_offset_x += float(pa_sc_window_offset.window_x_offset);
viewport_offset_y += float(pa_sc_window_offset.window_y_offset);
}
VkViewport viewport;
viewport.x = (viewport_offset_x - viewport_scale_x) * float(pixel_size_x);
viewport.y = (viewport_offset_y - viewport_scale_y) * float(pixel_size_y);
viewport.width = viewport_scale_x * 2.0f * float(pixel_size_x);
viewport.height = viewport_scale_y * 2.0f * float(pixel_size_y);
viewport.minDepth = std::min(std::max(viewport_offset_z, 0.0f), 1.0f);
viewport.maxDepth =
std::min(std::max(viewport_offset_z + viewport_scale_z, 0.0f), 1.0f);
ff_viewport_update_needed_ |= ff_viewport_.x != viewport.x;
ff_viewport_update_needed_ |= ff_viewport_.y != viewport.y;
ff_viewport_update_needed_ |= ff_viewport_.width != viewport.width;
ff_viewport_update_needed_ |= ff_viewport_.height != viewport.height;
ff_viewport_update_needed_ |= ff_viewport_.minDepth != viewport.minDepth;
ff_viewport_update_needed_ |= ff_viewport_.maxDepth != viewport.maxDepth;
if (ff_viewport_update_needed_) {
ff_viewport_ = viewport;
deferred_command_buffer_.CmdVkSetViewport(0, 1, &viewport);
ff_viewport_update_needed_ = false;
}
// Scissor.
// TODO(Triang3l): Move all of this to draw_util.
// TODO(Triang3l): Limit the scissor if exceeding the device limit.
auto pa_sc_window_scissor_tl = regs.Get<reg::PA_SC_WINDOW_SCISSOR_TL>();
auto pa_sc_window_scissor_br = regs.Get<reg::PA_SC_WINDOW_SCISSOR_BR>();
VkRect2D scissor;
scissor.offset.x = int32_t(pa_sc_window_scissor_tl.tl_x);
scissor.offset.y = int32_t(pa_sc_window_scissor_tl.tl_y);
int32_t scissor_br_x =
std::max(int32_t(pa_sc_window_scissor_br.br_x), scissor.offset.x);
int32_t scissor_br_y =
std::max(int32_t(pa_sc_window_scissor_br.br_y), scissor.offset.y);
if (!pa_sc_window_scissor_tl.window_offset_disable) {
scissor.offset.x = std::max(
scissor.offset.x + pa_sc_window_offset.window_x_offset, int32_t(0));
scissor.offset.y = std::max(
scissor.offset.y + pa_sc_window_offset.window_y_offset, int32_t(0));
scissor_br_x = std::max(scissor_br_x + pa_sc_window_offset.window_x_offset,
int32_t(0));
scissor_br_y = std::max(scissor_br_y + pa_sc_window_offset.window_y_offset,
int32_t(0));
}
scissor.extent.width = uint32_t(scissor_br_x - scissor.offset.x);
scissor.extent.height = uint32_t(scissor_br_y - scissor.offset.y);
scissor.offset.x *= pixel_size_x;
scissor.offset.y *= pixel_size_y;
scissor.extent.width *= pixel_size_x;
scissor.extent.height *= pixel_size_y;
ff_scissor_update_needed_ |= ff_scissor_.offset.x != scissor.offset.x;
ff_scissor_update_needed_ |= ff_scissor_.offset.y != scissor.offset.y;
ff_scissor_update_needed_ |= ff_scissor_.extent.width != scissor.extent.width;
ff_scissor_update_needed_ |=
ff_scissor_.extent.height != scissor.extent.height;
if (ff_scissor_update_needed_) {
ff_scissor_ = scissor;
deferred_command_buffer_.CmdVkSetScissor(0, 1, &scissor);
ff_scissor_update_needed_ = false;
}
}
void VulkanCommandProcessor::UpdateSystemConstantValues(
xenos::Endian index_endian) {
#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_f("gpu");
#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
const RegisterFile& regs = *register_file_;
int32_t vgt_indx_offset = int32_t(regs[XE_GPU_REG_VGT_INDX_OFFSET].u32);
bool dirty = false;
// Index or tessellation edge factor buffer endianness.
dirty |= system_constants_.vertex_index_endian != index_endian;
system_constants_.vertex_index_endian = index_endian;
// Vertex index offset.
dirty |= system_constants_.vertex_base_index != vgt_indx_offset;
system_constants_.vertex_base_index = vgt_indx_offset;
if (dirty) {
current_graphics_descriptor_set_values_up_to_date_ &=
~(uint32_t(1) << SpirvShaderTranslator::kDescriptorSetSystemConstants);
}
}
bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader,
const VulkanShader* pixel_shader) {
#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_f("gpu");
#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
const RegisterFile& regs = *register_file_;
// Invalidate descriptors for changed data.
// These are the constant base addresses/ranges for shaders.
// We have these hardcoded right now cause nothing seems to differ on the Xbox
// 360 (however, OpenGL ES on Adreno 200 on Android has different ranges).
assert_true(regs[XE_GPU_REG_SQ_VS_CONST].u32 == 0x000FF000 ||
regs[XE_GPU_REG_SQ_VS_CONST].u32 == 0x00000000);
assert_true(regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x000FF100 ||
regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000);
// Check if the float constant layout is still the same and get the counts.
const Shader::ConstantRegisterMap& float_constant_map_vertex =
vertex_shader->constant_register_map();
uint32_t float_constant_count_vertex = float_constant_map_vertex.float_count;
for (uint32_t i = 0; i < 4; ++i) {
if (current_float_constant_map_vertex_[i] !=
float_constant_map_vertex.float_bitmap[i]) {
current_float_constant_map_vertex_[i] =
float_constant_map_vertex.float_bitmap[i];
// If no float constants at all, any buffer can be reused for them, so not
// invalidating.
if (float_constant_count_vertex) {
current_graphics_descriptor_set_values_up_to_date_ &=
~(
uint32_t(1)
<< SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex);
}
}
}
uint32_t float_constant_count_pixel = 0;
if (pixel_shader != nullptr) {
const Shader::ConstantRegisterMap& float_constant_map_pixel =
pixel_shader->constant_register_map();
float_constant_count_pixel = float_constant_map_pixel.float_count;
for (uint32_t i = 0; i < 4; ++i) {
if (current_float_constant_map_pixel_[i] !=
float_constant_map_pixel.float_bitmap[i]) {
current_float_constant_map_pixel_[i] =
float_constant_map_pixel.float_bitmap[i];
if (float_constant_count_pixel) {
current_graphics_descriptor_set_values_up_to_date_ &=
~(uint32_t(1)
<< SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel);
}
}
}
} else {
std::memset(current_float_constant_map_pixel_, 0,
sizeof(current_float_constant_map_pixel_));
}
// Make sure new descriptor sets are bound to the command buffer.
current_graphics_descriptor_sets_bound_up_to_date_ &=
current_graphics_descriptor_set_values_up_to_date_;
// Write the new descriptor sets.
VkWriteDescriptorSet
write_descriptor_sets[SpirvShaderTranslator::kDescriptorSetCount];
uint32_t write_descriptor_set_count = 0;
uint32_t write_descriptor_set_bits = 0;
assert_not_zero(
current_graphics_descriptor_set_values_up_to_date_ &
(uint32_t(1)
<< SpirvShaderTranslator::kDescriptorSetSharedMemoryAndEdram));
VkDescriptorBufferInfo buffer_info_bool_loop_constants;
if (!(current_graphics_descriptor_set_values_up_to_date_ &
(uint32_t(1)
<< SpirvShaderTranslator::kDescriptorSetBoolLoopConstants))) {
VkWriteDescriptorSet& write_bool_loop_constants =
write_descriptor_sets[write_descriptor_set_count++];
constexpr size_t kBoolLoopConstantsSize = sizeof(uint32_t) * (8 + 32);
uint8_t* mapping_bool_loop_constants = WriteUniformBufferBinding(
kBoolLoopConstantsSize,
descriptor_set_layout_fetch_bool_loop_constants_,
buffer_info_bool_loop_constants, write_bool_loop_constants);
if (!mapping_bool_loop_constants) {
return false;
}
std::memcpy(mapping_bool_loop_constants,
&regs[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32,
kBoolLoopConstantsSize);
write_descriptor_set_bits |=
uint32_t(1) << SpirvShaderTranslator::kDescriptorSetBoolLoopConstants;
current_graphics_descriptor_sets_
[SpirvShaderTranslator::kDescriptorSetBoolLoopConstants] =
write_bool_loop_constants.dstSet;
}
VkDescriptorBufferInfo buffer_info_system_constants;
if (!(current_graphics_descriptor_set_values_up_to_date_ &
(uint32_t(1)
<< SpirvShaderTranslator::kDescriptorSetSystemConstants))) {
VkWriteDescriptorSet& write_system_constants =
write_descriptor_sets[write_descriptor_set_count++];
uint8_t* mapping_system_constants = WriteUniformBufferBinding(
sizeof(SpirvShaderTranslator::SystemConstants),
descriptor_set_layout_system_constants_, buffer_info_system_constants,
write_system_constants);
if (!mapping_system_constants) {
return false;
}
std::memcpy(mapping_system_constants, &system_constants_,
sizeof(SpirvShaderTranslator::SystemConstants));
write_descriptor_set_bits |=
uint32_t(1) << SpirvShaderTranslator::kDescriptorSetSystemConstants;
current_graphics_descriptor_sets_
[SpirvShaderTranslator::kDescriptorSetSystemConstants] =
write_system_constants.dstSet;
}
VkDescriptorBufferInfo buffer_info_float_constant_pixel;
if (!(current_graphics_descriptor_set_values_up_to_date_ &
(uint32_t(1)
<< SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel))) {
// Even if the shader doesn't need any float constants, a valid binding must
// still be provided (the pipeline layout always has float constants, for
// both the vertex shader and the pixel shader), so if the first draw in the
// frame doesn't have float constants at all, still allocate an empty
// buffer.
VkWriteDescriptorSet& write_float_constants_pixel =
write_descriptor_sets[write_descriptor_set_count++];
uint8_t* mapping_float_constants_pixel = WriteUniformBufferBinding(
sizeof(float) * 4 * std::max(float_constant_count_pixel, uint32_t(1)),
descriptor_set_layout_float_constants_pixel_,
buffer_info_float_constant_pixel, write_float_constants_pixel);
if (!mapping_float_constants_pixel) {
return false;
}
for (uint32_t i = 0; i < 4; ++i) {
uint64_t float_constant_map_entry = current_float_constant_map_pixel_[i];
uint32_t float_constant_index;
while (xe::bit_scan_forward(float_constant_map_entry,
&float_constant_index)) {
float_constant_map_entry &= ~(1ull << float_constant_index);
std::memcpy(mapping_float_constants_pixel,
&regs[XE_GPU_REG_SHADER_CONSTANT_256_X + (i << 8) +
(float_constant_index << 2)]
.f32,
sizeof(float) * 4);
mapping_float_constants_pixel += sizeof(float) * 4;
}
}
write_descriptor_set_bits |=
uint32_t(1) << SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel;
current_graphics_descriptor_sets_
[SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel] =
write_float_constants_pixel.dstSet;
}
VkDescriptorBufferInfo buffer_info_float_constant_vertex;
if (!(current_graphics_descriptor_set_values_up_to_date_ &
(uint32_t(1)
<< SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex))) {
VkWriteDescriptorSet& write_float_constants_vertex =
write_descriptor_sets[write_descriptor_set_count++];
uint8_t* mapping_float_constants_vertex = WriteUniformBufferBinding(
sizeof(float) * 4 * std::max(float_constant_count_vertex, uint32_t(1)),
descriptor_set_layout_float_constants_vertex_,
buffer_info_float_constant_vertex, write_float_constants_vertex);
if (!mapping_float_constants_vertex) {
return false;
}
for (uint32_t i = 0; i < 4; ++i) {
uint64_t float_constant_map_entry = current_float_constant_map_vertex_[i];
uint32_t float_constant_index;
while (xe::bit_scan_forward(float_constant_map_entry,
&float_constant_index)) {
float_constant_map_entry &= ~(1ull << float_constant_index);
std::memcpy(mapping_float_constants_vertex,
&regs[XE_GPU_REG_SHADER_CONSTANT_000_X + (i << 8) +
(float_constant_index << 2)]
.f32,
sizeof(float) * 4);
mapping_float_constants_vertex += sizeof(float) * 4;
}
}
write_descriptor_set_bits |=
uint32_t(1)
<< SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex;
current_graphics_descriptor_sets_
[SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex] =
write_float_constants_vertex.dstSet;
}
VkDescriptorBufferInfo buffer_info_fetch_constants;
if (!(current_graphics_descriptor_set_values_up_to_date_ &
(uint32_t(1) << SpirvShaderTranslator::kDescriptorSetFetchConstants))) {
VkWriteDescriptorSet& write_fetch_constants =
write_descriptor_sets[write_descriptor_set_count++];
constexpr size_t kFetchConstantsSize = sizeof(uint32_t) * 6 * 32;
uint8_t* mapping_fetch_constants = WriteUniformBufferBinding(
kFetchConstantsSize, descriptor_set_layout_fetch_bool_loop_constants_,
buffer_info_fetch_constants, write_fetch_constants);
if (!mapping_fetch_constants) {
return false;
}
std::memcpy(mapping_fetch_constants,
&regs[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0].u32,
kFetchConstantsSize);
write_descriptor_set_bits |=
uint32_t(1) << SpirvShaderTranslator::kDescriptorSetFetchConstants;
current_graphics_descriptor_sets_
[SpirvShaderTranslator::kDescriptorSetFetchConstants] =
write_fetch_constants.dstSet;
}
if (write_descriptor_set_count) {
const ui::vulkan::VulkanProvider& provider =
GetVulkanContext().GetVulkanProvider();
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
VkDevice device = provider.device();
dfn.vkUpdateDescriptorSets(device, write_descriptor_set_count,
write_descriptor_sets, 0, nullptr);
}
// Only make valid if written successfully.
current_graphics_descriptor_set_values_up_to_date_ |=
write_descriptor_set_bits;
// Bind the new descriptor sets.
uint32_t descriptor_sets_needed =
(uint32_t(1) << SpirvShaderTranslator::kDescriptorSetCount) - 1;
if (current_graphics_pipeline_layout_
->descriptor_set_layout_textures_vertex_ref ==
descriptor_set_layout_empty_) {
descriptor_sets_needed &=
~(uint32_t(1) << SpirvShaderTranslator::kDescriptorSetTexturesVertex);
}
if (current_graphics_pipeline_layout_
->descriptor_set_layout_textures_pixel_ref ==
descriptor_set_layout_empty_) {
descriptor_sets_needed &=
~(uint32_t(1) << SpirvShaderTranslator::kDescriptorSetTexturesPixel);
}
uint32_t descriptor_sets_remaining =
descriptor_sets_needed &
~current_graphics_descriptor_sets_bound_up_to_date_;
uint32_t descriptor_set_index;
while (
xe::bit_scan_forward(descriptor_sets_remaining, &descriptor_set_index)) {
uint32_t descriptor_set_mask_tzcnt =
xe::tzcnt(~(descriptor_sets_remaining |
((uint32_t(1) << descriptor_set_index) - 1)));
// TODO(Triang3l): Bind to compute for rectangle list emulation without
// geometry shaders.
deferred_command_buffer_.CmdVkBindDescriptorSets(
VK_PIPELINE_BIND_POINT_GRAPHICS,
current_graphics_pipeline_layout_->pipeline_layout,
descriptor_set_index, descriptor_set_mask_tzcnt - descriptor_set_index,
current_graphics_descriptor_sets_ + descriptor_set_index, 0, nullptr);
if (descriptor_set_mask_tzcnt >= 32) {
break;
}
descriptor_sets_remaining &=
~((uint32_t(1) << descriptor_set_mask_tzcnt) - 1);
}
current_graphics_descriptor_sets_bound_up_to_date_ |= descriptor_sets_needed;
return true;
}
uint8_t* VulkanCommandProcessor::WriteUniformBufferBinding(
size_t size, VkDescriptorSetLayout descriptor_set_layout,
VkDescriptorBufferInfo& descriptor_buffer_info_out,
VkWriteDescriptorSet& write_descriptor_set_out) {
VkDescriptorSet descriptor_set =
transient_descriptor_pool_uniform_buffers_->Request(
frame_current_, descriptor_set_layout, 1);
if (descriptor_set == VK_NULL_HANDLE) {
return nullptr;
}
const ui::vulkan::VulkanProvider& provider =
GetVulkanContext().GetVulkanProvider();
uint8_t* mapping = uniform_buffer_pool_->Request(
frame_current_, size,
size_t(
provider.device_properties().limits.minUniformBufferOffsetAlignment),
descriptor_buffer_info_out.buffer, descriptor_buffer_info_out.offset);
if (!mapping) {
return false;
}
descriptor_buffer_info_out.range = VkDeviceSize(size);
write_descriptor_set_out.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
write_descriptor_set_out.pNext = nullptr;
write_descriptor_set_out.dstSet = descriptor_set;
write_descriptor_set_out.dstBinding = 0;
write_descriptor_set_out.dstArrayElement = 0;
write_descriptor_set_out.descriptorCount = 1;
write_descriptor_set_out.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
write_descriptor_set_out.pImageInfo = nullptr;
write_descriptor_set_out.pBufferInfo = &descriptor_buffer_info_out;
write_descriptor_set_out.pTexelBufferView = nullptr;
return mapping;
}
} // namespace vulkan
} // namespace gpu
} // namespace xe

View File

@ -10,6 +10,7 @@
#ifndef XENIA_GPU_VULKAN_VULKAN_COMMAND_PROCESSOR_H_
#define XENIA_GPU_VULKAN_VULKAN_COMMAND_PROCESSOR_H_
#include <climits>
#include <cstdint>
#include <deque>
#include <memory>
@ -18,13 +19,18 @@
#include <vector>
#include "xenia/gpu/command_processor.h"
#include "xenia/gpu/spirv_shader_translator.h"
#include "xenia/gpu/vulkan/deferred_command_buffer.h"
#include "xenia/gpu/vulkan/vulkan_graphics_system.h"
#include "xenia/gpu/vulkan/vulkan_pipeline_cache.h"
#include "xenia/gpu/vulkan/vulkan_render_target_cache.h"
#include "xenia/gpu/vulkan/vulkan_shader.h"
#include "xenia/gpu/vulkan/vulkan_shared_memory.h"
#include "xenia/gpu/xenos.h"
#include "xenia/kernel/kernel_state.h"
#include "xenia/ui/vulkan/transient_descriptor_pool.h"
#include "xenia/ui/vulkan/vulkan_context.h"
#include "xenia/ui/vulkan/vulkan_upload_buffer_pool.h"
namespace xe {
namespace gpu {
@ -67,19 +73,21 @@ class VulkanCommandProcessor : public CommandProcessor {
const VkSparseMemoryBind* binds,
VkPipelineStageFlags wait_stage_mask);
struct PipelineLayout {
VkPipelineLayout pipeline_layout;
VkDescriptorSetLayout descriptor_set_layout_textures_pixel_ref;
VkDescriptorSetLayout descriptor_set_layout_textures_vertex_ref;
};
bool GetPipelineLayout(uint32_t texture_count_pixel,
uint32_t texture_count_vertex,
PipelineLayout& pipeline_layout_out);
// Must be called before doing anything outside the render pass scope,
// including adding pipeline barriers that are not a part of the render pass
// scope. Submission must be open.
void EndRenderPass();
// The returned reference is valid until a cache clear.
const VulkanPipelineCache::PipelineLayoutProvider* GetPipelineLayout(
uint32_t texture_count_pixel, uint32_t texture_count_vertex);
protected:
bool SetupContext() override;
void ShutdownContext() override;
void WriteRegister(uint32_t index, uint32_t value) override;
void PerformSwap(uint32_t frontbuffer_ptr, uint32_t frontbuffer_width,
uint32_t frontbuffer_height) override;
@ -95,6 +103,49 @@ class VulkanCommandProcessor : public CommandProcessor {
void InitializeTrace() override;
private:
struct CommandBuffer {
VkCommandPool pool;
VkCommandBuffer buffer;
};
struct SparseBufferBind {
VkBuffer buffer;
size_t bind_offset;
uint32_t bind_count;
};
union TextureDescriptorSetLayoutKey {
struct {
uint32_t is_vertex : 1;
// For 0, use descriptor_set_layout_empty_ instead as these are owning
// references.
uint32_t texture_count : 31;
};
uint32_t key = 0;
};
static_assert(sizeof(TextureDescriptorSetLayoutKey) == sizeof(uint32_t));
union PipelineLayoutKey {
struct {
// Pixel textures in the low bits since those are varied much more
// commonly.
uint32_t texture_count_pixel : 16;
uint32_t texture_count_vertex : 16;
};
uint32_t key = 0;
};
static_assert(sizeof(PipelineLayoutKey) == sizeof(uint32_t));
class PipelineLayout : public VulkanPipelineCache::PipelineLayoutProvider {
public:
VkPipelineLayout GetPipelineLayout() const override {
return pipeline_layout;
}
VkPipelineLayout pipeline_layout;
VkDescriptorSetLayout descriptor_set_layout_textures_pixel_ref;
VkDescriptorSetLayout descriptor_set_layout_textures_vertex_ref;
};
// BeginSubmission and EndSubmission may be called at any time. If there's an
// open non-frame submission, BeginSubmission(true) will promote it to a
// frame. EndSubmission(true) will close the frame no matter whether the
@ -119,6 +170,18 @@ class VulkanCommandProcessor : public CommandProcessor {
VkShaderStageFlags GetGuestVertexShaderStageFlags() const;
void UpdateFixedFunctionState();
void UpdateSystemConstantValues(xenos::Endian index_endian);
bool UpdateBindings(const VulkanShader* vertex_shader,
const VulkanShader* pixel_shader);
// Allocates a descriptor, space in the uniform buffer pool, and fills the
// VkWriteDescriptorSet structure and VkDescriptorBufferInfo referenced by it.
// Returns null in case of failure.
uint8_t* WriteUniformBufferBinding(
size_t size, VkDescriptorSetLayout descriptor_set_layout,
VkDescriptorBufferInfo& descriptor_buffer_info_out,
VkWriteDescriptorSet& write_descriptor_set_out);
bool cache_clear_requested_ = false;
std::vector<VkFence> fences_free_;
@ -143,20 +206,11 @@ class VulkanCommandProcessor : public CommandProcessor {
// Submission indices of frames that have already been submitted.
uint64_t closed_frame_submissions_[kMaxFramesInFlight] = {};
struct CommandBuffer {
VkCommandPool pool;
VkCommandBuffer buffer;
};
std::vector<CommandBuffer> command_buffers_writable_;
std::deque<std::pair<CommandBuffer, uint64_t>> command_buffers_submitted_;
DeferredCommandBuffer deferred_command_buffer_;
std::vector<VkSparseMemoryBind> sparse_memory_binds_;
struct SparseBufferBind {
VkBuffer buffer;
size_t bind_offset;
uint32_t bind_count;
};
std::vector<SparseBufferBind> sparse_buffer_binds_;
// SparseBufferBind converted to VkSparseBufferMemoryBindInfo to this buffer
// on submission (because pBinds should point to a place in std::vector, but
@ -166,6 +220,7 @@ class VulkanCommandProcessor : public CommandProcessor {
std::unique_ptr<ui::vulkan::TransientDescriptorPool>
transient_descriptor_pool_uniform_buffers_;
std::unique_ptr<ui::vulkan::VulkanUploadBufferPool> uniform_buffer_pool_;
// Descriptor set layouts used by different shaders.
VkDescriptorSetLayout descriptor_set_layout_empty_ = VK_NULL_HANDLE;
@ -180,34 +235,66 @@ class VulkanCommandProcessor : public CommandProcessor {
VkDescriptorSetLayout descriptor_set_layout_shared_memory_and_edram_ =
VK_NULL_HANDLE;
union TextureDescriptorSetLayoutKey {
struct {
uint32_t is_vertex : 1;
// For 0, use descriptor_set_layout_empty_ instead as these are owning
// references.
uint32_t texture_count : 31;
};
uint32_t key = 0;
};
// TextureDescriptorSetLayoutKey::key -> VkDescriptorSetLayout.
// Layouts are referenced by pipeline_layouts_.
std::unordered_map<uint32_t, VkDescriptorSetLayout>
descriptor_set_layouts_textures_;
union PipelineLayoutKey {
struct {
// Pixel textures in the low bits since those are varied much more
// commonly.
uint32_t texture_count_pixel : 16;
uint32_t texture_count_vertex : 16;
};
uint32_t key = 0;
};
// PipelineLayoutKey::key -> PipelineLayout.
// Layouts are referenced by VulkanPipelineCache.
std::unordered_map<uint32_t, PipelineLayout> pipeline_layouts_;
std::unique_ptr<VulkanSharedMemory> shared_memory_;
std::unique_ptr<VulkanPipelineCache> pipeline_cache_;
std::unique_ptr<VulkanRenderTargetCache> render_target_cache_;
VkDescriptorPool shared_memory_and_edram_descriptor_pool_ = VK_NULL_HANDLE;
VkDescriptorSet shared_memory_and_edram_descriptor_set_;
// The current fixed-function drawing state.
VkViewport ff_viewport_;
VkRect2D ff_scissor_;
bool ff_viewport_update_needed_;
bool ff_scissor_update_needed_;
// Cache render pass currently started in the command buffer with framebuffer.
VkRenderPass current_render_pass_;
VkFramebuffer current_framebuffer_;
// Cache graphics pipeline currently bound to the command buffer.
VkPipeline current_graphics_pipeline_;
// Pipeline layout of the current graphics pipeline.
const PipelineLayout* current_graphics_pipeline_layout_;
VkDescriptorSet current_graphics_descriptor_sets_
[SpirvShaderTranslator::kDescriptorSetCount];
// Whether descriptor sets in current_graphics_descriptor_sets_ point to
// up-to-date data.
uint32_t current_graphics_descriptor_set_values_up_to_date_;
// Whether the descriptor sets currently bound to the command buffer - only
// low bits for the descriptor set layouts that remained the same are kept
// when changing the pipeline layout. May be out of sync with
// current_graphics_descriptor_set_values_up_to_date_, but should be ensured
// to be a subset of it at some point when it becomes important; bits for
// non-existent descriptor set layouts may also be set, but need to be ignored
// when they start to matter.
uint32_t current_graphics_descriptor_sets_bound_up_to_date_;
static_assert(
SpirvShaderTranslator::kDescriptorSetCount <=
sizeof(current_graphics_descriptor_set_values_up_to_date_) * CHAR_BIT,
"Bit fields storing descriptor set validity must be large enough");
static_assert(
SpirvShaderTranslator::kDescriptorSetCount <=
sizeof(current_graphics_descriptor_sets_bound_up_to_date_) * CHAR_BIT,
"Bit fields storing descriptor set validity must be large enough");
// Float constant usage masks of the last draw call.
uint64_t current_float_constant_map_vertex_[4];
uint64_t current_float_constant_map_pixel_[4];
// System shader constants.
SpirvShaderTranslator::SystemConstants system_constants_;
};
} // namespace vulkan

View File

@ -0,0 +1,443 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2020 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/gpu/vulkan/vulkan_pipeline_cache.h"
#include <cstring>
#include <memory>
#include "xenia/base/assert.h"
#include "xenia/base/logging.h"
#include "xenia/base/math.h"
#include "xenia/base/profiling.h"
#include "xenia/gpu/register_file.h"
#include "xenia/gpu/registers.h"
#include "xenia/gpu/spirv_shader_translator.h"
#include "xenia/gpu/vulkan/vulkan_command_processor.h"
#include "xenia/gpu/vulkan/vulkan_shader.h"
#include "xenia/gpu/xenos.h"
namespace xe {
namespace gpu {
namespace vulkan {
VulkanPipelineCache::VulkanPipelineCache(
VulkanCommandProcessor& command_processor,
const RegisterFile& register_file,
VulkanRenderTargetCache& render_target_cache)
: command_processor_(command_processor),
register_file_(register_file),
render_target_cache_(render_target_cache) {}
VulkanPipelineCache::~VulkanPipelineCache() { Shutdown(); }
bool VulkanPipelineCache::Initialize() {
const ui::vulkan::VulkanProvider& provider =
command_processor_.GetVulkanContext().GetVulkanProvider();
device_pipeline_features_.features = 0;
// TODO(Triang3l): Support the portability subset.
device_pipeline_features_.triangle_fans = 1;
shader_translator_ = std::make_unique<SpirvShaderTranslator>(
SpirvShaderTranslator::Features(provider));
return true;
}
void VulkanPipelineCache::Shutdown() {
ClearCache();
shader_translator_.reset();
}
void VulkanPipelineCache::ClearCache() {
const ui::vulkan::VulkanProvider& provider =
command_processor_.GetVulkanContext().GetVulkanProvider();
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
VkDevice device = provider.device();
last_pipeline_ = nullptr;
for (const auto& pipeline_pair : pipelines_) {
if (pipeline_pair.second.pipeline != VK_NULL_HANDLE) {
dfn.vkDestroyPipeline(device, pipeline_pair.second.pipeline, nullptr);
}
}
pipelines_.clear();
for (auto it : shaders_) {
delete it.second;
}
shaders_.clear();
}
VulkanShader* VulkanPipelineCache::LoadShader(xenos::ShaderType shader_type,
uint32_t guest_address,
const uint32_t* host_address,
uint32_t dword_count) {
// Hash the input memory and lookup the shader.
uint64_t data_hash = XXH64(host_address, dword_count * sizeof(uint32_t), 0);
auto it = shaders_.find(data_hash);
if (it != shaders_.end()) {
// Shader has been previously loaded.
return it->second;
}
// Always create the shader and stash it away.
// We need to track it even if it fails translation so we know not to try
// again.
VulkanShader* shader =
new VulkanShader(shader_type, data_hash, host_address, dword_count);
shaders_.emplace(data_hash, shader);
return shader;
}
bool VulkanPipelineCache::EnsureShadersTranslated(
VulkanShader* vertex_shader, VulkanShader* pixel_shader,
Shader::HostVertexShaderType host_vertex_shader_type) {
const RegisterFile& regs = register_file_;
auto sq_program_cntl = regs.Get<reg::SQ_PROGRAM_CNTL>();
// Edge flags are not supported yet (because polygon primitives are not).
assert_true(sq_program_cntl.vs_export_mode !=
xenos::VertexShaderExportMode::kPosition2VectorsEdge &&
sq_program_cntl.vs_export_mode !=
xenos::VertexShaderExportMode::kPosition2VectorsEdgeKill);
assert_false(sq_program_cntl.gen_index_vtx);
if (!vertex_shader->is_translated()) {
if (!TranslateShader(*shader_translator_, *vertex_shader,
sq_program_cntl)) {
XELOGE("Failed to translate the vertex shader!");
return false;
}
}
if (pixel_shader != nullptr && !pixel_shader->is_translated()) {
if (!TranslateShader(*shader_translator_, *pixel_shader, sq_program_cntl)) {
XELOGE("Failed to translate the pixel shader!");
return false;
}
}
return true;
}
bool VulkanPipelineCache::ConfigurePipeline(
VulkanShader* vertex_shader, VulkanShader* pixel_shader,
VulkanRenderTargetCache::RenderPassKey render_pass_key,
VkPipeline& pipeline_out,
const PipelineLayoutProvider*& pipeline_layout_out) {
#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_f("gpu");
#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
PipelineDescription description;
if (!GetCurrentStateDescription(vertex_shader, pixel_shader, render_pass_key,
description)) {
return false;
}
if (last_pipeline_ && last_pipeline_->first == description) {
pipeline_out = last_pipeline_->second.pipeline;
pipeline_layout_out = last_pipeline_->second.pipeline_layout;
return true;
}
auto it = pipelines_.find(description);
if (it != pipelines_.end()) {
last_pipeline_ = &*it;
pipeline_out = it->second.pipeline;
pipeline_layout_out = it->second.pipeline_layout;
return true;
}
// Create the pipeline if not the latest and not already existing.
if (!EnsureShadersTranslated(vertex_shader, pixel_shader,
Shader::HostVertexShaderType::kVertex)) {
return false;
}
const PipelineLayoutProvider* pipeline_layout =
command_processor_.GetPipelineLayout(0, 0);
if (!pipeline_layout) {
return false;
}
VkRenderPass render_pass =
render_target_cache_.GetRenderPass(render_pass_key);
if (render_pass == VK_NULL_HANDLE) {
return false;
}
PipelineCreationArguments creation_arguments;
auto& pipeline =
*pipelines_.emplace(description, Pipeline(pipeline_layout)).first;
creation_arguments.pipeline = &pipeline;
creation_arguments.vertex_shader = vertex_shader;
creation_arguments.pixel_shader = pixel_shader;
creation_arguments.render_pass = render_pass;
if (!EnsurePipelineCreated(creation_arguments)) {
return false;
}
pipeline_out = pipeline.second.pipeline;
pipeline_layout_out = pipeline_layout;
return true;
}
bool VulkanPipelineCache::TranslateShader(SpirvShaderTranslator& translator,
VulkanShader& shader,
reg::SQ_PROGRAM_CNTL cntl) {
// Perform translation.
// If this fails the shader will be marked as invalid and ignored later.
// TODO(Triang3l): Host vertex shader type.
if (!translator.Translate(&shader, cntl,
Shader::HostVertexShaderType::kVertex)) {
XELOGE("Shader {:016X} translation failed; marking as ignored",
shader.ucode_data_hash());
return false;
}
return shader.InitializeShaderModule(
command_processor_.GetVulkanContext().GetVulkanProvider());
}
bool VulkanPipelineCache::GetCurrentStateDescription(
const VulkanShader* vertex_shader, const VulkanShader* pixel_shader,
VulkanRenderTargetCache::RenderPassKey render_pass_key,
PipelineDescription& description_out) const {
description_out.Reset();
const RegisterFile& regs = register_file_;
description_out.vertex_shader_hash = vertex_shader->ucode_data_hash();
description_out.pixel_shader_hash =
pixel_shader ? pixel_shader->ucode_data_hash() : 0;
description_out.render_pass_key = render_pass_key;
auto vgt_draw_initiator = regs.Get<reg::VGT_DRAW_INITIATOR>();
PipelinePrimitiveTopology primitive_topology;
switch (vgt_draw_initiator.prim_type) {
case xenos::PrimitiveType::kPointList:
primitive_topology = PipelinePrimitiveTopology::kPointList;
break;
case xenos::PrimitiveType::kLineList:
primitive_topology = PipelinePrimitiveTopology::kLineList;
break;
case xenos::PrimitiveType::kLineStrip:
primitive_topology = PipelinePrimitiveTopology::kLineStrip;
break;
case xenos::PrimitiveType::kTriangleList:
primitive_topology = PipelinePrimitiveTopology::kTriangleList;
break;
case xenos::PrimitiveType::kTriangleFan:
primitive_topology = device_pipeline_features_.triangle_fans
? PipelinePrimitiveTopology::kTriangleFan
: PipelinePrimitiveTopology::kTriangleList;
break;
case xenos::PrimitiveType::kTriangleStrip:
primitive_topology = PipelinePrimitiveTopology::kTriangleStrip;
break;
default:
// TODO(Triang3l): All primitive types and tessellation.
return false;
}
description_out.primitive_topology = primitive_topology;
// TODO(Triang3l): Primitive restart.
return true;
}
bool VulkanPipelineCache::EnsurePipelineCreated(
const PipelineCreationArguments& creation_arguments) {
if (creation_arguments.pipeline->second.pipeline != VK_NULL_HANDLE) {
return true;
}
// This function preferably should validate the description to prevent
// unsupported behavior that may be dangerous/crashing because pipelines can
// be created from the disk storage.
if (creation_arguments.pixel_shader) {
XELOGGPU("Creating graphics pipeline state with VS {:016X}, PS {:016X}",
creation_arguments.vertex_shader->ucode_data_hash(),
creation_arguments.pixel_shader->ucode_data_hash());
} else {
XELOGGPU("Creating graphics pipeline state with VS {:016X}",
creation_arguments.vertex_shader->ucode_data_hash());
}
const PipelineDescription& description = creation_arguments.pipeline->first;
VkPipelineShaderStageCreateInfo shader_stages[2];
uint32_t shader_stage_count = 0;
assert_true(creation_arguments.vertex_shader->is_translated());
if (!creation_arguments.vertex_shader->is_valid()) {
return false;
}
assert_true(shader_stage_count < xe::countof(shader_stages));
VkPipelineShaderStageCreateInfo& shader_stage_vertex =
shader_stages[shader_stage_count++];
shader_stage_vertex.sType =
VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
shader_stage_vertex.pNext = nullptr;
shader_stage_vertex.flags = 0;
shader_stage_vertex.stage = VK_SHADER_STAGE_VERTEX_BIT;
shader_stage_vertex.module =
creation_arguments.vertex_shader->shader_module();
assert_true(shader_stage_vertex.module != VK_NULL_HANDLE);
shader_stage_vertex.pName = "main";
shader_stage_vertex.pSpecializationInfo = nullptr;
if (creation_arguments.pixel_shader) {
assert_true(creation_arguments.pixel_shader->is_translated());
if (!creation_arguments.pixel_shader->is_valid()) {
return false;
}
assert_true(shader_stage_count < xe::countof(shader_stages));
VkPipelineShaderStageCreateInfo& shader_stage_fragment =
shader_stages[shader_stage_count++];
shader_stage_fragment.sType =
VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
shader_stage_fragment.pNext = nullptr;
shader_stage_fragment.flags = 0;
shader_stage_fragment.stage = VK_SHADER_STAGE_FRAGMENT_BIT;
shader_stage_fragment.module =
creation_arguments.pixel_shader->shader_module();
assert_true(shader_stage_fragment.module != VK_NULL_HANDLE);
shader_stage_fragment.pName = "main";
shader_stage_fragment.pSpecializationInfo = nullptr;
}
VkPipelineVertexInputStateCreateInfo vertex_input_state;
vertex_input_state.sType =
VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO;
vertex_input_state.pNext = nullptr;
vertex_input_state.flags = 0;
vertex_input_state.vertexBindingDescriptionCount = 0;
vertex_input_state.pVertexBindingDescriptions = nullptr;
vertex_input_state.vertexAttributeDescriptionCount = 0;
vertex_input_state.pVertexAttributeDescriptions = nullptr;
VkPipelineInputAssemblyStateCreateInfo input_assembly_state;
input_assembly_state.sType =
VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;
input_assembly_state.pNext = nullptr;
input_assembly_state.flags = 0;
switch (description.primitive_topology) {
case PipelinePrimitiveTopology::kPointList:
input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
break;
case PipelinePrimitiveTopology::kLineList:
input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_LINE_LIST;
break;
case PipelinePrimitiveTopology::kLineStrip:
input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_LINE_STRIP;
break;
case PipelinePrimitiveTopology::kTriangleList:
input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
break;
case PipelinePrimitiveTopology::kTriangleStrip:
input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP;
break;
case PipelinePrimitiveTopology::kTriangleFan:
assert_true(device_pipeline_features_.triangle_fans);
if (!device_pipeline_features_.triangle_fans) {
return false;
}
input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN;
break;
case PipelinePrimitiveTopology::kLineListWithAdjacency:
input_assembly_state.topology =
VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY;
break;
case PipelinePrimitiveTopology::kPatchList:
input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_PATCH_LIST;
break;
default:
assert_unhandled_case(description.primitive_topology);
return false;
}
input_assembly_state.primitiveRestartEnable =
description.primitive_restart ? VK_TRUE : VK_FALSE;
VkPipelineViewportStateCreateInfo viewport_state;
viewport_state.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO;
viewport_state.pNext = nullptr;
viewport_state.flags = 0;
viewport_state.viewportCount = 1;
viewport_state.pViewports = nullptr;
viewport_state.scissorCount = 1;
viewport_state.pScissors = nullptr;
VkPipelineRasterizationStateCreateInfo rasterization_state = {};
rasterization_state.sType =
VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO;
rasterization_state.lineWidth = 1.0f;
VkPipelineMultisampleStateCreateInfo multisample_state = {};
multisample_state.sType =
VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO;
multisample_state.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT;
static const VkDynamicState dynamic_states[] = {
VK_DYNAMIC_STATE_VIEWPORT,
VK_DYNAMIC_STATE_SCISSOR,
};
VkPipelineDynamicStateCreateInfo dynamic_state;
dynamic_state.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO;
dynamic_state.pNext = nullptr;
dynamic_state.flags = 0;
dynamic_state.dynamicStateCount = uint32_t(xe::countof(dynamic_states));
dynamic_state.pDynamicStates = dynamic_states;
VkGraphicsPipelineCreateInfo pipeline_create_info;
pipeline_create_info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
pipeline_create_info.pNext = nullptr;
pipeline_create_info.flags = 0;
pipeline_create_info.stageCount = shader_stage_count;
pipeline_create_info.pStages = shader_stages;
pipeline_create_info.pVertexInputState = &vertex_input_state;
pipeline_create_info.pInputAssemblyState = &input_assembly_state;
pipeline_create_info.pTessellationState = nullptr;
pipeline_create_info.pViewportState = &viewport_state;
pipeline_create_info.pRasterizationState = &rasterization_state;
pipeline_create_info.pMultisampleState = &multisample_state;
pipeline_create_info.pDepthStencilState = nullptr;
pipeline_create_info.pColorBlendState = nullptr;
pipeline_create_info.pDynamicState = &dynamic_state;
pipeline_create_info.layout =
creation_arguments.pipeline->second.pipeline_layout->GetPipelineLayout();
pipeline_create_info.renderPass = creation_arguments.render_pass;
pipeline_create_info.subpass = 0;
pipeline_create_info.basePipelineHandle = VK_NULL_HANDLE;
pipeline_create_info.basePipelineIndex = 0;
const ui::vulkan::VulkanProvider& provider =
command_processor_.GetVulkanContext().GetVulkanProvider();
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
VkDevice device = provider.device();
VkPipeline pipeline;
if (dfn.vkCreateGraphicsPipelines(device, VK_NULL_HANDLE, 1,
&pipeline_create_info, nullptr,
&pipeline) != VK_SUCCESS) {
// TODO(Triang3l): Move these error messages outside.
/* if (creation_arguments.pixel_shader) {
XELOGE(
"Failed to create graphics pipeline with VS {:016X}, PS {:016X}",
creation_arguments.vertex_shader->ucode_data_hash(),
creation_arguments.pixel_shader->ucode_data_hash());
} else {
XELOGE("Failed to create graphics pipeline with VS {:016X}",
creation_arguments.vertex_shader->ucode_data_hash());
} */
return false;
}
creation_arguments.pipeline->second.pipeline = pipeline;
return true;
}
} // namespace vulkan
} // namespace gpu
} // namespace xe

View File

@ -0,0 +1,183 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2020 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_VULKAN_VULKAN_PIPELINE_STATE_CACHE_H_
#define XENIA_GPU_VULKAN_VULKAN_PIPELINE_STATE_CACHE_H_
#include <cstddef>
#include <cstring>
#include <memory>
#include <unordered_map>
#include <utility>
#include "third_party/xxhash/xxhash.h"
#include "xenia/base/hash.h"
#include "xenia/base/platform.h"
#include "xenia/gpu/register_file.h"
#include "xenia/gpu/spirv_shader_translator.h"
#include "xenia/gpu/vulkan/vulkan_render_target_cache.h"
#include "xenia/gpu/vulkan/vulkan_shader.h"
#include "xenia/gpu/xenos.h"
#include "xenia/ui/vulkan/vulkan_provider.h"
namespace xe {
namespace gpu {
namespace vulkan {
class VulkanCommandProcessor;
// TODO(Triang3l): Create a common base for both the Vulkan and the Direct3D
// implementations.
class VulkanPipelineCache {
public:
class PipelineLayoutProvider {
public:
virtual ~PipelineLayoutProvider() {}
virtual VkPipelineLayout GetPipelineLayout() const = 0;
};
VulkanPipelineCache(VulkanCommandProcessor& command_processor,
const RegisterFile& register_file,
VulkanRenderTargetCache& render_target_cache);
~VulkanPipelineCache();
bool Initialize();
void Shutdown();
void ClearCache();
VulkanShader* LoadShader(xenos::ShaderType shader_type,
uint32_t guest_address, const uint32_t* host_address,
uint32_t dword_count);
// Translates shaders if needed, also making shader info up to date.
bool EnsureShadersTranslated(
VulkanShader* vertex_shader, VulkanShader* pixel_shader,
Shader::HostVertexShaderType host_vertex_shader_type);
// TODO(Triang3l): Return a deferred creation handle.
bool ConfigurePipeline(VulkanShader* vertex_shader,
VulkanShader* pixel_shader,
VulkanRenderTargetCache::RenderPassKey render_pass_key,
VkPipeline& pipeline_out,
const PipelineLayoutProvider*& pipeline_layout_out);
private:
// Can only load pipeline storage if features of the device it was created on
// and the current device match because descriptions may requires features not
// supported on the device. Very radical differences (such as RB emulation
// method) should result in a different storage file being used.
union DevicePipelineFeatures {
struct {
uint32_t triangle_fans : 1;
};
uint32_t features = 0;
};
enum class PipelinePrimitiveTopology : uint32_t {
kPointList,
kLineList,
kLineStrip,
kTriangleList,
kTriangleStrip,
// Requires DevicePipelineFeatures::triangle_fans.
kTriangleFan,
kLineListWithAdjacency,
kPatchList,
};
XEPACKEDSTRUCT(PipelineDescription, {
uint64_t vertex_shader_hash;
// 0 if no pixel shader.
uint64_t pixel_shader_hash;
VulkanRenderTargetCache::RenderPassKey render_pass_key;
// Input assembly.
PipelinePrimitiveTopology primitive_topology : 3;
uint32_t primitive_restart : 1;
// Including all the padding, for a stable hash.
PipelineDescription() { Reset(); }
PipelineDescription(const PipelineDescription& description) {
std::memcpy(this, &description, sizeof(*this));
}
PipelineDescription& operator=(const PipelineDescription& description) {
std::memcpy(this, &description, sizeof(*this));
return *this;
}
bool operator==(const PipelineDescription& description) const {
return std::memcmp(this, &description, sizeof(*this)) == 0;
}
void Reset() { std::memset(this, 0, sizeof(*this)); }
uint64_t GetHash() const { return XXH64(this, sizeof(*this), 0); }
struct Hasher {
size_t operator()(const PipelineDescription& description) const {
return size_t(description.GetHash());
}
};
});
struct Pipeline {
VkPipeline pipeline = VK_NULL_HANDLE;
// Owned by VulkanCommandProcessor, valid until ClearCache.
const PipelineLayoutProvider* pipeline_layout;
Pipeline(const PipelineLayoutProvider* pipeline_layout_provider)
: pipeline_layout(pipeline_layout_provider) {}
};
// Description that can be passed from the command processor thread to the
// creation threads, with everything needed from caches pre-looked-up.
struct PipelineCreationArguments {
std::pair<const PipelineDescription, Pipeline>* pipeline;
const VulkanShader* vertex_shader;
const VulkanShader* pixel_shader;
VkRenderPass render_pass;
};
// Can be called from multiple threads.
bool TranslateShader(SpirvShaderTranslator& translator, VulkanShader& shader,
reg::SQ_PROGRAM_CNTL cntl);
bool GetCurrentStateDescription(
const VulkanShader* vertex_shader, const VulkanShader* pixel_shader,
VulkanRenderTargetCache::RenderPassKey render_pass_key,
PipelineDescription& description_out) const;
// Can be called from creation threads - all needed data must be fully set up
// at the point of the call: shaders must be translated, pipeline layout and
// render pass objects must be available.
bool EnsurePipelineCreated(
const PipelineCreationArguments& creation_arguments);
VulkanCommandProcessor& command_processor_;
const RegisterFile& register_file_;
VulkanRenderTargetCache& render_target_cache_;
DevicePipelineFeatures device_pipeline_features_;
// Reusable shader translator on the command processor thread.
std::unique_ptr<SpirvShaderTranslator> shader_translator_;
// Ucode hash -> shader.
std::unordered_map<uint64_t, VulkanShader*,
xe::hash::IdentityHasher<uint64_t>>
shaders_;
std::unordered_map<PipelineDescription, Pipeline, PipelineDescription::Hasher>
pipelines_;
// Previously used pipeline, to avoid lookups if the state wasn't changed.
const std::pair<const PipelineDescription, Pipeline>* last_pipeline_ =
nullptr;
};
} // namespace vulkan
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_VULKAN_VULKAN_PIPELINE_STATE_CACHE_H_

View File

@ -0,0 +1,136 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2020 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/gpu/vulkan/vulkan_render_target_cache.h"
#include "xenia/base/logging.h"
#include "xenia/gpu/vulkan/vulkan_command_processor.h"
namespace xe {
namespace gpu {
namespace vulkan {
VulkanRenderTargetCache::VulkanRenderTargetCache(
VulkanCommandProcessor& command_processor,
const RegisterFile& register_file)
: command_processor_(command_processor), register_file_(register_file) {}
VulkanRenderTargetCache::~VulkanRenderTargetCache() { Shutdown(); }
bool VulkanRenderTargetCache::Initialize() { return true; }
void VulkanRenderTargetCache::Shutdown() { ClearCache(); }
void VulkanRenderTargetCache::ClearCache() {
const ui::vulkan::VulkanProvider& provider =
command_processor_.GetVulkanContext().GetVulkanProvider();
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
VkDevice device = provider.device();
for (const auto& framebuffer_pair : framebuffers_) {
dfn.vkDestroyFramebuffer(device, framebuffer_pair.second, nullptr);
}
framebuffers_.clear();
for (const auto& render_pass_pair : render_passes_) {
dfn.vkDestroyRenderPass(device, render_pass_pair.second, nullptr);
}
render_passes_.clear();
}
VkRenderPass VulkanRenderTargetCache::GetRenderPass(RenderPassKey key) {
auto it = render_passes_.find(key.key);
if (it != render_passes_.end()) {
return it->second;
}
// TODO(Triang3l): Attachments and dependencies.
VkSubpassDescription subpass_description;
subpass_description.flags = 0;
subpass_description.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
subpass_description.inputAttachmentCount = 0;
subpass_description.pInputAttachments = nullptr;
subpass_description.colorAttachmentCount = 0;
subpass_description.pColorAttachments = nullptr;
subpass_description.pResolveAttachments = nullptr;
subpass_description.pDepthStencilAttachment = nullptr;
subpass_description.preserveAttachmentCount = 0;
subpass_description.pPreserveAttachments = nullptr;
VkRenderPassCreateInfo render_pass_create_info;
render_pass_create_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO;
render_pass_create_info.pNext = nullptr;
render_pass_create_info.flags = 0;
render_pass_create_info.attachmentCount = 0;
render_pass_create_info.pAttachments = nullptr;
render_pass_create_info.subpassCount = 1;
render_pass_create_info.pSubpasses = &subpass_description;
render_pass_create_info.dependencyCount = 0;
render_pass_create_info.pDependencies = nullptr;
const ui::vulkan::VulkanProvider& provider =
command_processor_.GetVulkanContext().GetVulkanProvider();
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
VkDevice device = provider.device();
VkRenderPass render_pass;
if (dfn.vkCreateRenderPass(device, &render_pass_create_info, nullptr,
&render_pass) != VK_SUCCESS) {
XELOGE("Failed to create a Vulkan render pass");
return VK_NULL_HANDLE;
}
render_passes_.emplace(key.key, render_pass);
return render_pass;
}
VkFramebuffer VulkanRenderTargetCache::GetFramebuffer(FramebufferKey key) {
auto it = framebuffers_.find(key);
if (it != framebuffers_.end()) {
return it->second;
}
VkRenderPass render_pass = GetRenderPass(key.render_pass_key);
if (render_pass == VK_NULL_HANDLE) {
return VK_NULL_HANDLE;
}
VkFramebufferCreateInfo framebuffer_create_info;
framebuffer_create_info.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO;
framebuffer_create_info.pNext = nullptr;
framebuffer_create_info.flags = 0;
framebuffer_create_info.renderPass = render_pass;
framebuffer_create_info.attachmentCount = 0;
framebuffer_create_info.pAttachments = nullptr;
framebuffer_create_info.width = 1280;
framebuffer_create_info.height = 720;
framebuffer_create_info.layers = 1;
const ui::vulkan::VulkanProvider& provider =
command_processor_.GetVulkanContext().GetVulkanProvider();
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
VkDevice device = provider.device();
VkFramebuffer framebuffer;
if (dfn.vkCreateFramebuffer(device, &framebuffer_create_info, nullptr,
&framebuffer) != VK_SUCCESS) {
XELOGE("Failed to create a Vulkan framebuffer");
return VK_NULL_HANDLE;
}
framebuffers_.emplace(key, framebuffer);
return framebuffer;
}
bool VulkanRenderTargetCache::UpdateRenderTargets(
FramebufferKey& framebuffer_key_out) {
framebuffer_key_out = FramebufferKey();
return true;
}
} // namespace vulkan
} // namespace gpu
} // namespace xe

View File

@ -0,0 +1,95 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2020 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_VULKAN_VULKAN_RENDER_TARGET_CACHE_H_
#define XENIA_GPU_VULKAN_VULKAN_RENDER_TARGET_CACHE_H_
#include <cstdint>
#include <cstring>
#include <unordered_map>
#include "third_party/xxhash/xxhash.h"
#include "xenia/gpu/register_file.h"
#include "xenia/ui/vulkan/vulkan_provider.h"
namespace xe {
namespace gpu {
namespace vulkan {
class VulkanCommandProcessor;
// TODO(Triang3l): Create a common base for both the Vulkan and the Direct3D
// implementations.
class VulkanRenderTargetCache {
public:
union RenderPassKey {
uint32_t key = 0;
};
static_assert(sizeof(RenderPassKey) == sizeof(uint32_t));
struct FramebufferKey {
RenderPassKey render_pass_key;
// Including all the padding, for a stable hash.
FramebufferKey() { Reset(); }
FramebufferKey(const FramebufferKey& key) {
std::memcpy(this, &key, sizeof(*this));
}
FramebufferKey& operator=(const FramebufferKey& key) {
std::memcpy(this, &key, sizeof(*this));
return *this;
}
bool operator==(const FramebufferKey& key) const {
return std::memcmp(this, &key, sizeof(*this)) == 0;
}
void Reset() { std::memset(this, 0, sizeof(*this)); }
uint64_t GetHash() const { return XXH64(this, sizeof(*this), 0); }
struct Hasher {
size_t operator()(const FramebufferKey& description) const {
return size_t(description.GetHash());
}
};
};
static_assert(sizeof(FramebufferKey) == sizeof(uint32_t));
VulkanRenderTargetCache(VulkanCommandProcessor& command_processor,
const RegisterFile& register_file);
~VulkanRenderTargetCache();
bool Initialize();
void Shutdown();
void ClearCache();
// Returns the render pass object, or VK_NULL_HANDLE if failed to create.
// A render pass managed by the render target cache may be ended and resumed
// at any time (to allow for things like copying and texture loading).
VkRenderPass GetRenderPass(RenderPassKey key);
// Returns the framebuffer object, or VK_NULL_HANDLE if failed to create.
VkFramebuffer GetFramebuffer(FramebufferKey key);
// May dispatch computations.
bool UpdateRenderTargets(FramebufferKey& framebuffer_key_out);
private:
VulkanCommandProcessor& command_processor_;
const RegisterFile& register_file_;
// RenderPassKey::key -> VkRenderPass.
std::unordered_map<uint32_t, VkRenderPass> render_passes_;
std::unordered_map<FramebufferKey, VkFramebuffer, FramebufferKey::Hasher>
framebuffers_;
};
} // namespace vulkan
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_VULKAN_VULKAN_RENDER_TARGET_CACHE_H_

View File

@ -0,0 +1,48 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2020 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/gpu/vulkan/vulkan_shader.h"
#include <cstdint>
namespace xe {
namespace gpu {
namespace vulkan {
VulkanShader::VulkanShader(xenos::ShaderType shader_type, uint64_t data_hash,
const uint32_t* dword_ptr, uint32_t dword_count)
: Shader(shader_type, data_hash, dword_ptr, dword_count) {}
bool VulkanShader::InitializeShaderModule(
const ui::vulkan::VulkanProvider& provider) {
if (!is_valid()) {
return false;
}
if (shader_module_ != VK_NULL_HANDLE) {
return true;
}
VkShaderModuleCreateInfo shader_module_create_info;
shader_module_create_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
shader_module_create_info.pNext = nullptr;
shader_module_create_info.flags = 0;
shader_module_create_info.codeSize = translated_binary().size();
shader_module_create_info.pCode =
reinterpret_cast<const uint32_t*>(translated_binary().data());
if (provider.dfn().vkCreateShaderModule(provider.device(),
&shader_module_create_info, nullptr,
&shader_module_) != VK_SUCCESS) {
is_valid_ = false;
return false;
}
return true;
}
} // namespace vulkan
} // namespace gpu
} // namespace xe

View File

@ -0,0 +1,39 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2020 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_VULKAN_VULKAN_SHADER_H_
#define XENIA_GPU_VULKAN_VULKAN_SHADER_H_
#include <cstdint>
#include "xenia/gpu/shader.h"
#include "xenia/gpu/xenos.h"
#include "xenia/ui/vulkan/vulkan_provider.h"
namespace xe {
namespace gpu {
namespace vulkan {
class VulkanShader : public Shader {
public:
VulkanShader(xenos::ShaderType shader_type, uint64_t data_hash,
const uint32_t* dword_ptr, uint32_t dword_count);
bool InitializeShaderModule(const ui::vulkan::VulkanProvider& provider);
VkShaderModule shader_module() const { return shader_module_; }
private:
VkShaderModule shader_module_ = VK_NULL_HANDLE;
};
} // namespace vulkan
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_VULKAN_VULKAN_SHADER_H_

View File

@ -241,6 +241,7 @@ void VulkanSharedMemory::Use(Usage usage,
buffer_memory_barrier.size = VK_WHOLE_SIZE;
last_usage_ = usage;
}
command_processor_.EndRenderPass();
command_processor_.deferred_command_buffer().CmdVkPipelineBarrier(
stage_mask_src, stage_mask_dst, 0, 0, nullptr, 1,
&buffer_memory_barrier, 0, nullptr);
@ -271,7 +272,7 @@ bool VulkanSharedMemory::InitializeTraceSubmitDownloads() {
return false;
}
// TODO(Triang3l): End the render pass.
command_processor_.EndRenderPass();
Use(Usage::kRead);
DeferredCommandBuffer& command_buffer =
command_processor_.deferred_command_buffer();
@ -384,7 +385,7 @@ bool VulkanSharedMemory::UploadRanges(
if (upload_page_ranges.empty()) {
return true;
}
// TODO(Triang3l): End the render pass.
command_processor_.EndRenderPass();
// upload_page_ranges are sorted, use them to determine the range for the
// ordering barrier.
Use(Usage::kTransferDestination,

View File

@ -80,6 +80,8 @@ VkDescriptorSet TransientDescriptorPool::Request(
VkDescriptorSet descriptor_set;
// Try to allocate as normal.
// TODO(Triang3l): Investigate the possibility of reuse of descriptor sets, as
// vkAllocateDescriptorSets may be implemented suboptimally.
if (!pages_writable_.empty()) {
if (page_current_descriptor_sets_used_ < page_descriptor_set_count_ &&
page_current_descriptors_used_ + layout_descriptor_count <=