[Vulkan] Basic draw call architecture + [D3D12] Some cleanup
This commit is contained in:
parent
08c50af7b8
commit
65c8d2b28e
|
@ -2005,14 +2005,15 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
|
|||
}
|
||||
// Must not call anything that can change the descriptor heap from now on!
|
||||
|
||||
// Ensure vertex and index buffers are resident and draw.
|
||||
// Ensure vertex buffers are resident.
|
||||
// TODO(Triang3l): Cache residency for ranges in a way similar to how texture
|
||||
// validity will be tracked.
|
||||
// validity is tracked.
|
||||
uint64_t vertex_buffers_resident[2] = {};
|
||||
for (const auto& vertex_binding : vertex_shader->vertex_bindings()) {
|
||||
for (const Shader::VertexBinding& vertex_binding :
|
||||
vertex_shader->vertex_bindings()) {
|
||||
uint32_t vfetch_index = vertex_binding.fetch_constant;
|
||||
if (vertex_buffers_resident[vfetch_index >> 6] &
|
||||
(1ull << (vfetch_index & 63))) {
|
||||
(uint64_t(1) << (vfetch_index & 63))) {
|
||||
continue;
|
||||
}
|
||||
const auto& vfetch_constant = regs.Get<xenos::xe_gpu_vertex_fetch_t>(
|
||||
|
@ -2045,7 +2046,8 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
|
|||
vfetch_constant.address << 2, vfetch_constant.size << 2);
|
||||
return false;
|
||||
}
|
||||
vertex_buffers_resident[vfetch_index >> 6] |= 1ull << (vfetch_index & 63);
|
||||
vertex_buffers_resident[vfetch_index >> 6] |= uint64_t(1)
|
||||
<< (vfetch_index & 63);
|
||||
}
|
||||
|
||||
// Gather memexport ranges and ensure the heaps for them are resident, and
|
||||
|
@ -2745,12 +2747,12 @@ void D3D12CommandProcessor::ClearCommandAllocatorCache() {
|
|||
}
|
||||
|
||||
void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) {
|
||||
auto& regs = *register_file_;
|
||||
|
||||
#if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
#endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
|
||||
|
||||
const RegisterFile& regs = *register_file_;
|
||||
|
||||
// Window parameters.
|
||||
// http://ftp.tku.edu.tw/NetBSD/NetBSD-current/xsrc/external/mit/xf86-video-ati/dist/src/r600_reg_auto_r6xx.h
|
||||
// See r200UpdateWindow:
|
||||
|
@ -2846,14 +2848,14 @@ void D3D12CommandProcessor::UpdateFixedFunctionState(bool primitive_two_faced) {
|
|||
scissor.right = pa_sc_window_scissor_br.br_x;
|
||||
scissor.bottom = pa_sc_window_scissor_br.br_y;
|
||||
if (!pa_sc_window_scissor_tl.window_offset_disable) {
|
||||
scissor.left =
|
||||
std::max(scissor.left + pa_sc_window_offset.window_x_offset, LONG(0));
|
||||
scissor.top =
|
||||
std::max(scissor.top + pa_sc_window_offset.window_y_offset, LONG(0));
|
||||
scissor.right =
|
||||
std::max(scissor.right + pa_sc_window_offset.window_x_offset, LONG(0));
|
||||
scissor.bottom =
|
||||
std::max(scissor.bottom + pa_sc_window_offset.window_y_offset, LONG(0));
|
||||
scissor.left = std::max(
|
||||
LONG(scissor.left + pa_sc_window_offset.window_x_offset), LONG(0));
|
||||
scissor.top = std::max(
|
||||
LONG(scissor.top + pa_sc_window_offset.window_y_offset), LONG(0));
|
||||
scissor.right = std::max(
|
||||
LONG(scissor.right + pa_sc_window_offset.window_x_offset), LONG(0));
|
||||
scissor.bottom = std::max(
|
||||
LONG(scissor.bottom + pa_sc_window_offset.window_y_offset), LONG(0));
|
||||
}
|
||||
scissor.left *= pixel_size_x;
|
||||
scissor.top *= pixel_size_y;
|
||||
|
@ -2915,12 +2917,11 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
uint32_t line_loop_closing_index, xenos::Endian index_endian,
|
||||
uint32_t used_texture_mask, bool early_z, uint32_t color_mask,
|
||||
const RenderTargetCache::PipelineRenderTarget render_targets[4]) {
|
||||
auto& regs = *register_file_;
|
||||
|
||||
#if XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
#endif // XE_UI_D3D12_FINE_GRAINED_DRAW_SCOPES
|
||||
|
||||
const RegisterFile& regs = *register_file_;
|
||||
auto pa_cl_clip_cntl = regs.Get<reg::PA_CL_CLIP_CNTL>();
|
||||
auto pa_cl_vte_cntl = regs.Get<reg::PA_CL_VTE_CNTL>();
|
||||
auto pa_su_point_minmax = regs.Get<reg::PA_SU_POINT_MINMAX>();
|
||||
|
@ -3103,14 +3104,14 @@ void D3D12CommandProcessor::UpdateSystemConstantValues(
|
|||
dirty |= system_constants_.line_loop_closing_index != line_loop_closing_index;
|
||||
system_constants_.line_loop_closing_index = line_loop_closing_index;
|
||||
|
||||
// Vertex index offset.
|
||||
dirty |= system_constants_.vertex_base_index != vgt_indx_offset;
|
||||
system_constants_.vertex_base_index = vgt_indx_offset;
|
||||
|
||||
// Index or tessellation edge factor buffer endianness.
|
||||
dirty |= system_constants_.vertex_index_endian != index_endian;
|
||||
system_constants_.vertex_index_endian = index_endian;
|
||||
|
||||
// Vertex index offset.
|
||||
dirty |= system_constants_.vertex_base_index != vgt_indx_offset;
|
||||
system_constants_.vertex_base_index = vgt_indx_offset;
|
||||
|
||||
// User clip planes (UCP_ENA_#), when not CLIP_DISABLE.
|
||||
if (!pa_cl_clip_cntl.clip_disable) {
|
||||
for (uint32_t i = 0; i < 6; ++i) {
|
||||
|
@ -3574,7 +3575,7 @@ bool D3D12CommandProcessor::UpdateBindings(
|
|||
float_constant_map_vertex.float_bitmap[i];
|
||||
// If no float constants at all, we can reuse any buffer for them, so not
|
||||
// invalidating.
|
||||
if (float_constant_map_vertex.float_count != 0) {
|
||||
if (float_constant_count_vertex) {
|
||||
cbuffer_binding_float_vertex_.up_to_date = false;
|
||||
}
|
||||
}
|
||||
|
@ -3589,7 +3590,7 @@ bool D3D12CommandProcessor::UpdateBindings(
|
|||
float_constant_map_pixel.float_bitmap[i]) {
|
||||
current_float_constant_map_pixel_[i] =
|
||||
float_constant_map_pixel.float_bitmap[i];
|
||||
if (float_constant_map_pixel.float_count != 0) {
|
||||
if (float_constant_count_pixel) {
|
||||
cbuffer_binding_float_pixel_.up_to_date = false;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -223,10 +223,10 @@ void PipelineCache::ClearCache(bool shutting_down) {
|
|||
}
|
||||
texture_binding_layout_map_.clear();
|
||||
texture_binding_layouts_.clear();
|
||||
for (auto it : shader_map_) {
|
||||
for (auto it : shaders_) {
|
||||
delete it.second;
|
||||
}
|
||||
shader_map_.clear();
|
||||
shaders_.clear();
|
||||
|
||||
if (reinitialize_shader_storage) {
|
||||
InitializeShaderStorage(shader_storage_root, shader_storage_title_id,
|
||||
|
@ -374,8 +374,7 @@ void PipelineCache::InitializeShaderStorage(
|
|||
}
|
||||
size_t ucode_byte_count =
|
||||
shader_header.ucode_dword_count * sizeof(uint32_t);
|
||||
if (shader_map_.find(shader_header.ucode_data_hash) !=
|
||||
shader_map_.end()) {
|
||||
if (shaders_.find(shader_header.ucode_data_hash) != shaders_.end()) {
|
||||
// Already added - usually shaders aren't added without the intention of
|
||||
// translating them imminently, so don't do additional checks to
|
||||
// actually ensure that translation happens right now (they would cause
|
||||
|
@ -402,7 +401,7 @@ void PipelineCache::InitializeShaderStorage(
|
|||
D3D12Shader* shader =
|
||||
new D3D12Shader(shader_header.type, ucode_data_hash,
|
||||
ucode_dwords.data(), shader_header.ucode_dword_count);
|
||||
shader_map_.insert({ucode_data_hash, shader});
|
||||
shaders_.insert({ucode_data_hash, shader});
|
||||
// Create new threads if the currently existing threads can't keep up with
|
||||
// file reading, but not more than the number of logical processors minus
|
||||
// one.
|
||||
|
@ -439,7 +438,7 @@ void PipelineCache::InitializeShaderStorage(
|
|||
}
|
||||
shader_translation_threads.clear();
|
||||
for (D3D12Shader* shader : shaders_failed_to_translate) {
|
||||
shader_map_.erase(shader->ucode_data_hash());
|
||||
shaders_.erase(shader->ucode_data_hash());
|
||||
delete shader;
|
||||
}
|
||||
}
|
||||
|
@ -576,8 +575,8 @@ void PipelineCache::InitializeShaderStorage(
|
|||
|
||||
PipelineRuntimeDescription pipeline_runtime_description;
|
||||
auto vertex_shader_it =
|
||||
shader_map_.find(pipeline_description.vertex_shader_hash);
|
||||
if (vertex_shader_it == shader_map_.end()) {
|
||||
shaders_.find(pipeline_description.vertex_shader_hash);
|
||||
if (vertex_shader_it == shaders_.end()) {
|
||||
continue;
|
||||
}
|
||||
pipeline_runtime_description.vertex_shader = vertex_shader_it->second;
|
||||
|
@ -586,8 +585,8 @@ void PipelineCache::InitializeShaderStorage(
|
|||
}
|
||||
if (pipeline_description.pixel_shader_hash) {
|
||||
auto pixel_shader_it =
|
||||
shader_map_.find(pipeline_description.pixel_shader_hash);
|
||||
if (pixel_shader_it == shader_map_.end()) {
|
||||
shaders_.find(pipeline_description.pixel_shader_hash);
|
||||
if (pixel_shader_it == shaders_.end()) {
|
||||
continue;
|
||||
}
|
||||
pipeline_runtime_description.pixel_shader = pixel_shader_it->second;
|
||||
|
@ -779,8 +778,8 @@ D3D12Shader* PipelineCache::LoadShader(xenos::ShaderType shader_type,
|
|||
uint32_t dword_count) {
|
||||
// Hash the input memory and lookup the shader.
|
||||
uint64_t data_hash = XXH64(host_address, dword_count * sizeof(uint32_t), 0);
|
||||
auto it = shader_map_.find(data_hash);
|
||||
if (it != shader_map_.end()) {
|
||||
auto it = shaders_.find(data_hash);
|
||||
if (it != shaders_.end()) {
|
||||
// Shader has been previously loaded.
|
||||
return it->second;
|
||||
}
|
||||
|
@ -790,7 +789,7 @@ D3D12Shader* PipelineCache::LoadShader(xenos::ShaderType shader_type,
|
|||
// again.
|
||||
D3D12Shader* shader =
|
||||
new D3D12Shader(shader_type, data_hash, host_address, dword_count);
|
||||
shader_map_.insert({data_hash, shader});
|
||||
shaders_.insert({data_hash, shader});
|
||||
|
||||
return shader;
|
||||
}
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
#include "xenia/gpu/dxbc_shader_translator.h"
|
||||
#include "xenia/gpu/register_file.h"
|
||||
#include "xenia/gpu/xenos.h"
|
||||
#include "xenia/ui/d3d12/d3d12_api.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
|
@ -255,9 +256,9 @@ class PipelineCache {
|
|||
IDxcUtils* dxc_utils_ = nullptr;
|
||||
IDxcCompiler* dxc_compiler_ = nullptr;
|
||||
|
||||
// All loaded shaders mapped by their guest hash key.
|
||||
// Ucode hash -> shader.
|
||||
std::unordered_map<uint64_t, D3D12Shader*, xe::hash::IdentityHasher<uint64_t>>
|
||||
shader_map_;
|
||||
shaders_;
|
||||
|
||||
struct LayoutUID {
|
||||
size_t uid;
|
||||
|
|
|
@ -288,7 +288,7 @@ void SpirvShaderTranslator::StartTranslation() {
|
|||
id_vector_temp_.push_back(builder_->makeRuntimeArray(type_uint_));
|
||||
// Storage buffers have std430 packing, no padding to 4-component vectors.
|
||||
builder_->addDecoration(id_vector_temp_.back(), spv::DecorationArrayStride,
|
||||
sizeof(uint32_t) * 4);
|
||||
sizeof(uint32_t));
|
||||
spv::Id type_shared_memory =
|
||||
builder_->makeStructType(id_vector_temp_, "XeSharedMemory");
|
||||
builder_->addMemberName(type_shared_memory, 0, "shared_memory");
|
||||
|
@ -511,7 +511,9 @@ std::vector<uint8_t> SpirvShaderTranslator::CompleteTranslation() {
|
|||
? spv::ExecutionModelTessellationEvaluation
|
||||
: spv::ExecutionModelVertex;
|
||||
}
|
||||
if (features_.float_controls) {
|
||||
// TODO(Triang3l): Re-enable float controls when
|
||||
// VkPhysicalDeviceFloatControlsPropertiesKHR are handled.
|
||||
/* if (features_.float_controls) {
|
||||
// Flush to zero, similar to the real hardware, also for things like Shader
|
||||
// Model 3 multiplication emulation.
|
||||
builder_->addCapability(spv::CapabilityDenormFlushToZero);
|
||||
|
@ -523,7 +525,7 @@ std::vector<uint8_t> SpirvShaderTranslator::CompleteTranslation() {
|
|||
builder_->addCapability(spv::CapabilitySignedZeroInfNanPreserve);
|
||||
builder_->addExecutionMode(function_main_,
|
||||
spv::ExecutionModeSignedZeroInfNanPreserve, 32);
|
||||
}
|
||||
} */
|
||||
spv::Instruction* entry_point =
|
||||
builder_->addEntryPoint(execution_model, function_main_, "main");
|
||||
for (spv::Id interface_id : main_interface_) {
|
||||
|
@ -982,7 +984,19 @@ void SpirvShaderTranslator::StartVertexOrTessEvalShaderInMain() {
|
|||
}
|
||||
}
|
||||
|
||||
void SpirvShaderTranslator::CompleteVertexOrTessEvalShaderInMain() {}
|
||||
void SpirvShaderTranslator::CompleteVertexOrTessEvalShaderInMain() {
|
||||
// Write 1 to point size (using a geometry shader or another kind of fallback
|
||||
// to expand point sprites - point size support is not guaranteed, and the
|
||||
// size would also be limited, and can't be controlled independently along two
|
||||
// axes).
|
||||
id_vector_temp_.clear();
|
||||
id_vector_temp_.push_back(
|
||||
builder_->makeIntConstant(kOutputPerVertexMemberPointSize));
|
||||
builder_->createStore(
|
||||
const_float_1_,
|
||||
builder_->createAccessChain(spv::StorageClassOutput, output_per_vertex_,
|
||||
id_vector_temp_));
|
||||
}
|
||||
|
||||
void SpirvShaderTranslator::UpdateExecConditionals(
|
||||
ParsedExecInstruction::Type type, uint32_t bool_constant_index,
|
||||
|
@ -1054,9 +1068,8 @@ void SpirvShaderTranslator::UpdateExecConditionals(
|
|||
return;
|
||||
}
|
||||
cf_exec_condition_ = condition;
|
||||
spv::Function& function = builder_->getBuildPoint()->getParent();
|
||||
cf_exec_conditional_merge_ =
|
||||
new spv::Block(builder_->getUniqueId(), function);
|
||||
cf_exec_conditional_merge_ = new spv::Block(
|
||||
builder_->getUniqueId(), builder_->getBuildPoint()->getParent());
|
||||
SpirvCreateSelectionMerge(cf_exec_conditional_merge_->getId());
|
||||
spv::Block& inner_block = builder_->makeNewBlock();
|
||||
builder_->createConditionalBranch(
|
||||
|
@ -1095,7 +1108,8 @@ void SpirvShaderTranslator::UpdateInstructionPredication(bool predicated,
|
|||
spv::Id predicate_id =
|
||||
builder_->createLoad(var_main_predicate_, spv::NoPrecision);
|
||||
spv::Block& predicated_block = builder_->makeNewBlock();
|
||||
cf_instruction_predicate_merge_ = &builder_->makeNewBlock();
|
||||
cf_instruction_predicate_merge_ = new spv::Block(
|
||||
builder_->getUniqueId(), builder_->getBuildPoint()->getParent());
|
||||
SpirvCreateSelectionMerge(cf_instruction_predicate_merge_->getId());
|
||||
builder_->createConditionalBranch(
|
||||
predicate_id,
|
||||
|
@ -1135,12 +1149,23 @@ void SpirvShaderTranslator::CloseExecConditionals() {
|
|||
}
|
||||
|
||||
spv::Id SpirvShaderTranslator::GetStorageAddressingIndex(
|
||||
InstructionStorageAddressingMode addressing_mode, uint32_t storage_index) {
|
||||
InstructionStorageAddressingMode addressing_mode, uint32_t storage_index,
|
||||
bool is_float_constant) {
|
||||
EnsureBuildPointAvailable();
|
||||
spv::Id base_pointer = spv::NoResult;
|
||||
switch (addressing_mode) {
|
||||
case InstructionStorageAddressingMode::kStatic:
|
||||
return builder_->makeIntConstant(int(storage_index));
|
||||
case InstructionStorageAddressingMode::kStatic: {
|
||||
uint32_t static_storage_index = storage_index;
|
||||
if (is_float_constant) {
|
||||
static_storage_index =
|
||||
constant_register_map().GetPackedFloatConstantIndex(storage_index);
|
||||
assert_true(static_storage_index != UINT32_MAX);
|
||||
if (static_storage_index == UINT32_MAX) {
|
||||
static_storage_index = 0;
|
||||
}
|
||||
}
|
||||
return builder_->makeIntConstant(int(static_storage_index));
|
||||
}
|
||||
case InstructionStorageAddressingMode::kAddressAbsolute:
|
||||
base_pointer = var_main_address_absolute_;
|
||||
break;
|
||||
|
@ -1153,6 +1178,8 @@ spv::Id SpirvShaderTranslator::GetStorageAddressingIndex(
|
|||
id_vector_temp_util_);
|
||||
break;
|
||||
}
|
||||
assert_true(!is_float_constant ||
|
||||
constant_register_map().float_dynamic_addressing);
|
||||
assert_true(base_pointer != spv::NoResult);
|
||||
spv::Id index = builder_->createLoad(base_pointer, spv::NoPrecision);
|
||||
if (storage_index) {
|
||||
|
@ -1165,8 +1192,9 @@ spv::Id SpirvShaderTranslator::GetStorageAddressingIndex(
|
|||
|
||||
spv::Id SpirvShaderTranslator::LoadOperandStorage(
|
||||
const InstructionOperand& operand) {
|
||||
spv::Id index = GetStorageAddressingIndex(operand.storage_addressing_mode,
|
||||
operand.storage_index);
|
||||
spv::Id index = GetStorageAddressingIndex(
|
||||
operand.storage_addressing_mode, operand.storage_index,
|
||||
operand.storage_source == InstructionStorageSource::kConstantFloat);
|
||||
EnsureBuildPointAvailable();
|
||||
spv::Id vec4_pointer = spv::NoResult;
|
||||
switch (operand.storage_source) {
|
||||
|
@ -1592,7 +1620,7 @@ spv::Id SpirvShaderTranslator::EndianSwap32Uint(spv::Id value, spv::Id endian) {
|
|||
builder_->makeUintConstant(
|
||||
static_cast<unsigned int>(xenos::Endian::k8in32)));
|
||||
spv::Id is_8in16_or_8in32 =
|
||||
builder_->createBinOp(spv::OpLogicalAnd, type_bool_, is_8in16, is_8in32);
|
||||
builder_->createBinOp(spv::OpLogicalOr, type_bool_, is_8in16, is_8in32);
|
||||
spv::Block& block_pre_8in16 = *builder_->getBuildPoint();
|
||||
assert_false(block_pre_8in16.isTerminated());
|
||||
spv::Block& block_8in16 = builder_->makeNewBlock();
|
||||
|
@ -1633,7 +1661,7 @@ spv::Id SpirvShaderTranslator::EndianSwap32Uint(spv::Id value, spv::Id endian) {
|
|||
builder_->makeUintConstant(
|
||||
static_cast<unsigned int>(xenos::Endian::k16in32)));
|
||||
spv::Id is_8in32_or_16in32 =
|
||||
builder_->createBinOp(spv::OpLogicalAnd, type_bool_, is_8in32, is_16in32);
|
||||
builder_->createBinOp(spv::OpLogicalOr, type_bool_, is_8in32, is_16in32);
|
||||
spv::Block& block_pre_16in32 = *builder_->getBuildPoint();
|
||||
spv::Block& block_16in32 = builder_->makeNewBlock();
|
||||
spv::Block& block_16in32_merge = builder_->makeNewBlock();
|
||||
|
|
|
@ -39,26 +39,49 @@ class SpirvShaderTranslator : public ShaderTranslator {
|
|||
// therefore SSBOs must only be used for shared memory - all other storage
|
||||
// resources must be images or texel buffers.
|
||||
enum DescriptorSet : uint32_t {
|
||||
// In order of update frequency.
|
||||
// Very frequently changed, especially for UI draws, and for models drawn in
|
||||
// multiple parts - contains vertex and texture fetch constants.
|
||||
kDescriptorSetFetchConstants,
|
||||
// According to the "Pipeline Layout Compatibility" section of the Vulkan
|
||||
// specification:
|
||||
// "Two pipeline layouts are defined to be "compatible for set N" if they
|
||||
// were created with identically defined descriptor set layouts for sets
|
||||
// zero through N, and if they were created with identical push constant
|
||||
// ranges."
|
||||
// "Place the least frequently changing descriptor sets near the start of
|
||||
// the pipeline layout, and place the descriptor sets representing the most
|
||||
// frequently changing resources near the end. When pipelines are switched,
|
||||
// only the descriptor set bindings that have been invalidated will need to
|
||||
// be updated and the remainder of the descriptor set bindings will remain
|
||||
// in place."
|
||||
// This is partially the reverse of the Direct3D 12's rule of placing the
|
||||
// most frequently changed descriptor sets in the beginning. Here all
|
||||
// descriptor sets with an immutable layout are placed first, in reverse
|
||||
// frequency of changing, and sets that may be different for different
|
||||
// pipeline states last.
|
||||
|
||||
// Always the same descriptor set layouts for all pipeline layouts:
|
||||
|
||||
// Never changed.
|
||||
kDescriptorSetSharedMemoryAndEdram,
|
||||
// Pretty rarely used and rarely changed - flow control constants.
|
||||
kDescriptorSetBoolLoopConstants,
|
||||
// May stay the same across many draws.
|
||||
kDescriptorSetSystemConstants,
|
||||
// Less frequently changed (per-material).
|
||||
kDescriptorSetFloatConstantsPixel,
|
||||
// Quite frequently changed (for one object drawn multiple times, for
|
||||
// instance - may contain projection matrices).
|
||||
kDescriptorSetFloatConstantsVertex,
|
||||
// Less frequently changed (per-material).
|
||||
kDescriptorSetFloatConstantsPixel,
|
||||
// Per-material, combined images and samplers.
|
||||
kDescriptorSetTexturesPixel,
|
||||
// Very frequently changed, especially for UI draws, and for models drawn in
|
||||
// multiple parts - contains vertex and texture fetch constants.
|
||||
kDescriptorSetFetchConstants,
|
||||
|
||||
// Mutable part of the pipeline layout:
|
||||
kDescriptorSetMutableLayoutsStart,
|
||||
|
||||
// Rarely used at all, but may be changed at an unpredictable rate when
|
||||
// vertex textures are used, combined images and samplers.
|
||||
kDescriptorSetTexturesVertex,
|
||||
// May stay the same across many draws.
|
||||
kDescriptorSetSystemConstants,
|
||||
// Pretty rarely used and rarely changed - flow control constants.
|
||||
kDescriptorSetBoolLoopConstants,
|
||||
// Never changed.
|
||||
kDescriptorSetSharedMemoryAndEdram,
|
||||
kDescriptorSetTexturesVertex = kDescriptorSetMutableLayoutsStart,
|
||||
// Per-material, combined images and samplers.
|
||||
kDescriptorSetTexturesPixel,
|
||||
kDescriptorSetCount,
|
||||
};
|
||||
|
||||
|
@ -162,7 +185,8 @@ class SpirvShaderTranslator : public ShaderTranslator {
|
|||
void CloseExecConditionals();
|
||||
|
||||
spv::Id GetStorageAddressingIndex(
|
||||
InstructionStorageAddressingMode addressing_mode, uint32_t storage_index);
|
||||
InstructionStorageAddressingMode addressing_mode, uint32_t storage_index,
|
||||
bool is_float_constant = false);
|
||||
// Loads unswizzled operand without sign modifiers as float4.
|
||||
spv::Id LoadOperandStorage(const InstructionOperand& operand);
|
||||
spv::Id ApplyOperandModifiers(spv::Id operand_value,
|
||||
|
|
|
@ -46,15 +46,65 @@ void DeferredCommandBuffer::Execute(VkCommandBuffer command_buffer) {
|
|||
stream_remaining -= kCommandHeaderSizeElements;
|
||||
|
||||
switch (header.command) {
|
||||
case Command::kVkBeginRenderPass: {
|
||||
auto& args = *reinterpret_cast<const ArgsVkBeginRenderPass*>(stream);
|
||||
size_t offset_bytes = sizeof(ArgsVkBeginRenderPass);
|
||||
VkRenderPassBeginInfo render_pass_begin_info;
|
||||
render_pass_begin_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
|
||||
render_pass_begin_info.pNext = nullptr;
|
||||
render_pass_begin_info.renderPass = args.render_pass;
|
||||
render_pass_begin_info.framebuffer = args.framebuffer;
|
||||
render_pass_begin_info.renderArea = args.render_area;
|
||||
render_pass_begin_info.clearValueCount = args.clear_value_count;
|
||||
if (render_pass_begin_info.clearValueCount) {
|
||||
offset_bytes = xe::align(offset_bytes, alignof(VkClearValue));
|
||||
render_pass_begin_info.pClearValues =
|
||||
reinterpret_cast<const VkClearValue*>(
|
||||
reinterpret_cast<const uint8_t*>(stream) + offset_bytes);
|
||||
offset_bytes +=
|
||||
sizeof(VkClearValue) * render_pass_begin_info.clearValueCount;
|
||||
} else {
|
||||
render_pass_begin_info.pClearValues = nullptr;
|
||||
}
|
||||
dfn.vkCmdBeginRenderPass(command_buffer, &render_pass_begin_info,
|
||||
args.contents);
|
||||
} break;
|
||||
|
||||
case Command::kVkBindDescriptorSets: {
|
||||
auto& args = *reinterpret_cast<const ArgsVkBindDescriptorSets*>(stream);
|
||||
size_t offset_bytes = xe::align(sizeof(ArgsVkBindDescriptorSets),
|
||||
alignof(VkDescriptorSet));
|
||||
const VkDescriptorSet* descriptor_sets =
|
||||
reinterpret_cast<const VkDescriptorSet*>(
|
||||
reinterpret_cast<const uint8_t*>(stream) + offset_bytes);
|
||||
offset_bytes += sizeof(VkDescriptorSet) * args.descriptor_set_count;
|
||||
const uint32_t* dynamic_offsets = nullptr;
|
||||
if (args.dynamic_offset_count) {
|
||||
offset_bytes = xe::align(offset_bytes, alignof(uint32_t));
|
||||
dynamic_offsets = reinterpret_cast<const uint32_t*>(
|
||||
reinterpret_cast<const uint8_t*>(stream) + offset_bytes);
|
||||
offset_bytes += sizeof(uint32_t) * args.dynamic_offset_count;
|
||||
}
|
||||
dfn.vkCmdBindDescriptorSets(command_buffer, args.pipeline_bind_point,
|
||||
args.layout, args.first_set,
|
||||
args.descriptor_set_count, descriptor_sets,
|
||||
args.dynamic_offset_count, dynamic_offsets);
|
||||
} break;
|
||||
|
||||
case Command::kVkBindIndexBuffer: {
|
||||
auto& args = *reinterpret_cast<const ArgsVkBindIndexBuffer*>(stream);
|
||||
dfn.vkCmdBindIndexBuffer(command_buffer, args.buffer, args.offset,
|
||||
args.index_type);
|
||||
} break;
|
||||
|
||||
case Command::kVkBindPipeline: {
|
||||
auto& args = *reinterpret_cast<const ArgsVkBindPipeline*>(stream);
|
||||
dfn.vkCmdBindPipeline(command_buffer, args.pipeline_bind_point,
|
||||
args.pipeline);
|
||||
} break;
|
||||
|
||||
case Command::kVkCopyBuffer: {
|
||||
auto& args = *reinterpret_cast<const ArgsVkCopyBuffer*>(stream);
|
||||
static_assert(alignof(VkBufferCopy) <= alignof(uintmax_t));
|
||||
dfn.vkCmdCopyBuffer(
|
||||
command_buffer, args.src_buffer, args.dst_buffer, args.region_count,
|
||||
reinterpret_cast<const VkBufferCopy*>(
|
||||
|
@ -62,26 +112,37 @@ void DeferredCommandBuffer::Execute(VkCommandBuffer command_buffer) {
|
|||
xe::align(sizeof(ArgsVkCopyBuffer), alignof(VkBufferCopy))));
|
||||
} break;
|
||||
|
||||
case Command::kVkDraw: {
|
||||
auto& args = *reinterpret_cast<const ArgsVkDraw*>(stream);
|
||||
dfn.vkCmdDraw(command_buffer, args.vertex_count, args.instance_count,
|
||||
args.first_vertex, args.first_instance);
|
||||
} break;
|
||||
|
||||
case Command::kVkDrawIndexed: {
|
||||
auto& args = *reinterpret_cast<const ArgsVkDrawIndexed*>(stream);
|
||||
dfn.vkCmdDrawIndexed(command_buffer, args.index_count,
|
||||
args.instance_count, args.first_index,
|
||||
args.vertex_offset, args.first_instance);
|
||||
} break;
|
||||
|
||||
case Command::kVkEndRenderPass:
|
||||
dfn.vkCmdEndRenderPass(command_buffer);
|
||||
break;
|
||||
|
||||
case Command::kVkPipelineBarrier: {
|
||||
auto& args = *reinterpret_cast<const ArgsVkPipelineBarrier*>(stream);
|
||||
size_t barrier_offset_bytes = sizeof(ArgsVkPipelineBarrier);
|
||||
|
||||
const VkMemoryBarrier* memory_barriers;
|
||||
const VkMemoryBarrier* memory_barriers = nullptr;
|
||||
if (args.memory_barrier_count) {
|
||||
static_assert(alignof(VkMemoryBarrier) <= alignof(uintmax_t));
|
||||
barrier_offset_bytes =
|
||||
xe::align(barrier_offset_bytes, alignof(VkMemoryBarrier));
|
||||
memory_barriers = reinterpret_cast<const VkMemoryBarrier*>(
|
||||
reinterpret_cast<const uint8_t*>(stream) + barrier_offset_bytes);
|
||||
barrier_offset_bytes +=
|
||||
sizeof(VkMemoryBarrier) * args.memory_barrier_count;
|
||||
} else {
|
||||
memory_barriers = nullptr;
|
||||
}
|
||||
|
||||
const VkBufferMemoryBarrier* buffer_memory_barriers;
|
||||
const VkBufferMemoryBarrier* buffer_memory_barriers = nullptr;
|
||||
if (args.buffer_memory_barrier_count) {
|
||||
static_assert(alignof(VkBufferMemoryBarrier) <= alignof(uintmax_t));
|
||||
barrier_offset_bytes =
|
||||
xe::align(barrier_offset_bytes, alignof(VkBufferMemoryBarrier));
|
||||
buffer_memory_barriers =
|
||||
|
@ -90,23 +151,16 @@ void DeferredCommandBuffer::Execute(VkCommandBuffer command_buffer) {
|
|||
barrier_offset_bytes);
|
||||
barrier_offset_bytes +=
|
||||
sizeof(VkBufferMemoryBarrier) * args.buffer_memory_barrier_count;
|
||||
} else {
|
||||
buffer_memory_barriers = nullptr;
|
||||
}
|
||||
|
||||
const VkImageMemoryBarrier* image_memory_barriers;
|
||||
const VkImageMemoryBarrier* image_memory_barriers = nullptr;
|
||||
if (args.image_memory_barrier_count) {
|
||||
static_assert(alignof(VkImageMemoryBarrier) <= alignof(uintmax_t));
|
||||
barrier_offset_bytes =
|
||||
xe::align(barrier_offset_bytes, alignof(VkImageMemoryBarrier));
|
||||
image_memory_barriers = reinterpret_cast<const VkImageMemoryBarrier*>(
|
||||
reinterpret_cast<const uint8_t*>(stream) + barrier_offset_bytes);
|
||||
barrier_offset_bytes +=
|
||||
sizeof(VkImageMemoryBarrier) * args.image_memory_barrier_count;
|
||||
} else {
|
||||
image_memory_barriers = nullptr;
|
||||
}
|
||||
|
||||
dfn.vkCmdPipelineBarrier(
|
||||
command_buffer, args.src_stage_mask, args.dst_stage_mask,
|
||||
args.dependency_flags, args.memory_barrier_count, memory_barriers,
|
||||
|
@ -114,6 +168,24 @@ void DeferredCommandBuffer::Execute(VkCommandBuffer command_buffer) {
|
|||
args.image_memory_barrier_count, image_memory_barriers);
|
||||
} break;
|
||||
|
||||
case Command::kVkSetScissor: {
|
||||
auto& args = *reinterpret_cast<const ArgsVkSetScissor*>(stream);
|
||||
dfn.vkCmdSetScissor(
|
||||
command_buffer, args.first_scissor, args.scissor_count,
|
||||
reinterpret_cast<const VkRect2D*>(
|
||||
reinterpret_cast<const uint8_t*>(stream) +
|
||||
xe::align(sizeof(ArgsVkSetScissor), alignof(VkRect2D))));
|
||||
} break;
|
||||
|
||||
case Command::kVkSetViewport: {
|
||||
auto& args = *reinterpret_cast<const ArgsVkSetViewport*>(stream);
|
||||
dfn.vkCmdSetViewport(
|
||||
command_buffer, args.first_viewport, args.viewport_count,
|
||||
reinterpret_cast<const VkViewport*>(
|
||||
reinterpret_cast<const uint8_t*>(stream) +
|
||||
xe::align(sizeof(ArgsVkSetViewport), alignof(VkViewport))));
|
||||
} break;
|
||||
|
||||
default:
|
||||
assert_unhandled_case(header.command);
|
||||
break;
|
||||
|
@ -133,38 +205,25 @@ void DeferredCommandBuffer::CmdVkPipelineBarrier(
|
|||
uint32_t image_memory_barrier_count,
|
||||
const VkImageMemoryBarrier* image_memory_barriers) {
|
||||
size_t arguments_size = sizeof(ArgsVkPipelineBarrier);
|
||||
|
||||
size_t memory_barriers_offset;
|
||||
size_t memory_barriers_offset = 0;
|
||||
if (memory_barrier_count) {
|
||||
static_assert(alignof(VkMemoryBarrier) <= alignof(uintmax_t));
|
||||
arguments_size = xe::align(arguments_size, alignof(VkMemoryBarrier));
|
||||
memory_barriers_offset = arguments_size;
|
||||
arguments_size += sizeof(VkMemoryBarrier) * memory_barrier_count;
|
||||
} else {
|
||||
memory_barriers_offset = 0;
|
||||
}
|
||||
|
||||
size_t buffer_memory_barriers_offset;
|
||||
size_t buffer_memory_barriers_offset = 0;
|
||||
if (buffer_memory_barrier_count) {
|
||||
static_assert(alignof(VkBufferMemoryBarrier) <= alignof(uintmax_t));
|
||||
arguments_size = xe::align(arguments_size, alignof(VkBufferMemoryBarrier));
|
||||
buffer_memory_barriers_offset = arguments_size;
|
||||
arguments_size +=
|
||||
sizeof(VkBufferMemoryBarrier) * buffer_memory_barrier_count;
|
||||
} else {
|
||||
buffer_memory_barriers_offset = 0;
|
||||
}
|
||||
|
||||
size_t image_memory_barriers_offset;
|
||||
size_t image_memory_barriers_offset = 0;
|
||||
if (image_memory_barrier_count) {
|
||||
static_assert(alignof(VkImageMemoryBarrier) <= alignof(uintmax_t));
|
||||
arguments_size = xe::align(arguments_size, alignof(VkImageMemoryBarrier));
|
||||
image_memory_barriers_offset = arguments_size;
|
||||
arguments_size += sizeof(VkImageMemoryBarrier) * image_memory_barrier_count;
|
||||
} else {
|
||||
image_memory_barriers_offset = 0;
|
||||
}
|
||||
|
||||
uint8_t* args_ptr = reinterpret_cast<uint8_t*>(
|
||||
WriteCommand(Command::kVkPipelineBarrier, arguments_size));
|
||||
auto& args = *reinterpret_cast<ArgsVkPipelineBarrier*>(args_ptr);
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
#include <cstdint>
|
||||
#include <cstring>
|
||||
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/base/math.h"
|
||||
#include "xenia/ui/vulkan/vulkan_provider.h"
|
||||
|
||||
|
@ -31,6 +32,65 @@ class DeferredCommandBuffer {
|
|||
void Reset();
|
||||
void Execute(VkCommandBuffer command_buffer);
|
||||
|
||||
// render_pass_begin->pNext of all barriers must be null.
|
||||
void CmdVkBeginRenderPass(const VkRenderPassBeginInfo* render_pass_begin,
|
||||
VkSubpassContents contents) {
|
||||
assert_null(render_pass_begin->pNext);
|
||||
size_t arguments_size = sizeof(ArgsVkBeginRenderPass);
|
||||
uint32_t clear_value_count = render_pass_begin->clearValueCount;
|
||||
size_t clear_values_offset = 0;
|
||||
if (clear_value_count) {
|
||||
arguments_size = xe::align(arguments_size, alignof(VkClearValue));
|
||||
clear_values_offset = arguments_size;
|
||||
arguments_size += sizeof(VkClearValue) * clear_value_count;
|
||||
}
|
||||
uint8_t* args_ptr = reinterpret_cast<uint8_t*>(
|
||||
WriteCommand(Command::kVkBeginRenderPass, arguments_size));
|
||||
auto& args = *reinterpret_cast<ArgsVkBeginRenderPass*>(args_ptr);
|
||||
args.render_pass = render_pass_begin->renderPass;
|
||||
args.framebuffer = render_pass_begin->framebuffer;
|
||||
args.render_area = render_pass_begin->renderArea;
|
||||
args.clear_value_count = clear_value_count;
|
||||
args.contents = contents;
|
||||
if (clear_value_count) {
|
||||
std::memcpy(args_ptr + clear_values_offset,
|
||||
render_pass_begin->pClearValues,
|
||||
sizeof(VkClearValue) * clear_value_count);
|
||||
}
|
||||
}
|
||||
|
||||
void CmdVkBindDescriptorSets(VkPipelineBindPoint pipeline_bind_point,
|
||||
VkPipelineLayout layout, uint32_t first_set,
|
||||
uint32_t descriptor_set_count,
|
||||
const VkDescriptorSet* descriptor_sets,
|
||||
uint32_t dynamic_offset_count,
|
||||
const uint32_t* dynamic_offsets) {
|
||||
size_t arguments_size =
|
||||
xe::align(sizeof(ArgsVkBindDescriptorSets), alignof(VkDescriptorSet));
|
||||
size_t descriptor_sets_offset = arguments_size;
|
||||
arguments_size += sizeof(VkDescriptorSet) * descriptor_set_count;
|
||||
size_t dynamic_offsets_offset = 0;
|
||||
if (dynamic_offset_count) {
|
||||
arguments_size = xe::align(arguments_size, alignof(uint32_t));
|
||||
dynamic_offsets_offset = arguments_size;
|
||||
arguments_size += sizeof(uint32_t) * dynamic_offset_count;
|
||||
}
|
||||
uint8_t* args_ptr = reinterpret_cast<uint8_t*>(
|
||||
WriteCommand(Command::kVkBindDescriptorSets, arguments_size));
|
||||
auto& args = *reinterpret_cast<ArgsVkBindDescriptorSets*>(args_ptr);
|
||||
args.pipeline_bind_point = pipeline_bind_point;
|
||||
args.layout = layout;
|
||||
args.first_set = first_set;
|
||||
args.descriptor_set_count = descriptor_set_count;
|
||||
args.dynamic_offset_count = dynamic_offset_count;
|
||||
std::memcpy(args_ptr + descriptor_sets_offset, descriptor_sets,
|
||||
sizeof(VkDescriptorSet) * descriptor_set_count);
|
||||
if (dynamic_offset_count) {
|
||||
std::memcpy(args_ptr + dynamic_offsets_offset, dynamic_offsets,
|
||||
sizeof(uint32_t) * dynamic_offset_count);
|
||||
}
|
||||
}
|
||||
|
||||
void CmdVkBindIndexBuffer(VkBuffer buffer, VkDeviceSize offset,
|
||||
VkIndexType index_type) {
|
||||
auto& args = *reinterpret_cast<ArgsVkBindIndexBuffer*>(WriteCommand(
|
||||
|
@ -40,9 +100,16 @@ class DeferredCommandBuffer {
|
|||
args.index_type = index_type;
|
||||
}
|
||||
|
||||
void CmdVkBindPipeline(VkPipelineBindPoint pipeline_bind_point,
|
||||
VkPipeline pipeline) {
|
||||
auto& args = *reinterpret_cast<ArgsVkBindPipeline*>(
|
||||
WriteCommand(Command::kVkBindPipeline, sizeof(ArgsVkBindPipeline)));
|
||||
args.pipeline_bind_point = pipeline_bind_point;
|
||||
args.pipeline = pipeline;
|
||||
}
|
||||
|
||||
VkBufferCopy* CmdCopyBufferEmplace(VkBuffer src_buffer, VkBuffer dst_buffer,
|
||||
uint32_t region_count) {
|
||||
static_assert(alignof(VkBufferCopy) <= alignof(uintmax_t));
|
||||
const size_t header_size =
|
||||
xe::align(sizeof(ArgsVkCopyBuffer), alignof(VkBufferCopy));
|
||||
uint8_t* args_ptr = reinterpret_cast<uint8_t*>(
|
||||
|
@ -60,6 +127,30 @@ class DeferredCommandBuffer {
|
|||
regions, sizeof(VkBufferCopy) * region_count);
|
||||
}
|
||||
|
||||
void CmdVkDraw(uint32_t vertex_count, uint32_t instance_count,
|
||||
uint32_t first_vertex, uint32_t first_instance) {
|
||||
auto& args = *reinterpret_cast<ArgsVkDraw*>(
|
||||
WriteCommand(Command::kVkDraw, sizeof(ArgsVkDraw)));
|
||||
args.vertex_count = vertex_count;
|
||||
args.instance_count = instance_count;
|
||||
args.first_vertex = first_vertex;
|
||||
args.first_instance = first_instance;
|
||||
}
|
||||
|
||||
void CmdVkDrawIndexed(uint32_t index_count, uint32_t instance_count,
|
||||
uint32_t first_index, int32_t vertex_offset,
|
||||
uint32_t first_instance) {
|
||||
auto& args = *reinterpret_cast<ArgsVkDrawIndexed*>(
|
||||
WriteCommand(Command::kVkDrawIndexed, sizeof(ArgsVkDrawIndexed)));
|
||||
args.index_count = index_count;
|
||||
args.instance_count = instance_count;
|
||||
args.first_index = first_index;
|
||||
args.vertex_offset = vertex_offset;
|
||||
args.first_instance = first_instance;
|
||||
}
|
||||
|
||||
void CmdVkEndRenderPass() { WriteCommand(Command::kVkEndRenderPass, 0); }
|
||||
|
||||
// pNext of all barriers must be null.
|
||||
void CmdVkPipelineBarrier(VkPipelineStageFlags src_stage_mask,
|
||||
VkPipelineStageFlags dst_stage_mask,
|
||||
|
@ -71,11 +162,47 @@ class DeferredCommandBuffer {
|
|||
uint32_t image_memory_barrier_count,
|
||||
const VkImageMemoryBarrier* image_memory_barriers);
|
||||
|
||||
void CmdVkSetScissor(uint32_t first_scissor, uint32_t scissor_count,
|
||||
const VkRect2D* scissors) {
|
||||
const size_t header_size =
|
||||
xe::align(sizeof(ArgsVkSetScissor), alignof(VkRect2D));
|
||||
uint8_t* args_ptr = reinterpret_cast<uint8_t*>(
|
||||
WriteCommand(Command::kVkSetScissor,
|
||||
header_size + sizeof(VkRect2D) * scissor_count));
|
||||
auto& args = *reinterpret_cast<ArgsVkSetScissor*>(args_ptr);
|
||||
args.first_scissor = first_scissor;
|
||||
args.scissor_count = scissor_count;
|
||||
std::memcpy(args_ptr + header_size, scissors,
|
||||
sizeof(VkRect2D) * scissor_count);
|
||||
}
|
||||
|
||||
void CmdVkSetViewport(uint32_t first_viewport, uint32_t viewport_count,
|
||||
const VkViewport* viewports) {
|
||||
const size_t header_size =
|
||||
xe::align(sizeof(ArgsVkSetViewport), alignof(VkViewport));
|
||||
uint8_t* args_ptr = reinterpret_cast<uint8_t*>(
|
||||
WriteCommand(Command::kVkSetViewport,
|
||||
header_size + sizeof(VkViewport) * viewport_count));
|
||||
auto& args = *reinterpret_cast<ArgsVkSetViewport*>(args_ptr);
|
||||
args.first_viewport = first_viewport;
|
||||
args.viewport_count = viewport_count;
|
||||
std::memcpy(args_ptr + header_size, viewports,
|
||||
sizeof(VkViewport) * viewport_count);
|
||||
}
|
||||
|
||||
private:
|
||||
enum class Command {
|
||||
kVkBeginRenderPass,
|
||||
kVkBindDescriptorSets,
|
||||
kVkBindIndexBuffer,
|
||||
kVkBindPipeline,
|
||||
kVkCopyBuffer,
|
||||
kVkDraw,
|
||||
kVkDrawIndexed,
|
||||
kVkEndRenderPass,
|
||||
kVkPipelineBarrier,
|
||||
kVkSetScissor,
|
||||
kVkSetViewport,
|
||||
};
|
||||
|
||||
struct CommandHeader {
|
||||
|
@ -85,17 +212,58 @@ class DeferredCommandBuffer {
|
|||
static constexpr size_t kCommandHeaderSizeElements =
|
||||
(sizeof(CommandHeader) + sizeof(uintmax_t) - 1) / sizeof(uintmax_t);
|
||||
|
||||
struct ArgsVkBeginRenderPass {
|
||||
VkRenderPass render_pass;
|
||||
VkFramebuffer framebuffer;
|
||||
VkRect2D render_area;
|
||||
uint32_t clear_value_count;
|
||||
VkSubpassContents contents;
|
||||
// Followed by aligned optional VkClearValue[].
|
||||
static_assert(alignof(VkClearValue) <= alignof(uintmax_t));
|
||||
};
|
||||
|
||||
struct ArgsVkBindDescriptorSets {
|
||||
VkPipelineBindPoint pipeline_bind_point;
|
||||
VkPipelineLayout layout;
|
||||
uint32_t first_set;
|
||||
uint32_t descriptor_set_count;
|
||||
uint32_t dynamic_offset_count;
|
||||
// Followed by aligned VkDescriptorSet[], optional uint32_t[].
|
||||
static_assert(alignof(VkDescriptorSet) <= alignof(uintmax_t));
|
||||
};
|
||||
|
||||
struct ArgsVkBindIndexBuffer {
|
||||
VkBuffer buffer;
|
||||
VkDeviceSize offset;
|
||||
VkIndexType index_type;
|
||||
};
|
||||
|
||||
struct ArgsVkBindPipeline {
|
||||
VkPipelineBindPoint pipeline_bind_point;
|
||||
VkPipeline pipeline;
|
||||
};
|
||||
|
||||
struct ArgsVkCopyBuffer {
|
||||
VkBuffer src_buffer;
|
||||
VkBuffer dst_buffer;
|
||||
uint32_t region_count;
|
||||
// Followed by VkBufferCopy[].
|
||||
// Followed by aligned VkBufferCopy[].
|
||||
static_assert(alignof(VkBufferCopy) <= alignof(uintmax_t));
|
||||
};
|
||||
|
||||
struct ArgsVkDraw {
|
||||
uint32_t vertex_count;
|
||||
uint32_t instance_count;
|
||||
uint32_t first_vertex;
|
||||
uint32_t first_instance;
|
||||
};
|
||||
|
||||
struct ArgsVkDrawIndexed {
|
||||
uint32_t index_count;
|
||||
uint32_t instance_count;
|
||||
uint32_t first_index;
|
||||
int32_t vertex_offset;
|
||||
uint32_t first_instance;
|
||||
};
|
||||
|
||||
struct ArgsVkPipelineBarrier {
|
||||
|
@ -105,8 +273,25 @@ class DeferredCommandBuffer {
|
|||
uint32_t memory_barrier_count;
|
||||
uint32_t buffer_memory_barrier_count;
|
||||
uint32_t image_memory_barrier_count;
|
||||
// Followed by aligned VkMemoryBarrier[], VkBufferMemoryBarrier[],
|
||||
// VkImageMemoryBarrier[].
|
||||
// Followed by aligned optional VkMemoryBarrier[],
|
||||
// optional VkBufferMemoryBarrier[], optional VkImageMemoryBarrier[].
|
||||
static_assert(alignof(VkMemoryBarrier) <= alignof(uintmax_t));
|
||||
static_assert(alignof(VkBufferMemoryBarrier) <= alignof(uintmax_t));
|
||||
static_assert(alignof(VkImageMemoryBarrier) <= alignof(uintmax_t));
|
||||
};
|
||||
|
||||
struct ArgsVkSetScissor {
|
||||
uint32_t first_scissor;
|
||||
uint32_t scissor_count;
|
||||
// Followed by aligned VkRect2D[].
|
||||
static_assert(alignof(VkRect2D) <= alignof(uintmax_t));
|
||||
};
|
||||
|
||||
struct ArgsVkSetViewport {
|
||||
uint32_t first_viewport;
|
||||
uint32_t viewport_count;
|
||||
// Followed by aligned VkViewport[].
|
||||
static_assert(alignof(VkViewport) <= alignof(uintmax_t));
|
||||
};
|
||||
|
||||
void* WriteCommand(Command command, size_t arguments_size_bytes);
|
||||
|
|
|
@ -9,15 +9,24 @@
|
|||
|
||||
#include "xenia/gpu/vulkan/vulkan_command_processor.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <iterator>
|
||||
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/base/logging.h"
|
||||
#include "xenia/base/math.h"
|
||||
#include "xenia/base/profiling.h"
|
||||
#include "xenia/gpu/gpu_flags.h"
|
||||
#include "xenia/gpu/registers.h"
|
||||
#include "xenia/gpu/shader.h"
|
||||
#include "xenia/gpu/spirv_shader_translator.h"
|
||||
#include "xenia/gpu/vulkan/vulkan_pipeline_cache.h"
|
||||
#include "xenia/gpu/vulkan/vulkan_render_target_cache.h"
|
||||
#include "xenia/gpu/vulkan/vulkan_shader.h"
|
||||
#include "xenia/gpu/vulkan/vulkan_shared_memory.h"
|
||||
#include "xenia/gpu/xenos.h"
|
||||
#include "xenia/ui/vulkan/vulkan_context.h"
|
||||
#include "xenia/ui/vulkan/vulkan_provider.h"
|
||||
#include "xenia/ui/vulkan/vulkan_util.h"
|
||||
|
@ -54,6 +63,16 @@ bool VulkanCommandProcessor::SetupContext() {
|
|||
transient_descriptor_pool_uniform_buffers_ =
|
||||
std::make_unique<ui::vulkan::TransientDescriptorPool>(
|
||||
provider, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 32768, 32768);
|
||||
// 16384 is bigger than any single uniform buffer that Xenia needs, but is the
|
||||
// minimum maxUniformBufferRange, thus the safe minimum amount.
|
||||
VkDeviceSize uniform_buffer_alignment = std::max(
|
||||
provider.device_properties().limits.minUniformBufferOffsetAlignment,
|
||||
VkDeviceSize(1));
|
||||
uniform_buffer_pool_ = std::make_unique<ui::vulkan::VulkanUploadBufferPool>(
|
||||
provider, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
|
||||
xe::align(std::max(ui::GraphicsUploadBufferPool::kDefaultPageSize,
|
||||
size_t(16384)),
|
||||
size_t(uniform_buffer_alignment)));
|
||||
|
||||
VkDescriptorSetLayoutCreateInfo descriptor_set_layout_create_info;
|
||||
descriptor_set_layout_create_info.sType =
|
||||
|
@ -162,6 +181,20 @@ bool VulkanCommandProcessor::SetupContext() {
|
|||
return false;
|
||||
}
|
||||
|
||||
render_target_cache_ =
|
||||
std::make_unique<VulkanRenderTargetCache>(*this, *register_file_);
|
||||
if (!render_target_cache_->Initialize()) {
|
||||
XELOGE("Failed to initialize the render target cache");
|
||||
return false;
|
||||
}
|
||||
|
||||
pipeline_cache_ = std::make_unique<VulkanPipelineCache>(
|
||||
*this, *register_file_, *render_target_cache_);
|
||||
if (!pipeline_cache_->Initialize()) {
|
||||
XELOGE("Failed to initialize the graphics pipeline cache");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Shared memory and EDRAM common bindings.
|
||||
VkDescriptorPoolSize descriptor_pool_sizes[1];
|
||||
descriptor_pool_sizes[0].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
||||
|
@ -229,6 +262,9 @@ bool VulkanCommandProcessor::SetupContext() {
|
|||
// interlocks case.
|
||||
dfn.vkUpdateDescriptorSets(device, 1, write_descriptor_sets, 0, nullptr);
|
||||
|
||||
// Just not to expose uninitialized memory.
|
||||
std::memset(&system_constants_, 0, sizeof(system_constants_));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -244,6 +280,10 @@ void VulkanCommandProcessor::ShutdownContext() {
|
|||
dfn.vkDestroyDescriptorPool, device,
|
||||
shared_memory_and_edram_descriptor_pool_);
|
||||
|
||||
pipeline_cache_.reset();
|
||||
|
||||
render_target_cache_.reset();
|
||||
|
||||
shared_memory_.reset();
|
||||
|
||||
for (const auto& pipeline_layout_pair : pipeline_layouts_) {
|
||||
|
@ -276,6 +316,7 @@ void VulkanCommandProcessor::ShutdownContext() {
|
|||
ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyDescriptorSetLayout,
|
||||
device, descriptor_set_layout_empty_);
|
||||
|
||||
uniform_buffer_pool_.reset();
|
||||
transient_descriptor_pool_uniform_buffers_.reset();
|
||||
|
||||
sparse_bind_wait_stage_mask_ = 0;
|
||||
|
@ -325,6 +366,42 @@ void VulkanCommandProcessor::ShutdownContext() {
|
|||
CommandProcessor::ShutdownContext();
|
||||
}
|
||||
|
||||
void VulkanCommandProcessor::WriteRegister(uint32_t index, uint32_t value) {
|
||||
CommandProcessor::WriteRegister(index, value);
|
||||
|
||||
if (index >= XE_GPU_REG_SHADER_CONSTANT_000_X &&
|
||||
index <= XE_GPU_REG_SHADER_CONSTANT_511_W) {
|
||||
if (frame_open_) {
|
||||
uint32_t float_constant_index =
|
||||
(index - XE_GPU_REG_SHADER_CONSTANT_000_X) >> 2;
|
||||
if (float_constant_index >= 256) {
|
||||
float_constant_index -= 256;
|
||||
if (current_float_constant_map_pixel_[float_constant_index >> 6] &
|
||||
(1ull << (float_constant_index & 63))) {
|
||||
current_graphics_descriptor_set_values_up_to_date_ &=
|
||||
~(uint32_t(1)
|
||||
<< SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel);
|
||||
}
|
||||
} else {
|
||||
if (current_float_constant_map_vertex_[float_constant_index >> 6] &
|
||||
(1ull << (float_constant_index & 63))) {
|
||||
current_graphics_descriptor_set_values_up_to_date_ &=
|
||||
~(uint32_t(1)
|
||||
<< SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (index >= XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031 &&
|
||||
index <= XE_GPU_REG_SHADER_CONSTANT_LOOP_31) {
|
||||
current_graphics_descriptor_set_values_up_to_date_ &= ~(
|
||||
uint32_t(1) << SpirvShaderTranslator::kDescriptorSetBoolLoopConstants);
|
||||
} else if (index >= XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 &&
|
||||
index <= XE_GPU_REG_SHADER_CONSTANT_FETCH_31_5) {
|
||||
current_graphics_descriptor_set_values_up_to_date_ &=
|
||||
~(uint32_t(1) << SpirvShaderTranslator::kDescriptorSetFetchConstants);
|
||||
}
|
||||
}
|
||||
|
||||
void VulkanCommandProcessor::SparseBindBuffer(
|
||||
VkBuffer buffer, uint32_t bind_count, const VkSparseMemoryBind* binds,
|
||||
VkPipelineStageFlags wait_stage_mask) {
|
||||
|
@ -356,17 +433,25 @@ void VulkanCommandProcessor::PerformSwap(uint32_t frontbuffer_ptr,
|
|||
EndSubmission(true);
|
||||
}
|
||||
|
||||
bool VulkanCommandProcessor::GetPipelineLayout(
|
||||
uint32_t texture_count_pixel, uint32_t texture_count_vertex,
|
||||
PipelineLayout& pipeline_layout_out) {
|
||||
void VulkanCommandProcessor::EndRenderPass() {
|
||||
assert_true(submission_open_);
|
||||
if (current_render_pass_ == VK_NULL_HANDLE) {
|
||||
return;
|
||||
}
|
||||
deferred_command_buffer_.CmdVkEndRenderPass();
|
||||
current_render_pass_ = VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
const VulkanPipelineCache::PipelineLayoutProvider*
|
||||
VulkanCommandProcessor::GetPipelineLayout(uint32_t texture_count_pixel,
|
||||
uint32_t texture_count_vertex) {
|
||||
PipelineLayoutKey pipeline_layout_key;
|
||||
pipeline_layout_key.texture_count_pixel = texture_count_pixel;
|
||||
pipeline_layout_key.texture_count_vertex = texture_count_vertex;
|
||||
{
|
||||
auto it = pipeline_layouts_.find(pipeline_layout_key.key);
|
||||
if (it != pipeline_layouts_.end()) {
|
||||
pipeline_layout_out = it->second;
|
||||
return true;
|
||||
return &it->second;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -462,26 +547,28 @@ bool VulkanCommandProcessor::GetPipelineLayout(
|
|||
|
||||
VkDescriptorSetLayout
|
||||
descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetCount];
|
||||
descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetFetchConstants] =
|
||||
descriptor_set_layout_fetch_bool_loop_constants_;
|
||||
descriptor_set_layouts
|
||||
[SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex] =
|
||||
descriptor_set_layout_float_constants_vertex_;
|
||||
descriptor_set_layouts
|
||||
[SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel] =
|
||||
descriptor_set_layout_float_constants_pixel_;
|
||||
descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetTexturesPixel] =
|
||||
descriptor_set_layout_textures_pixel;
|
||||
descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetTexturesVertex] =
|
||||
descriptor_set_layout_textures_vertex;
|
||||
descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetSystemConstants] =
|
||||
descriptor_set_layout_system_constants_;
|
||||
descriptor_set_layouts
|
||||
[SpirvShaderTranslator::kDescriptorSetBoolLoopConstants] =
|
||||
descriptor_set_layout_fetch_bool_loop_constants_;
|
||||
// Immutable layouts.
|
||||
descriptor_set_layouts
|
||||
[SpirvShaderTranslator::kDescriptorSetSharedMemoryAndEdram] =
|
||||
descriptor_set_layout_shared_memory_and_edram_;
|
||||
descriptor_set_layouts
|
||||
[SpirvShaderTranslator::kDescriptorSetBoolLoopConstants] =
|
||||
descriptor_set_layout_fetch_bool_loop_constants_;
|
||||
descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetSystemConstants] =
|
||||
descriptor_set_layout_system_constants_;
|
||||
descriptor_set_layouts
|
||||
[SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel] =
|
||||
descriptor_set_layout_float_constants_pixel_;
|
||||
descriptor_set_layouts
|
||||
[SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex] =
|
||||
descriptor_set_layout_float_constants_vertex_;
|
||||
descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetFetchConstants] =
|
||||
descriptor_set_layout_fetch_bool_loop_constants_;
|
||||
// Mutable layouts.
|
||||
descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetTexturesVertex] =
|
||||
descriptor_set_layout_textures_vertex;
|
||||
descriptor_set_layouts[SpirvShaderTranslator::kDescriptorSetTexturesPixel] =
|
||||
descriptor_set_layout_textures_pixel;
|
||||
|
||||
VkPipelineLayoutCreateInfo pipeline_layout_create_info;
|
||||
pipeline_layout_create_info.sType =
|
||||
|
@ -508,16 +595,18 @@ bool VulkanCommandProcessor::GetPipelineLayout(
|
|||
descriptor_set_layout_textures_pixel;
|
||||
pipeline_layout_entry.descriptor_set_layout_textures_vertex_ref =
|
||||
descriptor_set_layout_textures_vertex;
|
||||
pipeline_layouts_.emplace(pipeline_layout_key.key, pipeline_layout_entry);
|
||||
pipeline_layout_out = pipeline_layout_entry;
|
||||
return true;
|
||||
auto emplaced_pair =
|
||||
pipeline_layouts_.emplace(pipeline_layout_key.key, pipeline_layout_entry);
|
||||
// unordered_map insertion doesn't invalidate element references.
|
||||
return &emplaced_pair.first->second;
|
||||
}
|
||||
|
||||
Shader* VulkanCommandProcessor::LoadShader(xenos::ShaderType shader_type,
|
||||
uint32_t guest_address,
|
||||
const uint32_t* host_address,
|
||||
uint32_t dword_count) {
|
||||
return nullptr;
|
||||
return pipeline_cache_->LoadShader(shader_type, guest_address, host_address,
|
||||
dword_count);
|
||||
}
|
||||
|
||||
bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
|
||||
|
@ -530,9 +619,135 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
|
|||
|
||||
BeginSubmission(true);
|
||||
|
||||
auto vertex_shader = static_cast<VulkanShader*>(active_vertex_shader());
|
||||
if (!vertex_shader) {
|
||||
// Always need a vertex shader.
|
||||
return false;
|
||||
}
|
||||
// TODO(Triang3l): Get a pixel shader.
|
||||
VulkanShader* pixel_shader = nullptr;
|
||||
|
||||
VulkanRenderTargetCache::FramebufferKey framebuffer_key;
|
||||
if (!render_target_cache_->UpdateRenderTargets(framebuffer_key)) {
|
||||
return false;
|
||||
}
|
||||
VkFramebuffer framebuffer =
|
||||
render_target_cache_->GetFramebuffer(framebuffer_key);
|
||||
if (framebuffer == VK_NULL_HANDLE) {
|
||||
return false;
|
||||
}
|
||||
VkRenderPass render_pass =
|
||||
render_target_cache_->GetRenderPass(framebuffer_key.render_pass_key);
|
||||
if (render_pass == VK_NULL_HANDLE) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Update the graphics pipeline, and if the new graphics pipeline has a
|
||||
// different layout, invalidate incompatible descriptor sets before updating
|
||||
// current_graphics_pipeline_layout_.
|
||||
VkPipeline pipeline;
|
||||
const VulkanPipelineCache::PipelineLayoutProvider* pipeline_layout_provider;
|
||||
if (!pipeline_cache_->ConfigurePipeline(vertex_shader, pixel_shader,
|
||||
framebuffer_key.render_pass_key,
|
||||
pipeline, pipeline_layout_provider)) {
|
||||
return false;
|
||||
}
|
||||
deferred_command_buffer_.CmdVkBindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS,
|
||||
pipeline);
|
||||
auto pipeline_layout =
|
||||
static_cast<const PipelineLayout*>(pipeline_layout_provider);
|
||||
if (current_graphics_pipeline_layout_ != pipeline_layout) {
|
||||
if (current_graphics_pipeline_layout_) {
|
||||
// Keep descriptor set layouts for which the new pipeline layout is
|
||||
// compatible with the previous one (pipeline layouts are compatible for
|
||||
// set N if set layouts 0 through N are compatible).
|
||||
uint32_t descriptor_sets_kept =
|
||||
uint32_t(SpirvShaderTranslator::kDescriptorSetCount);
|
||||
if (current_graphics_pipeline_layout_
|
||||
->descriptor_set_layout_textures_vertex_ref !=
|
||||
pipeline_layout->descriptor_set_layout_textures_vertex_ref) {
|
||||
descriptor_sets_kept = std::min(
|
||||
descriptor_sets_kept,
|
||||
uint32_t(SpirvShaderTranslator::kDescriptorSetTexturesVertex));
|
||||
}
|
||||
if (current_graphics_pipeline_layout_
|
||||
->descriptor_set_layout_textures_pixel_ref !=
|
||||
pipeline_layout->descriptor_set_layout_textures_pixel_ref) {
|
||||
descriptor_sets_kept = std::min(
|
||||
descriptor_sets_kept,
|
||||
uint32_t(SpirvShaderTranslator::kDescriptorSetTexturesPixel));
|
||||
}
|
||||
} else {
|
||||
// No or unknown pipeline layout previously bound - all bindings are in an
|
||||
// indeterminate state.
|
||||
current_graphics_descriptor_sets_bound_up_to_date_ = 0;
|
||||
}
|
||||
current_graphics_pipeline_layout_ = pipeline_layout;
|
||||
}
|
||||
|
||||
// Update fixed-function dynamic state.
|
||||
UpdateFixedFunctionState();
|
||||
|
||||
bool indexed = index_buffer_info != nullptr && index_buffer_info->guest_base;
|
||||
|
||||
// Actually draw.
|
||||
// Update system constants before uploading them.
|
||||
UpdateSystemConstantValues(indexed ? index_buffer_info->endianness
|
||||
: xenos::Endian::kNone);
|
||||
|
||||
// Update uniform buffers and descriptor sets after binding the pipeline with
|
||||
// the new layout.
|
||||
if (!UpdateBindings(vertex_shader, pixel_shader)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const RegisterFile& regs = *register_file_;
|
||||
|
||||
// Ensure vertex buffers are resident.
|
||||
// TODO(Triang3l): Cache residency for ranges in a way similar to how texture
|
||||
// validity is tracked.
|
||||
uint64_t vertex_buffers_resident[2] = {};
|
||||
for (const Shader::VertexBinding& vertex_binding :
|
||||
vertex_shader->vertex_bindings()) {
|
||||
uint32_t vfetch_index = vertex_binding.fetch_constant;
|
||||
if (vertex_buffers_resident[vfetch_index >> 6] &
|
||||
(uint64_t(1) << (vfetch_index & 63))) {
|
||||
continue;
|
||||
}
|
||||
const auto& vfetch_constant = regs.Get<xenos::xe_gpu_vertex_fetch_t>(
|
||||
XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + vfetch_index * 2);
|
||||
switch (vfetch_constant.type) {
|
||||
case xenos::FetchConstantType::kVertex:
|
||||
break;
|
||||
case xenos::FetchConstantType::kInvalidVertex:
|
||||
if (cvars::gpu_allow_invalid_fetch_constants) {
|
||||
break;
|
||||
}
|
||||
XELOGW(
|
||||
"Vertex fetch constant {} ({:08X} {:08X}) has \"invalid\" type! "
|
||||
"This "
|
||||
"is incorrect behavior, but you can try bypassing this by "
|
||||
"launching Xenia with --gpu_allow_invalid_fetch_constants=true.",
|
||||
vfetch_index, vfetch_constant.dword_0, vfetch_constant.dword_1);
|
||||
return false;
|
||||
default:
|
||||
XELOGW(
|
||||
"Vertex fetch constant {} ({:08X} {:08X}) is completely invalid!",
|
||||
vfetch_index, vfetch_constant.dword_0, vfetch_constant.dword_1);
|
||||
return false;
|
||||
}
|
||||
if (!shared_memory_->RequestRange(vfetch_constant.address << 2,
|
||||
vfetch_constant.size << 2)) {
|
||||
XELOGE(
|
||||
"Failed to request vertex buffer at 0x{:08X} (size {}) in the shared "
|
||||
"memory",
|
||||
vfetch_constant.address << 2, vfetch_constant.size << 2);
|
||||
return false;
|
||||
}
|
||||
vertex_buffers_resident[vfetch_index >> 6] |= uint64_t(1)
|
||||
<< (vfetch_index & 63);
|
||||
}
|
||||
|
||||
// Set up the geometry.
|
||||
if (indexed) {
|
||||
uint32_t index_size =
|
||||
index_buffer_info->format == xenos::IndexFormat::kInt32
|
||||
|
@ -557,6 +772,37 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
|
|||
}
|
||||
shared_memory_->Use(VulkanSharedMemory::Usage::kRead);
|
||||
|
||||
// After all commands that may dispatch or copy, enter the render pass before
|
||||
// drawing.
|
||||
if (current_render_pass_ != render_pass ||
|
||||
current_framebuffer_ != framebuffer) {
|
||||
if (current_render_pass_ != VK_NULL_HANDLE) {
|
||||
deferred_command_buffer_.CmdVkEndRenderPass();
|
||||
}
|
||||
current_render_pass_ = render_pass;
|
||||
current_framebuffer_ = framebuffer;
|
||||
VkRenderPassBeginInfo render_pass_begin_info;
|
||||
render_pass_begin_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
|
||||
render_pass_begin_info.pNext = nullptr;
|
||||
render_pass_begin_info.renderPass = render_pass;
|
||||
render_pass_begin_info.framebuffer = framebuffer;
|
||||
render_pass_begin_info.renderArea.offset.x = 0;
|
||||
render_pass_begin_info.renderArea.offset.y = 0;
|
||||
render_pass_begin_info.renderArea.extent.width = 1280;
|
||||
render_pass_begin_info.renderArea.extent.height = 720;
|
||||
render_pass_begin_info.clearValueCount = 0;
|
||||
render_pass_begin_info.pClearValues = nullptr;
|
||||
deferred_command_buffer_.CmdVkBeginRenderPass(&render_pass_begin_info,
|
||||
VK_SUBPASS_CONTENTS_INLINE);
|
||||
}
|
||||
|
||||
// Draw.
|
||||
if (indexed) {
|
||||
deferred_command_buffer_.CmdVkDrawIndexed(index_count, 1, 0, 0, 0);
|
||||
} else {
|
||||
deferred_command_buffer_.CmdVkDraw(index_count, 1, 0, 0);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -659,9 +905,6 @@ void VulkanCommandProcessor::CheckSubmissionFence(uint64_t await_submission) {
|
|||
command_buffers_submitted_.pop_front();
|
||||
}
|
||||
|
||||
// Reclaim descriptor pools.
|
||||
transient_descriptor_pool_uniform_buffers_->Reclaim(submission_completed_);
|
||||
|
||||
shared_memory_->CompletedSubmissionUpdated();
|
||||
}
|
||||
|
||||
|
@ -705,13 +948,41 @@ void VulkanCommandProcessor::BeginSubmission(bool is_guest_command) {
|
|||
submission_open_ = true;
|
||||
|
||||
// Start a new deferred command buffer - will submit it to the real one in
|
||||
// the end of the submission (when async pipeline state object creation
|
||||
// requests are fulfilled).
|
||||
// the end of the submission (when async pipeline object creation requests
|
||||
// are fulfilled).
|
||||
deferred_command_buffer_.Reset();
|
||||
|
||||
// Reset cached state of the command buffer.
|
||||
ff_viewport_update_needed_ = true;
|
||||
ff_scissor_update_needed_ = true;
|
||||
current_render_pass_ = VK_NULL_HANDLE;
|
||||
current_framebuffer_ = VK_NULL_HANDLE;
|
||||
current_graphics_pipeline_ = VK_NULL_HANDLE;
|
||||
current_graphics_pipeline_layout_ = nullptr;
|
||||
current_graphics_descriptor_sets_bound_up_to_date_ = 0;
|
||||
}
|
||||
|
||||
if (is_opening_frame) {
|
||||
frame_open_ = true;
|
||||
|
||||
// Reset bindings that depend on transient data.
|
||||
std::memset(current_float_constant_map_vertex_, 0,
|
||||
sizeof(current_float_constant_map_vertex_));
|
||||
std::memset(current_float_constant_map_pixel_, 0,
|
||||
sizeof(current_float_constant_map_pixel_));
|
||||
std::memset(current_graphics_descriptor_sets_, 0,
|
||||
sizeof(current_graphics_descriptor_sets_));
|
||||
current_graphics_descriptor_sets_
|
||||
[SpirvShaderTranslator::kDescriptorSetSharedMemoryAndEdram] =
|
||||
shared_memory_and_edram_descriptor_set_;
|
||||
current_graphics_descriptor_set_values_up_to_date_ =
|
||||
uint32_t(1)
|
||||
<< SpirvShaderTranslator::kDescriptorSetSharedMemoryAndEdram;
|
||||
|
||||
// Reclaim pool pages - no need to do this every small submission since some
|
||||
// may be reused.
|
||||
transient_descriptor_pool_uniform_buffers_->Reclaim(frame_completed_);
|
||||
uniform_buffer_pool_->Reclaim(frame_completed_);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -784,8 +1055,12 @@ bool VulkanCommandProcessor::EndSubmission(bool is_swap) {
|
|||
bool is_closing_frame = is_swap && frame_open_;
|
||||
|
||||
if (submission_open_) {
|
||||
EndRenderPass();
|
||||
|
||||
shared_memory_->EndSubmission();
|
||||
|
||||
uniform_buffer_pool_->FlushWrites();
|
||||
|
||||
// Submit sparse binds earlier, before executing the deferred command
|
||||
// buffer, to reduce latency.
|
||||
if (!sparse_memory_binds_.empty()) {
|
||||
|
@ -910,13 +1185,30 @@ bool VulkanCommandProcessor::EndSubmission(bool is_swap) {
|
|||
if (cache_clear_requested_ && AwaitAllQueueOperationsCompletion()) {
|
||||
cache_clear_requested_ = false;
|
||||
|
||||
transient_descriptor_pool_uniform_buffers_->ClearCache();
|
||||
|
||||
assert_true(command_buffers_submitted_.empty());
|
||||
for (const CommandBuffer& command_buffer : command_buffers_writable_) {
|
||||
dfn.vkDestroyCommandPool(device, command_buffer.pool, nullptr);
|
||||
}
|
||||
command_buffers_writable_.clear();
|
||||
|
||||
uniform_buffer_pool_->ClearCache();
|
||||
transient_descriptor_pool_uniform_buffers_->ClearCache();
|
||||
|
||||
pipeline_cache_->ClearCache();
|
||||
|
||||
render_target_cache_->ClearCache();
|
||||
|
||||
for (const auto& pipeline_layout_pair : pipeline_layouts_) {
|
||||
dfn.vkDestroyPipelineLayout(
|
||||
device, pipeline_layout_pair.second.pipeline_layout, nullptr);
|
||||
}
|
||||
pipeline_layouts_.clear();
|
||||
for (const auto& descriptor_set_layout_pair :
|
||||
descriptor_set_layouts_textures_) {
|
||||
dfn.vkDestroyDescriptorSetLayout(
|
||||
device, descriptor_set_layout_pair.second, nullptr);
|
||||
}
|
||||
descriptor_set_layouts_textures_.clear();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -936,6 +1228,441 @@ VkShaderStageFlags VulkanCommandProcessor::GetGuestVertexShaderStageFlags()
|
|||
return stages;
|
||||
}
|
||||
|
||||
void VulkanCommandProcessor::UpdateFixedFunctionState() {
|
||||
#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
|
||||
|
||||
const RegisterFile& regs = *register_file_;
|
||||
|
||||
// Window parameters.
|
||||
// http://ftp.tku.edu.tw/NetBSD/NetBSD-current/xsrc/external/mit/xf86-video-ati/dist/src/r600_reg_auto_r6xx.h
|
||||
// See r200UpdateWindow:
|
||||
// https://github.com/freedreno/mesa/blob/master/src/mesa/drivers/dri/r200/r200_state.c
|
||||
auto pa_sc_window_offset = regs.Get<reg::PA_SC_WINDOW_OFFSET>();
|
||||
|
||||
uint32_t pixel_size_x = 1, pixel_size_y = 1;
|
||||
|
||||
// Viewport.
|
||||
// PA_CL_VTE_CNTL contains whether offsets and scales are enabled.
|
||||
// http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf
|
||||
// In games, either all are enabled (for regular drawing) or none are (for
|
||||
// rectangle lists usually).
|
||||
//
|
||||
// If scale/offset is enabled, the Xenos shader is writing (neglecting W
|
||||
// division) position in the NDC (-1, -1, dx_clip_space_def - 1) -> (1, 1, 1)
|
||||
// box. If it's not, the position is in screen space. Since we can only use
|
||||
// the NDC in PC APIs, we use a viewport of the largest possible size, and
|
||||
// divide the position by it in translated shaders.
|
||||
//
|
||||
// TODO(Triang3l): Move all of this to draw_util.
|
||||
// TODO(Triang3l): Limit the viewport if exceeding the device limit; move to
|
||||
// NDC scale/offset constants.
|
||||
auto pa_cl_vte_cntl = regs.Get<reg::PA_CL_VTE_CNTL>();
|
||||
float viewport_scale_x =
|
||||
pa_cl_vte_cntl.vport_x_scale_ena
|
||||
? std::abs(regs[XE_GPU_REG_PA_CL_VPORT_XSCALE].f32)
|
||||
: 4096.0f;
|
||||
float viewport_scale_y =
|
||||
pa_cl_vte_cntl.vport_y_scale_ena
|
||||
? std::abs(regs[XE_GPU_REG_PA_CL_VPORT_YSCALE].f32)
|
||||
: 4096.0f;
|
||||
float viewport_scale_z = pa_cl_vte_cntl.vport_z_scale_ena
|
||||
? regs[XE_GPU_REG_PA_CL_VPORT_ZSCALE].f32
|
||||
: 1.0f;
|
||||
float viewport_offset_x = pa_cl_vte_cntl.vport_x_offset_ena
|
||||
? regs[XE_GPU_REG_PA_CL_VPORT_XOFFSET].f32
|
||||
: std::abs(viewport_scale_x);
|
||||
float viewport_offset_y = pa_cl_vte_cntl.vport_y_offset_ena
|
||||
? regs[XE_GPU_REG_PA_CL_VPORT_YOFFSET].f32
|
||||
: std::abs(viewport_scale_y);
|
||||
float viewport_offset_z = pa_cl_vte_cntl.vport_z_offset_ena
|
||||
? regs[XE_GPU_REG_PA_CL_VPORT_ZOFFSET].f32
|
||||
: 0.0f;
|
||||
if (regs.Get<reg::PA_SU_SC_MODE_CNTL>().vtx_window_offset_enable) {
|
||||
viewport_offset_x += float(pa_sc_window_offset.window_x_offset);
|
||||
viewport_offset_y += float(pa_sc_window_offset.window_y_offset);
|
||||
}
|
||||
VkViewport viewport;
|
||||
viewport.x = (viewport_offset_x - viewport_scale_x) * float(pixel_size_x);
|
||||
viewport.y = (viewport_offset_y - viewport_scale_y) * float(pixel_size_y);
|
||||
viewport.width = viewport_scale_x * 2.0f * float(pixel_size_x);
|
||||
viewport.height = viewport_scale_y * 2.0f * float(pixel_size_y);
|
||||
viewport.minDepth = std::min(std::max(viewport_offset_z, 0.0f), 1.0f);
|
||||
viewport.maxDepth =
|
||||
std::min(std::max(viewport_offset_z + viewport_scale_z, 0.0f), 1.0f);
|
||||
ff_viewport_update_needed_ |= ff_viewport_.x != viewport.x;
|
||||
ff_viewport_update_needed_ |= ff_viewport_.y != viewport.y;
|
||||
ff_viewport_update_needed_ |= ff_viewport_.width != viewport.width;
|
||||
ff_viewport_update_needed_ |= ff_viewport_.height != viewport.height;
|
||||
ff_viewport_update_needed_ |= ff_viewport_.minDepth != viewport.minDepth;
|
||||
ff_viewport_update_needed_ |= ff_viewport_.maxDepth != viewport.maxDepth;
|
||||
if (ff_viewport_update_needed_) {
|
||||
ff_viewport_ = viewport;
|
||||
deferred_command_buffer_.CmdVkSetViewport(0, 1, &viewport);
|
||||
ff_viewport_update_needed_ = false;
|
||||
}
|
||||
|
||||
// Scissor.
|
||||
// TODO(Triang3l): Move all of this to draw_util.
|
||||
// TODO(Triang3l): Limit the scissor if exceeding the device limit.
|
||||
auto pa_sc_window_scissor_tl = regs.Get<reg::PA_SC_WINDOW_SCISSOR_TL>();
|
||||
auto pa_sc_window_scissor_br = regs.Get<reg::PA_SC_WINDOW_SCISSOR_BR>();
|
||||
VkRect2D scissor;
|
||||
scissor.offset.x = int32_t(pa_sc_window_scissor_tl.tl_x);
|
||||
scissor.offset.y = int32_t(pa_sc_window_scissor_tl.tl_y);
|
||||
int32_t scissor_br_x =
|
||||
std::max(int32_t(pa_sc_window_scissor_br.br_x), scissor.offset.x);
|
||||
int32_t scissor_br_y =
|
||||
std::max(int32_t(pa_sc_window_scissor_br.br_y), scissor.offset.y);
|
||||
if (!pa_sc_window_scissor_tl.window_offset_disable) {
|
||||
scissor.offset.x = std::max(
|
||||
scissor.offset.x + pa_sc_window_offset.window_x_offset, int32_t(0));
|
||||
scissor.offset.y = std::max(
|
||||
scissor.offset.y + pa_sc_window_offset.window_y_offset, int32_t(0));
|
||||
scissor_br_x = std::max(scissor_br_x + pa_sc_window_offset.window_x_offset,
|
||||
int32_t(0));
|
||||
scissor_br_y = std::max(scissor_br_y + pa_sc_window_offset.window_y_offset,
|
||||
int32_t(0));
|
||||
}
|
||||
scissor.extent.width = uint32_t(scissor_br_x - scissor.offset.x);
|
||||
scissor.extent.height = uint32_t(scissor_br_y - scissor.offset.y);
|
||||
scissor.offset.x *= pixel_size_x;
|
||||
scissor.offset.y *= pixel_size_y;
|
||||
scissor.extent.width *= pixel_size_x;
|
||||
scissor.extent.height *= pixel_size_y;
|
||||
ff_scissor_update_needed_ |= ff_scissor_.offset.x != scissor.offset.x;
|
||||
ff_scissor_update_needed_ |= ff_scissor_.offset.y != scissor.offset.y;
|
||||
ff_scissor_update_needed_ |= ff_scissor_.extent.width != scissor.extent.width;
|
||||
ff_scissor_update_needed_ |=
|
||||
ff_scissor_.extent.height != scissor.extent.height;
|
||||
if (ff_scissor_update_needed_) {
|
||||
ff_scissor_ = scissor;
|
||||
deferred_command_buffer_.CmdVkSetScissor(0, 1, &scissor);
|
||||
ff_scissor_update_needed_ = false;
|
||||
}
|
||||
}
|
||||
|
||||
void VulkanCommandProcessor::UpdateSystemConstantValues(
|
||||
xenos::Endian index_endian) {
|
||||
#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
|
||||
|
||||
const RegisterFile& regs = *register_file_;
|
||||
int32_t vgt_indx_offset = int32_t(regs[XE_GPU_REG_VGT_INDX_OFFSET].u32);
|
||||
|
||||
bool dirty = false;
|
||||
|
||||
// Index or tessellation edge factor buffer endianness.
|
||||
dirty |= system_constants_.vertex_index_endian != index_endian;
|
||||
system_constants_.vertex_index_endian = index_endian;
|
||||
|
||||
// Vertex index offset.
|
||||
dirty |= system_constants_.vertex_base_index != vgt_indx_offset;
|
||||
system_constants_.vertex_base_index = vgt_indx_offset;
|
||||
|
||||
if (dirty) {
|
||||
current_graphics_descriptor_set_values_up_to_date_ &=
|
||||
~(uint32_t(1) << SpirvShaderTranslator::kDescriptorSetSystemConstants);
|
||||
}
|
||||
}
|
||||
|
||||
bool VulkanCommandProcessor::UpdateBindings(const VulkanShader* vertex_shader,
|
||||
const VulkanShader* pixel_shader) {
|
||||
#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
|
||||
|
||||
const RegisterFile& regs = *register_file_;
|
||||
|
||||
// Invalidate descriptors for changed data.
|
||||
// These are the constant base addresses/ranges for shaders.
|
||||
// We have these hardcoded right now cause nothing seems to differ on the Xbox
|
||||
// 360 (however, OpenGL ES on Adreno 200 on Android has different ranges).
|
||||
assert_true(regs[XE_GPU_REG_SQ_VS_CONST].u32 == 0x000FF000 ||
|
||||
regs[XE_GPU_REG_SQ_VS_CONST].u32 == 0x00000000);
|
||||
assert_true(regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x000FF100 ||
|
||||
regs[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000);
|
||||
// Check if the float constant layout is still the same and get the counts.
|
||||
const Shader::ConstantRegisterMap& float_constant_map_vertex =
|
||||
vertex_shader->constant_register_map();
|
||||
uint32_t float_constant_count_vertex = float_constant_map_vertex.float_count;
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
if (current_float_constant_map_vertex_[i] !=
|
||||
float_constant_map_vertex.float_bitmap[i]) {
|
||||
current_float_constant_map_vertex_[i] =
|
||||
float_constant_map_vertex.float_bitmap[i];
|
||||
// If no float constants at all, any buffer can be reused for them, so not
|
||||
// invalidating.
|
||||
if (float_constant_count_vertex) {
|
||||
current_graphics_descriptor_set_values_up_to_date_ &=
|
||||
~(
|
||||
uint32_t(1)
|
||||
<< SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex);
|
||||
}
|
||||
}
|
||||
}
|
||||
uint32_t float_constant_count_pixel = 0;
|
||||
if (pixel_shader != nullptr) {
|
||||
const Shader::ConstantRegisterMap& float_constant_map_pixel =
|
||||
pixel_shader->constant_register_map();
|
||||
float_constant_count_pixel = float_constant_map_pixel.float_count;
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
if (current_float_constant_map_pixel_[i] !=
|
||||
float_constant_map_pixel.float_bitmap[i]) {
|
||||
current_float_constant_map_pixel_[i] =
|
||||
float_constant_map_pixel.float_bitmap[i];
|
||||
if (float_constant_count_pixel) {
|
||||
current_graphics_descriptor_set_values_up_to_date_ &=
|
||||
~(uint32_t(1)
|
||||
<< SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
std::memset(current_float_constant_map_pixel_, 0,
|
||||
sizeof(current_float_constant_map_pixel_));
|
||||
}
|
||||
|
||||
// Make sure new descriptor sets are bound to the command buffer.
|
||||
current_graphics_descriptor_sets_bound_up_to_date_ &=
|
||||
current_graphics_descriptor_set_values_up_to_date_;
|
||||
|
||||
// Write the new descriptor sets.
|
||||
VkWriteDescriptorSet
|
||||
write_descriptor_sets[SpirvShaderTranslator::kDescriptorSetCount];
|
||||
uint32_t write_descriptor_set_count = 0;
|
||||
uint32_t write_descriptor_set_bits = 0;
|
||||
assert_not_zero(
|
||||
current_graphics_descriptor_set_values_up_to_date_ &
|
||||
(uint32_t(1)
|
||||
<< SpirvShaderTranslator::kDescriptorSetSharedMemoryAndEdram));
|
||||
VkDescriptorBufferInfo buffer_info_bool_loop_constants;
|
||||
if (!(current_graphics_descriptor_set_values_up_to_date_ &
|
||||
(uint32_t(1)
|
||||
<< SpirvShaderTranslator::kDescriptorSetBoolLoopConstants))) {
|
||||
VkWriteDescriptorSet& write_bool_loop_constants =
|
||||
write_descriptor_sets[write_descriptor_set_count++];
|
||||
constexpr size_t kBoolLoopConstantsSize = sizeof(uint32_t) * (8 + 32);
|
||||
uint8_t* mapping_bool_loop_constants = WriteUniformBufferBinding(
|
||||
kBoolLoopConstantsSize,
|
||||
descriptor_set_layout_fetch_bool_loop_constants_,
|
||||
buffer_info_bool_loop_constants, write_bool_loop_constants);
|
||||
if (!mapping_bool_loop_constants) {
|
||||
return false;
|
||||
}
|
||||
std::memcpy(mapping_bool_loop_constants,
|
||||
®s[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32,
|
||||
kBoolLoopConstantsSize);
|
||||
write_descriptor_set_bits |=
|
||||
uint32_t(1) << SpirvShaderTranslator::kDescriptorSetBoolLoopConstants;
|
||||
current_graphics_descriptor_sets_
|
||||
[SpirvShaderTranslator::kDescriptorSetBoolLoopConstants] =
|
||||
write_bool_loop_constants.dstSet;
|
||||
}
|
||||
VkDescriptorBufferInfo buffer_info_system_constants;
|
||||
if (!(current_graphics_descriptor_set_values_up_to_date_ &
|
||||
(uint32_t(1)
|
||||
<< SpirvShaderTranslator::kDescriptorSetSystemConstants))) {
|
||||
VkWriteDescriptorSet& write_system_constants =
|
||||
write_descriptor_sets[write_descriptor_set_count++];
|
||||
uint8_t* mapping_system_constants = WriteUniformBufferBinding(
|
||||
sizeof(SpirvShaderTranslator::SystemConstants),
|
||||
descriptor_set_layout_system_constants_, buffer_info_system_constants,
|
||||
write_system_constants);
|
||||
if (!mapping_system_constants) {
|
||||
return false;
|
||||
}
|
||||
std::memcpy(mapping_system_constants, &system_constants_,
|
||||
sizeof(SpirvShaderTranslator::SystemConstants));
|
||||
write_descriptor_set_bits |=
|
||||
uint32_t(1) << SpirvShaderTranslator::kDescriptorSetSystemConstants;
|
||||
current_graphics_descriptor_sets_
|
||||
[SpirvShaderTranslator::kDescriptorSetSystemConstants] =
|
||||
write_system_constants.dstSet;
|
||||
}
|
||||
VkDescriptorBufferInfo buffer_info_float_constant_pixel;
|
||||
if (!(current_graphics_descriptor_set_values_up_to_date_ &
|
||||
(uint32_t(1)
|
||||
<< SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel))) {
|
||||
// Even if the shader doesn't need any float constants, a valid binding must
|
||||
// still be provided (the pipeline layout always has float constants, for
|
||||
// both the vertex shader and the pixel shader), so if the first draw in the
|
||||
// frame doesn't have float constants at all, still allocate an empty
|
||||
// buffer.
|
||||
VkWriteDescriptorSet& write_float_constants_pixel =
|
||||
write_descriptor_sets[write_descriptor_set_count++];
|
||||
uint8_t* mapping_float_constants_pixel = WriteUniformBufferBinding(
|
||||
sizeof(float) * 4 * std::max(float_constant_count_pixel, uint32_t(1)),
|
||||
descriptor_set_layout_float_constants_pixel_,
|
||||
buffer_info_float_constant_pixel, write_float_constants_pixel);
|
||||
if (!mapping_float_constants_pixel) {
|
||||
return false;
|
||||
}
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
uint64_t float_constant_map_entry = current_float_constant_map_pixel_[i];
|
||||
uint32_t float_constant_index;
|
||||
while (xe::bit_scan_forward(float_constant_map_entry,
|
||||
&float_constant_index)) {
|
||||
float_constant_map_entry &= ~(1ull << float_constant_index);
|
||||
std::memcpy(mapping_float_constants_pixel,
|
||||
®s[XE_GPU_REG_SHADER_CONSTANT_256_X + (i << 8) +
|
||||
(float_constant_index << 2)]
|
||||
.f32,
|
||||
sizeof(float) * 4);
|
||||
mapping_float_constants_pixel += sizeof(float) * 4;
|
||||
}
|
||||
}
|
||||
write_descriptor_set_bits |=
|
||||
uint32_t(1) << SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel;
|
||||
current_graphics_descriptor_sets_
|
||||
[SpirvShaderTranslator::kDescriptorSetFloatConstantsPixel] =
|
||||
write_float_constants_pixel.dstSet;
|
||||
}
|
||||
VkDescriptorBufferInfo buffer_info_float_constant_vertex;
|
||||
if (!(current_graphics_descriptor_set_values_up_to_date_ &
|
||||
(uint32_t(1)
|
||||
<< SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex))) {
|
||||
VkWriteDescriptorSet& write_float_constants_vertex =
|
||||
write_descriptor_sets[write_descriptor_set_count++];
|
||||
uint8_t* mapping_float_constants_vertex = WriteUniformBufferBinding(
|
||||
sizeof(float) * 4 * std::max(float_constant_count_vertex, uint32_t(1)),
|
||||
descriptor_set_layout_float_constants_vertex_,
|
||||
buffer_info_float_constant_vertex, write_float_constants_vertex);
|
||||
if (!mapping_float_constants_vertex) {
|
||||
return false;
|
||||
}
|
||||
for (uint32_t i = 0; i < 4; ++i) {
|
||||
uint64_t float_constant_map_entry = current_float_constant_map_vertex_[i];
|
||||
uint32_t float_constant_index;
|
||||
while (xe::bit_scan_forward(float_constant_map_entry,
|
||||
&float_constant_index)) {
|
||||
float_constant_map_entry &= ~(1ull << float_constant_index);
|
||||
std::memcpy(mapping_float_constants_vertex,
|
||||
®s[XE_GPU_REG_SHADER_CONSTANT_000_X + (i << 8) +
|
||||
(float_constant_index << 2)]
|
||||
.f32,
|
||||
sizeof(float) * 4);
|
||||
mapping_float_constants_vertex += sizeof(float) * 4;
|
||||
}
|
||||
}
|
||||
write_descriptor_set_bits |=
|
||||
uint32_t(1)
|
||||
<< SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex;
|
||||
current_graphics_descriptor_sets_
|
||||
[SpirvShaderTranslator::kDescriptorSetFloatConstantsVertex] =
|
||||
write_float_constants_vertex.dstSet;
|
||||
}
|
||||
VkDescriptorBufferInfo buffer_info_fetch_constants;
|
||||
if (!(current_graphics_descriptor_set_values_up_to_date_ &
|
||||
(uint32_t(1) << SpirvShaderTranslator::kDescriptorSetFetchConstants))) {
|
||||
VkWriteDescriptorSet& write_fetch_constants =
|
||||
write_descriptor_sets[write_descriptor_set_count++];
|
||||
constexpr size_t kFetchConstantsSize = sizeof(uint32_t) * 6 * 32;
|
||||
uint8_t* mapping_fetch_constants = WriteUniformBufferBinding(
|
||||
kFetchConstantsSize, descriptor_set_layout_fetch_bool_loop_constants_,
|
||||
buffer_info_fetch_constants, write_fetch_constants);
|
||||
if (!mapping_fetch_constants) {
|
||||
return false;
|
||||
}
|
||||
std::memcpy(mapping_fetch_constants,
|
||||
®s[XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0].u32,
|
||||
kFetchConstantsSize);
|
||||
write_descriptor_set_bits |=
|
||||
uint32_t(1) << SpirvShaderTranslator::kDescriptorSetFetchConstants;
|
||||
current_graphics_descriptor_sets_
|
||||
[SpirvShaderTranslator::kDescriptorSetFetchConstants] =
|
||||
write_fetch_constants.dstSet;
|
||||
}
|
||||
if (write_descriptor_set_count) {
|
||||
const ui::vulkan::VulkanProvider& provider =
|
||||
GetVulkanContext().GetVulkanProvider();
|
||||
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
|
||||
VkDevice device = provider.device();
|
||||
dfn.vkUpdateDescriptorSets(device, write_descriptor_set_count,
|
||||
write_descriptor_sets, 0, nullptr);
|
||||
}
|
||||
// Only make valid if written successfully.
|
||||
current_graphics_descriptor_set_values_up_to_date_ |=
|
||||
write_descriptor_set_bits;
|
||||
|
||||
// Bind the new descriptor sets.
|
||||
uint32_t descriptor_sets_needed =
|
||||
(uint32_t(1) << SpirvShaderTranslator::kDescriptorSetCount) - 1;
|
||||
if (current_graphics_pipeline_layout_
|
||||
->descriptor_set_layout_textures_vertex_ref ==
|
||||
descriptor_set_layout_empty_) {
|
||||
descriptor_sets_needed &=
|
||||
~(uint32_t(1) << SpirvShaderTranslator::kDescriptorSetTexturesVertex);
|
||||
}
|
||||
if (current_graphics_pipeline_layout_
|
||||
->descriptor_set_layout_textures_pixel_ref ==
|
||||
descriptor_set_layout_empty_) {
|
||||
descriptor_sets_needed &=
|
||||
~(uint32_t(1) << SpirvShaderTranslator::kDescriptorSetTexturesPixel);
|
||||
}
|
||||
uint32_t descriptor_sets_remaining =
|
||||
descriptor_sets_needed &
|
||||
~current_graphics_descriptor_sets_bound_up_to_date_;
|
||||
uint32_t descriptor_set_index;
|
||||
while (
|
||||
xe::bit_scan_forward(descriptor_sets_remaining, &descriptor_set_index)) {
|
||||
uint32_t descriptor_set_mask_tzcnt =
|
||||
xe::tzcnt(~(descriptor_sets_remaining |
|
||||
((uint32_t(1) << descriptor_set_index) - 1)));
|
||||
// TODO(Triang3l): Bind to compute for rectangle list emulation without
|
||||
// geometry shaders.
|
||||
deferred_command_buffer_.CmdVkBindDescriptorSets(
|
||||
VK_PIPELINE_BIND_POINT_GRAPHICS,
|
||||
current_graphics_pipeline_layout_->pipeline_layout,
|
||||
descriptor_set_index, descriptor_set_mask_tzcnt - descriptor_set_index,
|
||||
current_graphics_descriptor_sets_ + descriptor_set_index, 0, nullptr);
|
||||
if (descriptor_set_mask_tzcnt >= 32) {
|
||||
break;
|
||||
}
|
||||
descriptor_sets_remaining &=
|
||||
~((uint32_t(1) << descriptor_set_mask_tzcnt) - 1);
|
||||
}
|
||||
current_graphics_descriptor_sets_bound_up_to_date_ |= descriptor_sets_needed;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
uint8_t* VulkanCommandProcessor::WriteUniformBufferBinding(
|
||||
size_t size, VkDescriptorSetLayout descriptor_set_layout,
|
||||
VkDescriptorBufferInfo& descriptor_buffer_info_out,
|
||||
VkWriteDescriptorSet& write_descriptor_set_out) {
|
||||
VkDescriptorSet descriptor_set =
|
||||
transient_descriptor_pool_uniform_buffers_->Request(
|
||||
frame_current_, descriptor_set_layout, 1);
|
||||
if (descriptor_set == VK_NULL_HANDLE) {
|
||||
return nullptr;
|
||||
}
|
||||
const ui::vulkan::VulkanProvider& provider =
|
||||
GetVulkanContext().GetVulkanProvider();
|
||||
uint8_t* mapping = uniform_buffer_pool_->Request(
|
||||
frame_current_, size,
|
||||
size_t(
|
||||
provider.device_properties().limits.minUniformBufferOffsetAlignment),
|
||||
descriptor_buffer_info_out.buffer, descriptor_buffer_info_out.offset);
|
||||
if (!mapping) {
|
||||
return false;
|
||||
}
|
||||
descriptor_buffer_info_out.range = VkDeviceSize(size);
|
||||
write_descriptor_set_out.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
|
||||
write_descriptor_set_out.pNext = nullptr;
|
||||
write_descriptor_set_out.dstSet = descriptor_set;
|
||||
write_descriptor_set_out.dstBinding = 0;
|
||||
write_descriptor_set_out.dstArrayElement = 0;
|
||||
write_descriptor_set_out.descriptorCount = 1;
|
||||
write_descriptor_set_out.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
|
||||
write_descriptor_set_out.pImageInfo = nullptr;
|
||||
write_descriptor_set_out.pBufferInfo = &descriptor_buffer_info_out;
|
||||
write_descriptor_set_out.pTexelBufferView = nullptr;
|
||||
return mapping;
|
||||
}
|
||||
|
||||
} // namespace vulkan
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
#ifndef XENIA_GPU_VULKAN_VULKAN_COMMAND_PROCESSOR_H_
|
||||
#define XENIA_GPU_VULKAN_VULKAN_COMMAND_PROCESSOR_H_
|
||||
|
||||
#include <climits>
|
||||
#include <cstdint>
|
||||
#include <deque>
|
||||
#include <memory>
|
||||
|
@ -18,13 +19,18 @@
|
|||
#include <vector>
|
||||
|
||||
#include "xenia/gpu/command_processor.h"
|
||||
#include "xenia/gpu/spirv_shader_translator.h"
|
||||
#include "xenia/gpu/vulkan/deferred_command_buffer.h"
|
||||
#include "xenia/gpu/vulkan/vulkan_graphics_system.h"
|
||||
#include "xenia/gpu/vulkan/vulkan_pipeline_cache.h"
|
||||
#include "xenia/gpu/vulkan/vulkan_render_target_cache.h"
|
||||
#include "xenia/gpu/vulkan/vulkan_shader.h"
|
||||
#include "xenia/gpu/vulkan/vulkan_shared_memory.h"
|
||||
#include "xenia/gpu/xenos.h"
|
||||
#include "xenia/kernel/kernel_state.h"
|
||||
#include "xenia/ui/vulkan/transient_descriptor_pool.h"
|
||||
#include "xenia/ui/vulkan/vulkan_context.h"
|
||||
#include "xenia/ui/vulkan/vulkan_upload_buffer_pool.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
|
@ -67,19 +73,21 @@ class VulkanCommandProcessor : public CommandProcessor {
|
|||
const VkSparseMemoryBind* binds,
|
||||
VkPipelineStageFlags wait_stage_mask);
|
||||
|
||||
struct PipelineLayout {
|
||||
VkPipelineLayout pipeline_layout;
|
||||
VkDescriptorSetLayout descriptor_set_layout_textures_pixel_ref;
|
||||
VkDescriptorSetLayout descriptor_set_layout_textures_vertex_ref;
|
||||
};
|
||||
bool GetPipelineLayout(uint32_t texture_count_pixel,
|
||||
uint32_t texture_count_vertex,
|
||||
PipelineLayout& pipeline_layout_out);
|
||||
// Must be called before doing anything outside the render pass scope,
|
||||
// including adding pipeline barriers that are not a part of the render pass
|
||||
// scope. Submission must be open.
|
||||
void EndRenderPass();
|
||||
|
||||
// The returned reference is valid until a cache clear.
|
||||
const VulkanPipelineCache::PipelineLayoutProvider* GetPipelineLayout(
|
||||
uint32_t texture_count_pixel, uint32_t texture_count_vertex);
|
||||
|
||||
protected:
|
||||
bool SetupContext() override;
|
||||
void ShutdownContext() override;
|
||||
|
||||
void WriteRegister(uint32_t index, uint32_t value) override;
|
||||
|
||||
void PerformSwap(uint32_t frontbuffer_ptr, uint32_t frontbuffer_width,
|
||||
uint32_t frontbuffer_height) override;
|
||||
|
||||
|
@ -95,6 +103,49 @@ class VulkanCommandProcessor : public CommandProcessor {
|
|||
void InitializeTrace() override;
|
||||
|
||||
private:
|
||||
struct CommandBuffer {
|
||||
VkCommandPool pool;
|
||||
VkCommandBuffer buffer;
|
||||
};
|
||||
|
||||
struct SparseBufferBind {
|
||||
VkBuffer buffer;
|
||||
size_t bind_offset;
|
||||
uint32_t bind_count;
|
||||
};
|
||||
|
||||
union TextureDescriptorSetLayoutKey {
|
||||
struct {
|
||||
uint32_t is_vertex : 1;
|
||||
// For 0, use descriptor_set_layout_empty_ instead as these are owning
|
||||
// references.
|
||||
uint32_t texture_count : 31;
|
||||
};
|
||||
uint32_t key = 0;
|
||||
};
|
||||
static_assert(sizeof(TextureDescriptorSetLayoutKey) == sizeof(uint32_t));
|
||||
|
||||
union PipelineLayoutKey {
|
||||
struct {
|
||||
// Pixel textures in the low bits since those are varied much more
|
||||
// commonly.
|
||||
uint32_t texture_count_pixel : 16;
|
||||
uint32_t texture_count_vertex : 16;
|
||||
};
|
||||
uint32_t key = 0;
|
||||
};
|
||||
static_assert(sizeof(PipelineLayoutKey) == sizeof(uint32_t));
|
||||
|
||||
class PipelineLayout : public VulkanPipelineCache::PipelineLayoutProvider {
|
||||
public:
|
||||
VkPipelineLayout GetPipelineLayout() const override {
|
||||
return pipeline_layout;
|
||||
}
|
||||
VkPipelineLayout pipeline_layout;
|
||||
VkDescriptorSetLayout descriptor_set_layout_textures_pixel_ref;
|
||||
VkDescriptorSetLayout descriptor_set_layout_textures_vertex_ref;
|
||||
};
|
||||
|
||||
// BeginSubmission and EndSubmission may be called at any time. If there's an
|
||||
// open non-frame submission, BeginSubmission(true) will promote it to a
|
||||
// frame. EndSubmission(true) will close the frame no matter whether the
|
||||
|
@ -119,6 +170,18 @@ class VulkanCommandProcessor : public CommandProcessor {
|
|||
|
||||
VkShaderStageFlags GetGuestVertexShaderStageFlags() const;
|
||||
|
||||
void UpdateFixedFunctionState();
|
||||
void UpdateSystemConstantValues(xenos::Endian index_endian);
|
||||
bool UpdateBindings(const VulkanShader* vertex_shader,
|
||||
const VulkanShader* pixel_shader);
|
||||
// Allocates a descriptor, space in the uniform buffer pool, and fills the
|
||||
// VkWriteDescriptorSet structure and VkDescriptorBufferInfo referenced by it.
|
||||
// Returns null in case of failure.
|
||||
uint8_t* WriteUniformBufferBinding(
|
||||
size_t size, VkDescriptorSetLayout descriptor_set_layout,
|
||||
VkDescriptorBufferInfo& descriptor_buffer_info_out,
|
||||
VkWriteDescriptorSet& write_descriptor_set_out);
|
||||
|
||||
bool cache_clear_requested_ = false;
|
||||
|
||||
std::vector<VkFence> fences_free_;
|
||||
|
@ -143,20 +206,11 @@ class VulkanCommandProcessor : public CommandProcessor {
|
|||
// Submission indices of frames that have already been submitted.
|
||||
uint64_t closed_frame_submissions_[kMaxFramesInFlight] = {};
|
||||
|
||||
struct CommandBuffer {
|
||||
VkCommandPool pool;
|
||||
VkCommandBuffer buffer;
|
||||
};
|
||||
std::vector<CommandBuffer> command_buffers_writable_;
|
||||
std::deque<std::pair<CommandBuffer, uint64_t>> command_buffers_submitted_;
|
||||
DeferredCommandBuffer deferred_command_buffer_;
|
||||
|
||||
std::vector<VkSparseMemoryBind> sparse_memory_binds_;
|
||||
struct SparseBufferBind {
|
||||
VkBuffer buffer;
|
||||
size_t bind_offset;
|
||||
uint32_t bind_count;
|
||||
};
|
||||
std::vector<SparseBufferBind> sparse_buffer_binds_;
|
||||
// SparseBufferBind converted to VkSparseBufferMemoryBindInfo to this buffer
|
||||
// on submission (because pBinds should point to a place in std::vector, but
|
||||
|
@ -166,6 +220,7 @@ class VulkanCommandProcessor : public CommandProcessor {
|
|||
|
||||
std::unique_ptr<ui::vulkan::TransientDescriptorPool>
|
||||
transient_descriptor_pool_uniform_buffers_;
|
||||
std::unique_ptr<ui::vulkan::VulkanUploadBufferPool> uniform_buffer_pool_;
|
||||
|
||||
// Descriptor set layouts used by different shaders.
|
||||
VkDescriptorSetLayout descriptor_set_layout_empty_ = VK_NULL_HANDLE;
|
||||
|
@ -180,34 +235,66 @@ class VulkanCommandProcessor : public CommandProcessor {
|
|||
VkDescriptorSetLayout descriptor_set_layout_shared_memory_and_edram_ =
|
||||
VK_NULL_HANDLE;
|
||||
|
||||
union TextureDescriptorSetLayoutKey {
|
||||
struct {
|
||||
uint32_t is_vertex : 1;
|
||||
// For 0, use descriptor_set_layout_empty_ instead as these are owning
|
||||
// references.
|
||||
uint32_t texture_count : 31;
|
||||
};
|
||||
uint32_t key = 0;
|
||||
};
|
||||
// TextureDescriptorSetLayoutKey::key -> VkDescriptorSetLayout.
|
||||
// Layouts are referenced by pipeline_layouts_.
|
||||
std::unordered_map<uint32_t, VkDescriptorSetLayout>
|
||||
descriptor_set_layouts_textures_;
|
||||
union PipelineLayoutKey {
|
||||
struct {
|
||||
// Pixel textures in the low bits since those are varied much more
|
||||
// commonly.
|
||||
uint32_t texture_count_pixel : 16;
|
||||
uint32_t texture_count_vertex : 16;
|
||||
};
|
||||
uint32_t key = 0;
|
||||
};
|
||||
// PipelineLayoutKey::key -> PipelineLayout.
|
||||
// Layouts are referenced by VulkanPipelineCache.
|
||||
std::unordered_map<uint32_t, PipelineLayout> pipeline_layouts_;
|
||||
|
||||
std::unique_ptr<VulkanSharedMemory> shared_memory_;
|
||||
|
||||
std::unique_ptr<VulkanPipelineCache> pipeline_cache_;
|
||||
|
||||
std::unique_ptr<VulkanRenderTargetCache> render_target_cache_;
|
||||
|
||||
VkDescriptorPool shared_memory_and_edram_descriptor_pool_ = VK_NULL_HANDLE;
|
||||
VkDescriptorSet shared_memory_and_edram_descriptor_set_;
|
||||
|
||||
// The current fixed-function drawing state.
|
||||
VkViewport ff_viewport_;
|
||||
VkRect2D ff_scissor_;
|
||||
bool ff_viewport_update_needed_;
|
||||
bool ff_scissor_update_needed_;
|
||||
|
||||
// Cache render pass currently started in the command buffer with framebuffer.
|
||||
VkRenderPass current_render_pass_;
|
||||
VkFramebuffer current_framebuffer_;
|
||||
|
||||
// Cache graphics pipeline currently bound to the command buffer.
|
||||
VkPipeline current_graphics_pipeline_;
|
||||
|
||||
// Pipeline layout of the current graphics pipeline.
|
||||
const PipelineLayout* current_graphics_pipeline_layout_;
|
||||
VkDescriptorSet current_graphics_descriptor_sets_
|
||||
[SpirvShaderTranslator::kDescriptorSetCount];
|
||||
// Whether descriptor sets in current_graphics_descriptor_sets_ point to
|
||||
// up-to-date data.
|
||||
uint32_t current_graphics_descriptor_set_values_up_to_date_;
|
||||
// Whether the descriptor sets currently bound to the command buffer - only
|
||||
// low bits for the descriptor set layouts that remained the same are kept
|
||||
// when changing the pipeline layout. May be out of sync with
|
||||
// current_graphics_descriptor_set_values_up_to_date_, but should be ensured
|
||||
// to be a subset of it at some point when it becomes important; bits for
|
||||
// non-existent descriptor set layouts may also be set, but need to be ignored
|
||||
// when they start to matter.
|
||||
uint32_t current_graphics_descriptor_sets_bound_up_to_date_;
|
||||
static_assert(
|
||||
SpirvShaderTranslator::kDescriptorSetCount <=
|
||||
sizeof(current_graphics_descriptor_set_values_up_to_date_) * CHAR_BIT,
|
||||
"Bit fields storing descriptor set validity must be large enough");
|
||||
static_assert(
|
||||
SpirvShaderTranslator::kDescriptorSetCount <=
|
||||
sizeof(current_graphics_descriptor_sets_bound_up_to_date_) * CHAR_BIT,
|
||||
"Bit fields storing descriptor set validity must be large enough");
|
||||
|
||||
// Float constant usage masks of the last draw call.
|
||||
uint64_t current_float_constant_map_vertex_[4];
|
||||
uint64_t current_float_constant_map_pixel_[4];
|
||||
|
||||
// System shader constants.
|
||||
SpirvShaderTranslator::SystemConstants system_constants_;
|
||||
};
|
||||
|
||||
} // namespace vulkan
|
||||
|
|
|
@ -0,0 +1,443 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2020 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/gpu/vulkan/vulkan_pipeline_cache.h"
|
||||
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/base/logging.h"
|
||||
#include "xenia/base/math.h"
|
||||
#include "xenia/base/profiling.h"
|
||||
#include "xenia/gpu/register_file.h"
|
||||
#include "xenia/gpu/registers.h"
|
||||
#include "xenia/gpu/spirv_shader_translator.h"
|
||||
#include "xenia/gpu/vulkan/vulkan_command_processor.h"
|
||||
#include "xenia/gpu/vulkan/vulkan_shader.h"
|
||||
#include "xenia/gpu/xenos.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace vulkan {
|
||||
|
||||
VulkanPipelineCache::VulkanPipelineCache(
|
||||
VulkanCommandProcessor& command_processor,
|
||||
const RegisterFile& register_file,
|
||||
VulkanRenderTargetCache& render_target_cache)
|
||||
: command_processor_(command_processor),
|
||||
register_file_(register_file),
|
||||
render_target_cache_(render_target_cache) {}
|
||||
|
||||
VulkanPipelineCache::~VulkanPipelineCache() { Shutdown(); }
|
||||
|
||||
bool VulkanPipelineCache::Initialize() {
|
||||
const ui::vulkan::VulkanProvider& provider =
|
||||
command_processor_.GetVulkanContext().GetVulkanProvider();
|
||||
|
||||
device_pipeline_features_.features = 0;
|
||||
// TODO(Triang3l): Support the portability subset.
|
||||
device_pipeline_features_.triangle_fans = 1;
|
||||
|
||||
shader_translator_ = std::make_unique<SpirvShaderTranslator>(
|
||||
SpirvShaderTranslator::Features(provider));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void VulkanPipelineCache::Shutdown() {
|
||||
ClearCache();
|
||||
|
||||
shader_translator_.reset();
|
||||
}
|
||||
|
||||
void VulkanPipelineCache::ClearCache() {
|
||||
const ui::vulkan::VulkanProvider& provider =
|
||||
command_processor_.GetVulkanContext().GetVulkanProvider();
|
||||
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
|
||||
VkDevice device = provider.device();
|
||||
|
||||
last_pipeline_ = nullptr;
|
||||
for (const auto& pipeline_pair : pipelines_) {
|
||||
if (pipeline_pair.second.pipeline != VK_NULL_HANDLE) {
|
||||
dfn.vkDestroyPipeline(device, pipeline_pair.second.pipeline, nullptr);
|
||||
}
|
||||
}
|
||||
pipelines_.clear();
|
||||
|
||||
for (auto it : shaders_) {
|
||||
delete it.second;
|
||||
}
|
||||
shaders_.clear();
|
||||
}
|
||||
|
||||
VulkanShader* VulkanPipelineCache::LoadShader(xenos::ShaderType shader_type,
|
||||
uint32_t guest_address,
|
||||
const uint32_t* host_address,
|
||||
uint32_t dword_count) {
|
||||
// Hash the input memory and lookup the shader.
|
||||
uint64_t data_hash = XXH64(host_address, dword_count * sizeof(uint32_t), 0);
|
||||
auto it = shaders_.find(data_hash);
|
||||
if (it != shaders_.end()) {
|
||||
// Shader has been previously loaded.
|
||||
return it->second;
|
||||
}
|
||||
|
||||
// Always create the shader and stash it away.
|
||||
// We need to track it even if it fails translation so we know not to try
|
||||
// again.
|
||||
VulkanShader* shader =
|
||||
new VulkanShader(shader_type, data_hash, host_address, dword_count);
|
||||
shaders_.emplace(data_hash, shader);
|
||||
|
||||
return shader;
|
||||
}
|
||||
|
||||
bool VulkanPipelineCache::EnsureShadersTranslated(
|
||||
VulkanShader* vertex_shader, VulkanShader* pixel_shader,
|
||||
Shader::HostVertexShaderType host_vertex_shader_type) {
|
||||
const RegisterFile& regs = register_file_;
|
||||
auto sq_program_cntl = regs.Get<reg::SQ_PROGRAM_CNTL>();
|
||||
|
||||
// Edge flags are not supported yet (because polygon primitives are not).
|
||||
assert_true(sq_program_cntl.vs_export_mode !=
|
||||
xenos::VertexShaderExportMode::kPosition2VectorsEdge &&
|
||||
sq_program_cntl.vs_export_mode !=
|
||||
xenos::VertexShaderExportMode::kPosition2VectorsEdgeKill);
|
||||
assert_false(sq_program_cntl.gen_index_vtx);
|
||||
|
||||
if (!vertex_shader->is_translated()) {
|
||||
if (!TranslateShader(*shader_translator_, *vertex_shader,
|
||||
sq_program_cntl)) {
|
||||
XELOGE("Failed to translate the vertex shader!");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (pixel_shader != nullptr && !pixel_shader->is_translated()) {
|
||||
if (!TranslateShader(*shader_translator_, *pixel_shader, sq_program_cntl)) {
|
||||
XELOGE("Failed to translate the pixel shader!");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool VulkanPipelineCache::ConfigurePipeline(
|
||||
VulkanShader* vertex_shader, VulkanShader* pixel_shader,
|
||||
VulkanRenderTargetCache::RenderPassKey render_pass_key,
|
||||
VkPipeline& pipeline_out,
|
||||
const PipelineLayoutProvider*& pipeline_layout_out) {
|
||||
#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
|
||||
|
||||
PipelineDescription description;
|
||||
if (!GetCurrentStateDescription(vertex_shader, pixel_shader, render_pass_key,
|
||||
description)) {
|
||||
return false;
|
||||
}
|
||||
if (last_pipeline_ && last_pipeline_->first == description) {
|
||||
pipeline_out = last_pipeline_->second.pipeline;
|
||||
pipeline_layout_out = last_pipeline_->second.pipeline_layout;
|
||||
return true;
|
||||
}
|
||||
auto it = pipelines_.find(description);
|
||||
if (it != pipelines_.end()) {
|
||||
last_pipeline_ = &*it;
|
||||
pipeline_out = it->second.pipeline;
|
||||
pipeline_layout_out = it->second.pipeline_layout;
|
||||
return true;
|
||||
}
|
||||
|
||||
// Create the pipeline if not the latest and not already existing.
|
||||
if (!EnsureShadersTranslated(vertex_shader, pixel_shader,
|
||||
Shader::HostVertexShaderType::kVertex)) {
|
||||
return false;
|
||||
}
|
||||
const PipelineLayoutProvider* pipeline_layout =
|
||||
command_processor_.GetPipelineLayout(0, 0);
|
||||
if (!pipeline_layout) {
|
||||
return false;
|
||||
}
|
||||
VkRenderPass render_pass =
|
||||
render_target_cache_.GetRenderPass(render_pass_key);
|
||||
if (render_pass == VK_NULL_HANDLE) {
|
||||
return false;
|
||||
}
|
||||
PipelineCreationArguments creation_arguments;
|
||||
auto& pipeline =
|
||||
*pipelines_.emplace(description, Pipeline(pipeline_layout)).first;
|
||||
creation_arguments.pipeline = &pipeline;
|
||||
creation_arguments.vertex_shader = vertex_shader;
|
||||
creation_arguments.pixel_shader = pixel_shader;
|
||||
creation_arguments.render_pass = render_pass;
|
||||
if (!EnsurePipelineCreated(creation_arguments)) {
|
||||
return false;
|
||||
}
|
||||
pipeline_out = pipeline.second.pipeline;
|
||||
pipeline_layout_out = pipeline_layout;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool VulkanPipelineCache::TranslateShader(SpirvShaderTranslator& translator,
|
||||
VulkanShader& shader,
|
||||
reg::SQ_PROGRAM_CNTL cntl) {
|
||||
// Perform translation.
|
||||
// If this fails the shader will be marked as invalid and ignored later.
|
||||
// TODO(Triang3l): Host vertex shader type.
|
||||
if (!translator.Translate(&shader, cntl,
|
||||
Shader::HostVertexShaderType::kVertex)) {
|
||||
XELOGE("Shader {:016X} translation failed; marking as ignored",
|
||||
shader.ucode_data_hash());
|
||||
return false;
|
||||
}
|
||||
return shader.InitializeShaderModule(
|
||||
command_processor_.GetVulkanContext().GetVulkanProvider());
|
||||
}
|
||||
|
||||
bool VulkanPipelineCache::GetCurrentStateDescription(
|
||||
const VulkanShader* vertex_shader, const VulkanShader* pixel_shader,
|
||||
VulkanRenderTargetCache::RenderPassKey render_pass_key,
|
||||
PipelineDescription& description_out) const {
|
||||
description_out.Reset();
|
||||
|
||||
const RegisterFile& regs = register_file_;
|
||||
|
||||
description_out.vertex_shader_hash = vertex_shader->ucode_data_hash();
|
||||
description_out.pixel_shader_hash =
|
||||
pixel_shader ? pixel_shader->ucode_data_hash() : 0;
|
||||
description_out.render_pass_key = render_pass_key;
|
||||
|
||||
auto vgt_draw_initiator = regs.Get<reg::VGT_DRAW_INITIATOR>();
|
||||
PipelinePrimitiveTopology primitive_topology;
|
||||
switch (vgt_draw_initiator.prim_type) {
|
||||
case xenos::PrimitiveType::kPointList:
|
||||
primitive_topology = PipelinePrimitiveTopology::kPointList;
|
||||
break;
|
||||
case xenos::PrimitiveType::kLineList:
|
||||
primitive_topology = PipelinePrimitiveTopology::kLineList;
|
||||
break;
|
||||
case xenos::PrimitiveType::kLineStrip:
|
||||
primitive_topology = PipelinePrimitiveTopology::kLineStrip;
|
||||
break;
|
||||
case xenos::PrimitiveType::kTriangleList:
|
||||
primitive_topology = PipelinePrimitiveTopology::kTriangleList;
|
||||
break;
|
||||
case xenos::PrimitiveType::kTriangleFan:
|
||||
primitive_topology = device_pipeline_features_.triangle_fans
|
||||
? PipelinePrimitiveTopology::kTriangleFan
|
||||
: PipelinePrimitiveTopology::kTriangleList;
|
||||
break;
|
||||
case xenos::PrimitiveType::kTriangleStrip:
|
||||
primitive_topology = PipelinePrimitiveTopology::kTriangleStrip;
|
||||
break;
|
||||
default:
|
||||
// TODO(Triang3l): All primitive types and tessellation.
|
||||
return false;
|
||||
}
|
||||
description_out.primitive_topology = primitive_topology;
|
||||
// TODO(Triang3l): Primitive restart.
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool VulkanPipelineCache::EnsurePipelineCreated(
|
||||
const PipelineCreationArguments& creation_arguments) {
|
||||
if (creation_arguments.pipeline->second.pipeline != VK_NULL_HANDLE) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// This function preferably should validate the description to prevent
|
||||
// unsupported behavior that may be dangerous/crashing because pipelines can
|
||||
// be created from the disk storage.
|
||||
|
||||
if (creation_arguments.pixel_shader) {
|
||||
XELOGGPU("Creating graphics pipeline state with VS {:016X}, PS {:016X}",
|
||||
creation_arguments.vertex_shader->ucode_data_hash(),
|
||||
creation_arguments.pixel_shader->ucode_data_hash());
|
||||
} else {
|
||||
XELOGGPU("Creating graphics pipeline state with VS {:016X}",
|
||||
creation_arguments.vertex_shader->ucode_data_hash());
|
||||
}
|
||||
|
||||
const PipelineDescription& description = creation_arguments.pipeline->first;
|
||||
|
||||
VkPipelineShaderStageCreateInfo shader_stages[2];
|
||||
uint32_t shader_stage_count = 0;
|
||||
|
||||
assert_true(creation_arguments.vertex_shader->is_translated());
|
||||
if (!creation_arguments.vertex_shader->is_valid()) {
|
||||
return false;
|
||||
}
|
||||
assert_true(shader_stage_count < xe::countof(shader_stages));
|
||||
VkPipelineShaderStageCreateInfo& shader_stage_vertex =
|
||||
shader_stages[shader_stage_count++];
|
||||
shader_stage_vertex.sType =
|
||||
VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
|
||||
shader_stage_vertex.pNext = nullptr;
|
||||
shader_stage_vertex.flags = 0;
|
||||
shader_stage_vertex.stage = VK_SHADER_STAGE_VERTEX_BIT;
|
||||
shader_stage_vertex.module =
|
||||
creation_arguments.vertex_shader->shader_module();
|
||||
assert_true(shader_stage_vertex.module != VK_NULL_HANDLE);
|
||||
shader_stage_vertex.pName = "main";
|
||||
shader_stage_vertex.pSpecializationInfo = nullptr;
|
||||
if (creation_arguments.pixel_shader) {
|
||||
assert_true(creation_arguments.pixel_shader->is_translated());
|
||||
if (!creation_arguments.pixel_shader->is_valid()) {
|
||||
return false;
|
||||
}
|
||||
assert_true(shader_stage_count < xe::countof(shader_stages));
|
||||
VkPipelineShaderStageCreateInfo& shader_stage_fragment =
|
||||
shader_stages[shader_stage_count++];
|
||||
shader_stage_fragment.sType =
|
||||
VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
|
||||
shader_stage_fragment.pNext = nullptr;
|
||||
shader_stage_fragment.flags = 0;
|
||||
shader_stage_fragment.stage = VK_SHADER_STAGE_FRAGMENT_BIT;
|
||||
shader_stage_fragment.module =
|
||||
creation_arguments.pixel_shader->shader_module();
|
||||
assert_true(shader_stage_fragment.module != VK_NULL_HANDLE);
|
||||
shader_stage_fragment.pName = "main";
|
||||
shader_stage_fragment.pSpecializationInfo = nullptr;
|
||||
}
|
||||
|
||||
VkPipelineVertexInputStateCreateInfo vertex_input_state;
|
||||
vertex_input_state.sType =
|
||||
VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO;
|
||||
vertex_input_state.pNext = nullptr;
|
||||
vertex_input_state.flags = 0;
|
||||
vertex_input_state.vertexBindingDescriptionCount = 0;
|
||||
vertex_input_state.pVertexBindingDescriptions = nullptr;
|
||||
vertex_input_state.vertexAttributeDescriptionCount = 0;
|
||||
vertex_input_state.pVertexAttributeDescriptions = nullptr;
|
||||
|
||||
VkPipelineInputAssemblyStateCreateInfo input_assembly_state;
|
||||
input_assembly_state.sType =
|
||||
VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;
|
||||
input_assembly_state.pNext = nullptr;
|
||||
input_assembly_state.flags = 0;
|
||||
switch (description.primitive_topology) {
|
||||
case PipelinePrimitiveTopology::kPointList:
|
||||
input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
|
||||
break;
|
||||
case PipelinePrimitiveTopology::kLineList:
|
||||
input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_LINE_LIST;
|
||||
break;
|
||||
case PipelinePrimitiveTopology::kLineStrip:
|
||||
input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_LINE_STRIP;
|
||||
break;
|
||||
case PipelinePrimitiveTopology::kTriangleList:
|
||||
input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
|
||||
break;
|
||||
case PipelinePrimitiveTopology::kTriangleStrip:
|
||||
input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP;
|
||||
break;
|
||||
case PipelinePrimitiveTopology::kTriangleFan:
|
||||
assert_true(device_pipeline_features_.triangle_fans);
|
||||
if (!device_pipeline_features_.triangle_fans) {
|
||||
return false;
|
||||
}
|
||||
input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN;
|
||||
break;
|
||||
case PipelinePrimitiveTopology::kLineListWithAdjacency:
|
||||
input_assembly_state.topology =
|
||||
VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY;
|
||||
break;
|
||||
case PipelinePrimitiveTopology::kPatchList:
|
||||
input_assembly_state.topology = VK_PRIMITIVE_TOPOLOGY_PATCH_LIST;
|
||||
break;
|
||||
default:
|
||||
assert_unhandled_case(description.primitive_topology);
|
||||
return false;
|
||||
}
|
||||
input_assembly_state.primitiveRestartEnable =
|
||||
description.primitive_restart ? VK_TRUE : VK_FALSE;
|
||||
|
||||
VkPipelineViewportStateCreateInfo viewport_state;
|
||||
viewport_state.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO;
|
||||
viewport_state.pNext = nullptr;
|
||||
viewport_state.flags = 0;
|
||||
viewport_state.viewportCount = 1;
|
||||
viewport_state.pViewports = nullptr;
|
||||
viewport_state.scissorCount = 1;
|
||||
viewport_state.pScissors = nullptr;
|
||||
|
||||
VkPipelineRasterizationStateCreateInfo rasterization_state = {};
|
||||
rasterization_state.sType =
|
||||
VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO;
|
||||
rasterization_state.lineWidth = 1.0f;
|
||||
|
||||
VkPipelineMultisampleStateCreateInfo multisample_state = {};
|
||||
multisample_state.sType =
|
||||
VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO;
|
||||
multisample_state.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT;
|
||||
|
||||
static const VkDynamicState dynamic_states[] = {
|
||||
VK_DYNAMIC_STATE_VIEWPORT,
|
||||
VK_DYNAMIC_STATE_SCISSOR,
|
||||
};
|
||||
VkPipelineDynamicStateCreateInfo dynamic_state;
|
||||
dynamic_state.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO;
|
||||
dynamic_state.pNext = nullptr;
|
||||
dynamic_state.flags = 0;
|
||||
dynamic_state.dynamicStateCount = uint32_t(xe::countof(dynamic_states));
|
||||
dynamic_state.pDynamicStates = dynamic_states;
|
||||
|
||||
VkGraphicsPipelineCreateInfo pipeline_create_info;
|
||||
pipeline_create_info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
|
||||
pipeline_create_info.pNext = nullptr;
|
||||
pipeline_create_info.flags = 0;
|
||||
pipeline_create_info.stageCount = shader_stage_count;
|
||||
pipeline_create_info.pStages = shader_stages;
|
||||
pipeline_create_info.pVertexInputState = &vertex_input_state;
|
||||
pipeline_create_info.pInputAssemblyState = &input_assembly_state;
|
||||
pipeline_create_info.pTessellationState = nullptr;
|
||||
pipeline_create_info.pViewportState = &viewport_state;
|
||||
pipeline_create_info.pRasterizationState = &rasterization_state;
|
||||
pipeline_create_info.pMultisampleState = &multisample_state;
|
||||
pipeline_create_info.pDepthStencilState = nullptr;
|
||||
pipeline_create_info.pColorBlendState = nullptr;
|
||||
pipeline_create_info.pDynamicState = &dynamic_state;
|
||||
pipeline_create_info.layout =
|
||||
creation_arguments.pipeline->second.pipeline_layout->GetPipelineLayout();
|
||||
pipeline_create_info.renderPass = creation_arguments.render_pass;
|
||||
pipeline_create_info.subpass = 0;
|
||||
pipeline_create_info.basePipelineHandle = VK_NULL_HANDLE;
|
||||
pipeline_create_info.basePipelineIndex = 0;
|
||||
|
||||
const ui::vulkan::VulkanProvider& provider =
|
||||
command_processor_.GetVulkanContext().GetVulkanProvider();
|
||||
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
|
||||
VkDevice device = provider.device();
|
||||
VkPipeline pipeline;
|
||||
if (dfn.vkCreateGraphicsPipelines(device, VK_NULL_HANDLE, 1,
|
||||
&pipeline_create_info, nullptr,
|
||||
&pipeline) != VK_SUCCESS) {
|
||||
// TODO(Triang3l): Move these error messages outside.
|
||||
/* if (creation_arguments.pixel_shader) {
|
||||
XELOGE(
|
||||
"Failed to create graphics pipeline with VS {:016X}, PS {:016X}",
|
||||
creation_arguments.vertex_shader->ucode_data_hash(),
|
||||
creation_arguments.pixel_shader->ucode_data_hash());
|
||||
} else {
|
||||
XELOGE("Failed to create graphics pipeline with VS {:016X}",
|
||||
creation_arguments.vertex_shader->ucode_data_hash());
|
||||
} */
|
||||
return false;
|
||||
}
|
||||
creation_arguments.pipeline->second.pipeline = pipeline;
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace vulkan
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
|
@ -0,0 +1,183 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2020 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_GPU_VULKAN_VULKAN_PIPELINE_STATE_CACHE_H_
|
||||
#define XENIA_GPU_VULKAN_VULKAN_PIPELINE_STATE_CACHE_H_
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
|
||||
#include "third_party/xxhash/xxhash.h"
|
||||
#include "xenia/base/hash.h"
|
||||
#include "xenia/base/platform.h"
|
||||
#include "xenia/gpu/register_file.h"
|
||||
#include "xenia/gpu/spirv_shader_translator.h"
|
||||
#include "xenia/gpu/vulkan/vulkan_render_target_cache.h"
|
||||
#include "xenia/gpu/vulkan/vulkan_shader.h"
|
||||
#include "xenia/gpu/xenos.h"
|
||||
#include "xenia/ui/vulkan/vulkan_provider.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace vulkan {
|
||||
|
||||
class VulkanCommandProcessor;
|
||||
|
||||
// TODO(Triang3l): Create a common base for both the Vulkan and the Direct3D
|
||||
// implementations.
|
||||
class VulkanPipelineCache {
|
||||
public:
|
||||
class PipelineLayoutProvider {
|
||||
public:
|
||||
virtual ~PipelineLayoutProvider() {}
|
||||
virtual VkPipelineLayout GetPipelineLayout() const = 0;
|
||||
};
|
||||
|
||||
VulkanPipelineCache(VulkanCommandProcessor& command_processor,
|
||||
const RegisterFile& register_file,
|
||||
VulkanRenderTargetCache& render_target_cache);
|
||||
~VulkanPipelineCache();
|
||||
|
||||
bool Initialize();
|
||||
void Shutdown();
|
||||
void ClearCache();
|
||||
|
||||
VulkanShader* LoadShader(xenos::ShaderType shader_type,
|
||||
uint32_t guest_address, const uint32_t* host_address,
|
||||
uint32_t dword_count);
|
||||
|
||||
// Translates shaders if needed, also making shader info up to date.
|
||||
bool EnsureShadersTranslated(
|
||||
VulkanShader* vertex_shader, VulkanShader* pixel_shader,
|
||||
Shader::HostVertexShaderType host_vertex_shader_type);
|
||||
|
||||
// TODO(Triang3l): Return a deferred creation handle.
|
||||
bool ConfigurePipeline(VulkanShader* vertex_shader,
|
||||
VulkanShader* pixel_shader,
|
||||
VulkanRenderTargetCache::RenderPassKey render_pass_key,
|
||||
VkPipeline& pipeline_out,
|
||||
const PipelineLayoutProvider*& pipeline_layout_out);
|
||||
|
||||
private:
|
||||
// Can only load pipeline storage if features of the device it was created on
|
||||
// and the current device match because descriptions may requires features not
|
||||
// supported on the device. Very radical differences (such as RB emulation
|
||||
// method) should result in a different storage file being used.
|
||||
union DevicePipelineFeatures {
|
||||
struct {
|
||||
uint32_t triangle_fans : 1;
|
||||
};
|
||||
uint32_t features = 0;
|
||||
};
|
||||
|
||||
enum class PipelinePrimitiveTopology : uint32_t {
|
||||
kPointList,
|
||||
kLineList,
|
||||
kLineStrip,
|
||||
kTriangleList,
|
||||
kTriangleStrip,
|
||||
// Requires DevicePipelineFeatures::triangle_fans.
|
||||
kTriangleFan,
|
||||
kLineListWithAdjacency,
|
||||
kPatchList,
|
||||
};
|
||||
|
||||
XEPACKEDSTRUCT(PipelineDescription, {
|
||||
uint64_t vertex_shader_hash;
|
||||
// 0 if no pixel shader.
|
||||
uint64_t pixel_shader_hash;
|
||||
VulkanRenderTargetCache::RenderPassKey render_pass_key;
|
||||
|
||||
// Input assembly.
|
||||
PipelinePrimitiveTopology primitive_topology : 3;
|
||||
uint32_t primitive_restart : 1;
|
||||
|
||||
// Including all the padding, for a stable hash.
|
||||
PipelineDescription() { Reset(); }
|
||||
PipelineDescription(const PipelineDescription& description) {
|
||||
std::memcpy(this, &description, sizeof(*this));
|
||||
}
|
||||
PipelineDescription& operator=(const PipelineDescription& description) {
|
||||
std::memcpy(this, &description, sizeof(*this));
|
||||
return *this;
|
||||
}
|
||||
bool operator==(const PipelineDescription& description) const {
|
||||
return std::memcmp(this, &description, sizeof(*this)) == 0;
|
||||
}
|
||||
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
||||
uint64_t GetHash() const { return XXH64(this, sizeof(*this), 0); }
|
||||
struct Hasher {
|
||||
size_t operator()(const PipelineDescription& description) const {
|
||||
return size_t(description.GetHash());
|
||||
}
|
||||
};
|
||||
});
|
||||
|
||||
struct Pipeline {
|
||||
VkPipeline pipeline = VK_NULL_HANDLE;
|
||||
// Owned by VulkanCommandProcessor, valid until ClearCache.
|
||||
const PipelineLayoutProvider* pipeline_layout;
|
||||
Pipeline(const PipelineLayoutProvider* pipeline_layout_provider)
|
||||
: pipeline_layout(pipeline_layout_provider) {}
|
||||
};
|
||||
|
||||
// Description that can be passed from the command processor thread to the
|
||||
// creation threads, with everything needed from caches pre-looked-up.
|
||||
struct PipelineCreationArguments {
|
||||
std::pair<const PipelineDescription, Pipeline>* pipeline;
|
||||
const VulkanShader* vertex_shader;
|
||||
const VulkanShader* pixel_shader;
|
||||
VkRenderPass render_pass;
|
||||
};
|
||||
|
||||
// Can be called from multiple threads.
|
||||
bool TranslateShader(SpirvShaderTranslator& translator, VulkanShader& shader,
|
||||
reg::SQ_PROGRAM_CNTL cntl);
|
||||
|
||||
bool GetCurrentStateDescription(
|
||||
const VulkanShader* vertex_shader, const VulkanShader* pixel_shader,
|
||||
VulkanRenderTargetCache::RenderPassKey render_pass_key,
|
||||
PipelineDescription& description_out) const;
|
||||
|
||||
// Can be called from creation threads - all needed data must be fully set up
|
||||
// at the point of the call: shaders must be translated, pipeline layout and
|
||||
// render pass objects must be available.
|
||||
bool EnsurePipelineCreated(
|
||||
const PipelineCreationArguments& creation_arguments);
|
||||
|
||||
VulkanCommandProcessor& command_processor_;
|
||||
const RegisterFile& register_file_;
|
||||
VulkanRenderTargetCache& render_target_cache_;
|
||||
|
||||
DevicePipelineFeatures device_pipeline_features_;
|
||||
|
||||
// Reusable shader translator on the command processor thread.
|
||||
std::unique_ptr<SpirvShaderTranslator> shader_translator_;
|
||||
|
||||
// Ucode hash -> shader.
|
||||
std::unordered_map<uint64_t, VulkanShader*,
|
||||
xe::hash::IdentityHasher<uint64_t>>
|
||||
shaders_;
|
||||
|
||||
std::unordered_map<PipelineDescription, Pipeline, PipelineDescription::Hasher>
|
||||
pipelines_;
|
||||
|
||||
// Previously used pipeline, to avoid lookups if the state wasn't changed.
|
||||
const std::pair<const PipelineDescription, Pipeline>* last_pipeline_ =
|
||||
nullptr;
|
||||
};
|
||||
|
||||
} // namespace vulkan
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_GPU_VULKAN_VULKAN_PIPELINE_STATE_CACHE_H_
|
|
@ -0,0 +1,136 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2020 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/gpu/vulkan/vulkan_render_target_cache.h"
|
||||
|
||||
#include "xenia/base/logging.h"
|
||||
#include "xenia/gpu/vulkan/vulkan_command_processor.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace vulkan {
|
||||
|
||||
VulkanRenderTargetCache::VulkanRenderTargetCache(
|
||||
VulkanCommandProcessor& command_processor,
|
||||
const RegisterFile& register_file)
|
||||
: command_processor_(command_processor), register_file_(register_file) {}
|
||||
|
||||
VulkanRenderTargetCache::~VulkanRenderTargetCache() { Shutdown(); }
|
||||
|
||||
bool VulkanRenderTargetCache::Initialize() { return true; }
|
||||
|
||||
void VulkanRenderTargetCache::Shutdown() { ClearCache(); }
|
||||
|
||||
void VulkanRenderTargetCache::ClearCache() {
|
||||
const ui::vulkan::VulkanProvider& provider =
|
||||
command_processor_.GetVulkanContext().GetVulkanProvider();
|
||||
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
|
||||
VkDevice device = provider.device();
|
||||
|
||||
for (const auto& framebuffer_pair : framebuffers_) {
|
||||
dfn.vkDestroyFramebuffer(device, framebuffer_pair.second, nullptr);
|
||||
}
|
||||
framebuffers_.clear();
|
||||
|
||||
for (const auto& render_pass_pair : render_passes_) {
|
||||
dfn.vkDestroyRenderPass(device, render_pass_pair.second, nullptr);
|
||||
}
|
||||
render_passes_.clear();
|
||||
}
|
||||
|
||||
VkRenderPass VulkanRenderTargetCache::GetRenderPass(RenderPassKey key) {
|
||||
auto it = render_passes_.find(key.key);
|
||||
if (it != render_passes_.end()) {
|
||||
return it->second;
|
||||
}
|
||||
|
||||
// TODO(Triang3l): Attachments and dependencies.
|
||||
|
||||
VkSubpassDescription subpass_description;
|
||||
subpass_description.flags = 0;
|
||||
subpass_description.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
|
||||
subpass_description.inputAttachmentCount = 0;
|
||||
subpass_description.pInputAttachments = nullptr;
|
||||
subpass_description.colorAttachmentCount = 0;
|
||||
subpass_description.pColorAttachments = nullptr;
|
||||
subpass_description.pResolveAttachments = nullptr;
|
||||
subpass_description.pDepthStencilAttachment = nullptr;
|
||||
subpass_description.preserveAttachmentCount = 0;
|
||||
subpass_description.pPreserveAttachments = nullptr;
|
||||
|
||||
VkRenderPassCreateInfo render_pass_create_info;
|
||||
render_pass_create_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO;
|
||||
render_pass_create_info.pNext = nullptr;
|
||||
render_pass_create_info.flags = 0;
|
||||
render_pass_create_info.attachmentCount = 0;
|
||||
render_pass_create_info.pAttachments = nullptr;
|
||||
render_pass_create_info.subpassCount = 1;
|
||||
render_pass_create_info.pSubpasses = &subpass_description;
|
||||
render_pass_create_info.dependencyCount = 0;
|
||||
render_pass_create_info.pDependencies = nullptr;
|
||||
|
||||
const ui::vulkan::VulkanProvider& provider =
|
||||
command_processor_.GetVulkanContext().GetVulkanProvider();
|
||||
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
|
||||
VkDevice device = provider.device();
|
||||
VkRenderPass render_pass;
|
||||
if (dfn.vkCreateRenderPass(device, &render_pass_create_info, nullptr,
|
||||
&render_pass) != VK_SUCCESS) {
|
||||
XELOGE("Failed to create a Vulkan render pass");
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
render_passes_.emplace(key.key, render_pass);
|
||||
return render_pass;
|
||||
}
|
||||
|
||||
VkFramebuffer VulkanRenderTargetCache::GetFramebuffer(FramebufferKey key) {
|
||||
auto it = framebuffers_.find(key);
|
||||
if (it != framebuffers_.end()) {
|
||||
return it->second;
|
||||
}
|
||||
|
||||
VkRenderPass render_pass = GetRenderPass(key.render_pass_key);
|
||||
if (render_pass == VK_NULL_HANDLE) {
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
VkFramebufferCreateInfo framebuffer_create_info;
|
||||
framebuffer_create_info.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO;
|
||||
framebuffer_create_info.pNext = nullptr;
|
||||
framebuffer_create_info.flags = 0;
|
||||
framebuffer_create_info.renderPass = render_pass;
|
||||
framebuffer_create_info.attachmentCount = 0;
|
||||
framebuffer_create_info.pAttachments = nullptr;
|
||||
framebuffer_create_info.width = 1280;
|
||||
framebuffer_create_info.height = 720;
|
||||
framebuffer_create_info.layers = 1;
|
||||
|
||||
const ui::vulkan::VulkanProvider& provider =
|
||||
command_processor_.GetVulkanContext().GetVulkanProvider();
|
||||
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
|
||||
VkDevice device = provider.device();
|
||||
VkFramebuffer framebuffer;
|
||||
if (dfn.vkCreateFramebuffer(device, &framebuffer_create_info, nullptr,
|
||||
&framebuffer) != VK_SUCCESS) {
|
||||
XELOGE("Failed to create a Vulkan framebuffer");
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
framebuffers_.emplace(key, framebuffer);
|
||||
return framebuffer;
|
||||
}
|
||||
|
||||
bool VulkanRenderTargetCache::UpdateRenderTargets(
|
||||
FramebufferKey& framebuffer_key_out) {
|
||||
framebuffer_key_out = FramebufferKey();
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace vulkan
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
|
@ -0,0 +1,95 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2020 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_GPU_VULKAN_VULKAN_RENDER_TARGET_CACHE_H_
|
||||
#define XENIA_GPU_VULKAN_VULKAN_RENDER_TARGET_CACHE_H_
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "third_party/xxhash/xxhash.h"
|
||||
#include "xenia/gpu/register_file.h"
|
||||
#include "xenia/ui/vulkan/vulkan_provider.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace vulkan {
|
||||
|
||||
class VulkanCommandProcessor;
|
||||
|
||||
// TODO(Triang3l): Create a common base for both the Vulkan and the Direct3D
|
||||
// implementations.
|
||||
class VulkanRenderTargetCache {
|
||||
public:
|
||||
union RenderPassKey {
|
||||
uint32_t key = 0;
|
||||
};
|
||||
static_assert(sizeof(RenderPassKey) == sizeof(uint32_t));
|
||||
|
||||
struct FramebufferKey {
|
||||
RenderPassKey render_pass_key;
|
||||
|
||||
// Including all the padding, for a stable hash.
|
||||
FramebufferKey() { Reset(); }
|
||||
FramebufferKey(const FramebufferKey& key) {
|
||||
std::memcpy(this, &key, sizeof(*this));
|
||||
}
|
||||
FramebufferKey& operator=(const FramebufferKey& key) {
|
||||
std::memcpy(this, &key, sizeof(*this));
|
||||
return *this;
|
||||
}
|
||||
bool operator==(const FramebufferKey& key) const {
|
||||
return std::memcmp(this, &key, sizeof(*this)) == 0;
|
||||
}
|
||||
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
||||
uint64_t GetHash() const { return XXH64(this, sizeof(*this), 0); }
|
||||
struct Hasher {
|
||||
size_t operator()(const FramebufferKey& description) const {
|
||||
return size_t(description.GetHash());
|
||||
}
|
||||
};
|
||||
};
|
||||
static_assert(sizeof(FramebufferKey) == sizeof(uint32_t));
|
||||
|
||||
VulkanRenderTargetCache(VulkanCommandProcessor& command_processor,
|
||||
const RegisterFile& register_file);
|
||||
~VulkanRenderTargetCache();
|
||||
|
||||
bool Initialize();
|
||||
void Shutdown();
|
||||
void ClearCache();
|
||||
|
||||
// Returns the render pass object, or VK_NULL_HANDLE if failed to create.
|
||||
// A render pass managed by the render target cache may be ended and resumed
|
||||
// at any time (to allow for things like copying and texture loading).
|
||||
VkRenderPass GetRenderPass(RenderPassKey key);
|
||||
|
||||
// Returns the framebuffer object, or VK_NULL_HANDLE if failed to create.
|
||||
VkFramebuffer GetFramebuffer(FramebufferKey key);
|
||||
|
||||
// May dispatch computations.
|
||||
bool UpdateRenderTargets(FramebufferKey& framebuffer_key_out);
|
||||
|
||||
private:
|
||||
VulkanCommandProcessor& command_processor_;
|
||||
const RegisterFile& register_file_;
|
||||
|
||||
// RenderPassKey::key -> VkRenderPass.
|
||||
std::unordered_map<uint32_t, VkRenderPass> render_passes_;
|
||||
|
||||
std::unordered_map<FramebufferKey, VkFramebuffer, FramebufferKey::Hasher>
|
||||
framebuffers_;
|
||||
};
|
||||
|
||||
} // namespace vulkan
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_GPU_VULKAN_VULKAN_RENDER_TARGET_CACHE_H_
|
|
@ -0,0 +1,48 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2020 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/gpu/vulkan/vulkan_shader.h"
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace vulkan {
|
||||
|
||||
VulkanShader::VulkanShader(xenos::ShaderType shader_type, uint64_t data_hash,
|
||||
const uint32_t* dword_ptr, uint32_t dword_count)
|
||||
: Shader(shader_type, data_hash, dword_ptr, dword_count) {}
|
||||
|
||||
bool VulkanShader::InitializeShaderModule(
|
||||
const ui::vulkan::VulkanProvider& provider) {
|
||||
if (!is_valid()) {
|
||||
return false;
|
||||
}
|
||||
if (shader_module_ != VK_NULL_HANDLE) {
|
||||
return true;
|
||||
}
|
||||
VkShaderModuleCreateInfo shader_module_create_info;
|
||||
shader_module_create_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
|
||||
shader_module_create_info.pNext = nullptr;
|
||||
shader_module_create_info.flags = 0;
|
||||
shader_module_create_info.codeSize = translated_binary().size();
|
||||
shader_module_create_info.pCode =
|
||||
reinterpret_cast<const uint32_t*>(translated_binary().data());
|
||||
if (provider.dfn().vkCreateShaderModule(provider.device(),
|
||||
&shader_module_create_info, nullptr,
|
||||
&shader_module_) != VK_SUCCESS) {
|
||||
is_valid_ = false;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace vulkan
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
|
@ -0,0 +1,39 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2020 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_GPU_VULKAN_VULKAN_SHADER_H_
|
||||
#define XENIA_GPU_VULKAN_VULKAN_SHADER_H_
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
#include "xenia/gpu/shader.h"
|
||||
#include "xenia/gpu/xenos.h"
|
||||
#include "xenia/ui/vulkan/vulkan_provider.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace vulkan {
|
||||
|
||||
class VulkanShader : public Shader {
|
||||
public:
|
||||
VulkanShader(xenos::ShaderType shader_type, uint64_t data_hash,
|
||||
const uint32_t* dword_ptr, uint32_t dword_count);
|
||||
|
||||
bool InitializeShaderModule(const ui::vulkan::VulkanProvider& provider);
|
||||
VkShaderModule shader_module() const { return shader_module_; }
|
||||
|
||||
private:
|
||||
VkShaderModule shader_module_ = VK_NULL_HANDLE;
|
||||
};
|
||||
|
||||
} // namespace vulkan
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_GPU_VULKAN_VULKAN_SHADER_H_
|
|
@ -241,6 +241,7 @@ void VulkanSharedMemory::Use(Usage usage,
|
|||
buffer_memory_barrier.size = VK_WHOLE_SIZE;
|
||||
last_usage_ = usage;
|
||||
}
|
||||
command_processor_.EndRenderPass();
|
||||
command_processor_.deferred_command_buffer().CmdVkPipelineBarrier(
|
||||
stage_mask_src, stage_mask_dst, 0, 0, nullptr, 1,
|
||||
&buffer_memory_barrier, 0, nullptr);
|
||||
|
@ -271,7 +272,7 @@ bool VulkanSharedMemory::InitializeTraceSubmitDownloads() {
|
|||
return false;
|
||||
}
|
||||
|
||||
// TODO(Triang3l): End the render pass.
|
||||
command_processor_.EndRenderPass();
|
||||
Use(Usage::kRead);
|
||||
DeferredCommandBuffer& command_buffer =
|
||||
command_processor_.deferred_command_buffer();
|
||||
|
@ -384,7 +385,7 @@ bool VulkanSharedMemory::UploadRanges(
|
|||
if (upload_page_ranges.empty()) {
|
||||
return true;
|
||||
}
|
||||
// TODO(Triang3l): End the render pass.
|
||||
command_processor_.EndRenderPass();
|
||||
// upload_page_ranges are sorted, use them to determine the range for the
|
||||
// ordering barrier.
|
||||
Use(Usage::kTransferDestination,
|
||||
|
|
|
@ -80,6 +80,8 @@ VkDescriptorSet TransientDescriptorPool::Request(
|
|||
VkDescriptorSet descriptor_set;
|
||||
|
||||
// Try to allocate as normal.
|
||||
// TODO(Triang3l): Investigate the possibility of reuse of descriptor sets, as
|
||||
// vkAllocateDescriptorSets may be implemented suboptimally.
|
||||
if (!pages_writable_.empty()) {
|
||||
if (page_current_descriptor_sets_used_ < page_descriptor_set_count_ &&
|
||||
page_current_descriptors_used_ + layout_descriptor_count <=
|
||||
|
|
Loading…
Reference in New Issue