WIP rough sketch of vulkan backend structure.
This commit is contained in:
parent
35e08d9428
commit
4c4a641096
|
@ -18,6 +18,10 @@ namespace xe {
|
|||
// http://gnuradio.org/redmine/projects/gnuradio/repository/revisions/f2bc76cc65ffba51a141950f98e75364e49df874/entry/volk/kernels/volk/volk_32u_byteswap.h
|
||||
// http://gnuradio.org/redmine/projects/gnuradio/repository/revisions/2c4c371885c31222362f70a1cd714415d1398021/entry/volk/kernels/volk/volk_64u_byteswap.h
|
||||
|
||||
void copy_128_aligned(void* dest, const void* src, size_t count) {
|
||||
std::memcpy(dest, src, count * 16);
|
||||
}
|
||||
|
||||
void copy_and_swap_16_aligned(uint16_t* dest, const uint16_t* src,
|
||||
size_t count) {
|
||||
return copy_and_swap_16_unaligned(dest, src, count);
|
||||
|
|
|
@ -121,6 +121,8 @@ inline void* low_address(void* address) {
|
|||
return reinterpret_cast<void*>(uint64_t(address) & 0xFFFFFFFF);
|
||||
}
|
||||
|
||||
void copy_128_aligned(void* dest, const void* src, size_t count);
|
||||
|
||||
void copy_and_swap_16_aligned(uint16_t* dest, const uint16_t* src,
|
||||
size_t count);
|
||||
void copy_and_swap_16_unaligned(uint16_t* dest, const uint16_t* src,
|
||||
|
|
|
@ -0,0 +1,330 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2016 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/gpu/vulkan/buffer_cache.h"
|
||||
|
||||
#include "xenia/base/logging.h"
|
||||
#include "xenia/base/math.h"
|
||||
#include "xenia/base/memory.h"
|
||||
#include "xenia/base/profiling.h"
|
||||
#include "xenia/gpu/gpu_flags.h"
|
||||
#include "xenia/gpu/vulkan/vulkan_gpu_flags.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace vulkan {
|
||||
|
||||
using xe::ui::vulkan::CheckResult;
|
||||
|
||||
BufferCache::BufferCache(RegisterFile* register_file,
|
||||
ui::vulkan::VulkanDevice* device, size_t capacity)
|
||||
: register_file_(register_file),
|
||||
device_(*device),
|
||||
transient_capacity_(capacity) {
|
||||
// Uniform buffer.
|
||||
VkBufferCreateInfo uniform_buffer_info;
|
||||
uniform_buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
|
||||
uniform_buffer_info.pNext = nullptr;
|
||||
uniform_buffer_info.flags = 0;
|
||||
uniform_buffer_info.size = transient_capacity_;
|
||||
uniform_buffer_info.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
|
||||
uniform_buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
|
||||
uniform_buffer_info.queueFamilyIndexCount = 0;
|
||||
uniform_buffer_info.pQueueFamilyIndices = nullptr;
|
||||
auto err = vkCreateBuffer(device_, &uniform_buffer_info, nullptr,
|
||||
&transient_uniform_buffer_);
|
||||
CheckResult(err, "vkCreateBuffer");
|
||||
|
||||
// Index buffer.
|
||||
VkBufferCreateInfo index_buffer_info;
|
||||
index_buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
|
||||
index_buffer_info.pNext = nullptr;
|
||||
index_buffer_info.flags = 0;
|
||||
index_buffer_info.size = transient_capacity_;
|
||||
index_buffer_info.usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT;
|
||||
index_buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
|
||||
index_buffer_info.queueFamilyIndexCount = 0;
|
||||
index_buffer_info.pQueueFamilyIndices = nullptr;
|
||||
err = vkCreateBuffer(device_, &index_buffer_info, nullptr,
|
||||
&transient_index_buffer_);
|
||||
CheckResult(err, "vkCreateBuffer");
|
||||
|
||||
// Vertex buffer.
|
||||
VkBufferCreateInfo vertex_buffer_info;
|
||||
vertex_buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
|
||||
vertex_buffer_info.pNext = nullptr;
|
||||
vertex_buffer_info.flags = 0;
|
||||
vertex_buffer_info.size = transient_capacity_;
|
||||
vertex_buffer_info.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
|
||||
vertex_buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
|
||||
vertex_buffer_info.queueFamilyIndexCount = 0;
|
||||
vertex_buffer_info.pQueueFamilyIndices = nullptr;
|
||||
err = vkCreateBuffer(*device, &vertex_buffer_info, nullptr,
|
||||
&transient_vertex_buffer_);
|
||||
CheckResult(err, "vkCreateBuffer");
|
||||
|
||||
// Allocate the underlying buffer we use for all storage.
|
||||
// We query all types and take the max alignment.
|
||||
VkMemoryRequirements uniform_buffer_requirements;
|
||||
VkMemoryRequirements index_buffer_requirements;
|
||||
VkMemoryRequirements vertex_buffer_requirements;
|
||||
vkGetBufferMemoryRequirements(device_, transient_uniform_buffer_,
|
||||
&uniform_buffer_requirements);
|
||||
vkGetBufferMemoryRequirements(device_, transient_index_buffer_,
|
||||
&index_buffer_requirements);
|
||||
vkGetBufferMemoryRequirements(device_, transient_vertex_buffer_,
|
||||
&vertex_buffer_requirements);
|
||||
uniform_buffer_alignment_ = uniform_buffer_requirements.alignment;
|
||||
index_buffer_alignment_ = index_buffer_requirements.alignment;
|
||||
vertex_buffer_alignment_ = vertex_buffer_requirements.alignment;
|
||||
VkMemoryRequirements buffer_requirements;
|
||||
buffer_requirements.size = transient_capacity_;
|
||||
buffer_requirements.alignment =
|
||||
std::max(uniform_buffer_requirements.alignment,
|
||||
std::max(index_buffer_requirements.alignment,
|
||||
vertex_buffer_requirements.alignment));
|
||||
buffer_requirements.memoryTypeBits =
|
||||
uniform_buffer_requirements.memoryTypeBits |
|
||||
index_buffer_requirements.memoryTypeBits |
|
||||
vertex_buffer_requirements.memoryTypeBits;
|
||||
transient_buffer_memory_ = device->AllocateMemory(
|
||||
buffer_requirements, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
|
||||
|
||||
// Alias all buffers to our memory.
|
||||
vkBindBufferMemory(device_, transient_uniform_buffer_,
|
||||
transient_buffer_memory_, 0);
|
||||
vkBindBufferMemory(device_, transient_index_buffer_, transient_buffer_memory_,
|
||||
0);
|
||||
vkBindBufferMemory(device_, transient_vertex_buffer_,
|
||||
transient_buffer_memory_, 0);
|
||||
|
||||
// Map memory and keep it mapped while we use it.
|
||||
err = vkMapMemory(device_, transient_buffer_memory_, 0, VK_WHOLE_SIZE, 0,
|
||||
&transient_buffer_data_);
|
||||
CheckResult(err, "vkMapMemory");
|
||||
|
||||
// Descriptor pool used for all of our cached descriptors.
|
||||
// In the steady state we don't allocate anything, so these are all manually
|
||||
// managed.
|
||||
VkDescriptorPoolCreateInfo descriptor_pool_info;
|
||||
descriptor_pool_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
|
||||
descriptor_pool_info.pNext = nullptr;
|
||||
descriptor_pool_info.flags =
|
||||
VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT;
|
||||
descriptor_pool_info.maxSets = 1;
|
||||
VkDescriptorPoolSize pool_sizes[1];
|
||||
pool_sizes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
|
||||
pool_sizes[0].descriptorCount = 2;
|
||||
descriptor_pool_info.poolSizeCount = 1;
|
||||
descriptor_pool_info.pPoolSizes = pool_sizes;
|
||||
err = vkCreateDescriptorPool(device_, &descriptor_pool_info, nullptr,
|
||||
&descriptor_pool_);
|
||||
CheckResult(err, "vkCreateDescriptorPool");
|
||||
|
||||
// Create the descriptor set layout used for our uniform buffer.
|
||||
// As it is a static binding that uses dynamic offsets during draws we can
|
||||
// create this once and reuse it forever.
|
||||
VkDescriptorSetLayoutBinding vertex_uniform_binding;
|
||||
vertex_uniform_binding.binding = 0;
|
||||
vertex_uniform_binding.descriptorType =
|
||||
VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
|
||||
vertex_uniform_binding.descriptorCount = 1;
|
||||
vertex_uniform_binding.stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
|
||||
vertex_uniform_binding.pImmutableSamplers = nullptr;
|
||||
VkDescriptorSetLayoutBinding fragment_uniform_binding;
|
||||
fragment_uniform_binding.binding = 1;
|
||||
fragment_uniform_binding.descriptorType =
|
||||
VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
|
||||
fragment_uniform_binding.descriptorCount = 1;
|
||||
fragment_uniform_binding.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
|
||||
fragment_uniform_binding.pImmutableSamplers = nullptr;
|
||||
VkDescriptorSetLayoutCreateInfo descriptor_set_layout_info;
|
||||
descriptor_set_layout_info.sType =
|
||||
VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
|
||||
descriptor_set_layout_info.pNext = nullptr;
|
||||
descriptor_set_layout_info.flags = 0;
|
||||
VkDescriptorSetLayoutBinding uniform_bindings[] = {
|
||||
vertex_uniform_binding, fragment_uniform_binding,
|
||||
};
|
||||
descriptor_set_layout_info.bindingCount =
|
||||
static_cast<uint32_t>(xe::countof(uniform_bindings));
|
||||
descriptor_set_layout_info.pBindings = uniform_bindings;
|
||||
err = vkCreateDescriptorSetLayout(device_, &descriptor_set_layout_info,
|
||||
nullptr, &descriptor_set_layout_);
|
||||
CheckResult(err, "vkCreateDescriptorSetLayout");
|
||||
|
||||
// Create the descriptor we'll use for the uniform buffer.
|
||||
// This is what we hand out to everyone (who then also needs to use our
|
||||
// offsets).
|
||||
VkDescriptorSetAllocateInfo set_alloc_info;
|
||||
set_alloc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
|
||||
set_alloc_info.pNext = nullptr;
|
||||
set_alloc_info.descriptorPool = descriptor_pool_;
|
||||
set_alloc_info.descriptorSetCount = 1;
|
||||
set_alloc_info.pSetLayouts = &descriptor_set_layout_;
|
||||
err = vkAllocateDescriptorSets(device_, &set_alloc_info,
|
||||
&transient_descriptor_set_);
|
||||
CheckResult(err, "vkAllocateDescriptorSets");
|
||||
}
|
||||
|
||||
BufferCache::~BufferCache() {
|
||||
vkFreeDescriptorSets(device_, descriptor_pool_, 1,
|
||||
&transient_descriptor_set_);
|
||||
vkDestroyDescriptorSetLayout(device_, descriptor_set_layout_, nullptr);
|
||||
vkDestroyDescriptorPool(device_, descriptor_pool_, nullptr);
|
||||
vkUnmapMemory(device_, transient_buffer_memory_);
|
||||
vkFreeMemory(device_, transient_buffer_memory_, nullptr);
|
||||
vkDestroyBuffer(device_, transient_uniform_buffer_, nullptr);
|
||||
vkDestroyBuffer(device_, transient_index_buffer_, nullptr);
|
||||
vkDestroyBuffer(device_, transient_vertex_buffer_, nullptr);
|
||||
}
|
||||
|
||||
VkDeviceSize BufferCache::UploadConstantRegisters(
|
||||
const Shader::ConstantRegisterMap& constant_register_map) {
|
||||
// Allocate space in the buffer for our data.
|
||||
auto offset = AllocateTransientData(uniform_buffer_alignment_,
|
||||
constant_register_map.packed_byte_length);
|
||||
if (offset == VK_WHOLE_SIZE) {
|
||||
// OOM.
|
||||
return VK_WHOLE_SIZE;
|
||||
}
|
||||
|
||||
// Run through registers and copy them into the buffer.
|
||||
// TODO(benvanik): optimize this - it's hit twice every call.
|
||||
const auto& values = register_file_->values;
|
||||
uint8_t* dest_ptr =
|
||||
reinterpret_cast<uint8_t*>(transient_buffer_data_) + offset;
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
auto piece = constant_register_map.float_bitmap[i];
|
||||
if (!piece) {
|
||||
continue;
|
||||
}
|
||||
for (int j = 0, sh = 0; j < 64; ++j, sh << 1) {
|
||||
if (piece & sh) {
|
||||
xe::copy_128_aligned(
|
||||
dest_ptr,
|
||||
&values[XE_GPU_REG_SHADER_CONSTANT_000_X + i * 64 + j].f32, 1);
|
||||
dest_ptr += 16;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < 32; ++i) {
|
||||
if (constant_register_map.int_bitmap & (1 << i)) {
|
||||
xe::store<uint32_t>(dest_ptr,
|
||||
values[XE_GPU_REG_SHADER_CONSTANT_LOOP_00 + i].u32);
|
||||
dest_ptr += 4;
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < 8; ++i) {
|
||||
if (constant_register_map.bool_bitmap[i]) {
|
||||
xe::store<uint32_t>(
|
||||
dest_ptr, values[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031 + i].u32);
|
||||
dest_ptr += 4;
|
||||
}
|
||||
}
|
||||
|
||||
return offset;
|
||||
}
|
||||
|
||||
std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadIndexBuffer(
|
||||
const void* source_ptr, size_t source_length, IndexFormat format) {
|
||||
// TODO(benvanik): check cache.
|
||||
|
||||
// Allocate space in the buffer for our data.
|
||||
auto offset = AllocateTransientData(index_buffer_alignment_, source_length);
|
||||
if (offset == VK_WHOLE_SIZE) {
|
||||
// OOM.
|
||||
return {nullptr, VK_WHOLE_SIZE};
|
||||
}
|
||||
|
||||
// Copy data into the buffer.
|
||||
// TODO(benvanik): get min/max indices and pass back?
|
||||
// TODO(benvanik): memcpy then use compute shaders to swap?
|
||||
if (format == IndexFormat::kInt16) {
|
||||
// Endian::k8in16, swap half-words.
|
||||
xe::copy_and_swap_16_aligned(
|
||||
reinterpret_cast<uint16_t*>(transient_buffer_data_) + offset,
|
||||
reinterpret_cast<const uint16_t*>(source_ptr), source_length / 2);
|
||||
} else if (format == IndexFormat::kInt32) {
|
||||
// Endian::k8in32, swap words.
|
||||
xe::copy_and_swap_32_aligned(
|
||||
reinterpret_cast<uint32_t*>(transient_buffer_data_) + offset,
|
||||
reinterpret_cast<const uint32_t*>(source_ptr), source_length / 4);
|
||||
}
|
||||
|
||||
return {transient_index_buffer_, offset};
|
||||
}
|
||||
|
||||
std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadVertexBuffer(
|
||||
const void* source_ptr, size_t source_length) {
|
||||
// TODO(benvanik): check cache.
|
||||
|
||||
// Allocate space in the buffer for our data.
|
||||
auto offset = AllocateTransientData(vertex_buffer_alignment_, source_length);
|
||||
if (offset == VK_WHOLE_SIZE) {
|
||||
// OOM.
|
||||
return {nullptr, VK_WHOLE_SIZE};
|
||||
}
|
||||
|
||||
// Copy data into the buffer.
|
||||
// TODO(benvanik): memcpy then use compute shaders to swap?
|
||||
// Endian::k8in32, swap words.
|
||||
xe::copy_and_swap_32_aligned(
|
||||
reinterpret_cast<uint32_t*>(transient_buffer_data_) + offset,
|
||||
reinterpret_cast<const uint32_t*>(source_ptr), source_length / 4);
|
||||
|
||||
return {transient_vertex_buffer_, offset};
|
||||
}
|
||||
|
||||
VkDeviceSize BufferCache::AllocateTransientData(size_t alignment,
|
||||
size_t length) {
|
||||
// Try to add to end, wrapping if required.
|
||||
|
||||
// Check to ensure there is space.
|
||||
if (false) {
|
||||
// Consume all fences.
|
||||
}
|
||||
|
||||
// Slice off our bit.
|
||||
|
||||
return VK_WHOLE_SIZE;
|
||||
}
|
||||
|
||||
void BufferCache::Flush(VkCommandBuffer command_buffer) {
|
||||
// If we are flushing a big enough chunk queue up an event.
|
||||
// We don't want to do this for everything but often enough so that we won't
|
||||
// run out of space.
|
||||
if (true) {
|
||||
// VkEvent finish_event;
|
||||
// vkCmdSetEvent(cmd_buffer, finish_event,
|
||||
// VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);
|
||||
}
|
||||
|
||||
// Flush memory.
|
||||
// TODO(benvanik): subrange.
|
||||
VkMappedMemoryRange dirty_range;
|
||||
dirty_range.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
|
||||
dirty_range.pNext = nullptr;
|
||||
dirty_range.memory = transient_buffer_memory_;
|
||||
dirty_range.offset = 0;
|
||||
dirty_range.size = transient_capacity_;
|
||||
vkFlushMappedMemoryRanges(device_, 1, &dirty_range);
|
||||
}
|
||||
|
||||
void BufferCache::InvalidateCache() {
|
||||
// TODO(benvanik): caching.
|
||||
}
|
||||
|
||||
void BufferCache::ClearCache() {
|
||||
// TODO(benvanik): caching.
|
||||
}
|
||||
|
||||
} // namespace vulkan
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
|
@ -0,0 +1,111 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2016 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_GPU_VULKAN_BUFFER_CACHE_H_
|
||||
#define XENIA_GPU_VULKAN_BUFFER_CACHE_H_
|
||||
|
||||
#include "xenia/gpu/register_file.h"
|
||||
#include "xenia/gpu/shader.h"
|
||||
#include "xenia/gpu/xenos.h"
|
||||
#include "xenia/ui/vulkan/vulkan.h"
|
||||
#include "xenia/ui/vulkan/vulkan_device.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace vulkan {
|
||||
|
||||
// Efficiently manages buffers of various kinds.
|
||||
// Used primarily for uploading index and vertex data from guest memory and
|
||||
// transient data like shader constants.
|
||||
class BufferCache {
|
||||
public:
|
||||
BufferCache(RegisterFile* register_file, ui::vulkan::VulkanDevice* device,
|
||||
size_t capacity);
|
||||
~BufferCache();
|
||||
|
||||
// Descriptor set containing the dynamic uniform buffer used for constant
|
||||
// uploads. Used in conjunction with a dynamic offset returned by
|
||||
// UploadConstantRegisters.
|
||||
// The set contains two bindings:
|
||||
// binding = 0: for use in vertex shaders
|
||||
// binding = 1: for use in fragment shaders
|
||||
VkDescriptorSet constant_descriptor_set() const {
|
||||
return transient_descriptor_set_;
|
||||
}
|
||||
|
||||
// Uploads the constants specified in the register maps to the transient
|
||||
// uniform storage buffer.
|
||||
// The registers are tightly packed in order as [floats, ints, bools].
|
||||
// Returns an offset that can be used with the transient_descriptor_set or
|
||||
// VK_WHOLE_SIZE if the constants could not be uploaded (OOM).
|
||||
VkDeviceSize UploadConstantRegisters(
|
||||
const Shader::ConstantRegisterMap& constant_register_map);
|
||||
|
||||
// Uploads index buffer data from guest memory, possibly eliding with
|
||||
// recently uploaded data or cached copies.
|
||||
// Returns a buffer and offset that can be used with vkCmdBindIndexBuffer.
|
||||
// Size will be VK_WHOLE_SIZE if the data could not be uploaded (OOM).
|
||||
std::pair<VkBuffer, VkDeviceSize> UploadIndexBuffer(const void* source_ptr,
|
||||
size_t source_length,
|
||||
IndexFormat format);
|
||||
|
||||
// Uploads vertex buffer data from guest memory, possibly eliding with
|
||||
// recently uploaded data or cached copies.
|
||||
// Returns a buffer and offset that can be used with vkCmdBindVertexBuffers.
|
||||
// Size will be VK_WHOLE_SIZE if the data could not be uploaded (OOM).
|
||||
std::pair<VkBuffer, VkDeviceSize> UploadVertexBuffer(const void* source_ptr,
|
||||
size_t source_length);
|
||||
|
||||
// Flushes all pending data to the GPU.
|
||||
// Until this is called the GPU is not guaranteed to see any data.
|
||||
// The given command buffer will be used to queue up events so that the
|
||||
// cache can determine when data has been consumed.
|
||||
void Flush(VkCommandBuffer command_buffer);
|
||||
|
||||
// Marks the cache as potentially invalid.
|
||||
// This is not as strong as ClearCache and is a hint that any and all data
|
||||
// should be verified before being reused.
|
||||
void InvalidateCache();
|
||||
|
||||
// Clears all cached content and prevents future elision with pending data.
|
||||
void ClearCache();
|
||||
|
||||
private:
|
||||
// Allocates a block of memory in the transient buffer.
|
||||
// Returns VK_WHOLE_SIZE if requested amount of memory is not available.
|
||||
VkDeviceSize AllocateTransientData(size_t alignment, size_t length);
|
||||
|
||||
RegisterFile* register_file_ = nullptr;
|
||||
VkDevice device_ = nullptr;
|
||||
|
||||
// Staging ringbuffer we cycle through fast. Used for data we don't
|
||||
// plan on keeping past the current frame.
|
||||
size_t transient_capacity_ = 0;
|
||||
VkBuffer transient_uniform_buffer_ = nullptr;
|
||||
VkBuffer transient_index_buffer_ = nullptr;
|
||||
VkBuffer transient_vertex_buffer_ = nullptr;
|
||||
VkDeviceMemory transient_buffer_memory_ = nullptr;
|
||||
void* transient_buffer_data_ = nullptr;
|
||||
|
||||
// Required alignemnts for our various types.
|
||||
// All allocations must start at the appropriate alignment.
|
||||
VkDeviceSize uniform_buffer_alignment_ = 0;
|
||||
VkDeviceSize index_buffer_alignment_ = 0;
|
||||
VkDeviceSize vertex_buffer_alignment_ = 0;
|
||||
|
||||
VkDescriptorPool descriptor_pool_ = nullptr;
|
||||
VkDescriptorSetLayout descriptor_set_layout_ = nullptr;
|
||||
VkDescriptorSet transient_descriptor_set_ = nullptr;
|
||||
};
|
||||
|
||||
} // namespace vulkan
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_GPU_VULKAN_BUFFER_CACHE_H_
|
|
@ -0,0 +1,335 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2016 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/gpu/vulkan/pipeline_cache.h"
|
||||
|
||||
#include "third_party/xxhash/xxhash.h"
|
||||
#include "xenia/base/logging.h"
|
||||
#include "xenia/base/math.h"
|
||||
#include "xenia/base/memory.h"
|
||||
#include "xenia/base/profiling.h"
|
||||
#include "xenia/gpu/gpu_flags.h"
|
||||
#include "xenia/gpu/vulkan/vulkan_gpu_flags.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace vulkan {
|
||||
|
||||
using xe::ui::vulkan::CheckResult;
|
||||
|
||||
PipelineCache::PipelineCache(RegisterFile* register_file,
|
||||
ui::vulkan::VulkanDevice* device)
|
||||
: register_file_(register_file), device_(*device) {}
|
||||
|
||||
PipelineCache::~PipelineCache() {
|
||||
// Destroy all shaders.
|
||||
for (auto it : shader_map_) {
|
||||
delete it.second;
|
||||
}
|
||||
shader_map_.clear();
|
||||
}
|
||||
|
||||
VulkanShader* PipelineCache::LoadShader(ShaderType shader_type,
|
||||
uint32_t guest_address,
|
||||
const uint32_t* host_address,
|
||||
uint32_t dword_count) {
|
||||
// Hash the input memory and lookup the shader.
|
||||
uint64_t data_hash = XXH64(host_address, dword_count * sizeof(uint32_t), 0);
|
||||
auto it = shader_map_.find(data_hash);
|
||||
if (it != shader_map_.end()) {
|
||||
// Shader has been previously loaded.
|
||||
return it->second;
|
||||
}
|
||||
|
||||
// Always create the shader and stash it away.
|
||||
// We need to track it even if it fails translation so we know not to try
|
||||
// again.
|
||||
VulkanShader* shader =
|
||||
new VulkanShader(shader_type, data_hash, host_address, dword_count);
|
||||
shader_map_.insert({data_hash, shader});
|
||||
|
||||
// Perform translation.
|
||||
// If this fails the shader will be marked as invalid and ignored later.
|
||||
if (!shader_translator_.Translate(shader)) {
|
||||
XELOGE("Shader translation failed; marking shader as ignored");
|
||||
return shader;
|
||||
}
|
||||
|
||||
// Prepare the shader for use (creates our VkShaderModule).
|
||||
// It could still fail at this point.
|
||||
if (!shader->Prepare()) {
|
||||
XELOGE("Shader preparation failed; marking shader as ignored");
|
||||
return shader;
|
||||
}
|
||||
|
||||
if (shader->is_valid()) {
|
||||
XELOGGPU("Generated %s shader at 0x%.8X (%db):\n%s",
|
||||
shader_type == ShaderType::kVertex ? "vertex" : "pixel",
|
||||
guest_address, dword_count * 4,
|
||||
shader->ucode_disassembly().c_str());
|
||||
}
|
||||
|
||||
// Dump shader files if desired.
|
||||
if (!FLAGS_dump_shaders.empty()) {
|
||||
shader->Dump(FLAGS_dump_shaders, "vk");
|
||||
}
|
||||
|
||||
return shader;
|
||||
}
|
||||
|
||||
bool PipelineCache::ConfigurePipeline(VkCommandBuffer command_buffer,
|
||||
VkRenderPass render_pass,
|
||||
PrimitiveType primitive_type) {
|
||||
return false;
|
||||
}
|
||||
|
||||
void PipelineCache::ClearCache() {
|
||||
// TODO(benvanik): caching.
|
||||
}
|
||||
|
||||
bool PipelineCache::SetShadowRegister(uint32_t* dest, uint32_t register_name) {
|
||||
uint32_t value = register_file_->values[register_name].u32;
|
||||
if (*dest == value) {
|
||||
return false;
|
||||
}
|
||||
*dest = value;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool PipelineCache::SetShadowRegister(float* dest, uint32_t register_name) {
|
||||
float value = register_file_->values[register_name].f32;
|
||||
if (*dest == value) {
|
||||
return false;
|
||||
}
|
||||
*dest = value;
|
||||
return true;
|
||||
}
|
||||
|
||||
PipelineCache::UpdateStatus PipelineCache::UpdateShaders(
|
||||
PrimitiveType prim_type) {
|
||||
auto& regs = update_shaders_regs_;
|
||||
|
||||
// These are the constant base addresses/ranges for shaders.
|
||||
// We have these hardcoded right now cause nothing seems to differ.
|
||||
assert_true(register_file_->values[XE_GPU_REG_SQ_VS_CONST].u32 ==
|
||||
0x000FF000 ||
|
||||
register_file_->values[XE_GPU_REG_SQ_VS_CONST].u32 == 0x00000000);
|
||||
assert_true(register_file_->values[XE_GPU_REG_SQ_PS_CONST].u32 ==
|
||||
0x000FF100 ||
|
||||
register_file_->values[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000);
|
||||
|
||||
bool dirty = false;
|
||||
dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl,
|
||||
XE_GPU_REG_PA_SU_SC_MODE_CNTL);
|
||||
dirty |= SetShadowRegister(®s.sq_program_cntl, XE_GPU_REG_SQ_PROGRAM_CNTL);
|
||||
dirty |= SetShadowRegister(®s.sq_context_misc, XE_GPU_REG_SQ_CONTEXT_MISC);
|
||||
// dirty |= regs.vertex_shader != active_vertex_shader_;
|
||||
// dirty |= regs.pixel_shader != active_pixel_shader_;
|
||||
dirty |= regs.prim_type != prim_type;
|
||||
if (!dirty) {
|
||||
return UpdateStatus::kCompatible;
|
||||
}
|
||||
// regs.vertex_shader = static_cast<VulkanShader*>(active_vertex_shader_);
|
||||
// regs.pixel_shader = static_cast<VulkanShader*>(active_pixel_shader_);
|
||||
regs.prim_type = prim_type;
|
||||
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
return UpdateStatus::kMismatch;
|
||||
}
|
||||
|
||||
PipelineCache::UpdateStatus PipelineCache::UpdateRenderTargets() {
|
||||
auto& regs = update_render_targets_regs_;
|
||||
|
||||
bool dirty = false;
|
||||
dirty |= SetShadowRegister(®s.rb_modecontrol, XE_GPU_REG_RB_MODECONTROL);
|
||||
dirty |= SetShadowRegister(®s.rb_surface_info, XE_GPU_REG_RB_SURFACE_INFO);
|
||||
dirty |= SetShadowRegister(®s.rb_color_info, XE_GPU_REG_RB_COLOR_INFO);
|
||||
dirty |= SetShadowRegister(®s.rb_color1_info, XE_GPU_REG_RB_COLOR1_INFO);
|
||||
dirty |= SetShadowRegister(®s.rb_color2_info, XE_GPU_REG_RB_COLOR2_INFO);
|
||||
dirty |= SetShadowRegister(®s.rb_color3_info, XE_GPU_REG_RB_COLOR3_INFO);
|
||||
dirty |= SetShadowRegister(®s.rb_color_mask, XE_GPU_REG_RB_COLOR_MASK);
|
||||
dirty |= SetShadowRegister(®s.rb_depthcontrol, XE_GPU_REG_RB_DEPTHCONTROL);
|
||||
dirty |=
|
||||
SetShadowRegister(®s.rb_stencilrefmask, XE_GPU_REG_RB_STENCILREFMASK);
|
||||
dirty |= SetShadowRegister(®s.rb_depth_info, XE_GPU_REG_RB_DEPTH_INFO);
|
||||
if (!dirty) {
|
||||
return UpdateStatus::kCompatible;
|
||||
}
|
||||
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
return UpdateStatus::kMismatch;
|
||||
}
|
||||
|
||||
PipelineCache::UpdateStatus PipelineCache::UpdateState(
|
||||
PrimitiveType prim_type) {
|
||||
bool mismatch = false;
|
||||
|
||||
#define CHECK_UPDATE_STATUS(status, mismatch, error_message) \
|
||||
{ \
|
||||
if (status == UpdateStatus::kError) { \
|
||||
XELOGE(error_message); \
|
||||
return status; \
|
||||
} else if (status == UpdateStatus::kMismatch) { \
|
||||
mismatch = true; \
|
||||
} \
|
||||
}
|
||||
|
||||
UpdateStatus status;
|
||||
status = UpdateViewportState();
|
||||
CHECK_UPDATE_STATUS(status, mismatch, "Unable to update viewport state");
|
||||
status = UpdateRasterizerState(prim_type);
|
||||
CHECK_UPDATE_STATUS(status, mismatch, "Unable to update rasterizer state");
|
||||
status = UpdateBlendState();
|
||||
CHECK_UPDATE_STATUS(status, mismatch, "Unable to update blend state");
|
||||
status = UpdateDepthStencilState();
|
||||
CHECK_UPDATE_STATUS(status, mismatch, "Unable to update depth/stencil state");
|
||||
|
||||
return mismatch ? UpdateStatus::kMismatch : UpdateStatus::kCompatible;
|
||||
}
|
||||
|
||||
PipelineCache::UpdateStatus PipelineCache::UpdateViewportState() {
|
||||
auto& regs = update_viewport_state_regs_;
|
||||
|
||||
bool dirty = false;
|
||||
// dirty |= SetShadowRegister(&state_regs.pa_cl_clip_cntl,
|
||||
// XE_GPU_REG_PA_CL_CLIP_CNTL);
|
||||
dirty |= SetShadowRegister(®s.rb_surface_info, XE_GPU_REG_RB_SURFACE_INFO);
|
||||
dirty |= SetShadowRegister(®s.pa_cl_vte_cntl, XE_GPU_REG_PA_CL_VTE_CNTL);
|
||||
dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl,
|
||||
XE_GPU_REG_PA_SU_SC_MODE_CNTL);
|
||||
dirty |= SetShadowRegister(®s.pa_sc_window_offset,
|
||||
XE_GPU_REG_PA_SC_WINDOW_OFFSET);
|
||||
dirty |= SetShadowRegister(®s.pa_sc_window_scissor_tl,
|
||||
XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL);
|
||||
dirty |= SetShadowRegister(®s.pa_sc_window_scissor_br,
|
||||
XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR);
|
||||
dirty |= SetShadowRegister(®s.pa_cl_vport_xoffset,
|
||||
XE_GPU_REG_PA_CL_VPORT_XOFFSET);
|
||||
dirty |= SetShadowRegister(®s.pa_cl_vport_yoffset,
|
||||
XE_GPU_REG_PA_CL_VPORT_YOFFSET);
|
||||
dirty |= SetShadowRegister(®s.pa_cl_vport_zoffset,
|
||||
XE_GPU_REG_PA_CL_VPORT_ZOFFSET);
|
||||
dirty |= SetShadowRegister(®s.pa_cl_vport_xscale,
|
||||
XE_GPU_REG_PA_CL_VPORT_XSCALE);
|
||||
dirty |= SetShadowRegister(®s.pa_cl_vport_yscale,
|
||||
XE_GPU_REG_PA_CL_VPORT_YSCALE);
|
||||
dirty |= SetShadowRegister(®s.pa_cl_vport_zscale,
|
||||
XE_GPU_REG_PA_CL_VPORT_ZSCALE);
|
||||
|
||||
// Much of this state machine is extracted from:
|
||||
// https://github.com/freedreno/mesa/blob/master/src/mesa/drivers/dri/r200/r200_state.c
|
||||
// http://fossies.org/dox/MesaLib-10.3.5/fd2__gmem_8c_source.html
|
||||
// http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf
|
||||
|
||||
// http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf
|
||||
// VTX_XY_FMT = true: the incoming X, Y have already been multiplied by 1/W0.
|
||||
// = false: multiply the X, Y coordinates by 1/W0.
|
||||
// VTX_Z_FMT = true: the incoming Z has already been multiplied by 1/W0.
|
||||
// = false: multiply the Z coordinate by 1/W0.
|
||||
// VTX_W0_FMT = true: the incoming W0 is not 1/W0. Perform the reciprocal to
|
||||
// get 1/W0.
|
||||
// draw_batcher_.set_vtx_fmt((regs.pa_cl_vte_cntl >> 8) & 0x1 ? 1.0f : 0.0f,
|
||||
// (regs.pa_cl_vte_cntl >> 9) & 0x1 ? 1.0f : 0.0f,
|
||||
// (regs.pa_cl_vte_cntl >> 10) & 0x1 ? 1.0f : 0.0f);
|
||||
|
||||
// Done in VS, no need to flush state.
|
||||
// if ((regs.pa_cl_vte_cntl & (1 << 0)) > 0) {
|
||||
// draw_batcher_.set_window_scalar(1.0f, 1.0f);
|
||||
//} else {
|
||||
// draw_batcher_.set_window_scalar(1.0f / 2560.0f, -1.0f / 2560.0f);
|
||||
//}
|
||||
|
||||
if (!dirty) {
|
||||
return UpdateStatus::kCompatible;
|
||||
}
|
||||
|
||||
return UpdateStatus::kMismatch;
|
||||
}
|
||||
|
||||
PipelineCache::UpdateStatus PipelineCache::UpdateRasterizerState(
|
||||
PrimitiveType prim_type) {
|
||||
auto& regs = update_rasterizer_state_regs_;
|
||||
|
||||
bool dirty = false;
|
||||
dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl,
|
||||
XE_GPU_REG_PA_SU_SC_MODE_CNTL);
|
||||
dirty |= SetShadowRegister(®s.pa_sc_screen_scissor_tl,
|
||||
XE_GPU_REG_PA_SC_SCREEN_SCISSOR_TL);
|
||||
dirty |= SetShadowRegister(®s.pa_sc_screen_scissor_br,
|
||||
XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR);
|
||||
dirty |= SetShadowRegister(®s.multi_prim_ib_reset_index,
|
||||
XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX);
|
||||
dirty |= regs.prim_type != prim_type;
|
||||
if (!dirty) {
|
||||
return UpdateStatus::kCompatible;
|
||||
}
|
||||
|
||||
regs.prim_type = prim_type;
|
||||
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
return UpdateStatus::kMismatch;
|
||||
}
|
||||
|
||||
PipelineCache::UpdateStatus PipelineCache::UpdateBlendState() {
|
||||
auto& reg_file = *register_file_;
|
||||
auto& regs = update_blend_state_regs_;
|
||||
|
||||
// Alpha testing -- ALPHAREF, ALPHAFUNC, ALPHATESTENABLE
|
||||
// Deprecated in GL, implemented in shader.
|
||||
// if(ALPHATESTENABLE && frag_out.a [<=/ALPHAFUNC] ALPHAREF) discard;
|
||||
// uint32_t color_control = reg_file[XE_GPU_REG_RB_COLORCONTROL].u32;
|
||||
// draw_batcher_.set_alpha_test((color_control & 0x4) != 0, //
|
||||
// ALPAHTESTENABLE
|
||||
// color_control & 0x7, // ALPHAFUNC
|
||||
// reg_file[XE_GPU_REG_RB_ALPHA_REF].f32);
|
||||
|
||||
bool dirty = false;
|
||||
dirty |=
|
||||
SetShadowRegister(®s.rb_blendcontrol[0], XE_GPU_REG_RB_BLENDCONTROL_0);
|
||||
dirty |=
|
||||
SetShadowRegister(®s.rb_blendcontrol[1], XE_GPU_REG_RB_BLENDCONTROL_1);
|
||||
dirty |=
|
||||
SetShadowRegister(®s.rb_blendcontrol[2], XE_GPU_REG_RB_BLENDCONTROL_2);
|
||||
dirty |=
|
||||
SetShadowRegister(®s.rb_blendcontrol[3], XE_GPU_REG_RB_BLENDCONTROL_3);
|
||||
dirty |= SetShadowRegister(®s.rb_blend_rgba[0], XE_GPU_REG_RB_BLEND_RED);
|
||||
dirty |= SetShadowRegister(®s.rb_blend_rgba[1], XE_GPU_REG_RB_BLEND_GREEN);
|
||||
dirty |= SetShadowRegister(®s.rb_blend_rgba[2], XE_GPU_REG_RB_BLEND_BLUE);
|
||||
dirty |= SetShadowRegister(®s.rb_blend_rgba[3], XE_GPU_REG_RB_BLEND_ALPHA);
|
||||
if (!dirty) {
|
||||
return UpdateStatus::kCompatible;
|
||||
}
|
||||
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
return UpdateStatus::kMismatch;
|
||||
}
|
||||
|
||||
PipelineCache::UpdateStatus PipelineCache::UpdateDepthStencilState() {
|
||||
auto& regs = update_depth_stencil_state_regs_;
|
||||
|
||||
bool dirty = false;
|
||||
dirty |= SetShadowRegister(®s.rb_depthcontrol, XE_GPU_REG_RB_DEPTHCONTROL);
|
||||
dirty |=
|
||||
SetShadowRegister(®s.rb_stencilrefmask, XE_GPU_REG_RB_STENCILREFMASK);
|
||||
if (!dirty) {
|
||||
return UpdateStatus::kCompatible;
|
||||
}
|
||||
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
return UpdateStatus::kMismatch;
|
||||
}
|
||||
|
||||
} // namespace vulkan
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
|
@ -0,0 +1,167 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2016 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_GPU_VULKAN_PIPELINE_CACHE_H_
|
||||
#define XENIA_GPU_VULKAN_PIPELINE_CACHE_H_
|
||||
|
||||
#include <unordered_map>
|
||||
|
||||
#include "xenia/gpu/register_file.h"
|
||||
#include "xenia/gpu/spirv_shader_translator.h"
|
||||
#include "xenia/gpu/vulkan/vulkan_shader.h"
|
||||
#include "xenia/gpu/xenos.h"
|
||||
#include "xenia/ui/spirv/spirv_disassembler.h"
|
||||
#include "xenia/ui/vulkan/vulkan.h"
|
||||
#include "xenia/ui/vulkan/vulkan_device.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace vulkan {
|
||||
|
||||
// Configures and caches pipelines based on render state.
|
||||
// This is responsible for properly setting all state required for a draw
|
||||
// including shaders, various blend/etc options, and input configuration.
|
||||
class PipelineCache {
|
||||
public:
|
||||
PipelineCache(RegisterFile* register_file, ui::vulkan::VulkanDevice* device);
|
||||
~PipelineCache();
|
||||
|
||||
// Loads a shader from the cache, possibly translating it.
|
||||
VulkanShader* LoadShader(ShaderType shader_type, uint32_t guest_address,
|
||||
const uint32_t* host_address, uint32_t dword_count);
|
||||
|
||||
// Configures a pipeline using the current render state and the given render
|
||||
// pass. If a previously available pipeline is available it will be used,
|
||||
// otherwise a new one may be created. Any state that can be set dynamically
|
||||
// in the command buffer is issued at this time.
|
||||
// Returns whether the pipeline could be successfully created.
|
||||
bool ConfigurePipeline(VkCommandBuffer command_buffer,
|
||||
VkRenderPass render_pass,
|
||||
PrimitiveType primitive_type);
|
||||
|
||||
// Currently configured pipeline layout, if any.
|
||||
VkPipelineLayout current_pipeline_layout() const { return nullptr; }
|
||||
// Currently configured vertex shader, if any.
|
||||
VulkanShader* current_vertex_shader() const { return nullptr; }
|
||||
// Currently configured pixel shader, if any.
|
||||
VulkanShader* current_pixel_shader() const { return nullptr; }
|
||||
|
||||
// Clears all cached content.
|
||||
void ClearCache();
|
||||
|
||||
private:
|
||||
// TODO(benvanik): geometry shader cache.
|
||||
// TODO(benvanik): translated shader cache.
|
||||
// TODO(benvanik): pipeline layouts.
|
||||
// TODO(benvanik): pipeline cache.
|
||||
|
||||
RegisterFile* register_file_ = nullptr;
|
||||
VkDevice device_ = nullptr;
|
||||
|
||||
SpirvShaderTranslator shader_translator_;
|
||||
xe::ui::spirv::SpirvDisassembler disassembler_;
|
||||
// All loaded shaders mapped by their guest hash key.
|
||||
std::unordered_map<uint64_t, VulkanShader*> shader_map_;
|
||||
|
||||
private:
|
||||
enum class UpdateStatus {
|
||||
kCompatible,
|
||||
kMismatch,
|
||||
kError,
|
||||
};
|
||||
|
||||
UpdateStatus UpdateShaders(PrimitiveType prim_type);
|
||||
UpdateStatus UpdateRenderTargets();
|
||||
UpdateStatus UpdateState(PrimitiveType prim_type);
|
||||
UpdateStatus UpdateViewportState();
|
||||
UpdateStatus UpdateRasterizerState(PrimitiveType prim_type);
|
||||
UpdateStatus UpdateBlendState();
|
||||
UpdateStatus UpdateDepthStencilState();
|
||||
|
||||
bool SetShadowRegister(uint32_t* dest, uint32_t register_name);
|
||||
bool SetShadowRegister(float* dest, uint32_t register_name);
|
||||
|
||||
struct UpdateRenderTargetsRegisters {
|
||||
uint32_t rb_modecontrol;
|
||||
uint32_t rb_surface_info;
|
||||
uint32_t rb_color_info;
|
||||
uint32_t rb_color1_info;
|
||||
uint32_t rb_color2_info;
|
||||
uint32_t rb_color3_info;
|
||||
uint32_t rb_color_mask;
|
||||
uint32_t rb_depthcontrol;
|
||||
uint32_t rb_stencilrefmask;
|
||||
uint32_t rb_depth_info;
|
||||
|
||||
UpdateRenderTargetsRegisters() { Reset(); }
|
||||
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
||||
} update_render_targets_regs_;
|
||||
struct UpdateViewportStateRegisters {
|
||||
// uint32_t pa_cl_clip_cntl;
|
||||
uint32_t rb_surface_info;
|
||||
uint32_t pa_cl_vte_cntl;
|
||||
uint32_t pa_su_sc_mode_cntl;
|
||||
uint32_t pa_sc_window_offset;
|
||||
uint32_t pa_sc_window_scissor_tl;
|
||||
uint32_t pa_sc_window_scissor_br;
|
||||
float pa_cl_vport_xoffset;
|
||||
float pa_cl_vport_yoffset;
|
||||
float pa_cl_vport_zoffset;
|
||||
float pa_cl_vport_xscale;
|
||||
float pa_cl_vport_yscale;
|
||||
float pa_cl_vport_zscale;
|
||||
|
||||
UpdateViewportStateRegisters() { Reset(); }
|
||||
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
||||
} update_viewport_state_regs_;
|
||||
struct UpdateRasterizerStateRegisters {
|
||||
uint32_t pa_su_sc_mode_cntl;
|
||||
uint32_t pa_sc_screen_scissor_tl;
|
||||
uint32_t pa_sc_screen_scissor_br;
|
||||
uint32_t multi_prim_ib_reset_index;
|
||||
PrimitiveType prim_type;
|
||||
|
||||
UpdateRasterizerStateRegisters() { Reset(); }
|
||||
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
||||
} update_rasterizer_state_regs_;
|
||||
struct UpdateBlendStateRegisters {
|
||||
uint32_t rb_blendcontrol[4];
|
||||
float rb_blend_rgba[4];
|
||||
|
||||
UpdateBlendStateRegisters() { Reset(); }
|
||||
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
||||
} update_blend_state_regs_;
|
||||
struct UpdateDepthStencilStateRegisters {
|
||||
uint32_t rb_depthcontrol;
|
||||
uint32_t rb_stencilrefmask;
|
||||
|
||||
UpdateDepthStencilStateRegisters() { Reset(); }
|
||||
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
||||
} update_depth_stencil_state_regs_;
|
||||
struct UpdateShadersRegisters {
|
||||
PrimitiveType prim_type;
|
||||
uint32_t pa_su_sc_mode_cntl;
|
||||
uint32_t sq_program_cntl;
|
||||
uint32_t sq_context_misc;
|
||||
VulkanShader* vertex_shader;
|
||||
VulkanShader* pixel_shader;
|
||||
|
||||
UpdateShadersRegisters() { Reset(); }
|
||||
void Reset() {
|
||||
sq_program_cntl = 0;
|
||||
vertex_shader = pixel_shader = nullptr;
|
||||
}
|
||||
} update_shaders_regs_;
|
||||
};
|
||||
|
||||
} // namespace vulkan
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_GPU_VULKAN_PIPELINE_CACHE_H_
|
|
@ -0,0 +1,45 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2016 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/gpu/vulkan/render_cache.h"
|
||||
|
||||
#include "xenia/base/logging.h"
|
||||
#include "xenia/base/math.h"
|
||||
#include "xenia/base/memory.h"
|
||||
#include "xenia/base/profiling.h"
|
||||
#include "xenia/gpu/gpu_flags.h"
|
||||
#include "xenia/gpu/vulkan/vulkan_gpu_flags.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace vulkan {
|
||||
|
||||
using xe::ui::vulkan::CheckResult;
|
||||
|
||||
RenderCache::RenderCache(RegisterFile* register_file,
|
||||
ui::vulkan::VulkanDevice* device)
|
||||
: register_file_(register_file), device_(*device) {}
|
||||
|
||||
RenderCache::~RenderCache() = default;
|
||||
|
||||
VkRenderPass RenderCache::BeginRenderPass(VkCommandBuffer command_buffer) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void RenderCache::EndRenderPass() {
|
||||
//
|
||||
}
|
||||
|
||||
void RenderCache::ClearCache() {
|
||||
// TODO(benvanik): caching.
|
||||
}
|
||||
|
||||
} // namespace vulkan
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
|
@ -0,0 +1,46 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2016 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_GPU_VULKAN_RENDER_CACHE_H_
|
||||
#define XENIA_GPU_VULKAN_RENDER_CACHE_H_
|
||||
|
||||
#include "xenia/gpu/register_file.h"
|
||||
#include "xenia/gpu/shader.h"
|
||||
#include "xenia/gpu/xenos.h"
|
||||
#include "xenia/ui/vulkan/vulkan.h"
|
||||
#include "xenia/ui/vulkan/vulkan_device.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace vulkan {
|
||||
|
||||
// Configures and caches pipelines based on render state.
|
||||
// This is responsible for properly setting all state required for a draw
|
||||
// including shaders, various blend/etc options, and input configuration.
|
||||
class RenderCache {
|
||||
public:
|
||||
RenderCache(RegisterFile* register_file, ui::vulkan::VulkanDevice* device);
|
||||
~RenderCache();
|
||||
|
||||
VkRenderPass BeginRenderPass(VkCommandBuffer command_buffer);
|
||||
void EndRenderPass();
|
||||
|
||||
// Clears all cached content.
|
||||
void ClearCache();
|
||||
|
||||
private:
|
||||
RegisterFile* register_file_ = nullptr;
|
||||
VkDevice device_ = nullptr;
|
||||
};
|
||||
|
||||
} // namespace vulkan
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_GPU_VULKAN_RENDER_CACHE_H_
|
|
@ -0,0 +1,37 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2016 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/gpu/vulkan/texture_cache.h"
|
||||
|
||||
#include "xenia/base/logging.h"
|
||||
#include "xenia/base/math.h"
|
||||
#include "xenia/base/memory.h"
|
||||
#include "xenia/base/profiling.h"
|
||||
#include "xenia/gpu/gpu_flags.h"
|
||||
#include "xenia/gpu/vulkan/vulkan_gpu_flags.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace vulkan {
|
||||
|
||||
using xe::ui::vulkan::CheckResult;
|
||||
|
||||
TextureCache::TextureCache(RegisterFile* register_file,
|
||||
ui::vulkan::VulkanDevice* device)
|
||||
: register_file_(register_file), device_(*device) {}
|
||||
|
||||
TextureCache::~TextureCache() = default;
|
||||
|
||||
void TextureCache::ClearCache() {
|
||||
// TODO(benvanik): caching.
|
||||
}
|
||||
|
||||
} // namespace vulkan
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
|
@ -0,0 +1,47 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2016 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_GPU_VULKAN_TEXTURE_CACHE_H_
|
||||
#define XENIA_GPU_VULKAN_TEXTURE_CACHE_H_
|
||||
|
||||
#include "xenia/gpu/register_file.h"
|
||||
#include "xenia/gpu/shader.h"
|
||||
#include "xenia/gpu/xenos.h"
|
||||
#include "xenia/ui/vulkan/vulkan.h"
|
||||
#include "xenia/ui/vulkan/vulkan_device.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace vulkan {
|
||||
|
||||
// Configures and caches pipelines based on render state.
|
||||
// This is responsible for properly setting all state required for a draw
|
||||
// including shaders, various blend/etc options, and input configuration.
|
||||
class TextureCache {
|
||||
public:
|
||||
TextureCache(RegisterFile* register_file, ui::vulkan::VulkanDevice* device);
|
||||
~TextureCache();
|
||||
|
||||
// TODO(benvanik): UploadTexture.
|
||||
// TODO(benvanik): Resolve.
|
||||
// TODO(benvanik): ReadTexture.
|
||||
|
||||
// Clears all cached content.
|
||||
void ClearCache();
|
||||
|
||||
private:
|
||||
RegisterFile* register_file_ = nullptr;
|
||||
VkDevice device_ = nullptr;
|
||||
};
|
||||
|
||||
} // namespace vulkan
|
||||
} // namespace gpu
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_GPU_VULKAN_TEXTURE_CACHE_H_
|
|
@ -20,12 +20,16 @@
|
|||
#include "xenia/gpu/vulkan/vulkan_gpu_flags.h"
|
||||
#include "xenia/gpu/vulkan/vulkan_graphics_system.h"
|
||||
#include "xenia/gpu/xenos.h"
|
||||
#include "xenia/ui/vulkan/vulkan_util.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
namespace vulkan {
|
||||
|
||||
using namespace xe::gpu::xenos;
|
||||
using xe::ui::vulkan::CheckResult;
|
||||
|
||||
constexpr size_t kDefaultBufferCacheCapacity = 256 * 1024 * 1024;
|
||||
|
||||
VulkanCommandProcessor::VulkanCommandProcessor(
|
||||
VulkanGraphicsSystem* graphics_system, kernel::KernelState* kernel_state)
|
||||
|
@ -33,7 +37,14 @@ VulkanCommandProcessor::VulkanCommandProcessor(
|
|||
|
||||
VulkanCommandProcessor::~VulkanCommandProcessor() = default;
|
||||
|
||||
void VulkanCommandProcessor::ClearCaches() { CommandProcessor::ClearCaches(); }
|
||||
void VulkanCommandProcessor::ClearCaches() {
|
||||
CommandProcessor::ClearCaches();
|
||||
|
||||
buffer_cache_->ClearCache();
|
||||
pipeline_cache_->ClearCache();
|
||||
render_cache_->ClearCache();
|
||||
texture_cache_->ClearCache();
|
||||
}
|
||||
|
||||
bool VulkanCommandProcessor::SetupContext() {
|
||||
if (!CommandProcessor::SetupContext()) {
|
||||
|
@ -41,10 +52,47 @@ bool VulkanCommandProcessor::SetupContext() {
|
|||
return false;
|
||||
}
|
||||
|
||||
// Acquire our device and queue.
|
||||
auto context = static_cast<xe::ui::vulkan::VulkanContext*>(context_.get());
|
||||
device_ = context->device();
|
||||
queue_ = device_->AcquireQueue();
|
||||
if (!queue_) {
|
||||
// Need to reuse primary queue (with locks).
|
||||
queue_ = device_->primary_queue();
|
||||
queue_mutex_ = &device_->primary_queue_mutex();
|
||||
}
|
||||
|
||||
// Setup fenced pools used for all our per-frame/per-draw resources.
|
||||
command_buffer_pool_ = std::make_unique<ui::vulkan::CommandBufferPool>(
|
||||
*device_, device_->queue_family_index(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
|
||||
|
||||
// Initialize the state machine caches.
|
||||
buffer_cache_ = std::make_unique<BufferCache>(register_file_, device_,
|
||||
kDefaultBufferCacheCapacity);
|
||||
pipeline_cache_ = std::make_unique<PipelineCache>(register_file_, device_);
|
||||
render_cache_ = std::make_unique<RenderCache>(register_file_, device_);
|
||||
texture_cache_ = std::make_unique<TextureCache>(register_file_, device_);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void VulkanCommandProcessor::ShutdownContext() {
|
||||
// TODO(benvanik): wait until idle.
|
||||
|
||||
buffer_cache_.reset();
|
||||
pipeline_cache_.reset();
|
||||
render_cache_.reset();
|
||||
texture_cache_.reset();
|
||||
|
||||
// Free all pools. This must come after all of our caches clean up.
|
||||
command_buffer_pool_.reset();
|
||||
|
||||
// Release queue, if were using an acquired one.
|
||||
if (!queue_mutex_) {
|
||||
device_->ReleaseQueue(queue_);
|
||||
queue_ = nullptr;
|
||||
}
|
||||
|
||||
CommandProcessor::ShutdownContext();
|
||||
}
|
||||
|
||||
|
@ -55,7 +103,8 @@ void VulkanCommandProcessor::MakeCoherent() {
|
|||
CommandProcessor::MakeCoherent();
|
||||
|
||||
if (status_host & 0x80000000ul) {
|
||||
// scratch_buffer_.ClearCache();
|
||||
// TODO(benvanik): less-fine-grained clearing.
|
||||
buffer_cache_->InvalidateCache();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -103,346 +152,167 @@ Shader* VulkanCommandProcessor::LoadShader(ShaderType shader_type,
|
|||
uint32_t guest_address,
|
||||
const uint32_t* host_address,
|
||||
uint32_t dword_count) {
|
||||
// return shader_cache_.LookupOrInsertShader(shader_type, host_address,
|
||||
// dword_count);
|
||||
return nullptr;
|
||||
return pipeline_cache_->LoadShader(shader_type, guest_address, host_address,
|
||||
dword_count);
|
||||
}
|
||||
|
||||
bool VulkanCommandProcessor::IssueDraw(PrimitiveType prim_type,
|
||||
bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
||||
uint32_t index_count,
|
||||
IndexBufferInfo* index_buffer_info) {
|
||||
auto& regs = *register_file_;
|
||||
|
||||
#if FINE_GRAINED_DRAW_SCOPES
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
#endif // FINE_GRAINED_DRAW_SCOPES
|
||||
|
||||
// Skip all drawing for now - what did you expect? :)
|
||||
return true;
|
||||
|
||||
bool draw_valid = false;
|
||||
// if (index_buffer_info) {
|
||||
// draw_valid = draw_batcher_.BeginDrawElements(prim_type, index_count,
|
||||
// index_buffer_info->format);
|
||||
//} else {
|
||||
// draw_valid = draw_batcher_.BeginDrawArrays(prim_type, index_count);
|
||||
//}
|
||||
if (!draw_valid) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto& regs = *register_file_;
|
||||
|
||||
auto enable_mode =
|
||||
static_cast<ModeControl>(regs[XE_GPU_REG_RB_MODECONTROL].u32 & 0x7);
|
||||
if (enable_mode == ModeControl::kIgnore) {
|
||||
// Ignored.
|
||||
// draw_batcher_.DiscardDraw();
|
||||
return true;
|
||||
} else if (enable_mode == ModeControl::kCopy) {
|
||||
// Special copy handling.
|
||||
// draw_batcher_.DiscardDraw();
|
||||
return IssueCopy();
|
||||
}
|
||||
|
||||
#define CHECK_ISSUE_UPDATE_STATUS(status, mismatch, error_message) \
|
||||
{ \
|
||||
if (status == UpdateStatus::kError) { \
|
||||
XELOGE(error_message); \
|
||||
/*draw_batcher_.DiscardDraw(); */ \
|
||||
return false; \
|
||||
} else if (status == UpdateStatus::kMismatch) { \
|
||||
mismatch = true; \
|
||||
} \
|
||||
}
|
||||
// TODO(benvanik): bigger batches.
|
||||
command_buffer_pool_->BeginBatch();
|
||||
VkCommandBuffer command_buffer = command_buffer_pool_->AcquireEntry();
|
||||
VkCommandBufferBeginInfo command_buffer_begin_info;
|
||||
command_buffer_begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
|
||||
command_buffer_begin_info.pNext = nullptr;
|
||||
command_buffer_begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
|
||||
command_buffer_begin_info.pInheritanceInfo = nullptr;
|
||||
auto err = vkBeginCommandBuffer(command_buffer, &command_buffer_begin_info);
|
||||
CheckResult(err, "vkBeginCommandBuffer");
|
||||
|
||||
UpdateStatus status;
|
||||
bool mismatch = false;
|
||||
status = UpdateShaders(prim_type);
|
||||
CHECK_ISSUE_UPDATE_STATUS(status, mismatch, "Unable to prepare draw shaders");
|
||||
status = UpdateRenderTargets();
|
||||
CHECK_ISSUE_UPDATE_STATUS(status, mismatch, "Unable to setup render targets");
|
||||
// if (!active_framebuffer_) {
|
||||
// // No framebuffer, so nothing we do will actually have an effect.
|
||||
// // Treat it as a no-op.
|
||||
// // TODO(benvanik): if we have a vs export, still allow it to go.
|
||||
// draw_batcher_.DiscardDraw();
|
||||
// return true;
|
||||
//}
|
||||
|
||||
status = UpdateState(prim_type);
|
||||
CHECK_ISSUE_UPDATE_STATUS(status, mismatch, "Unable to setup render state");
|
||||
status = PopulateSamplers();
|
||||
CHECK_ISSUE_UPDATE_STATUS(status, mismatch,
|
||||
"Unable to prepare draw samplers");
|
||||
|
||||
status = PopulateIndexBuffer(index_buffer_info);
|
||||
CHECK_ISSUE_UPDATE_STATUS(status, mismatch, "Unable to setup index buffer");
|
||||
status = PopulateVertexBuffers();
|
||||
CHECK_ISSUE_UPDATE_STATUS(status, mismatch, "Unable to setup vertex buffers");
|
||||
|
||||
// if (!draw_batcher_.CommitDraw()) {
|
||||
// return false;
|
||||
//}
|
||||
|
||||
// draw_batcher_.Flush(DrawBatcher::FlushMode::kMakeCoherent);
|
||||
if (context_->WasLost()) {
|
||||
// This draw lost us the context. This typically isn't hit.
|
||||
assert_always();
|
||||
// Begin the render pass.
|
||||
// This will setup our framebuffer and begin the pass in the command buffer.
|
||||
VkRenderPass render_pass = render_cache_->BeginRenderPass(command_buffer);
|
||||
if (!render_pass) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Configure the pipeline for drawing.
|
||||
// This encodes all render state (blend, depth, etc), our shader stages,
|
||||
// and our vertex input layout.
|
||||
if (!pipeline_cache_->ConfigurePipeline(command_buffer, render_pass,
|
||||
primitive_type)) {
|
||||
render_cache_->EndRenderPass();
|
||||
return false;
|
||||
}
|
||||
|
||||
// Upload the constants the shaders require.
|
||||
auto vertex_shader = pipeline_cache_->current_vertex_shader();
|
||||
auto pixel_shader = pipeline_cache_->current_pixel_shader();
|
||||
auto vertex_constant_offset = buffer_cache_->UploadConstantRegisters(
|
||||
vertex_shader->constant_register_map());
|
||||
auto pixel_constant_offset = buffer_cache_->UploadConstantRegisters(
|
||||
pixel_shader->constant_register_map());
|
||||
if (vertex_constant_offset == VK_WHOLE_SIZE ||
|
||||
pixel_constant_offset == VK_WHOLE_SIZE) {
|
||||
render_cache_->EndRenderPass();
|
||||
return false;
|
||||
}
|
||||
|
||||
// Configure constant uniform access to point at our offsets.
|
||||
auto constant_descriptor_set = buffer_cache_->constant_descriptor_set();
|
||||
auto pipeline_layout = pipeline_cache_->current_pipeline_layout();
|
||||
uint32_t constant_offsets[2] = {static_cast<uint32_t>(vertex_constant_offset),
|
||||
static_cast<uint32_t>(pixel_constant_offset)};
|
||||
vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
|
||||
pipeline_layout, 0, 1, &constant_descriptor_set,
|
||||
static_cast<uint32_t>(xe::countof(constant_offsets)),
|
||||
constant_offsets);
|
||||
|
||||
// Upload and bind index buffer data (if we have any).
|
||||
if (!PopulateIndexBuffer(command_buffer, index_buffer_info)) {
|
||||
render_cache_->EndRenderPass();
|
||||
return false;
|
||||
}
|
||||
|
||||
// Upload and bind all vertex buffer data.
|
||||
if (!PopulateVertexBuffers(command_buffer, vertex_shader)) {
|
||||
render_cache_->EndRenderPass();
|
||||
return false;
|
||||
}
|
||||
|
||||
// Upload and set descriptors for all textures.
|
||||
if (!PopulateSamplers(command_buffer, vertex_shader, pixel_shader)) {
|
||||
render_cache_->EndRenderPass();
|
||||
return false;
|
||||
}
|
||||
|
||||
#if 0
|
||||
// Actually issue the draw.
|
||||
if (!index_buffer_info) {
|
||||
// Auto-indexed draw.
|
||||
uint32_t instance_count = 1;
|
||||
uint32_t first_vertex = 0;
|
||||
uint32_t first_instance = 0;
|
||||
vkCmdDraw(command_buffer, index_count, instance_count, first_vertex,
|
||||
first_instance);
|
||||
} else {
|
||||
// Index buffer draw.
|
||||
uint32_t instance_count = 1;
|
||||
uint32_t first_index =
|
||||
register_file_->values[XE_GPU_REG_VGT_INDX_OFFSET].u32;
|
||||
uint32_t vertex_offset = 0;
|
||||
uint32_t first_instance = 0;
|
||||
vkCmdDrawIndexed(command_buffer, index_count, instance_count, first_index,
|
||||
vertex_offset, first_instance);
|
||||
}
|
||||
#endif
|
||||
|
||||
// End the rendering pass.
|
||||
render_cache_->EndRenderPass();
|
||||
|
||||
// TODO(benvanik): bigger batches.
|
||||
err = vkEndCommandBuffer(command_buffer);
|
||||
CheckResult(err, "vkEndCommandBuffer");
|
||||
VkFence fence;
|
||||
VkFenceCreateInfo fence_info;
|
||||
fence_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
|
||||
fence_info.pNext = nullptr;
|
||||
fence_info.flags = 0;
|
||||
vkCreateFence(*device_, &fence_info, nullptr, &fence);
|
||||
command_buffer_pool_->EndBatch(fence);
|
||||
VkSubmitInfo submit_info;
|
||||
submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
|
||||
submit_info.pNext = nullptr;
|
||||
submit_info.waitSemaphoreCount = 0;
|
||||
submit_info.pWaitSemaphores = nullptr;
|
||||
submit_info.commandBufferCount = 1;
|
||||
submit_info.pCommandBuffers = &command_buffer;
|
||||
submit_info.signalSemaphoreCount = 0;
|
||||
submit_info.pSignalSemaphores = nullptr;
|
||||
if (queue_mutex_) {
|
||||
queue_mutex_->lock();
|
||||
}
|
||||
err = vkQueueSubmit(queue_, 1, &submit_info, fence);
|
||||
if (queue_mutex_) {
|
||||
queue_mutex_->unlock();
|
||||
}
|
||||
CheckResult(err, "vkQueueSubmit");
|
||||
if (queue_mutex_) {
|
||||
queue_mutex_->lock();
|
||||
}
|
||||
vkQueueWaitIdle(queue_);
|
||||
if (queue_mutex_) {
|
||||
queue_mutex_->unlock();
|
||||
}
|
||||
command_buffer_pool_->Scavenge();
|
||||
vkDestroyFence(*device_, fence, nullptr);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool VulkanCommandProcessor::SetShadowRegister(uint32_t* dest,
|
||||
uint32_t register_name) {
|
||||
uint32_t value = register_file_->values[register_name].u32;
|
||||
if (*dest == value) {
|
||||
return false;
|
||||
}
|
||||
*dest = value;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool VulkanCommandProcessor::SetShadowRegister(float* dest,
|
||||
uint32_t register_name) {
|
||||
float value = register_file_->values[register_name].f32;
|
||||
if (*dest == value) {
|
||||
return false;
|
||||
}
|
||||
*dest = value;
|
||||
return true;
|
||||
}
|
||||
|
||||
VulkanCommandProcessor::UpdateStatus VulkanCommandProcessor::UpdateShaders(
|
||||
PrimitiveType prim_type) {
|
||||
auto& regs = update_shaders_regs_;
|
||||
|
||||
// These are the constant base addresses/ranges for shaders.
|
||||
// We have these hardcoded right now cause nothing seems to differ.
|
||||
assert_true(register_file_->values[XE_GPU_REG_SQ_VS_CONST].u32 ==
|
||||
0x000FF000 ||
|
||||
register_file_->values[XE_GPU_REG_SQ_VS_CONST].u32 == 0x00000000);
|
||||
assert_true(register_file_->values[XE_GPU_REG_SQ_PS_CONST].u32 ==
|
||||
0x000FF100 ||
|
||||
register_file_->values[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000);
|
||||
|
||||
bool dirty = false;
|
||||
dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl,
|
||||
XE_GPU_REG_PA_SU_SC_MODE_CNTL);
|
||||
dirty |= SetShadowRegister(®s.sq_program_cntl, XE_GPU_REG_SQ_PROGRAM_CNTL);
|
||||
dirty |= SetShadowRegister(®s.sq_context_misc, XE_GPU_REG_SQ_CONTEXT_MISC);
|
||||
dirty |= regs.vertex_shader != active_vertex_shader_;
|
||||
dirty |= regs.pixel_shader != active_pixel_shader_;
|
||||
dirty |= regs.prim_type != prim_type;
|
||||
if (!dirty) {
|
||||
return UpdateStatus::kCompatible;
|
||||
}
|
||||
regs.vertex_shader = static_cast<VulkanShader*>(active_vertex_shader_);
|
||||
regs.pixel_shader = static_cast<VulkanShader*>(active_pixel_shader_);
|
||||
regs.prim_type = prim_type;
|
||||
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
return UpdateStatus::kMismatch;
|
||||
}
|
||||
|
||||
VulkanCommandProcessor::UpdateStatus
|
||||
VulkanCommandProcessor::UpdateRenderTargets() {
|
||||
auto& regs = update_render_targets_regs_;
|
||||
|
||||
bool dirty = false;
|
||||
dirty |= SetShadowRegister(®s.rb_modecontrol, XE_GPU_REG_RB_MODECONTROL);
|
||||
dirty |= SetShadowRegister(®s.rb_surface_info, XE_GPU_REG_RB_SURFACE_INFO);
|
||||
dirty |= SetShadowRegister(®s.rb_color_info, XE_GPU_REG_RB_COLOR_INFO);
|
||||
dirty |= SetShadowRegister(®s.rb_color1_info, XE_GPU_REG_RB_COLOR1_INFO);
|
||||
dirty |= SetShadowRegister(®s.rb_color2_info, XE_GPU_REG_RB_COLOR2_INFO);
|
||||
dirty |= SetShadowRegister(®s.rb_color3_info, XE_GPU_REG_RB_COLOR3_INFO);
|
||||
dirty |= SetShadowRegister(®s.rb_color_mask, XE_GPU_REG_RB_COLOR_MASK);
|
||||
dirty |= SetShadowRegister(®s.rb_depthcontrol, XE_GPU_REG_RB_DEPTHCONTROL);
|
||||
dirty |=
|
||||
SetShadowRegister(®s.rb_stencilrefmask, XE_GPU_REG_RB_STENCILREFMASK);
|
||||
dirty |= SetShadowRegister(®s.rb_depth_info, XE_GPU_REG_RB_DEPTH_INFO);
|
||||
if (!dirty) {
|
||||
return UpdateStatus::kCompatible;
|
||||
}
|
||||
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
return UpdateStatus::kMismatch;
|
||||
}
|
||||
|
||||
VulkanCommandProcessor::UpdateStatus VulkanCommandProcessor::UpdateState(
|
||||
PrimitiveType prim_type) {
|
||||
bool mismatch = false;
|
||||
|
||||
#define CHECK_UPDATE_STATUS(status, mismatch, error_message) \
|
||||
{ \
|
||||
if (status == UpdateStatus::kError) { \
|
||||
XELOGE(error_message); \
|
||||
return status; \
|
||||
} else if (status == UpdateStatus::kMismatch) { \
|
||||
mismatch = true; \
|
||||
} \
|
||||
}
|
||||
|
||||
UpdateStatus status;
|
||||
status = UpdateViewportState();
|
||||
CHECK_UPDATE_STATUS(status, mismatch, "Unable to update viewport state");
|
||||
status = UpdateRasterizerState(prim_type);
|
||||
CHECK_UPDATE_STATUS(status, mismatch, "Unable to update rasterizer state");
|
||||
status = UpdateBlendState();
|
||||
CHECK_UPDATE_STATUS(status, mismatch, "Unable to update blend state");
|
||||
status = UpdateDepthStencilState();
|
||||
CHECK_UPDATE_STATUS(status, mismatch, "Unable to update depth/stencil state");
|
||||
|
||||
return mismatch ? UpdateStatus::kMismatch : UpdateStatus::kCompatible;
|
||||
}
|
||||
|
||||
VulkanCommandProcessor::UpdateStatus
|
||||
VulkanCommandProcessor::UpdateViewportState() {
|
||||
auto& regs = update_viewport_state_regs_;
|
||||
|
||||
bool dirty = false;
|
||||
// dirty |= SetShadowRegister(&state_regs.pa_cl_clip_cntl,
|
||||
// XE_GPU_REG_PA_CL_CLIP_CNTL);
|
||||
dirty |= SetShadowRegister(®s.rb_surface_info, XE_GPU_REG_RB_SURFACE_INFO);
|
||||
dirty |= SetShadowRegister(®s.pa_cl_vte_cntl, XE_GPU_REG_PA_CL_VTE_CNTL);
|
||||
dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl,
|
||||
XE_GPU_REG_PA_SU_SC_MODE_CNTL);
|
||||
dirty |= SetShadowRegister(®s.pa_sc_window_offset,
|
||||
XE_GPU_REG_PA_SC_WINDOW_OFFSET);
|
||||
dirty |= SetShadowRegister(®s.pa_sc_window_scissor_tl,
|
||||
XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL);
|
||||
dirty |= SetShadowRegister(®s.pa_sc_window_scissor_br,
|
||||
XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR);
|
||||
dirty |= SetShadowRegister(®s.pa_cl_vport_xoffset,
|
||||
XE_GPU_REG_PA_CL_VPORT_XOFFSET);
|
||||
dirty |= SetShadowRegister(®s.pa_cl_vport_yoffset,
|
||||
XE_GPU_REG_PA_CL_VPORT_YOFFSET);
|
||||
dirty |= SetShadowRegister(®s.pa_cl_vport_zoffset,
|
||||
XE_GPU_REG_PA_CL_VPORT_ZOFFSET);
|
||||
dirty |= SetShadowRegister(®s.pa_cl_vport_xscale,
|
||||
XE_GPU_REG_PA_CL_VPORT_XSCALE);
|
||||
dirty |= SetShadowRegister(®s.pa_cl_vport_yscale,
|
||||
XE_GPU_REG_PA_CL_VPORT_YSCALE);
|
||||
dirty |= SetShadowRegister(®s.pa_cl_vport_zscale,
|
||||
XE_GPU_REG_PA_CL_VPORT_ZSCALE);
|
||||
|
||||
// Much of this state machine is extracted from:
|
||||
// https://github.com/freedreno/mesa/blob/master/src/mesa/drivers/dri/r200/r200_state.c
|
||||
// http://fossies.org/dox/MesaLib-10.3.5/fd2__gmem_8c_source.html
|
||||
// http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf
|
||||
|
||||
// http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf
|
||||
// VTX_XY_FMT = true: the incoming X, Y have already been multiplied by 1/W0.
|
||||
// = false: multiply the X, Y coordinates by 1/W0.
|
||||
// VTX_Z_FMT = true: the incoming Z has already been multiplied by 1/W0.
|
||||
// = false: multiply the Z coordinate by 1/W0.
|
||||
// VTX_W0_FMT = true: the incoming W0 is not 1/W0. Perform the reciprocal to
|
||||
// get 1/W0.
|
||||
// draw_batcher_.set_vtx_fmt((regs.pa_cl_vte_cntl >> 8) & 0x1 ? 1.0f : 0.0f,
|
||||
// (regs.pa_cl_vte_cntl >> 9) & 0x1 ? 1.0f : 0.0f,
|
||||
// (regs.pa_cl_vte_cntl >> 10) & 0x1 ? 1.0f : 0.0f);
|
||||
|
||||
// Done in VS, no need to flush state.
|
||||
// if ((regs.pa_cl_vte_cntl & (1 << 0)) > 0) {
|
||||
// draw_batcher_.set_window_scalar(1.0f, 1.0f);
|
||||
//} else {
|
||||
// draw_batcher_.set_window_scalar(1.0f / 2560.0f, -1.0f / 2560.0f);
|
||||
//}
|
||||
|
||||
if (!dirty) {
|
||||
return UpdateStatus::kCompatible;
|
||||
}
|
||||
|
||||
return UpdateStatus::kMismatch;
|
||||
}
|
||||
|
||||
VulkanCommandProcessor::UpdateStatus
|
||||
VulkanCommandProcessor::UpdateRasterizerState(PrimitiveType prim_type) {
|
||||
auto& regs = update_rasterizer_state_regs_;
|
||||
|
||||
bool dirty = false;
|
||||
dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl,
|
||||
XE_GPU_REG_PA_SU_SC_MODE_CNTL);
|
||||
dirty |= SetShadowRegister(®s.pa_sc_screen_scissor_tl,
|
||||
XE_GPU_REG_PA_SC_SCREEN_SCISSOR_TL);
|
||||
dirty |= SetShadowRegister(®s.pa_sc_screen_scissor_br,
|
||||
XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR);
|
||||
dirty |= SetShadowRegister(®s.multi_prim_ib_reset_index,
|
||||
XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX);
|
||||
dirty |= regs.prim_type != prim_type;
|
||||
if (!dirty) {
|
||||
return UpdateStatus::kCompatible;
|
||||
}
|
||||
|
||||
regs.prim_type = prim_type;
|
||||
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
return UpdateStatus::kMismatch;
|
||||
}
|
||||
|
||||
VulkanCommandProcessor::UpdateStatus
|
||||
VulkanCommandProcessor::UpdateBlendState() {
|
||||
auto& reg_file = *register_file_;
|
||||
auto& regs = update_blend_state_regs_;
|
||||
|
||||
// Alpha testing -- ALPHAREF, ALPHAFUNC, ALPHATESTENABLE
|
||||
// Deprecated in GL, implemented in shader.
|
||||
// if(ALPHATESTENABLE && frag_out.a [<=/ALPHAFUNC] ALPHAREF) discard;
|
||||
// uint32_t color_control = reg_file[XE_GPU_REG_RB_COLORCONTROL].u32;
|
||||
// draw_batcher_.set_alpha_test((color_control & 0x4) != 0, //
|
||||
// ALPAHTESTENABLE
|
||||
// color_control & 0x7, // ALPHAFUNC
|
||||
// reg_file[XE_GPU_REG_RB_ALPHA_REF].f32);
|
||||
|
||||
bool dirty = false;
|
||||
dirty |=
|
||||
SetShadowRegister(®s.rb_blendcontrol[0], XE_GPU_REG_RB_BLENDCONTROL_0);
|
||||
dirty |=
|
||||
SetShadowRegister(®s.rb_blendcontrol[1], XE_GPU_REG_RB_BLENDCONTROL_1);
|
||||
dirty |=
|
||||
SetShadowRegister(®s.rb_blendcontrol[2], XE_GPU_REG_RB_BLENDCONTROL_2);
|
||||
dirty |=
|
||||
SetShadowRegister(®s.rb_blendcontrol[3], XE_GPU_REG_RB_BLENDCONTROL_3);
|
||||
dirty |= SetShadowRegister(®s.rb_blend_rgba[0], XE_GPU_REG_RB_BLEND_RED);
|
||||
dirty |= SetShadowRegister(®s.rb_blend_rgba[1], XE_GPU_REG_RB_BLEND_GREEN);
|
||||
dirty |= SetShadowRegister(®s.rb_blend_rgba[2], XE_GPU_REG_RB_BLEND_BLUE);
|
||||
dirty |= SetShadowRegister(®s.rb_blend_rgba[3], XE_GPU_REG_RB_BLEND_ALPHA);
|
||||
if (!dirty) {
|
||||
return UpdateStatus::kCompatible;
|
||||
}
|
||||
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
return UpdateStatus::kMismatch;
|
||||
}
|
||||
|
||||
VulkanCommandProcessor::UpdateStatus
|
||||
VulkanCommandProcessor::UpdateDepthStencilState() {
|
||||
auto& regs = update_depth_stencil_state_regs_;
|
||||
|
||||
bool dirty = false;
|
||||
dirty |= SetShadowRegister(®s.rb_depthcontrol, XE_GPU_REG_RB_DEPTHCONTROL);
|
||||
dirty |=
|
||||
SetShadowRegister(®s.rb_stencilrefmask, XE_GPU_REG_RB_STENCILREFMASK);
|
||||
if (!dirty) {
|
||||
return UpdateStatus::kCompatible;
|
||||
}
|
||||
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
|
||||
return UpdateStatus::kMismatch;
|
||||
}
|
||||
|
||||
VulkanCommandProcessor::UpdateStatus
|
||||
VulkanCommandProcessor::PopulateIndexBuffer(
|
||||
IndexBufferInfo* index_buffer_info) {
|
||||
bool VulkanCommandProcessor::PopulateIndexBuffer(
|
||||
VkCommandBuffer command_buffer, IndexBufferInfo* index_buffer_info) {
|
||||
auto& regs = *register_file_;
|
||||
if (!index_buffer_info || !index_buffer_info->guest_base) {
|
||||
// No index buffer or auto draw.
|
||||
return UpdateStatus::kCompatible;
|
||||
return true;
|
||||
}
|
||||
auto& info = *index_buffer_info;
|
||||
|
||||
|
@ -462,19 +332,44 @@ VulkanCommandProcessor::PopulateIndexBuffer(
|
|||
|
||||
trace_writer_.WriteMemoryRead(info.guest_base, info.length);
|
||||
|
||||
return UpdateStatus::kCompatible;
|
||||
// Upload (or get a cached copy of) the buffer.
|
||||
const void* source_ptr =
|
||||
memory_->TranslatePhysical<const void*>(info.guest_base);
|
||||
size_t source_length =
|
||||
info.count * (info.format == IndexFormat::kInt32 ? sizeof(uint32_t)
|
||||
: sizeof(uint16_t));
|
||||
auto buffer_ref =
|
||||
buffer_cache_->UploadIndexBuffer(source_ptr, source_length, info.format);
|
||||
if (buffer_ref.second == VK_WHOLE_SIZE) {
|
||||
// Failed to upload buffer.
|
||||
return false;
|
||||
}
|
||||
|
||||
// Bind the buffer.
|
||||
VkIndexType index_type = info.format == IndexFormat::kInt32
|
||||
? VK_INDEX_TYPE_UINT32
|
||||
: VK_INDEX_TYPE_UINT16;
|
||||
vkCmdBindIndexBuffer(command_buffer, buffer_ref.first, buffer_ref.second,
|
||||
index_type);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
VulkanCommandProcessor::UpdateStatus
|
||||
VulkanCommandProcessor::PopulateVertexBuffers() {
|
||||
bool VulkanCommandProcessor::PopulateVertexBuffers(
|
||||
VkCommandBuffer command_buffer, VulkanShader* vertex_shader) {
|
||||
auto& regs = *register_file_;
|
||||
|
||||
#if FINE_GRAINED_DRAW_SCOPES
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
#endif // FINE_GRAINED_DRAW_SCOPES
|
||||
|
||||
auto& regs = *register_file_;
|
||||
assert_not_null(active_vertex_shader_);
|
||||
auto& vertex_bindings = vertex_shader->vertex_bindings();
|
||||
assert_true(vertex_bindings.size() <= 32);
|
||||
VkBuffer all_buffers[32];
|
||||
VkDeviceSize all_buffer_offsets[32];
|
||||
uint32_t buffer_index = 0;
|
||||
|
||||
for (const auto& vertex_binding : active_vertex_shader_->vertex_bindings()) {
|
||||
for (const auto& vertex_binding : vertex_bindings) {
|
||||
int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 +
|
||||
(vertex_binding.fetch_constant / 3) * 6;
|
||||
const auto group = reinterpret_cast<xe_gpu_fetch_group_t*>(®s.values[r]);
|
||||
|
@ -492,58 +387,72 @@ VulkanCommandProcessor::PopulateVertexBuffers() {
|
|||
}
|
||||
assert_true(fetch->endian == 2);
|
||||
|
||||
// TODO(benvanik): compute based on indices or vertex count.
|
||||
// THIS CAN BE MASSIVELY INCORRECT (too large).
|
||||
size_t valid_range = size_t(fetch->size * 4);
|
||||
|
||||
trace_writer_.WriteMemoryRead(fetch->address << 2, valid_range);
|
||||
|
||||
// Upload (or get a cached copy of) the buffer.
|
||||
const void* source_ptr =
|
||||
memory_->TranslatePhysical<const void*>(fetch->address << 2);
|
||||
size_t source_length = valid_range;
|
||||
auto buffer_ref =
|
||||
buffer_cache_->UploadVertexBuffer(source_ptr, source_length);
|
||||
if (buffer_ref.second == VK_WHOLE_SIZE) {
|
||||
// Failed to upload buffer.
|
||||
return false;
|
||||
}
|
||||
|
||||
// Stash the buffer reference for our bulk bind at the end.
|
||||
all_buffers[buffer_index] = buffer_ref.first;
|
||||
all_buffer_offsets[buffer_index] = buffer_ref.second;
|
||||
++buffer_index;
|
||||
}
|
||||
|
||||
return UpdateStatus::kCompatible;
|
||||
// Bind buffers.
|
||||
vkCmdBindVertexBuffers(command_buffer, 0, buffer_index, all_buffers,
|
||||
all_buffer_offsets);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
VulkanCommandProcessor::UpdateStatus
|
||||
VulkanCommandProcessor::PopulateSamplers() {
|
||||
bool VulkanCommandProcessor::PopulateSamplers(VkCommandBuffer command_buffer,
|
||||
VulkanShader* vertex_shader,
|
||||
VulkanShader* pixel_shader) {
|
||||
#if FINE_GRAINED_DRAW_SCOPES
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
#endif // FINE_GRAINED_DRAW_SCOPES
|
||||
|
||||
bool mismatch = false;
|
||||
bool any_failed = false;
|
||||
|
||||
// VS and PS samplers are shared, but may be used exclusively.
|
||||
// We walk each and setup lazily.
|
||||
bool has_setup_sampler[32] = {false};
|
||||
|
||||
// Vertex texture samplers.
|
||||
for (auto& texture_binding : active_vertex_shader_->texture_bindings()) {
|
||||
for (auto& texture_binding : vertex_shader->texture_bindings()) {
|
||||
if (has_setup_sampler[texture_binding.fetch_constant]) {
|
||||
continue;
|
||||
}
|
||||
has_setup_sampler[texture_binding.fetch_constant] = true;
|
||||
auto status = PopulateSampler(texture_binding);
|
||||
if (status == UpdateStatus::kError) {
|
||||
return status;
|
||||
} else if (status == UpdateStatus::kMismatch) {
|
||||
mismatch = true;
|
||||
}
|
||||
any_failed = PopulateSampler(command_buffer, texture_binding) || any_failed;
|
||||
}
|
||||
|
||||
// Pixel shader texture sampler.
|
||||
for (auto& texture_binding : active_pixel_shader_->texture_bindings()) {
|
||||
for (auto& texture_binding : pixel_shader->texture_bindings()) {
|
||||
if (has_setup_sampler[texture_binding.fetch_constant]) {
|
||||
continue;
|
||||
}
|
||||
has_setup_sampler[texture_binding.fetch_constant] = true;
|
||||
auto status = PopulateSampler(texture_binding);
|
||||
if (status == UpdateStatus::kError) {
|
||||
return UpdateStatus::kError;
|
||||
} else if (status == UpdateStatus::kMismatch) {
|
||||
mismatch = true;
|
||||
}
|
||||
any_failed = PopulateSampler(command_buffer, texture_binding) || any_failed;
|
||||
}
|
||||
|
||||
return mismatch ? UpdateStatus::kMismatch : UpdateStatus::kCompatible;
|
||||
return !any_failed;
|
||||
}
|
||||
|
||||
VulkanCommandProcessor::UpdateStatus VulkanCommandProcessor::PopulateSampler(
|
||||
bool VulkanCommandProcessor::PopulateSampler(
|
||||
VkCommandBuffer command_buffer,
|
||||
const Shader::TextureBinding& texture_binding) {
|
||||
auto& regs = *register_file_;
|
||||
int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 +
|
||||
|
@ -553,30 +462,34 @@ VulkanCommandProcessor::UpdateStatus VulkanCommandProcessor::PopulateSampler(
|
|||
|
||||
// ?
|
||||
if (!fetch.type) {
|
||||
return UpdateStatus::kCompatible;
|
||||
return true;
|
||||
}
|
||||
assert_true(fetch.type == 0x2);
|
||||
|
||||
TextureInfo texture_info;
|
||||
if (!TextureInfo::Prepare(fetch, &texture_info)) {
|
||||
XELOGE("Unable to parse texture fetcher info");
|
||||
return UpdateStatus::kCompatible; // invalid texture used
|
||||
return true; // invalid texture used
|
||||
}
|
||||
SamplerInfo sampler_info;
|
||||
if (!SamplerInfo::Prepare(fetch, texture_binding.fetch_instr,
|
||||
&sampler_info)) {
|
||||
XELOGE("Unable to parse sampler info");
|
||||
return UpdateStatus::kCompatible; // invalid texture used
|
||||
return true; // invalid texture used
|
||||
}
|
||||
|
||||
trace_writer_.WriteMemoryRead(texture_info.guest_address,
|
||||
texture_info.input_length);
|
||||
|
||||
return UpdateStatus::kCompatible;
|
||||
// TODO(benvanik): texture cache lookup.
|
||||
// TODO(benvanik): bind or return so PopulateSamplers can batch.
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool VulkanCommandProcessor::IssueCopy() {
|
||||
SCOPE_profile_cpu_f("gpu");
|
||||
// TODO(benvanik): resolve.
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -23,12 +23,17 @@
|
|||
#include "xenia/base/threading.h"
|
||||
#include "xenia/gpu/command_processor.h"
|
||||
#include "xenia/gpu/register_file.h"
|
||||
#include "xenia/gpu/spirv_shader_translator.h"
|
||||
#include "xenia/gpu/vulkan/buffer_cache.h"
|
||||
#include "xenia/gpu/vulkan/pipeline_cache.h"
|
||||
#include "xenia/gpu/vulkan/render_cache.h"
|
||||
#include "xenia/gpu/vulkan/texture_cache.h"
|
||||
#include "xenia/gpu/vulkan/vulkan_shader.h"
|
||||
#include "xenia/gpu/xenos.h"
|
||||
#include "xenia/kernel/xthread.h"
|
||||
#include "xenia/memory.h"
|
||||
#include "xenia/ui/vulkan/fenced_pools.h"
|
||||
#include "xenia/ui/vulkan/vulkan_context.h"
|
||||
#include "xenia/ui/vulkan/vulkan_device.h"
|
||||
|
||||
namespace xe {
|
||||
namespace gpu {
|
||||
|
@ -45,12 +50,6 @@ class VulkanCommandProcessor : public CommandProcessor {
|
|||
void ClearCaches() override;
|
||||
|
||||
private:
|
||||
enum class UpdateStatus {
|
||||
kCompatible,
|
||||
kMismatch,
|
||||
kError,
|
||||
};
|
||||
|
||||
bool SetupContext() override;
|
||||
void ShutdownContext() override;
|
||||
|
||||
|
@ -65,97 +64,35 @@ class VulkanCommandProcessor : public CommandProcessor {
|
|||
const uint32_t* host_address,
|
||||
uint32_t dword_count) override;
|
||||
|
||||
bool IssueDraw(PrimitiveType prim_type, uint32_t index_count,
|
||||
bool IssueDraw(PrimitiveType primitive_type, uint32_t index_count,
|
||||
IndexBufferInfo* index_buffer_info) override;
|
||||
UpdateStatus UpdateShaders(PrimitiveType prim_type);
|
||||
UpdateStatus UpdateRenderTargets();
|
||||
UpdateStatus UpdateState(PrimitiveType prim_type);
|
||||
UpdateStatus UpdateViewportState();
|
||||
UpdateStatus UpdateRasterizerState(PrimitiveType prim_type);
|
||||
UpdateStatus UpdateBlendState();
|
||||
UpdateStatus UpdateDepthStencilState();
|
||||
UpdateStatus PopulateIndexBuffer(IndexBufferInfo* index_buffer_info);
|
||||
UpdateStatus PopulateVertexBuffers();
|
||||
UpdateStatus PopulateSamplers();
|
||||
UpdateStatus PopulateSampler(const Shader::TextureBinding& texture_binding);
|
||||
bool PopulateIndexBuffer(VkCommandBuffer command_buffer,
|
||||
IndexBufferInfo* index_buffer_info);
|
||||
bool PopulateVertexBuffers(VkCommandBuffer command_buffer,
|
||||
VulkanShader* vertex_shader);
|
||||
bool PopulateSamplers(VkCommandBuffer command_buffer,
|
||||
VulkanShader* vertex_shader,
|
||||
VulkanShader* pixel_shader);
|
||||
bool PopulateSampler(VkCommandBuffer command_buffer,
|
||||
const Shader::TextureBinding& texture_binding);
|
||||
bool IssueCopy() override;
|
||||
|
||||
SpirvShaderTranslator shader_translator_;
|
||||
xe::ui::vulkan::VulkanDevice* device_ = nullptr;
|
||||
|
||||
private:
|
||||
bool SetShadowRegister(uint32_t* dest, uint32_t register_name);
|
||||
bool SetShadowRegister(float* dest, uint32_t register_name);
|
||||
struct UpdateRenderTargetsRegisters {
|
||||
uint32_t rb_modecontrol;
|
||||
uint32_t rb_surface_info;
|
||||
uint32_t rb_color_info;
|
||||
uint32_t rb_color1_info;
|
||||
uint32_t rb_color2_info;
|
||||
uint32_t rb_color3_info;
|
||||
uint32_t rb_color_mask;
|
||||
uint32_t rb_depthcontrol;
|
||||
uint32_t rb_stencilrefmask;
|
||||
uint32_t rb_depth_info;
|
||||
// TODO(benvanik): abstract behind context?
|
||||
// Queue used to submit work. This may be a dedicated queue for the command
|
||||
// processor and no locking will be required for use. If a dedicated queue
|
||||
// was not available this will be the device primary_queue and the
|
||||
// queue_mutex must be used to synchronize access to it.
|
||||
VkQueue queue_ = nullptr;
|
||||
std::mutex* queue_mutex_ = nullptr;
|
||||
|
||||
UpdateRenderTargetsRegisters() { Reset(); }
|
||||
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
||||
} update_render_targets_regs_;
|
||||
struct UpdateViewportStateRegisters {
|
||||
// uint32_t pa_cl_clip_cntl;
|
||||
uint32_t rb_surface_info;
|
||||
uint32_t pa_cl_vte_cntl;
|
||||
uint32_t pa_su_sc_mode_cntl;
|
||||
uint32_t pa_sc_window_offset;
|
||||
uint32_t pa_sc_window_scissor_tl;
|
||||
uint32_t pa_sc_window_scissor_br;
|
||||
float pa_cl_vport_xoffset;
|
||||
float pa_cl_vport_yoffset;
|
||||
float pa_cl_vport_zoffset;
|
||||
float pa_cl_vport_xscale;
|
||||
float pa_cl_vport_yscale;
|
||||
float pa_cl_vport_zscale;
|
||||
std::unique_ptr<BufferCache> buffer_cache_;
|
||||
std::unique_ptr<PipelineCache> pipeline_cache_;
|
||||
std::unique_ptr<RenderCache> render_cache_;
|
||||
std::unique_ptr<TextureCache> texture_cache_;
|
||||
|
||||
UpdateViewportStateRegisters() { Reset(); }
|
||||
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
||||
} update_viewport_state_regs_;
|
||||
struct UpdateRasterizerStateRegisters {
|
||||
uint32_t pa_su_sc_mode_cntl;
|
||||
uint32_t pa_sc_screen_scissor_tl;
|
||||
uint32_t pa_sc_screen_scissor_br;
|
||||
uint32_t multi_prim_ib_reset_index;
|
||||
PrimitiveType prim_type;
|
||||
|
||||
UpdateRasterizerStateRegisters() { Reset(); }
|
||||
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
||||
} update_rasterizer_state_regs_;
|
||||
struct UpdateBlendStateRegisters {
|
||||
uint32_t rb_blendcontrol[4];
|
||||
float rb_blend_rgba[4];
|
||||
|
||||
UpdateBlendStateRegisters() { Reset(); }
|
||||
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
||||
} update_blend_state_regs_;
|
||||
struct UpdateDepthStencilStateRegisters {
|
||||
uint32_t rb_depthcontrol;
|
||||
uint32_t rb_stencilrefmask;
|
||||
|
||||
UpdateDepthStencilStateRegisters() { Reset(); }
|
||||
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
||||
} update_depth_stencil_state_regs_;
|
||||
struct UpdateShadersRegisters {
|
||||
PrimitiveType prim_type;
|
||||
uint32_t pa_su_sc_mode_cntl;
|
||||
uint32_t sq_program_cntl;
|
||||
uint32_t sq_context_misc;
|
||||
VulkanShader* vertex_shader;
|
||||
VulkanShader* pixel_shader;
|
||||
|
||||
UpdateShadersRegisters() { Reset(); }
|
||||
void Reset() {
|
||||
sq_program_cntl = 0;
|
||||
vertex_shader = pixel_shader = nullptr;
|
||||
}
|
||||
} update_shaders_regs_;
|
||||
std::unique_ptr<ui::vulkan::CommandBufferPool> command_buffer_pool_;
|
||||
};
|
||||
|
||||
} // namespace vulkan
|
||||
|
|
|
@ -0,0 +1,81 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2016 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#include "xenia/ui/vulkan/fenced_pools.h"
|
||||
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/base/math.h"
|
||||
#include "xenia/ui/vulkan/vulkan_util.h"
|
||||
|
||||
namespace xe {
|
||||
namespace ui {
|
||||
namespace vulkan {
|
||||
|
||||
using xe::ui::vulkan::CheckResult;
|
||||
|
||||
CommandBufferPool::CommandBufferPool(VkDevice device,
|
||||
uint32_t queue_family_index,
|
||||
VkCommandBufferLevel level)
|
||||
: BaseFencedPool(device), level_(level) {
|
||||
// Create the pool used for allocating buffers.
|
||||
// They are marked as transient (short-lived) and cycled frequently.
|
||||
VkCommandPoolCreateInfo cmd_pool_info;
|
||||
cmd_pool_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
|
||||
cmd_pool_info.pNext = nullptr;
|
||||
cmd_pool_info.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT |
|
||||
VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
|
||||
cmd_pool_info.queueFamilyIndex = queue_family_index;
|
||||
auto err =
|
||||
vkCreateCommandPool(device_, &cmd_pool_info, nullptr, &command_pool_);
|
||||
CheckResult(err, "vkCreateCommandPool");
|
||||
|
||||
// Allocate a bunch of command buffers to start.
|
||||
constexpr uint32_t kDefaultCount = 32;
|
||||
VkCommandBufferAllocateInfo command_buffer_info;
|
||||
command_buffer_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
|
||||
command_buffer_info.pNext = nullptr;
|
||||
command_buffer_info.commandPool = command_pool_;
|
||||
command_buffer_info.level = level;
|
||||
command_buffer_info.commandBufferCount = kDefaultCount;
|
||||
VkCommandBuffer command_buffers[kDefaultCount];
|
||||
err =
|
||||
vkAllocateCommandBuffers(device_, &command_buffer_info, command_buffers);
|
||||
CheckResult(err, "vkCreateCommandBuffer");
|
||||
for (size_t i = 0; i < xe::countof(command_buffers); ++i) {
|
||||
PushEntry(command_buffers[i]);
|
||||
}
|
||||
}
|
||||
|
||||
CommandBufferPool::~CommandBufferPool() {
|
||||
vkDestroyCommandPool(device_, command_pool_, nullptr);
|
||||
command_pool_ = nullptr;
|
||||
}
|
||||
|
||||
VkCommandBuffer CommandBufferPool::AllocateEntry() {
|
||||
// TODO(benvanik): allocate a bunch at once?
|
||||
VkCommandBufferAllocateInfo command_buffer_info;
|
||||
command_buffer_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
|
||||
command_buffer_info.pNext = nullptr;
|
||||
command_buffer_info.commandPool = command_pool_;
|
||||
command_buffer_info.level = level_;
|
||||
command_buffer_info.commandBufferCount = 1;
|
||||
VkCommandBuffer command_buffer;
|
||||
auto err =
|
||||
vkAllocateCommandBuffers(device_, &command_buffer_info, &command_buffer);
|
||||
CheckResult(err, "vkCreateCommandBuffer");
|
||||
return command_buffer;
|
||||
}
|
||||
|
||||
void CommandBufferPool::FreeEntry(VkCommandBuffer handle) {
|
||||
vkFreeCommandBuffers(device_, command_pool_, 1, &handle);
|
||||
}
|
||||
|
||||
} // namespace vulkan
|
||||
} // namespace ui
|
||||
} // namespace xe
|
|
@ -0,0 +1,200 @@
|
|||
/**
|
||||
******************************************************************************
|
||||
* Xenia : Xbox 360 Emulator Research Project *
|
||||
******************************************************************************
|
||||
* Copyright 2016 Ben Vanik. All rights reserved. *
|
||||
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||
******************************************************************************
|
||||
*/
|
||||
|
||||
#ifndef XENIA_UI_VULKAN_FENCED_POOLS_H_
|
||||
#define XENIA_UI_VULKAN_FENCED_POOLS_H_
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "xenia/base/assert.h"
|
||||
#include "xenia/ui/vulkan/vulkan.h"
|
||||
|
||||
namespace xe {
|
||||
namespace ui {
|
||||
namespace vulkan {
|
||||
|
||||
// Simple pool for Vulkan homogenous objects that cannot be reused while
|
||||
// in-flight.
|
||||
// It batches pooled objects into groups and uses a vkQueueSubmit fence to
|
||||
// indicate their availability. If no objects are free when one is requested
|
||||
// the caller is expected to create them.
|
||||
template <typename T, typename HANDLE>
|
||||
class BaseFencedPool {
|
||||
public:
|
||||
BaseFencedPool(VkDevice device) : device_(device) {}
|
||||
|
||||
virtual ~BaseFencedPool() {
|
||||
// TODO(benvanik): wait on fence until done.
|
||||
assert_null(pending_batch_list_head_);
|
||||
|
||||
// Run down free lists.
|
||||
while (free_batch_list_head_) {
|
||||
auto batch = free_batch_list_head_;
|
||||
free_batch_list_head_ = batch->next;
|
||||
delete batch;
|
||||
}
|
||||
while (free_entry_list_head_) {
|
||||
auto entry = free_entry_list_head_;
|
||||
free_entry_list_head_ = entry->next;
|
||||
static_cast<T*>(this)->FreeEntry(entry->handle);
|
||||
delete entry;
|
||||
}
|
||||
}
|
||||
|
||||
// Checks all pending batches for completion and scavenges their entries.
|
||||
// This should be called as frequently as reasonable.
|
||||
void Scavenge() {
|
||||
while (pending_batch_list_head_) {
|
||||
auto batch = pending_batch_list_head_;
|
||||
if (vkGetFenceStatus(device_, batch->fence) == VK_SUCCESS) {
|
||||
// Batch has completed. Reclaim.
|
||||
pending_batch_list_head_ = batch->next;
|
||||
if (batch == pending_batch_list_tail_) {
|
||||
pending_batch_list_tail_ = nullptr;
|
||||
}
|
||||
batch->next = free_batch_list_head_;
|
||||
free_batch_list_head_ = batch;
|
||||
batch->entry_list_tail->next = free_entry_list_head_;
|
||||
free_entry_list_head_ = batch->entry_list_head;
|
||||
batch->entry_list_head = nullptr;
|
||||
batch->entry_list_tail = nullptr;
|
||||
} else {
|
||||
// Batch is still in-flight. Since batches are executed in order we know
|
||||
// no others after it could have completed, so early-exit.
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Begins a new batch.
|
||||
// All entries acquired within this batch will be marked as in-use until
|
||||
// the fence specified in EndBatch is signalled.
|
||||
void BeginBatch() {
|
||||
assert_null(open_batch_);
|
||||
Batch* batch = nullptr;
|
||||
if (free_batch_list_head_) {
|
||||
// Reuse a batch.
|
||||
batch = free_batch_list_head_;
|
||||
free_batch_list_head_ = batch->next;
|
||||
batch->next = nullptr;
|
||||
} else {
|
||||
// Allocate new batch.
|
||||
batch = new Batch();
|
||||
batch->next = nullptr;
|
||||
}
|
||||
batch->entry_list_head = nullptr;
|
||||
batch->entry_list_tail = nullptr;
|
||||
batch->fence = nullptr;
|
||||
open_batch_ = batch;
|
||||
}
|
||||
|
||||
// Attempts to acquire an entry from the pool in the current batch.
|
||||
// If none are available a new one will be allocated.
|
||||
HANDLE AcquireEntry() {
|
||||
Entry* entry = nullptr;
|
||||
if (free_entry_list_head_) {
|
||||
// Slice off an entry from the free list.
|
||||
entry = free_entry_list_head_;
|
||||
free_entry_list_head_ = entry->next;
|
||||
} else {
|
||||
// No entry available; allocate new.
|
||||
entry = new Entry();
|
||||
entry->handle = static_cast<T*>(this)->AllocateEntry();
|
||||
}
|
||||
entry->next = nullptr;
|
||||
if (!open_batch_->entry_list_head) {
|
||||
open_batch_->entry_list_head = entry;
|
||||
}
|
||||
if (open_batch_->entry_list_tail) {
|
||||
open_batch_->entry_list_tail->next = entry;
|
||||
}
|
||||
open_batch_->entry_list_tail = entry;
|
||||
return entry->handle;
|
||||
}
|
||||
|
||||
// Ends the current batch using the given fence to indicate when the batch
|
||||
// has completed execution on the GPU.
|
||||
void EndBatch(VkFence fence) {
|
||||
assert_not_null(open_batch_);
|
||||
|
||||
// Close and see if we have anything.
|
||||
auto batch = open_batch_;
|
||||
open_batch_ = nullptr;
|
||||
if (!batch->entry_list_head) {
|
||||
// Nothing to do.
|
||||
batch->next = free_batch_list_head_;
|
||||
free_batch_list_head_ = batch;
|
||||
return;
|
||||
}
|
||||
|
||||
// Track the fence.
|
||||
batch->fence = fence;
|
||||
|
||||
// Append to the end of the batch list.
|
||||
batch->next = nullptr;
|
||||
if (!pending_batch_list_head_) {
|
||||
pending_batch_list_head_ = batch;
|
||||
}
|
||||
if (pending_batch_list_tail_) {
|
||||
pending_batch_list_tail_->next = batch;
|
||||
} else {
|
||||
pending_batch_list_tail_ = batch;
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
void PushEntry(HANDLE handle) {
|
||||
auto entry = new Entry();
|
||||
entry->next = free_entry_list_head_;
|
||||
entry->handle = handle;
|
||||
free_entry_list_head_ = entry;
|
||||
}
|
||||
|
||||
VkDevice device_ = nullptr;
|
||||
|
||||
private:
|
||||
struct Entry {
|
||||
Entry* next;
|
||||
HANDLE handle;
|
||||
};
|
||||
struct Batch {
|
||||
Batch* next;
|
||||
Entry* entry_list_head;
|
||||
Entry* entry_list_tail;
|
||||
VkFence fence;
|
||||
};
|
||||
|
||||
Batch* free_batch_list_head_ = nullptr;
|
||||
Entry* free_entry_list_head_ = nullptr;
|
||||
Batch* pending_batch_list_head_ = nullptr;
|
||||
Batch* pending_batch_list_tail_ = nullptr;
|
||||
Batch* open_batch_ = nullptr;
|
||||
};
|
||||
|
||||
class CommandBufferPool
|
||||
: public BaseFencedPool<CommandBufferPool, VkCommandBuffer> {
|
||||
public:
|
||||
CommandBufferPool(VkDevice device, uint32_t queue_family_index,
|
||||
VkCommandBufferLevel level);
|
||||
~CommandBufferPool() override;
|
||||
|
||||
protected:
|
||||
friend class BaseFencedPool<CommandBufferPool, VkCommandBuffer>;
|
||||
VkCommandBuffer AllocateEntry();
|
||||
void FreeEntry(VkCommandBuffer handle);
|
||||
|
||||
VkCommandPool command_pool_ = nullptr;
|
||||
VkCommandBufferLevel level_ = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
|
||||
};
|
||||
|
||||
} // namespace vulkan
|
||||
} // namespace ui
|
||||
} // namespace xe
|
||||
|
||||
#endif // XENIA_UI_VULKAN_FENCED_POOLS_H_
|
Loading…
Reference in New Issue