WIP rough sketch of vulkan backend structure.
This commit is contained in:
parent
35e08d9428
commit
4c4a641096
|
@ -18,6 +18,10 @@ namespace xe {
|
||||||
// http://gnuradio.org/redmine/projects/gnuradio/repository/revisions/f2bc76cc65ffba51a141950f98e75364e49df874/entry/volk/kernels/volk/volk_32u_byteswap.h
|
// http://gnuradio.org/redmine/projects/gnuradio/repository/revisions/f2bc76cc65ffba51a141950f98e75364e49df874/entry/volk/kernels/volk/volk_32u_byteswap.h
|
||||||
// http://gnuradio.org/redmine/projects/gnuradio/repository/revisions/2c4c371885c31222362f70a1cd714415d1398021/entry/volk/kernels/volk/volk_64u_byteswap.h
|
// http://gnuradio.org/redmine/projects/gnuradio/repository/revisions/2c4c371885c31222362f70a1cd714415d1398021/entry/volk/kernels/volk/volk_64u_byteswap.h
|
||||||
|
|
||||||
|
void copy_128_aligned(void* dest, const void* src, size_t count) {
|
||||||
|
std::memcpy(dest, src, count * 16);
|
||||||
|
}
|
||||||
|
|
||||||
void copy_and_swap_16_aligned(uint16_t* dest, const uint16_t* src,
|
void copy_and_swap_16_aligned(uint16_t* dest, const uint16_t* src,
|
||||||
size_t count) {
|
size_t count) {
|
||||||
return copy_and_swap_16_unaligned(dest, src, count);
|
return copy_and_swap_16_unaligned(dest, src, count);
|
||||||
|
|
|
@ -121,6 +121,8 @@ inline void* low_address(void* address) {
|
||||||
return reinterpret_cast<void*>(uint64_t(address) & 0xFFFFFFFF);
|
return reinterpret_cast<void*>(uint64_t(address) & 0xFFFFFFFF);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void copy_128_aligned(void* dest, const void* src, size_t count);
|
||||||
|
|
||||||
void copy_and_swap_16_aligned(uint16_t* dest, const uint16_t* src,
|
void copy_and_swap_16_aligned(uint16_t* dest, const uint16_t* src,
|
||||||
size_t count);
|
size_t count);
|
||||||
void copy_and_swap_16_unaligned(uint16_t* dest, const uint16_t* src,
|
void copy_and_swap_16_unaligned(uint16_t* dest, const uint16_t* src,
|
||||||
|
|
|
@ -0,0 +1,330 @@
|
||||||
|
/**
|
||||||
|
******************************************************************************
|
||||||
|
* Xenia : Xbox 360 Emulator Research Project *
|
||||||
|
******************************************************************************
|
||||||
|
* Copyright 2016 Ben Vanik. All rights reserved. *
|
||||||
|
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||||
|
******************************************************************************
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "xenia/gpu/vulkan/buffer_cache.h"
|
||||||
|
|
||||||
|
#include "xenia/base/logging.h"
|
||||||
|
#include "xenia/base/math.h"
|
||||||
|
#include "xenia/base/memory.h"
|
||||||
|
#include "xenia/base/profiling.h"
|
||||||
|
#include "xenia/gpu/gpu_flags.h"
|
||||||
|
#include "xenia/gpu/vulkan/vulkan_gpu_flags.h"
|
||||||
|
|
||||||
|
namespace xe {
|
||||||
|
namespace gpu {
|
||||||
|
namespace vulkan {
|
||||||
|
|
||||||
|
using xe::ui::vulkan::CheckResult;
|
||||||
|
|
||||||
|
BufferCache::BufferCache(RegisterFile* register_file,
|
||||||
|
ui::vulkan::VulkanDevice* device, size_t capacity)
|
||||||
|
: register_file_(register_file),
|
||||||
|
device_(*device),
|
||||||
|
transient_capacity_(capacity) {
|
||||||
|
// Uniform buffer.
|
||||||
|
VkBufferCreateInfo uniform_buffer_info;
|
||||||
|
uniform_buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
|
||||||
|
uniform_buffer_info.pNext = nullptr;
|
||||||
|
uniform_buffer_info.flags = 0;
|
||||||
|
uniform_buffer_info.size = transient_capacity_;
|
||||||
|
uniform_buffer_info.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
|
||||||
|
uniform_buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
|
||||||
|
uniform_buffer_info.queueFamilyIndexCount = 0;
|
||||||
|
uniform_buffer_info.pQueueFamilyIndices = nullptr;
|
||||||
|
auto err = vkCreateBuffer(device_, &uniform_buffer_info, nullptr,
|
||||||
|
&transient_uniform_buffer_);
|
||||||
|
CheckResult(err, "vkCreateBuffer");
|
||||||
|
|
||||||
|
// Index buffer.
|
||||||
|
VkBufferCreateInfo index_buffer_info;
|
||||||
|
index_buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
|
||||||
|
index_buffer_info.pNext = nullptr;
|
||||||
|
index_buffer_info.flags = 0;
|
||||||
|
index_buffer_info.size = transient_capacity_;
|
||||||
|
index_buffer_info.usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT;
|
||||||
|
index_buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
|
||||||
|
index_buffer_info.queueFamilyIndexCount = 0;
|
||||||
|
index_buffer_info.pQueueFamilyIndices = nullptr;
|
||||||
|
err = vkCreateBuffer(device_, &index_buffer_info, nullptr,
|
||||||
|
&transient_index_buffer_);
|
||||||
|
CheckResult(err, "vkCreateBuffer");
|
||||||
|
|
||||||
|
// Vertex buffer.
|
||||||
|
VkBufferCreateInfo vertex_buffer_info;
|
||||||
|
vertex_buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
|
||||||
|
vertex_buffer_info.pNext = nullptr;
|
||||||
|
vertex_buffer_info.flags = 0;
|
||||||
|
vertex_buffer_info.size = transient_capacity_;
|
||||||
|
vertex_buffer_info.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
|
||||||
|
vertex_buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
|
||||||
|
vertex_buffer_info.queueFamilyIndexCount = 0;
|
||||||
|
vertex_buffer_info.pQueueFamilyIndices = nullptr;
|
||||||
|
err = vkCreateBuffer(*device, &vertex_buffer_info, nullptr,
|
||||||
|
&transient_vertex_buffer_);
|
||||||
|
CheckResult(err, "vkCreateBuffer");
|
||||||
|
|
||||||
|
// Allocate the underlying buffer we use for all storage.
|
||||||
|
// We query all types and take the max alignment.
|
||||||
|
VkMemoryRequirements uniform_buffer_requirements;
|
||||||
|
VkMemoryRequirements index_buffer_requirements;
|
||||||
|
VkMemoryRequirements vertex_buffer_requirements;
|
||||||
|
vkGetBufferMemoryRequirements(device_, transient_uniform_buffer_,
|
||||||
|
&uniform_buffer_requirements);
|
||||||
|
vkGetBufferMemoryRequirements(device_, transient_index_buffer_,
|
||||||
|
&index_buffer_requirements);
|
||||||
|
vkGetBufferMemoryRequirements(device_, transient_vertex_buffer_,
|
||||||
|
&vertex_buffer_requirements);
|
||||||
|
uniform_buffer_alignment_ = uniform_buffer_requirements.alignment;
|
||||||
|
index_buffer_alignment_ = index_buffer_requirements.alignment;
|
||||||
|
vertex_buffer_alignment_ = vertex_buffer_requirements.alignment;
|
||||||
|
VkMemoryRequirements buffer_requirements;
|
||||||
|
buffer_requirements.size = transient_capacity_;
|
||||||
|
buffer_requirements.alignment =
|
||||||
|
std::max(uniform_buffer_requirements.alignment,
|
||||||
|
std::max(index_buffer_requirements.alignment,
|
||||||
|
vertex_buffer_requirements.alignment));
|
||||||
|
buffer_requirements.memoryTypeBits =
|
||||||
|
uniform_buffer_requirements.memoryTypeBits |
|
||||||
|
index_buffer_requirements.memoryTypeBits |
|
||||||
|
vertex_buffer_requirements.memoryTypeBits;
|
||||||
|
transient_buffer_memory_ = device->AllocateMemory(
|
||||||
|
buffer_requirements, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
|
||||||
|
|
||||||
|
// Alias all buffers to our memory.
|
||||||
|
vkBindBufferMemory(device_, transient_uniform_buffer_,
|
||||||
|
transient_buffer_memory_, 0);
|
||||||
|
vkBindBufferMemory(device_, transient_index_buffer_, transient_buffer_memory_,
|
||||||
|
0);
|
||||||
|
vkBindBufferMemory(device_, transient_vertex_buffer_,
|
||||||
|
transient_buffer_memory_, 0);
|
||||||
|
|
||||||
|
// Map memory and keep it mapped while we use it.
|
||||||
|
err = vkMapMemory(device_, transient_buffer_memory_, 0, VK_WHOLE_SIZE, 0,
|
||||||
|
&transient_buffer_data_);
|
||||||
|
CheckResult(err, "vkMapMemory");
|
||||||
|
|
||||||
|
// Descriptor pool used for all of our cached descriptors.
|
||||||
|
// In the steady state we don't allocate anything, so these are all manually
|
||||||
|
// managed.
|
||||||
|
VkDescriptorPoolCreateInfo descriptor_pool_info;
|
||||||
|
descriptor_pool_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
|
||||||
|
descriptor_pool_info.pNext = nullptr;
|
||||||
|
descriptor_pool_info.flags =
|
||||||
|
VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT;
|
||||||
|
descriptor_pool_info.maxSets = 1;
|
||||||
|
VkDescriptorPoolSize pool_sizes[1];
|
||||||
|
pool_sizes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
|
||||||
|
pool_sizes[0].descriptorCount = 2;
|
||||||
|
descriptor_pool_info.poolSizeCount = 1;
|
||||||
|
descriptor_pool_info.pPoolSizes = pool_sizes;
|
||||||
|
err = vkCreateDescriptorPool(device_, &descriptor_pool_info, nullptr,
|
||||||
|
&descriptor_pool_);
|
||||||
|
CheckResult(err, "vkCreateDescriptorPool");
|
||||||
|
|
||||||
|
// Create the descriptor set layout used for our uniform buffer.
|
||||||
|
// As it is a static binding that uses dynamic offsets during draws we can
|
||||||
|
// create this once and reuse it forever.
|
||||||
|
VkDescriptorSetLayoutBinding vertex_uniform_binding;
|
||||||
|
vertex_uniform_binding.binding = 0;
|
||||||
|
vertex_uniform_binding.descriptorType =
|
||||||
|
VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
|
||||||
|
vertex_uniform_binding.descriptorCount = 1;
|
||||||
|
vertex_uniform_binding.stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
|
||||||
|
vertex_uniform_binding.pImmutableSamplers = nullptr;
|
||||||
|
VkDescriptorSetLayoutBinding fragment_uniform_binding;
|
||||||
|
fragment_uniform_binding.binding = 1;
|
||||||
|
fragment_uniform_binding.descriptorType =
|
||||||
|
VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
|
||||||
|
fragment_uniform_binding.descriptorCount = 1;
|
||||||
|
fragment_uniform_binding.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
|
||||||
|
fragment_uniform_binding.pImmutableSamplers = nullptr;
|
||||||
|
VkDescriptorSetLayoutCreateInfo descriptor_set_layout_info;
|
||||||
|
descriptor_set_layout_info.sType =
|
||||||
|
VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
|
||||||
|
descriptor_set_layout_info.pNext = nullptr;
|
||||||
|
descriptor_set_layout_info.flags = 0;
|
||||||
|
VkDescriptorSetLayoutBinding uniform_bindings[] = {
|
||||||
|
vertex_uniform_binding, fragment_uniform_binding,
|
||||||
|
};
|
||||||
|
descriptor_set_layout_info.bindingCount =
|
||||||
|
static_cast<uint32_t>(xe::countof(uniform_bindings));
|
||||||
|
descriptor_set_layout_info.pBindings = uniform_bindings;
|
||||||
|
err = vkCreateDescriptorSetLayout(device_, &descriptor_set_layout_info,
|
||||||
|
nullptr, &descriptor_set_layout_);
|
||||||
|
CheckResult(err, "vkCreateDescriptorSetLayout");
|
||||||
|
|
||||||
|
// Create the descriptor we'll use for the uniform buffer.
|
||||||
|
// This is what we hand out to everyone (who then also needs to use our
|
||||||
|
// offsets).
|
||||||
|
VkDescriptorSetAllocateInfo set_alloc_info;
|
||||||
|
set_alloc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
|
||||||
|
set_alloc_info.pNext = nullptr;
|
||||||
|
set_alloc_info.descriptorPool = descriptor_pool_;
|
||||||
|
set_alloc_info.descriptorSetCount = 1;
|
||||||
|
set_alloc_info.pSetLayouts = &descriptor_set_layout_;
|
||||||
|
err = vkAllocateDescriptorSets(device_, &set_alloc_info,
|
||||||
|
&transient_descriptor_set_);
|
||||||
|
CheckResult(err, "vkAllocateDescriptorSets");
|
||||||
|
}
|
||||||
|
|
||||||
|
BufferCache::~BufferCache() {
|
||||||
|
vkFreeDescriptorSets(device_, descriptor_pool_, 1,
|
||||||
|
&transient_descriptor_set_);
|
||||||
|
vkDestroyDescriptorSetLayout(device_, descriptor_set_layout_, nullptr);
|
||||||
|
vkDestroyDescriptorPool(device_, descriptor_pool_, nullptr);
|
||||||
|
vkUnmapMemory(device_, transient_buffer_memory_);
|
||||||
|
vkFreeMemory(device_, transient_buffer_memory_, nullptr);
|
||||||
|
vkDestroyBuffer(device_, transient_uniform_buffer_, nullptr);
|
||||||
|
vkDestroyBuffer(device_, transient_index_buffer_, nullptr);
|
||||||
|
vkDestroyBuffer(device_, transient_vertex_buffer_, nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
VkDeviceSize BufferCache::UploadConstantRegisters(
|
||||||
|
const Shader::ConstantRegisterMap& constant_register_map) {
|
||||||
|
// Allocate space in the buffer for our data.
|
||||||
|
auto offset = AllocateTransientData(uniform_buffer_alignment_,
|
||||||
|
constant_register_map.packed_byte_length);
|
||||||
|
if (offset == VK_WHOLE_SIZE) {
|
||||||
|
// OOM.
|
||||||
|
return VK_WHOLE_SIZE;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run through registers and copy them into the buffer.
|
||||||
|
// TODO(benvanik): optimize this - it's hit twice every call.
|
||||||
|
const auto& values = register_file_->values;
|
||||||
|
uint8_t* dest_ptr =
|
||||||
|
reinterpret_cast<uint8_t*>(transient_buffer_data_) + offset;
|
||||||
|
for (int i = 0; i < 4; ++i) {
|
||||||
|
auto piece = constant_register_map.float_bitmap[i];
|
||||||
|
if (!piece) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
for (int j = 0, sh = 0; j < 64; ++j, sh << 1) {
|
||||||
|
if (piece & sh) {
|
||||||
|
xe::copy_128_aligned(
|
||||||
|
dest_ptr,
|
||||||
|
&values[XE_GPU_REG_SHADER_CONSTANT_000_X + i * 64 + j].f32, 1);
|
||||||
|
dest_ptr += 16;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (int i = 0; i < 32; ++i) {
|
||||||
|
if (constant_register_map.int_bitmap & (1 << i)) {
|
||||||
|
xe::store<uint32_t>(dest_ptr,
|
||||||
|
values[XE_GPU_REG_SHADER_CONSTANT_LOOP_00 + i].u32);
|
||||||
|
dest_ptr += 4;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (int i = 0; i < 8; ++i) {
|
||||||
|
if (constant_register_map.bool_bitmap[i]) {
|
||||||
|
xe::store<uint32_t>(
|
||||||
|
dest_ptr, values[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031 + i].u32);
|
||||||
|
dest_ptr += 4;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadIndexBuffer(
|
||||||
|
const void* source_ptr, size_t source_length, IndexFormat format) {
|
||||||
|
// TODO(benvanik): check cache.
|
||||||
|
|
||||||
|
// Allocate space in the buffer for our data.
|
||||||
|
auto offset = AllocateTransientData(index_buffer_alignment_, source_length);
|
||||||
|
if (offset == VK_WHOLE_SIZE) {
|
||||||
|
// OOM.
|
||||||
|
return {nullptr, VK_WHOLE_SIZE};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy data into the buffer.
|
||||||
|
// TODO(benvanik): get min/max indices and pass back?
|
||||||
|
// TODO(benvanik): memcpy then use compute shaders to swap?
|
||||||
|
if (format == IndexFormat::kInt16) {
|
||||||
|
// Endian::k8in16, swap half-words.
|
||||||
|
xe::copy_and_swap_16_aligned(
|
||||||
|
reinterpret_cast<uint16_t*>(transient_buffer_data_) + offset,
|
||||||
|
reinterpret_cast<const uint16_t*>(source_ptr), source_length / 2);
|
||||||
|
} else if (format == IndexFormat::kInt32) {
|
||||||
|
// Endian::k8in32, swap words.
|
||||||
|
xe::copy_and_swap_32_aligned(
|
||||||
|
reinterpret_cast<uint32_t*>(transient_buffer_data_) + offset,
|
||||||
|
reinterpret_cast<const uint32_t*>(source_ptr), source_length / 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
return {transient_index_buffer_, offset};
|
||||||
|
}
|
||||||
|
|
||||||
|
std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadVertexBuffer(
|
||||||
|
const void* source_ptr, size_t source_length) {
|
||||||
|
// TODO(benvanik): check cache.
|
||||||
|
|
||||||
|
// Allocate space in the buffer for our data.
|
||||||
|
auto offset = AllocateTransientData(vertex_buffer_alignment_, source_length);
|
||||||
|
if (offset == VK_WHOLE_SIZE) {
|
||||||
|
// OOM.
|
||||||
|
return {nullptr, VK_WHOLE_SIZE};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy data into the buffer.
|
||||||
|
// TODO(benvanik): memcpy then use compute shaders to swap?
|
||||||
|
// Endian::k8in32, swap words.
|
||||||
|
xe::copy_and_swap_32_aligned(
|
||||||
|
reinterpret_cast<uint32_t*>(transient_buffer_data_) + offset,
|
||||||
|
reinterpret_cast<const uint32_t*>(source_ptr), source_length / 4);
|
||||||
|
|
||||||
|
return {transient_vertex_buffer_, offset};
|
||||||
|
}
|
||||||
|
|
||||||
|
VkDeviceSize BufferCache::AllocateTransientData(size_t alignment,
|
||||||
|
size_t length) {
|
||||||
|
// Try to add to end, wrapping if required.
|
||||||
|
|
||||||
|
// Check to ensure there is space.
|
||||||
|
if (false) {
|
||||||
|
// Consume all fences.
|
||||||
|
}
|
||||||
|
|
||||||
|
// Slice off our bit.
|
||||||
|
|
||||||
|
return VK_WHOLE_SIZE;
|
||||||
|
}
|
||||||
|
|
||||||
|
void BufferCache::Flush(VkCommandBuffer command_buffer) {
|
||||||
|
// If we are flushing a big enough chunk queue up an event.
|
||||||
|
// We don't want to do this for everything but often enough so that we won't
|
||||||
|
// run out of space.
|
||||||
|
if (true) {
|
||||||
|
// VkEvent finish_event;
|
||||||
|
// vkCmdSetEvent(cmd_buffer, finish_event,
|
||||||
|
// VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Flush memory.
|
||||||
|
// TODO(benvanik): subrange.
|
||||||
|
VkMappedMemoryRange dirty_range;
|
||||||
|
dirty_range.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
|
||||||
|
dirty_range.pNext = nullptr;
|
||||||
|
dirty_range.memory = transient_buffer_memory_;
|
||||||
|
dirty_range.offset = 0;
|
||||||
|
dirty_range.size = transient_capacity_;
|
||||||
|
vkFlushMappedMemoryRanges(device_, 1, &dirty_range);
|
||||||
|
}
|
||||||
|
|
||||||
|
void BufferCache::InvalidateCache() {
|
||||||
|
// TODO(benvanik): caching.
|
||||||
|
}
|
||||||
|
|
||||||
|
void BufferCache::ClearCache() {
|
||||||
|
// TODO(benvanik): caching.
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace vulkan
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace xe
|
|
@ -0,0 +1,111 @@
|
||||||
|
/**
|
||||||
|
******************************************************************************
|
||||||
|
* Xenia : Xbox 360 Emulator Research Project *
|
||||||
|
******************************************************************************
|
||||||
|
* Copyright 2016 Ben Vanik. All rights reserved. *
|
||||||
|
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||||
|
******************************************************************************
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef XENIA_GPU_VULKAN_BUFFER_CACHE_H_
|
||||||
|
#define XENIA_GPU_VULKAN_BUFFER_CACHE_H_
|
||||||
|
|
||||||
|
#include "xenia/gpu/register_file.h"
|
||||||
|
#include "xenia/gpu/shader.h"
|
||||||
|
#include "xenia/gpu/xenos.h"
|
||||||
|
#include "xenia/ui/vulkan/vulkan.h"
|
||||||
|
#include "xenia/ui/vulkan/vulkan_device.h"
|
||||||
|
|
||||||
|
namespace xe {
|
||||||
|
namespace gpu {
|
||||||
|
namespace vulkan {
|
||||||
|
|
||||||
|
// Efficiently manages buffers of various kinds.
|
||||||
|
// Used primarily for uploading index and vertex data from guest memory and
|
||||||
|
// transient data like shader constants.
|
||||||
|
class BufferCache {
|
||||||
|
public:
|
||||||
|
BufferCache(RegisterFile* register_file, ui::vulkan::VulkanDevice* device,
|
||||||
|
size_t capacity);
|
||||||
|
~BufferCache();
|
||||||
|
|
||||||
|
// Descriptor set containing the dynamic uniform buffer used for constant
|
||||||
|
// uploads. Used in conjunction with a dynamic offset returned by
|
||||||
|
// UploadConstantRegisters.
|
||||||
|
// The set contains two bindings:
|
||||||
|
// binding = 0: for use in vertex shaders
|
||||||
|
// binding = 1: for use in fragment shaders
|
||||||
|
VkDescriptorSet constant_descriptor_set() const {
|
||||||
|
return transient_descriptor_set_;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Uploads the constants specified in the register maps to the transient
|
||||||
|
// uniform storage buffer.
|
||||||
|
// The registers are tightly packed in order as [floats, ints, bools].
|
||||||
|
// Returns an offset that can be used with the transient_descriptor_set or
|
||||||
|
// VK_WHOLE_SIZE if the constants could not be uploaded (OOM).
|
||||||
|
VkDeviceSize UploadConstantRegisters(
|
||||||
|
const Shader::ConstantRegisterMap& constant_register_map);
|
||||||
|
|
||||||
|
// Uploads index buffer data from guest memory, possibly eliding with
|
||||||
|
// recently uploaded data or cached copies.
|
||||||
|
// Returns a buffer and offset that can be used with vkCmdBindIndexBuffer.
|
||||||
|
// Size will be VK_WHOLE_SIZE if the data could not be uploaded (OOM).
|
||||||
|
std::pair<VkBuffer, VkDeviceSize> UploadIndexBuffer(const void* source_ptr,
|
||||||
|
size_t source_length,
|
||||||
|
IndexFormat format);
|
||||||
|
|
||||||
|
// Uploads vertex buffer data from guest memory, possibly eliding with
|
||||||
|
// recently uploaded data or cached copies.
|
||||||
|
// Returns a buffer and offset that can be used with vkCmdBindVertexBuffers.
|
||||||
|
// Size will be VK_WHOLE_SIZE if the data could not be uploaded (OOM).
|
||||||
|
std::pair<VkBuffer, VkDeviceSize> UploadVertexBuffer(const void* source_ptr,
|
||||||
|
size_t source_length);
|
||||||
|
|
||||||
|
// Flushes all pending data to the GPU.
|
||||||
|
// Until this is called the GPU is not guaranteed to see any data.
|
||||||
|
// The given command buffer will be used to queue up events so that the
|
||||||
|
// cache can determine when data has been consumed.
|
||||||
|
void Flush(VkCommandBuffer command_buffer);
|
||||||
|
|
||||||
|
// Marks the cache as potentially invalid.
|
||||||
|
// This is not as strong as ClearCache and is a hint that any and all data
|
||||||
|
// should be verified before being reused.
|
||||||
|
void InvalidateCache();
|
||||||
|
|
||||||
|
// Clears all cached content and prevents future elision with pending data.
|
||||||
|
void ClearCache();
|
||||||
|
|
||||||
|
private:
|
||||||
|
// Allocates a block of memory in the transient buffer.
|
||||||
|
// Returns VK_WHOLE_SIZE if requested amount of memory is not available.
|
||||||
|
VkDeviceSize AllocateTransientData(size_t alignment, size_t length);
|
||||||
|
|
||||||
|
RegisterFile* register_file_ = nullptr;
|
||||||
|
VkDevice device_ = nullptr;
|
||||||
|
|
||||||
|
// Staging ringbuffer we cycle through fast. Used for data we don't
|
||||||
|
// plan on keeping past the current frame.
|
||||||
|
size_t transient_capacity_ = 0;
|
||||||
|
VkBuffer transient_uniform_buffer_ = nullptr;
|
||||||
|
VkBuffer transient_index_buffer_ = nullptr;
|
||||||
|
VkBuffer transient_vertex_buffer_ = nullptr;
|
||||||
|
VkDeviceMemory transient_buffer_memory_ = nullptr;
|
||||||
|
void* transient_buffer_data_ = nullptr;
|
||||||
|
|
||||||
|
// Required alignemnts for our various types.
|
||||||
|
// All allocations must start at the appropriate alignment.
|
||||||
|
VkDeviceSize uniform_buffer_alignment_ = 0;
|
||||||
|
VkDeviceSize index_buffer_alignment_ = 0;
|
||||||
|
VkDeviceSize vertex_buffer_alignment_ = 0;
|
||||||
|
|
||||||
|
VkDescriptorPool descriptor_pool_ = nullptr;
|
||||||
|
VkDescriptorSetLayout descriptor_set_layout_ = nullptr;
|
||||||
|
VkDescriptorSet transient_descriptor_set_ = nullptr;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace vulkan
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace xe
|
||||||
|
|
||||||
|
#endif // XENIA_GPU_VULKAN_BUFFER_CACHE_H_
|
|
@ -0,0 +1,335 @@
|
||||||
|
/**
|
||||||
|
******************************************************************************
|
||||||
|
* Xenia : Xbox 360 Emulator Research Project *
|
||||||
|
******************************************************************************
|
||||||
|
* Copyright 2016 Ben Vanik. All rights reserved. *
|
||||||
|
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||||
|
******************************************************************************
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "xenia/gpu/vulkan/pipeline_cache.h"
|
||||||
|
|
||||||
|
#include "third_party/xxhash/xxhash.h"
|
||||||
|
#include "xenia/base/logging.h"
|
||||||
|
#include "xenia/base/math.h"
|
||||||
|
#include "xenia/base/memory.h"
|
||||||
|
#include "xenia/base/profiling.h"
|
||||||
|
#include "xenia/gpu/gpu_flags.h"
|
||||||
|
#include "xenia/gpu/vulkan/vulkan_gpu_flags.h"
|
||||||
|
|
||||||
|
namespace xe {
|
||||||
|
namespace gpu {
|
||||||
|
namespace vulkan {
|
||||||
|
|
||||||
|
using xe::ui::vulkan::CheckResult;
|
||||||
|
|
||||||
|
PipelineCache::PipelineCache(RegisterFile* register_file,
|
||||||
|
ui::vulkan::VulkanDevice* device)
|
||||||
|
: register_file_(register_file), device_(*device) {}
|
||||||
|
|
||||||
|
PipelineCache::~PipelineCache() {
|
||||||
|
// Destroy all shaders.
|
||||||
|
for (auto it : shader_map_) {
|
||||||
|
delete it.second;
|
||||||
|
}
|
||||||
|
shader_map_.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
VulkanShader* PipelineCache::LoadShader(ShaderType shader_type,
|
||||||
|
uint32_t guest_address,
|
||||||
|
const uint32_t* host_address,
|
||||||
|
uint32_t dword_count) {
|
||||||
|
// Hash the input memory and lookup the shader.
|
||||||
|
uint64_t data_hash = XXH64(host_address, dword_count * sizeof(uint32_t), 0);
|
||||||
|
auto it = shader_map_.find(data_hash);
|
||||||
|
if (it != shader_map_.end()) {
|
||||||
|
// Shader has been previously loaded.
|
||||||
|
return it->second;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Always create the shader and stash it away.
|
||||||
|
// We need to track it even if it fails translation so we know not to try
|
||||||
|
// again.
|
||||||
|
VulkanShader* shader =
|
||||||
|
new VulkanShader(shader_type, data_hash, host_address, dword_count);
|
||||||
|
shader_map_.insert({data_hash, shader});
|
||||||
|
|
||||||
|
// Perform translation.
|
||||||
|
// If this fails the shader will be marked as invalid and ignored later.
|
||||||
|
if (!shader_translator_.Translate(shader)) {
|
||||||
|
XELOGE("Shader translation failed; marking shader as ignored");
|
||||||
|
return shader;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Prepare the shader for use (creates our VkShaderModule).
|
||||||
|
// It could still fail at this point.
|
||||||
|
if (!shader->Prepare()) {
|
||||||
|
XELOGE("Shader preparation failed; marking shader as ignored");
|
||||||
|
return shader;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (shader->is_valid()) {
|
||||||
|
XELOGGPU("Generated %s shader at 0x%.8X (%db):\n%s",
|
||||||
|
shader_type == ShaderType::kVertex ? "vertex" : "pixel",
|
||||||
|
guest_address, dword_count * 4,
|
||||||
|
shader->ucode_disassembly().c_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Dump shader files if desired.
|
||||||
|
if (!FLAGS_dump_shaders.empty()) {
|
||||||
|
shader->Dump(FLAGS_dump_shaders, "vk");
|
||||||
|
}
|
||||||
|
|
||||||
|
return shader;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool PipelineCache::ConfigurePipeline(VkCommandBuffer command_buffer,
|
||||||
|
VkRenderPass render_pass,
|
||||||
|
PrimitiveType primitive_type) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
void PipelineCache::ClearCache() {
|
||||||
|
// TODO(benvanik): caching.
|
||||||
|
}
|
||||||
|
|
||||||
|
bool PipelineCache::SetShadowRegister(uint32_t* dest, uint32_t register_name) {
|
||||||
|
uint32_t value = register_file_->values[register_name].u32;
|
||||||
|
if (*dest == value) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
*dest = value;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool PipelineCache::SetShadowRegister(float* dest, uint32_t register_name) {
|
||||||
|
float value = register_file_->values[register_name].f32;
|
||||||
|
if (*dest == value) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
*dest = value;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
PipelineCache::UpdateStatus PipelineCache::UpdateShaders(
|
||||||
|
PrimitiveType prim_type) {
|
||||||
|
auto& regs = update_shaders_regs_;
|
||||||
|
|
||||||
|
// These are the constant base addresses/ranges for shaders.
|
||||||
|
// We have these hardcoded right now cause nothing seems to differ.
|
||||||
|
assert_true(register_file_->values[XE_GPU_REG_SQ_VS_CONST].u32 ==
|
||||||
|
0x000FF000 ||
|
||||||
|
register_file_->values[XE_GPU_REG_SQ_VS_CONST].u32 == 0x00000000);
|
||||||
|
assert_true(register_file_->values[XE_GPU_REG_SQ_PS_CONST].u32 ==
|
||||||
|
0x000FF100 ||
|
||||||
|
register_file_->values[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000);
|
||||||
|
|
||||||
|
bool dirty = false;
|
||||||
|
dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl,
|
||||||
|
XE_GPU_REG_PA_SU_SC_MODE_CNTL);
|
||||||
|
dirty |= SetShadowRegister(®s.sq_program_cntl, XE_GPU_REG_SQ_PROGRAM_CNTL);
|
||||||
|
dirty |= SetShadowRegister(®s.sq_context_misc, XE_GPU_REG_SQ_CONTEXT_MISC);
|
||||||
|
// dirty |= regs.vertex_shader != active_vertex_shader_;
|
||||||
|
// dirty |= regs.pixel_shader != active_pixel_shader_;
|
||||||
|
dirty |= regs.prim_type != prim_type;
|
||||||
|
if (!dirty) {
|
||||||
|
return UpdateStatus::kCompatible;
|
||||||
|
}
|
||||||
|
// regs.vertex_shader = static_cast<VulkanShader*>(active_vertex_shader_);
|
||||||
|
// regs.pixel_shader = static_cast<VulkanShader*>(active_pixel_shader_);
|
||||||
|
regs.prim_type = prim_type;
|
||||||
|
|
||||||
|
SCOPE_profile_cpu_f("gpu");
|
||||||
|
|
||||||
|
return UpdateStatus::kMismatch;
|
||||||
|
}
|
||||||
|
|
||||||
|
PipelineCache::UpdateStatus PipelineCache::UpdateRenderTargets() {
|
||||||
|
auto& regs = update_render_targets_regs_;
|
||||||
|
|
||||||
|
bool dirty = false;
|
||||||
|
dirty |= SetShadowRegister(®s.rb_modecontrol, XE_GPU_REG_RB_MODECONTROL);
|
||||||
|
dirty |= SetShadowRegister(®s.rb_surface_info, XE_GPU_REG_RB_SURFACE_INFO);
|
||||||
|
dirty |= SetShadowRegister(®s.rb_color_info, XE_GPU_REG_RB_COLOR_INFO);
|
||||||
|
dirty |= SetShadowRegister(®s.rb_color1_info, XE_GPU_REG_RB_COLOR1_INFO);
|
||||||
|
dirty |= SetShadowRegister(®s.rb_color2_info, XE_GPU_REG_RB_COLOR2_INFO);
|
||||||
|
dirty |= SetShadowRegister(®s.rb_color3_info, XE_GPU_REG_RB_COLOR3_INFO);
|
||||||
|
dirty |= SetShadowRegister(®s.rb_color_mask, XE_GPU_REG_RB_COLOR_MASK);
|
||||||
|
dirty |= SetShadowRegister(®s.rb_depthcontrol, XE_GPU_REG_RB_DEPTHCONTROL);
|
||||||
|
dirty |=
|
||||||
|
SetShadowRegister(®s.rb_stencilrefmask, XE_GPU_REG_RB_STENCILREFMASK);
|
||||||
|
dirty |= SetShadowRegister(®s.rb_depth_info, XE_GPU_REG_RB_DEPTH_INFO);
|
||||||
|
if (!dirty) {
|
||||||
|
return UpdateStatus::kCompatible;
|
||||||
|
}
|
||||||
|
|
||||||
|
SCOPE_profile_cpu_f("gpu");
|
||||||
|
|
||||||
|
return UpdateStatus::kMismatch;
|
||||||
|
}
|
||||||
|
|
||||||
|
PipelineCache::UpdateStatus PipelineCache::UpdateState(
|
||||||
|
PrimitiveType prim_type) {
|
||||||
|
bool mismatch = false;
|
||||||
|
|
||||||
|
#define CHECK_UPDATE_STATUS(status, mismatch, error_message) \
|
||||||
|
{ \
|
||||||
|
if (status == UpdateStatus::kError) { \
|
||||||
|
XELOGE(error_message); \
|
||||||
|
return status; \
|
||||||
|
} else if (status == UpdateStatus::kMismatch) { \
|
||||||
|
mismatch = true; \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
UpdateStatus status;
|
||||||
|
status = UpdateViewportState();
|
||||||
|
CHECK_UPDATE_STATUS(status, mismatch, "Unable to update viewport state");
|
||||||
|
status = UpdateRasterizerState(prim_type);
|
||||||
|
CHECK_UPDATE_STATUS(status, mismatch, "Unable to update rasterizer state");
|
||||||
|
status = UpdateBlendState();
|
||||||
|
CHECK_UPDATE_STATUS(status, mismatch, "Unable to update blend state");
|
||||||
|
status = UpdateDepthStencilState();
|
||||||
|
CHECK_UPDATE_STATUS(status, mismatch, "Unable to update depth/stencil state");
|
||||||
|
|
||||||
|
return mismatch ? UpdateStatus::kMismatch : UpdateStatus::kCompatible;
|
||||||
|
}
|
||||||
|
|
||||||
|
PipelineCache::UpdateStatus PipelineCache::UpdateViewportState() {
|
||||||
|
auto& regs = update_viewport_state_regs_;
|
||||||
|
|
||||||
|
bool dirty = false;
|
||||||
|
// dirty |= SetShadowRegister(&state_regs.pa_cl_clip_cntl,
|
||||||
|
// XE_GPU_REG_PA_CL_CLIP_CNTL);
|
||||||
|
dirty |= SetShadowRegister(®s.rb_surface_info, XE_GPU_REG_RB_SURFACE_INFO);
|
||||||
|
dirty |= SetShadowRegister(®s.pa_cl_vte_cntl, XE_GPU_REG_PA_CL_VTE_CNTL);
|
||||||
|
dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl,
|
||||||
|
XE_GPU_REG_PA_SU_SC_MODE_CNTL);
|
||||||
|
dirty |= SetShadowRegister(®s.pa_sc_window_offset,
|
||||||
|
XE_GPU_REG_PA_SC_WINDOW_OFFSET);
|
||||||
|
dirty |= SetShadowRegister(®s.pa_sc_window_scissor_tl,
|
||||||
|
XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL);
|
||||||
|
dirty |= SetShadowRegister(®s.pa_sc_window_scissor_br,
|
||||||
|
XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR);
|
||||||
|
dirty |= SetShadowRegister(®s.pa_cl_vport_xoffset,
|
||||||
|
XE_GPU_REG_PA_CL_VPORT_XOFFSET);
|
||||||
|
dirty |= SetShadowRegister(®s.pa_cl_vport_yoffset,
|
||||||
|
XE_GPU_REG_PA_CL_VPORT_YOFFSET);
|
||||||
|
dirty |= SetShadowRegister(®s.pa_cl_vport_zoffset,
|
||||||
|
XE_GPU_REG_PA_CL_VPORT_ZOFFSET);
|
||||||
|
dirty |= SetShadowRegister(®s.pa_cl_vport_xscale,
|
||||||
|
XE_GPU_REG_PA_CL_VPORT_XSCALE);
|
||||||
|
dirty |= SetShadowRegister(®s.pa_cl_vport_yscale,
|
||||||
|
XE_GPU_REG_PA_CL_VPORT_YSCALE);
|
||||||
|
dirty |= SetShadowRegister(®s.pa_cl_vport_zscale,
|
||||||
|
XE_GPU_REG_PA_CL_VPORT_ZSCALE);
|
||||||
|
|
||||||
|
// Much of this state machine is extracted from:
|
||||||
|
// https://github.com/freedreno/mesa/blob/master/src/mesa/drivers/dri/r200/r200_state.c
|
||||||
|
// http://fossies.org/dox/MesaLib-10.3.5/fd2__gmem_8c_source.html
|
||||||
|
// http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf
|
||||||
|
|
||||||
|
// http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf
|
||||||
|
// VTX_XY_FMT = true: the incoming X, Y have already been multiplied by 1/W0.
|
||||||
|
// = false: multiply the X, Y coordinates by 1/W0.
|
||||||
|
// VTX_Z_FMT = true: the incoming Z has already been multiplied by 1/W0.
|
||||||
|
// = false: multiply the Z coordinate by 1/W0.
|
||||||
|
// VTX_W0_FMT = true: the incoming W0 is not 1/W0. Perform the reciprocal to
|
||||||
|
// get 1/W0.
|
||||||
|
// draw_batcher_.set_vtx_fmt((regs.pa_cl_vte_cntl >> 8) & 0x1 ? 1.0f : 0.0f,
|
||||||
|
// (regs.pa_cl_vte_cntl >> 9) & 0x1 ? 1.0f : 0.0f,
|
||||||
|
// (regs.pa_cl_vte_cntl >> 10) & 0x1 ? 1.0f : 0.0f);
|
||||||
|
|
||||||
|
// Done in VS, no need to flush state.
|
||||||
|
// if ((regs.pa_cl_vte_cntl & (1 << 0)) > 0) {
|
||||||
|
// draw_batcher_.set_window_scalar(1.0f, 1.0f);
|
||||||
|
//} else {
|
||||||
|
// draw_batcher_.set_window_scalar(1.0f / 2560.0f, -1.0f / 2560.0f);
|
||||||
|
//}
|
||||||
|
|
||||||
|
if (!dirty) {
|
||||||
|
return UpdateStatus::kCompatible;
|
||||||
|
}
|
||||||
|
|
||||||
|
return UpdateStatus::kMismatch;
|
||||||
|
}
|
||||||
|
|
||||||
|
PipelineCache::UpdateStatus PipelineCache::UpdateRasterizerState(
|
||||||
|
PrimitiveType prim_type) {
|
||||||
|
auto& regs = update_rasterizer_state_regs_;
|
||||||
|
|
||||||
|
bool dirty = false;
|
||||||
|
dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl,
|
||||||
|
XE_GPU_REG_PA_SU_SC_MODE_CNTL);
|
||||||
|
dirty |= SetShadowRegister(®s.pa_sc_screen_scissor_tl,
|
||||||
|
XE_GPU_REG_PA_SC_SCREEN_SCISSOR_TL);
|
||||||
|
dirty |= SetShadowRegister(®s.pa_sc_screen_scissor_br,
|
||||||
|
XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR);
|
||||||
|
dirty |= SetShadowRegister(®s.multi_prim_ib_reset_index,
|
||||||
|
XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX);
|
||||||
|
dirty |= regs.prim_type != prim_type;
|
||||||
|
if (!dirty) {
|
||||||
|
return UpdateStatus::kCompatible;
|
||||||
|
}
|
||||||
|
|
||||||
|
regs.prim_type = prim_type;
|
||||||
|
|
||||||
|
SCOPE_profile_cpu_f("gpu");
|
||||||
|
|
||||||
|
return UpdateStatus::kMismatch;
|
||||||
|
}
|
||||||
|
|
||||||
|
PipelineCache::UpdateStatus PipelineCache::UpdateBlendState() {
|
||||||
|
auto& reg_file = *register_file_;
|
||||||
|
auto& regs = update_blend_state_regs_;
|
||||||
|
|
||||||
|
// Alpha testing -- ALPHAREF, ALPHAFUNC, ALPHATESTENABLE
|
||||||
|
// Deprecated in GL, implemented in shader.
|
||||||
|
// if(ALPHATESTENABLE && frag_out.a [<=/ALPHAFUNC] ALPHAREF) discard;
|
||||||
|
// uint32_t color_control = reg_file[XE_GPU_REG_RB_COLORCONTROL].u32;
|
||||||
|
// draw_batcher_.set_alpha_test((color_control & 0x4) != 0, //
|
||||||
|
// ALPAHTESTENABLE
|
||||||
|
// color_control & 0x7, // ALPHAFUNC
|
||||||
|
// reg_file[XE_GPU_REG_RB_ALPHA_REF].f32);
|
||||||
|
|
||||||
|
bool dirty = false;
|
||||||
|
dirty |=
|
||||||
|
SetShadowRegister(®s.rb_blendcontrol[0], XE_GPU_REG_RB_BLENDCONTROL_0);
|
||||||
|
dirty |=
|
||||||
|
SetShadowRegister(®s.rb_blendcontrol[1], XE_GPU_REG_RB_BLENDCONTROL_1);
|
||||||
|
dirty |=
|
||||||
|
SetShadowRegister(®s.rb_blendcontrol[2], XE_GPU_REG_RB_BLENDCONTROL_2);
|
||||||
|
dirty |=
|
||||||
|
SetShadowRegister(®s.rb_blendcontrol[3], XE_GPU_REG_RB_BLENDCONTROL_3);
|
||||||
|
dirty |= SetShadowRegister(®s.rb_blend_rgba[0], XE_GPU_REG_RB_BLEND_RED);
|
||||||
|
dirty |= SetShadowRegister(®s.rb_blend_rgba[1], XE_GPU_REG_RB_BLEND_GREEN);
|
||||||
|
dirty |= SetShadowRegister(®s.rb_blend_rgba[2], XE_GPU_REG_RB_BLEND_BLUE);
|
||||||
|
dirty |= SetShadowRegister(®s.rb_blend_rgba[3], XE_GPU_REG_RB_BLEND_ALPHA);
|
||||||
|
if (!dirty) {
|
||||||
|
return UpdateStatus::kCompatible;
|
||||||
|
}
|
||||||
|
|
||||||
|
SCOPE_profile_cpu_f("gpu");
|
||||||
|
|
||||||
|
return UpdateStatus::kMismatch;
|
||||||
|
}
|
||||||
|
|
||||||
|
PipelineCache::UpdateStatus PipelineCache::UpdateDepthStencilState() {
|
||||||
|
auto& regs = update_depth_stencil_state_regs_;
|
||||||
|
|
||||||
|
bool dirty = false;
|
||||||
|
dirty |= SetShadowRegister(®s.rb_depthcontrol, XE_GPU_REG_RB_DEPTHCONTROL);
|
||||||
|
dirty |=
|
||||||
|
SetShadowRegister(®s.rb_stencilrefmask, XE_GPU_REG_RB_STENCILREFMASK);
|
||||||
|
if (!dirty) {
|
||||||
|
return UpdateStatus::kCompatible;
|
||||||
|
}
|
||||||
|
|
||||||
|
SCOPE_profile_cpu_f("gpu");
|
||||||
|
|
||||||
|
return UpdateStatus::kMismatch;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace vulkan
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace xe
|
|
@ -0,0 +1,167 @@
|
||||||
|
/**
|
||||||
|
******************************************************************************
|
||||||
|
* Xenia : Xbox 360 Emulator Research Project *
|
||||||
|
******************************************************************************
|
||||||
|
* Copyright 2016 Ben Vanik. All rights reserved. *
|
||||||
|
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||||
|
******************************************************************************
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef XENIA_GPU_VULKAN_PIPELINE_CACHE_H_
|
||||||
|
#define XENIA_GPU_VULKAN_PIPELINE_CACHE_H_
|
||||||
|
|
||||||
|
#include <unordered_map>
|
||||||
|
|
||||||
|
#include "xenia/gpu/register_file.h"
|
||||||
|
#include "xenia/gpu/spirv_shader_translator.h"
|
||||||
|
#include "xenia/gpu/vulkan/vulkan_shader.h"
|
||||||
|
#include "xenia/gpu/xenos.h"
|
||||||
|
#include "xenia/ui/spirv/spirv_disassembler.h"
|
||||||
|
#include "xenia/ui/vulkan/vulkan.h"
|
||||||
|
#include "xenia/ui/vulkan/vulkan_device.h"
|
||||||
|
|
||||||
|
namespace xe {
|
||||||
|
namespace gpu {
|
||||||
|
namespace vulkan {
|
||||||
|
|
||||||
|
// Configures and caches pipelines based on render state.
|
||||||
|
// This is responsible for properly setting all state required for a draw
|
||||||
|
// including shaders, various blend/etc options, and input configuration.
|
||||||
|
class PipelineCache {
|
||||||
|
public:
|
||||||
|
PipelineCache(RegisterFile* register_file, ui::vulkan::VulkanDevice* device);
|
||||||
|
~PipelineCache();
|
||||||
|
|
||||||
|
// Loads a shader from the cache, possibly translating it.
|
||||||
|
VulkanShader* LoadShader(ShaderType shader_type, uint32_t guest_address,
|
||||||
|
const uint32_t* host_address, uint32_t dword_count);
|
||||||
|
|
||||||
|
// Configures a pipeline using the current render state and the given render
|
||||||
|
// pass. If a previously available pipeline is available it will be used,
|
||||||
|
// otherwise a new one may be created. Any state that can be set dynamically
|
||||||
|
// in the command buffer is issued at this time.
|
||||||
|
// Returns whether the pipeline could be successfully created.
|
||||||
|
bool ConfigurePipeline(VkCommandBuffer command_buffer,
|
||||||
|
VkRenderPass render_pass,
|
||||||
|
PrimitiveType primitive_type);
|
||||||
|
|
||||||
|
// Currently configured pipeline layout, if any.
|
||||||
|
VkPipelineLayout current_pipeline_layout() const { return nullptr; }
|
||||||
|
// Currently configured vertex shader, if any.
|
||||||
|
VulkanShader* current_vertex_shader() const { return nullptr; }
|
||||||
|
// Currently configured pixel shader, if any.
|
||||||
|
VulkanShader* current_pixel_shader() const { return nullptr; }
|
||||||
|
|
||||||
|
// Clears all cached content.
|
||||||
|
void ClearCache();
|
||||||
|
|
||||||
|
private:
|
||||||
|
// TODO(benvanik): geometry shader cache.
|
||||||
|
// TODO(benvanik): translated shader cache.
|
||||||
|
// TODO(benvanik): pipeline layouts.
|
||||||
|
// TODO(benvanik): pipeline cache.
|
||||||
|
|
||||||
|
RegisterFile* register_file_ = nullptr;
|
||||||
|
VkDevice device_ = nullptr;
|
||||||
|
|
||||||
|
SpirvShaderTranslator shader_translator_;
|
||||||
|
xe::ui::spirv::SpirvDisassembler disassembler_;
|
||||||
|
// All loaded shaders mapped by their guest hash key.
|
||||||
|
std::unordered_map<uint64_t, VulkanShader*> shader_map_;
|
||||||
|
|
||||||
|
private:
|
||||||
|
enum class UpdateStatus {
|
||||||
|
kCompatible,
|
||||||
|
kMismatch,
|
||||||
|
kError,
|
||||||
|
};
|
||||||
|
|
||||||
|
UpdateStatus UpdateShaders(PrimitiveType prim_type);
|
||||||
|
UpdateStatus UpdateRenderTargets();
|
||||||
|
UpdateStatus UpdateState(PrimitiveType prim_type);
|
||||||
|
UpdateStatus UpdateViewportState();
|
||||||
|
UpdateStatus UpdateRasterizerState(PrimitiveType prim_type);
|
||||||
|
UpdateStatus UpdateBlendState();
|
||||||
|
UpdateStatus UpdateDepthStencilState();
|
||||||
|
|
||||||
|
bool SetShadowRegister(uint32_t* dest, uint32_t register_name);
|
||||||
|
bool SetShadowRegister(float* dest, uint32_t register_name);
|
||||||
|
|
||||||
|
struct UpdateRenderTargetsRegisters {
|
||||||
|
uint32_t rb_modecontrol;
|
||||||
|
uint32_t rb_surface_info;
|
||||||
|
uint32_t rb_color_info;
|
||||||
|
uint32_t rb_color1_info;
|
||||||
|
uint32_t rb_color2_info;
|
||||||
|
uint32_t rb_color3_info;
|
||||||
|
uint32_t rb_color_mask;
|
||||||
|
uint32_t rb_depthcontrol;
|
||||||
|
uint32_t rb_stencilrefmask;
|
||||||
|
uint32_t rb_depth_info;
|
||||||
|
|
||||||
|
UpdateRenderTargetsRegisters() { Reset(); }
|
||||||
|
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
||||||
|
} update_render_targets_regs_;
|
||||||
|
struct UpdateViewportStateRegisters {
|
||||||
|
// uint32_t pa_cl_clip_cntl;
|
||||||
|
uint32_t rb_surface_info;
|
||||||
|
uint32_t pa_cl_vte_cntl;
|
||||||
|
uint32_t pa_su_sc_mode_cntl;
|
||||||
|
uint32_t pa_sc_window_offset;
|
||||||
|
uint32_t pa_sc_window_scissor_tl;
|
||||||
|
uint32_t pa_sc_window_scissor_br;
|
||||||
|
float pa_cl_vport_xoffset;
|
||||||
|
float pa_cl_vport_yoffset;
|
||||||
|
float pa_cl_vport_zoffset;
|
||||||
|
float pa_cl_vport_xscale;
|
||||||
|
float pa_cl_vport_yscale;
|
||||||
|
float pa_cl_vport_zscale;
|
||||||
|
|
||||||
|
UpdateViewportStateRegisters() { Reset(); }
|
||||||
|
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
||||||
|
} update_viewport_state_regs_;
|
||||||
|
struct UpdateRasterizerStateRegisters {
|
||||||
|
uint32_t pa_su_sc_mode_cntl;
|
||||||
|
uint32_t pa_sc_screen_scissor_tl;
|
||||||
|
uint32_t pa_sc_screen_scissor_br;
|
||||||
|
uint32_t multi_prim_ib_reset_index;
|
||||||
|
PrimitiveType prim_type;
|
||||||
|
|
||||||
|
UpdateRasterizerStateRegisters() { Reset(); }
|
||||||
|
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
||||||
|
} update_rasterizer_state_regs_;
|
||||||
|
struct UpdateBlendStateRegisters {
|
||||||
|
uint32_t rb_blendcontrol[4];
|
||||||
|
float rb_blend_rgba[4];
|
||||||
|
|
||||||
|
UpdateBlendStateRegisters() { Reset(); }
|
||||||
|
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
||||||
|
} update_blend_state_regs_;
|
||||||
|
struct UpdateDepthStencilStateRegisters {
|
||||||
|
uint32_t rb_depthcontrol;
|
||||||
|
uint32_t rb_stencilrefmask;
|
||||||
|
|
||||||
|
UpdateDepthStencilStateRegisters() { Reset(); }
|
||||||
|
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
||||||
|
} update_depth_stencil_state_regs_;
|
||||||
|
struct UpdateShadersRegisters {
|
||||||
|
PrimitiveType prim_type;
|
||||||
|
uint32_t pa_su_sc_mode_cntl;
|
||||||
|
uint32_t sq_program_cntl;
|
||||||
|
uint32_t sq_context_misc;
|
||||||
|
VulkanShader* vertex_shader;
|
||||||
|
VulkanShader* pixel_shader;
|
||||||
|
|
||||||
|
UpdateShadersRegisters() { Reset(); }
|
||||||
|
void Reset() {
|
||||||
|
sq_program_cntl = 0;
|
||||||
|
vertex_shader = pixel_shader = nullptr;
|
||||||
|
}
|
||||||
|
} update_shaders_regs_;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace vulkan
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace xe
|
||||||
|
|
||||||
|
#endif // XENIA_GPU_VULKAN_PIPELINE_CACHE_H_
|
|
@ -0,0 +1,45 @@
|
||||||
|
/**
|
||||||
|
******************************************************************************
|
||||||
|
* Xenia : Xbox 360 Emulator Research Project *
|
||||||
|
******************************************************************************
|
||||||
|
* Copyright 2016 Ben Vanik. All rights reserved. *
|
||||||
|
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||||
|
******************************************************************************
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "xenia/gpu/vulkan/render_cache.h"
|
||||||
|
|
||||||
|
#include "xenia/base/logging.h"
|
||||||
|
#include "xenia/base/math.h"
|
||||||
|
#include "xenia/base/memory.h"
|
||||||
|
#include "xenia/base/profiling.h"
|
||||||
|
#include "xenia/gpu/gpu_flags.h"
|
||||||
|
#include "xenia/gpu/vulkan/vulkan_gpu_flags.h"
|
||||||
|
|
||||||
|
namespace xe {
|
||||||
|
namespace gpu {
|
||||||
|
namespace vulkan {
|
||||||
|
|
||||||
|
using xe::ui::vulkan::CheckResult;
|
||||||
|
|
||||||
|
RenderCache::RenderCache(RegisterFile* register_file,
|
||||||
|
ui::vulkan::VulkanDevice* device)
|
||||||
|
: register_file_(register_file), device_(*device) {}
|
||||||
|
|
||||||
|
RenderCache::~RenderCache() = default;
|
||||||
|
|
||||||
|
VkRenderPass RenderCache::BeginRenderPass(VkCommandBuffer command_buffer) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
void RenderCache::EndRenderPass() {
|
||||||
|
//
|
||||||
|
}
|
||||||
|
|
||||||
|
void RenderCache::ClearCache() {
|
||||||
|
// TODO(benvanik): caching.
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace vulkan
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace xe
|
|
@ -0,0 +1,46 @@
|
||||||
|
/**
|
||||||
|
******************************************************************************
|
||||||
|
* Xenia : Xbox 360 Emulator Research Project *
|
||||||
|
******************************************************************************
|
||||||
|
* Copyright 2016 Ben Vanik. All rights reserved. *
|
||||||
|
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||||
|
******************************************************************************
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef XENIA_GPU_VULKAN_RENDER_CACHE_H_
|
||||||
|
#define XENIA_GPU_VULKAN_RENDER_CACHE_H_
|
||||||
|
|
||||||
|
#include "xenia/gpu/register_file.h"
|
||||||
|
#include "xenia/gpu/shader.h"
|
||||||
|
#include "xenia/gpu/xenos.h"
|
||||||
|
#include "xenia/ui/vulkan/vulkan.h"
|
||||||
|
#include "xenia/ui/vulkan/vulkan_device.h"
|
||||||
|
|
||||||
|
namespace xe {
|
||||||
|
namespace gpu {
|
||||||
|
namespace vulkan {
|
||||||
|
|
||||||
|
// Configures and caches pipelines based on render state.
|
||||||
|
// This is responsible for properly setting all state required for a draw
|
||||||
|
// including shaders, various blend/etc options, and input configuration.
|
||||||
|
class RenderCache {
|
||||||
|
public:
|
||||||
|
RenderCache(RegisterFile* register_file, ui::vulkan::VulkanDevice* device);
|
||||||
|
~RenderCache();
|
||||||
|
|
||||||
|
VkRenderPass BeginRenderPass(VkCommandBuffer command_buffer);
|
||||||
|
void EndRenderPass();
|
||||||
|
|
||||||
|
// Clears all cached content.
|
||||||
|
void ClearCache();
|
||||||
|
|
||||||
|
private:
|
||||||
|
RegisterFile* register_file_ = nullptr;
|
||||||
|
VkDevice device_ = nullptr;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace vulkan
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace xe
|
||||||
|
|
||||||
|
#endif // XENIA_GPU_VULKAN_RENDER_CACHE_H_
|
|
@ -0,0 +1,37 @@
|
||||||
|
/**
|
||||||
|
******************************************************************************
|
||||||
|
* Xenia : Xbox 360 Emulator Research Project *
|
||||||
|
******************************************************************************
|
||||||
|
* Copyright 2016 Ben Vanik. All rights reserved. *
|
||||||
|
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||||
|
******************************************************************************
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "xenia/gpu/vulkan/texture_cache.h"
|
||||||
|
|
||||||
|
#include "xenia/base/logging.h"
|
||||||
|
#include "xenia/base/math.h"
|
||||||
|
#include "xenia/base/memory.h"
|
||||||
|
#include "xenia/base/profiling.h"
|
||||||
|
#include "xenia/gpu/gpu_flags.h"
|
||||||
|
#include "xenia/gpu/vulkan/vulkan_gpu_flags.h"
|
||||||
|
|
||||||
|
namespace xe {
|
||||||
|
namespace gpu {
|
||||||
|
namespace vulkan {
|
||||||
|
|
||||||
|
using xe::ui::vulkan::CheckResult;
|
||||||
|
|
||||||
|
TextureCache::TextureCache(RegisterFile* register_file,
|
||||||
|
ui::vulkan::VulkanDevice* device)
|
||||||
|
: register_file_(register_file), device_(*device) {}
|
||||||
|
|
||||||
|
TextureCache::~TextureCache() = default;
|
||||||
|
|
||||||
|
void TextureCache::ClearCache() {
|
||||||
|
// TODO(benvanik): caching.
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace vulkan
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace xe
|
|
@ -0,0 +1,47 @@
|
||||||
|
/**
|
||||||
|
******************************************************************************
|
||||||
|
* Xenia : Xbox 360 Emulator Research Project *
|
||||||
|
******************************************************************************
|
||||||
|
* Copyright 2016 Ben Vanik. All rights reserved. *
|
||||||
|
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||||
|
******************************************************************************
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef XENIA_GPU_VULKAN_TEXTURE_CACHE_H_
|
||||||
|
#define XENIA_GPU_VULKAN_TEXTURE_CACHE_H_
|
||||||
|
|
||||||
|
#include "xenia/gpu/register_file.h"
|
||||||
|
#include "xenia/gpu/shader.h"
|
||||||
|
#include "xenia/gpu/xenos.h"
|
||||||
|
#include "xenia/ui/vulkan/vulkan.h"
|
||||||
|
#include "xenia/ui/vulkan/vulkan_device.h"
|
||||||
|
|
||||||
|
namespace xe {
|
||||||
|
namespace gpu {
|
||||||
|
namespace vulkan {
|
||||||
|
|
||||||
|
// Configures and caches pipelines based on render state.
|
||||||
|
// This is responsible for properly setting all state required for a draw
|
||||||
|
// including shaders, various blend/etc options, and input configuration.
|
||||||
|
class TextureCache {
|
||||||
|
public:
|
||||||
|
TextureCache(RegisterFile* register_file, ui::vulkan::VulkanDevice* device);
|
||||||
|
~TextureCache();
|
||||||
|
|
||||||
|
// TODO(benvanik): UploadTexture.
|
||||||
|
// TODO(benvanik): Resolve.
|
||||||
|
// TODO(benvanik): ReadTexture.
|
||||||
|
|
||||||
|
// Clears all cached content.
|
||||||
|
void ClearCache();
|
||||||
|
|
||||||
|
private:
|
||||||
|
RegisterFile* register_file_ = nullptr;
|
||||||
|
VkDevice device_ = nullptr;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace vulkan
|
||||||
|
} // namespace gpu
|
||||||
|
} // namespace xe
|
||||||
|
|
||||||
|
#endif // XENIA_GPU_VULKAN_TEXTURE_CACHE_H_
|
|
@ -20,12 +20,16 @@
|
||||||
#include "xenia/gpu/vulkan/vulkan_gpu_flags.h"
|
#include "xenia/gpu/vulkan/vulkan_gpu_flags.h"
|
||||||
#include "xenia/gpu/vulkan/vulkan_graphics_system.h"
|
#include "xenia/gpu/vulkan/vulkan_graphics_system.h"
|
||||||
#include "xenia/gpu/xenos.h"
|
#include "xenia/gpu/xenos.h"
|
||||||
|
#include "xenia/ui/vulkan/vulkan_util.h"
|
||||||
|
|
||||||
namespace xe {
|
namespace xe {
|
||||||
namespace gpu {
|
namespace gpu {
|
||||||
namespace vulkan {
|
namespace vulkan {
|
||||||
|
|
||||||
using namespace xe::gpu::xenos;
|
using namespace xe::gpu::xenos;
|
||||||
|
using xe::ui::vulkan::CheckResult;
|
||||||
|
|
||||||
|
constexpr size_t kDefaultBufferCacheCapacity = 256 * 1024 * 1024;
|
||||||
|
|
||||||
VulkanCommandProcessor::VulkanCommandProcessor(
|
VulkanCommandProcessor::VulkanCommandProcessor(
|
||||||
VulkanGraphicsSystem* graphics_system, kernel::KernelState* kernel_state)
|
VulkanGraphicsSystem* graphics_system, kernel::KernelState* kernel_state)
|
||||||
|
@ -33,7 +37,14 @@ VulkanCommandProcessor::VulkanCommandProcessor(
|
||||||
|
|
||||||
VulkanCommandProcessor::~VulkanCommandProcessor() = default;
|
VulkanCommandProcessor::~VulkanCommandProcessor() = default;
|
||||||
|
|
||||||
void VulkanCommandProcessor::ClearCaches() { CommandProcessor::ClearCaches(); }
|
void VulkanCommandProcessor::ClearCaches() {
|
||||||
|
CommandProcessor::ClearCaches();
|
||||||
|
|
||||||
|
buffer_cache_->ClearCache();
|
||||||
|
pipeline_cache_->ClearCache();
|
||||||
|
render_cache_->ClearCache();
|
||||||
|
texture_cache_->ClearCache();
|
||||||
|
}
|
||||||
|
|
||||||
bool VulkanCommandProcessor::SetupContext() {
|
bool VulkanCommandProcessor::SetupContext() {
|
||||||
if (!CommandProcessor::SetupContext()) {
|
if (!CommandProcessor::SetupContext()) {
|
||||||
|
@ -41,10 +52,47 @@ bool VulkanCommandProcessor::SetupContext() {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Acquire our device and queue.
|
||||||
|
auto context = static_cast<xe::ui::vulkan::VulkanContext*>(context_.get());
|
||||||
|
device_ = context->device();
|
||||||
|
queue_ = device_->AcquireQueue();
|
||||||
|
if (!queue_) {
|
||||||
|
// Need to reuse primary queue (with locks).
|
||||||
|
queue_ = device_->primary_queue();
|
||||||
|
queue_mutex_ = &device_->primary_queue_mutex();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Setup fenced pools used for all our per-frame/per-draw resources.
|
||||||
|
command_buffer_pool_ = std::make_unique<ui::vulkan::CommandBufferPool>(
|
||||||
|
*device_, device_->queue_family_index(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
|
||||||
|
|
||||||
|
// Initialize the state machine caches.
|
||||||
|
buffer_cache_ = std::make_unique<BufferCache>(register_file_, device_,
|
||||||
|
kDefaultBufferCacheCapacity);
|
||||||
|
pipeline_cache_ = std::make_unique<PipelineCache>(register_file_, device_);
|
||||||
|
render_cache_ = std::make_unique<RenderCache>(register_file_, device_);
|
||||||
|
texture_cache_ = std::make_unique<TextureCache>(register_file_, device_);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void VulkanCommandProcessor::ShutdownContext() {
|
void VulkanCommandProcessor::ShutdownContext() {
|
||||||
|
// TODO(benvanik): wait until idle.
|
||||||
|
|
||||||
|
buffer_cache_.reset();
|
||||||
|
pipeline_cache_.reset();
|
||||||
|
render_cache_.reset();
|
||||||
|
texture_cache_.reset();
|
||||||
|
|
||||||
|
// Free all pools. This must come after all of our caches clean up.
|
||||||
|
command_buffer_pool_.reset();
|
||||||
|
|
||||||
|
// Release queue, if were using an acquired one.
|
||||||
|
if (!queue_mutex_) {
|
||||||
|
device_->ReleaseQueue(queue_);
|
||||||
|
queue_ = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
CommandProcessor::ShutdownContext();
|
CommandProcessor::ShutdownContext();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -55,7 +103,8 @@ void VulkanCommandProcessor::MakeCoherent() {
|
||||||
CommandProcessor::MakeCoherent();
|
CommandProcessor::MakeCoherent();
|
||||||
|
|
||||||
if (status_host & 0x80000000ul) {
|
if (status_host & 0x80000000ul) {
|
||||||
// scratch_buffer_.ClearCache();
|
// TODO(benvanik): less-fine-grained clearing.
|
||||||
|
buffer_cache_->InvalidateCache();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -103,346 +152,167 @@ Shader* VulkanCommandProcessor::LoadShader(ShaderType shader_type,
|
||||||
uint32_t guest_address,
|
uint32_t guest_address,
|
||||||
const uint32_t* host_address,
|
const uint32_t* host_address,
|
||||||
uint32_t dword_count) {
|
uint32_t dword_count) {
|
||||||
// return shader_cache_.LookupOrInsertShader(shader_type, host_address,
|
return pipeline_cache_->LoadShader(shader_type, guest_address, host_address,
|
||||||
// dword_count);
|
dword_count);
|
||||||
return nullptr;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool VulkanCommandProcessor::IssueDraw(PrimitiveType prim_type,
|
bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
||||||
uint32_t index_count,
|
uint32_t index_count,
|
||||||
IndexBufferInfo* index_buffer_info) {
|
IndexBufferInfo* index_buffer_info) {
|
||||||
|
auto& regs = *register_file_;
|
||||||
|
|
||||||
#if FINE_GRAINED_DRAW_SCOPES
|
#if FINE_GRAINED_DRAW_SCOPES
|
||||||
SCOPE_profile_cpu_f("gpu");
|
SCOPE_profile_cpu_f("gpu");
|
||||||
#endif // FINE_GRAINED_DRAW_SCOPES
|
#endif // FINE_GRAINED_DRAW_SCOPES
|
||||||
|
|
||||||
// Skip all drawing for now - what did you expect? :)
|
|
||||||
return true;
|
|
||||||
|
|
||||||
bool draw_valid = false;
|
|
||||||
// if (index_buffer_info) {
|
|
||||||
// draw_valid = draw_batcher_.BeginDrawElements(prim_type, index_count,
|
|
||||||
// index_buffer_info->format);
|
|
||||||
//} else {
|
|
||||||
// draw_valid = draw_batcher_.BeginDrawArrays(prim_type, index_count);
|
|
||||||
//}
|
|
||||||
if (!draw_valid) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
auto& regs = *register_file_;
|
|
||||||
|
|
||||||
auto enable_mode =
|
auto enable_mode =
|
||||||
static_cast<ModeControl>(regs[XE_GPU_REG_RB_MODECONTROL].u32 & 0x7);
|
static_cast<ModeControl>(regs[XE_GPU_REG_RB_MODECONTROL].u32 & 0x7);
|
||||||
if (enable_mode == ModeControl::kIgnore) {
|
if (enable_mode == ModeControl::kIgnore) {
|
||||||
// Ignored.
|
// Ignored.
|
||||||
// draw_batcher_.DiscardDraw();
|
|
||||||
return true;
|
return true;
|
||||||
} else if (enable_mode == ModeControl::kCopy) {
|
} else if (enable_mode == ModeControl::kCopy) {
|
||||||
// Special copy handling.
|
// Special copy handling.
|
||||||
// draw_batcher_.DiscardDraw();
|
|
||||||
return IssueCopy();
|
return IssueCopy();
|
||||||
}
|
}
|
||||||
|
|
||||||
#define CHECK_ISSUE_UPDATE_STATUS(status, mismatch, error_message) \
|
// TODO(benvanik): bigger batches.
|
||||||
{ \
|
command_buffer_pool_->BeginBatch();
|
||||||
if (status == UpdateStatus::kError) { \
|
VkCommandBuffer command_buffer = command_buffer_pool_->AcquireEntry();
|
||||||
XELOGE(error_message); \
|
VkCommandBufferBeginInfo command_buffer_begin_info;
|
||||||
/*draw_batcher_.DiscardDraw(); */ \
|
command_buffer_begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
|
||||||
return false; \
|
command_buffer_begin_info.pNext = nullptr;
|
||||||
} else if (status == UpdateStatus::kMismatch) { \
|
command_buffer_begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
|
||||||
mismatch = true; \
|
command_buffer_begin_info.pInheritanceInfo = nullptr;
|
||||||
} \
|
auto err = vkBeginCommandBuffer(command_buffer, &command_buffer_begin_info);
|
||||||
}
|
CheckResult(err, "vkBeginCommandBuffer");
|
||||||
|
|
||||||
UpdateStatus status;
|
// Begin the render pass.
|
||||||
bool mismatch = false;
|
// This will setup our framebuffer and begin the pass in the command buffer.
|
||||||
status = UpdateShaders(prim_type);
|
VkRenderPass render_pass = render_cache_->BeginRenderPass(command_buffer);
|
||||||
CHECK_ISSUE_UPDATE_STATUS(status, mismatch, "Unable to prepare draw shaders");
|
if (!render_pass) {
|
||||||
status = UpdateRenderTargets();
|
|
||||||
CHECK_ISSUE_UPDATE_STATUS(status, mismatch, "Unable to setup render targets");
|
|
||||||
// if (!active_framebuffer_) {
|
|
||||||
// // No framebuffer, so nothing we do will actually have an effect.
|
|
||||||
// // Treat it as a no-op.
|
|
||||||
// // TODO(benvanik): if we have a vs export, still allow it to go.
|
|
||||||
// draw_batcher_.DiscardDraw();
|
|
||||||
// return true;
|
|
||||||
//}
|
|
||||||
|
|
||||||
status = UpdateState(prim_type);
|
|
||||||
CHECK_ISSUE_UPDATE_STATUS(status, mismatch, "Unable to setup render state");
|
|
||||||
status = PopulateSamplers();
|
|
||||||
CHECK_ISSUE_UPDATE_STATUS(status, mismatch,
|
|
||||||
"Unable to prepare draw samplers");
|
|
||||||
|
|
||||||
status = PopulateIndexBuffer(index_buffer_info);
|
|
||||||
CHECK_ISSUE_UPDATE_STATUS(status, mismatch, "Unable to setup index buffer");
|
|
||||||
status = PopulateVertexBuffers();
|
|
||||||
CHECK_ISSUE_UPDATE_STATUS(status, mismatch, "Unable to setup vertex buffers");
|
|
||||||
|
|
||||||
// if (!draw_batcher_.CommitDraw()) {
|
|
||||||
// return false;
|
|
||||||
//}
|
|
||||||
|
|
||||||
// draw_batcher_.Flush(DrawBatcher::FlushMode::kMakeCoherent);
|
|
||||||
if (context_->WasLost()) {
|
|
||||||
// This draw lost us the context. This typically isn't hit.
|
|
||||||
assert_always();
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Configure the pipeline for drawing.
|
||||||
|
// This encodes all render state (blend, depth, etc), our shader stages,
|
||||||
|
// and our vertex input layout.
|
||||||
|
if (!pipeline_cache_->ConfigurePipeline(command_buffer, render_pass,
|
||||||
|
primitive_type)) {
|
||||||
|
render_cache_->EndRenderPass();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Upload the constants the shaders require.
|
||||||
|
auto vertex_shader = pipeline_cache_->current_vertex_shader();
|
||||||
|
auto pixel_shader = pipeline_cache_->current_pixel_shader();
|
||||||
|
auto vertex_constant_offset = buffer_cache_->UploadConstantRegisters(
|
||||||
|
vertex_shader->constant_register_map());
|
||||||
|
auto pixel_constant_offset = buffer_cache_->UploadConstantRegisters(
|
||||||
|
pixel_shader->constant_register_map());
|
||||||
|
if (vertex_constant_offset == VK_WHOLE_SIZE ||
|
||||||
|
pixel_constant_offset == VK_WHOLE_SIZE) {
|
||||||
|
render_cache_->EndRenderPass();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Configure constant uniform access to point at our offsets.
|
||||||
|
auto constant_descriptor_set = buffer_cache_->constant_descriptor_set();
|
||||||
|
auto pipeline_layout = pipeline_cache_->current_pipeline_layout();
|
||||||
|
uint32_t constant_offsets[2] = {static_cast<uint32_t>(vertex_constant_offset),
|
||||||
|
static_cast<uint32_t>(pixel_constant_offset)};
|
||||||
|
vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
|
||||||
|
pipeline_layout, 0, 1, &constant_descriptor_set,
|
||||||
|
static_cast<uint32_t>(xe::countof(constant_offsets)),
|
||||||
|
constant_offsets);
|
||||||
|
|
||||||
|
// Upload and bind index buffer data (if we have any).
|
||||||
|
if (!PopulateIndexBuffer(command_buffer, index_buffer_info)) {
|
||||||
|
render_cache_->EndRenderPass();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Upload and bind all vertex buffer data.
|
||||||
|
if (!PopulateVertexBuffers(command_buffer, vertex_shader)) {
|
||||||
|
render_cache_->EndRenderPass();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Upload and set descriptors for all textures.
|
||||||
|
if (!PopulateSamplers(command_buffer, vertex_shader, pixel_shader)) {
|
||||||
|
render_cache_->EndRenderPass();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
// Actually issue the draw.
|
||||||
|
if (!index_buffer_info) {
|
||||||
|
// Auto-indexed draw.
|
||||||
|
uint32_t instance_count = 1;
|
||||||
|
uint32_t first_vertex = 0;
|
||||||
|
uint32_t first_instance = 0;
|
||||||
|
vkCmdDraw(command_buffer, index_count, instance_count, first_vertex,
|
||||||
|
first_instance);
|
||||||
|
} else {
|
||||||
|
// Index buffer draw.
|
||||||
|
uint32_t instance_count = 1;
|
||||||
|
uint32_t first_index =
|
||||||
|
register_file_->values[XE_GPU_REG_VGT_INDX_OFFSET].u32;
|
||||||
|
uint32_t vertex_offset = 0;
|
||||||
|
uint32_t first_instance = 0;
|
||||||
|
vkCmdDrawIndexed(command_buffer, index_count, instance_count, first_index,
|
||||||
|
vertex_offset, first_instance);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// End the rendering pass.
|
||||||
|
render_cache_->EndRenderPass();
|
||||||
|
|
||||||
|
// TODO(benvanik): bigger batches.
|
||||||
|
err = vkEndCommandBuffer(command_buffer);
|
||||||
|
CheckResult(err, "vkEndCommandBuffer");
|
||||||
|
VkFence fence;
|
||||||
|
VkFenceCreateInfo fence_info;
|
||||||
|
fence_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
|
||||||
|
fence_info.pNext = nullptr;
|
||||||
|
fence_info.flags = 0;
|
||||||
|
vkCreateFence(*device_, &fence_info, nullptr, &fence);
|
||||||
|
command_buffer_pool_->EndBatch(fence);
|
||||||
|
VkSubmitInfo submit_info;
|
||||||
|
submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
|
||||||
|
submit_info.pNext = nullptr;
|
||||||
|
submit_info.waitSemaphoreCount = 0;
|
||||||
|
submit_info.pWaitSemaphores = nullptr;
|
||||||
|
submit_info.commandBufferCount = 1;
|
||||||
|
submit_info.pCommandBuffers = &command_buffer;
|
||||||
|
submit_info.signalSemaphoreCount = 0;
|
||||||
|
submit_info.pSignalSemaphores = nullptr;
|
||||||
|
if (queue_mutex_) {
|
||||||
|
queue_mutex_->lock();
|
||||||
|
}
|
||||||
|
err = vkQueueSubmit(queue_, 1, &submit_info, fence);
|
||||||
|
if (queue_mutex_) {
|
||||||
|
queue_mutex_->unlock();
|
||||||
|
}
|
||||||
|
CheckResult(err, "vkQueueSubmit");
|
||||||
|
if (queue_mutex_) {
|
||||||
|
queue_mutex_->lock();
|
||||||
|
}
|
||||||
|
vkQueueWaitIdle(queue_);
|
||||||
|
if (queue_mutex_) {
|
||||||
|
queue_mutex_->unlock();
|
||||||
|
}
|
||||||
|
command_buffer_pool_->Scavenge();
|
||||||
|
vkDestroyFence(*device_, fence, nullptr);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool VulkanCommandProcessor::SetShadowRegister(uint32_t* dest,
|
bool VulkanCommandProcessor::PopulateIndexBuffer(
|
||||||
uint32_t register_name) {
|
VkCommandBuffer command_buffer, IndexBufferInfo* index_buffer_info) {
|
||||||
uint32_t value = register_file_->values[register_name].u32;
|
|
||||||
if (*dest == value) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
*dest = value;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool VulkanCommandProcessor::SetShadowRegister(float* dest,
|
|
||||||
uint32_t register_name) {
|
|
||||||
float value = register_file_->values[register_name].f32;
|
|
||||||
if (*dest == value) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
*dest = value;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
VulkanCommandProcessor::UpdateStatus VulkanCommandProcessor::UpdateShaders(
|
|
||||||
PrimitiveType prim_type) {
|
|
||||||
auto& regs = update_shaders_regs_;
|
|
||||||
|
|
||||||
// These are the constant base addresses/ranges for shaders.
|
|
||||||
// We have these hardcoded right now cause nothing seems to differ.
|
|
||||||
assert_true(register_file_->values[XE_GPU_REG_SQ_VS_CONST].u32 ==
|
|
||||||
0x000FF000 ||
|
|
||||||
register_file_->values[XE_GPU_REG_SQ_VS_CONST].u32 == 0x00000000);
|
|
||||||
assert_true(register_file_->values[XE_GPU_REG_SQ_PS_CONST].u32 ==
|
|
||||||
0x000FF100 ||
|
|
||||||
register_file_->values[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000);
|
|
||||||
|
|
||||||
bool dirty = false;
|
|
||||||
dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl,
|
|
||||||
XE_GPU_REG_PA_SU_SC_MODE_CNTL);
|
|
||||||
dirty |= SetShadowRegister(®s.sq_program_cntl, XE_GPU_REG_SQ_PROGRAM_CNTL);
|
|
||||||
dirty |= SetShadowRegister(®s.sq_context_misc, XE_GPU_REG_SQ_CONTEXT_MISC);
|
|
||||||
dirty |= regs.vertex_shader != active_vertex_shader_;
|
|
||||||
dirty |= regs.pixel_shader != active_pixel_shader_;
|
|
||||||
dirty |= regs.prim_type != prim_type;
|
|
||||||
if (!dirty) {
|
|
||||||
return UpdateStatus::kCompatible;
|
|
||||||
}
|
|
||||||
regs.vertex_shader = static_cast<VulkanShader*>(active_vertex_shader_);
|
|
||||||
regs.pixel_shader = static_cast<VulkanShader*>(active_pixel_shader_);
|
|
||||||
regs.prim_type = prim_type;
|
|
||||||
|
|
||||||
SCOPE_profile_cpu_f("gpu");
|
|
||||||
|
|
||||||
return UpdateStatus::kMismatch;
|
|
||||||
}
|
|
||||||
|
|
||||||
VulkanCommandProcessor::UpdateStatus
|
|
||||||
VulkanCommandProcessor::UpdateRenderTargets() {
|
|
||||||
auto& regs = update_render_targets_regs_;
|
|
||||||
|
|
||||||
bool dirty = false;
|
|
||||||
dirty |= SetShadowRegister(®s.rb_modecontrol, XE_GPU_REG_RB_MODECONTROL);
|
|
||||||
dirty |= SetShadowRegister(®s.rb_surface_info, XE_GPU_REG_RB_SURFACE_INFO);
|
|
||||||
dirty |= SetShadowRegister(®s.rb_color_info, XE_GPU_REG_RB_COLOR_INFO);
|
|
||||||
dirty |= SetShadowRegister(®s.rb_color1_info, XE_GPU_REG_RB_COLOR1_INFO);
|
|
||||||
dirty |= SetShadowRegister(®s.rb_color2_info, XE_GPU_REG_RB_COLOR2_INFO);
|
|
||||||
dirty |= SetShadowRegister(®s.rb_color3_info, XE_GPU_REG_RB_COLOR3_INFO);
|
|
||||||
dirty |= SetShadowRegister(®s.rb_color_mask, XE_GPU_REG_RB_COLOR_MASK);
|
|
||||||
dirty |= SetShadowRegister(®s.rb_depthcontrol, XE_GPU_REG_RB_DEPTHCONTROL);
|
|
||||||
dirty |=
|
|
||||||
SetShadowRegister(®s.rb_stencilrefmask, XE_GPU_REG_RB_STENCILREFMASK);
|
|
||||||
dirty |= SetShadowRegister(®s.rb_depth_info, XE_GPU_REG_RB_DEPTH_INFO);
|
|
||||||
if (!dirty) {
|
|
||||||
return UpdateStatus::kCompatible;
|
|
||||||
}
|
|
||||||
|
|
||||||
SCOPE_profile_cpu_f("gpu");
|
|
||||||
|
|
||||||
return UpdateStatus::kMismatch;
|
|
||||||
}
|
|
||||||
|
|
||||||
VulkanCommandProcessor::UpdateStatus VulkanCommandProcessor::UpdateState(
|
|
||||||
PrimitiveType prim_type) {
|
|
||||||
bool mismatch = false;
|
|
||||||
|
|
||||||
#define CHECK_UPDATE_STATUS(status, mismatch, error_message) \
|
|
||||||
{ \
|
|
||||||
if (status == UpdateStatus::kError) { \
|
|
||||||
XELOGE(error_message); \
|
|
||||||
return status; \
|
|
||||||
} else if (status == UpdateStatus::kMismatch) { \
|
|
||||||
mismatch = true; \
|
|
||||||
} \
|
|
||||||
}
|
|
||||||
|
|
||||||
UpdateStatus status;
|
|
||||||
status = UpdateViewportState();
|
|
||||||
CHECK_UPDATE_STATUS(status, mismatch, "Unable to update viewport state");
|
|
||||||
status = UpdateRasterizerState(prim_type);
|
|
||||||
CHECK_UPDATE_STATUS(status, mismatch, "Unable to update rasterizer state");
|
|
||||||
status = UpdateBlendState();
|
|
||||||
CHECK_UPDATE_STATUS(status, mismatch, "Unable to update blend state");
|
|
||||||
status = UpdateDepthStencilState();
|
|
||||||
CHECK_UPDATE_STATUS(status, mismatch, "Unable to update depth/stencil state");
|
|
||||||
|
|
||||||
return mismatch ? UpdateStatus::kMismatch : UpdateStatus::kCompatible;
|
|
||||||
}
|
|
||||||
|
|
||||||
VulkanCommandProcessor::UpdateStatus
|
|
||||||
VulkanCommandProcessor::UpdateViewportState() {
|
|
||||||
auto& regs = update_viewport_state_regs_;
|
|
||||||
|
|
||||||
bool dirty = false;
|
|
||||||
// dirty |= SetShadowRegister(&state_regs.pa_cl_clip_cntl,
|
|
||||||
// XE_GPU_REG_PA_CL_CLIP_CNTL);
|
|
||||||
dirty |= SetShadowRegister(®s.rb_surface_info, XE_GPU_REG_RB_SURFACE_INFO);
|
|
||||||
dirty |= SetShadowRegister(®s.pa_cl_vte_cntl, XE_GPU_REG_PA_CL_VTE_CNTL);
|
|
||||||
dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl,
|
|
||||||
XE_GPU_REG_PA_SU_SC_MODE_CNTL);
|
|
||||||
dirty |= SetShadowRegister(®s.pa_sc_window_offset,
|
|
||||||
XE_GPU_REG_PA_SC_WINDOW_OFFSET);
|
|
||||||
dirty |= SetShadowRegister(®s.pa_sc_window_scissor_tl,
|
|
||||||
XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL);
|
|
||||||
dirty |= SetShadowRegister(®s.pa_sc_window_scissor_br,
|
|
||||||
XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR);
|
|
||||||
dirty |= SetShadowRegister(®s.pa_cl_vport_xoffset,
|
|
||||||
XE_GPU_REG_PA_CL_VPORT_XOFFSET);
|
|
||||||
dirty |= SetShadowRegister(®s.pa_cl_vport_yoffset,
|
|
||||||
XE_GPU_REG_PA_CL_VPORT_YOFFSET);
|
|
||||||
dirty |= SetShadowRegister(®s.pa_cl_vport_zoffset,
|
|
||||||
XE_GPU_REG_PA_CL_VPORT_ZOFFSET);
|
|
||||||
dirty |= SetShadowRegister(®s.pa_cl_vport_xscale,
|
|
||||||
XE_GPU_REG_PA_CL_VPORT_XSCALE);
|
|
||||||
dirty |= SetShadowRegister(®s.pa_cl_vport_yscale,
|
|
||||||
XE_GPU_REG_PA_CL_VPORT_YSCALE);
|
|
||||||
dirty |= SetShadowRegister(®s.pa_cl_vport_zscale,
|
|
||||||
XE_GPU_REG_PA_CL_VPORT_ZSCALE);
|
|
||||||
|
|
||||||
// Much of this state machine is extracted from:
|
|
||||||
// https://github.com/freedreno/mesa/blob/master/src/mesa/drivers/dri/r200/r200_state.c
|
|
||||||
// http://fossies.org/dox/MesaLib-10.3.5/fd2__gmem_8c_source.html
|
|
||||||
// http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf
|
|
||||||
|
|
||||||
// http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf
|
|
||||||
// VTX_XY_FMT = true: the incoming X, Y have already been multiplied by 1/W0.
|
|
||||||
// = false: multiply the X, Y coordinates by 1/W0.
|
|
||||||
// VTX_Z_FMT = true: the incoming Z has already been multiplied by 1/W0.
|
|
||||||
// = false: multiply the Z coordinate by 1/W0.
|
|
||||||
// VTX_W0_FMT = true: the incoming W0 is not 1/W0. Perform the reciprocal to
|
|
||||||
// get 1/W0.
|
|
||||||
// draw_batcher_.set_vtx_fmt((regs.pa_cl_vte_cntl >> 8) & 0x1 ? 1.0f : 0.0f,
|
|
||||||
// (regs.pa_cl_vte_cntl >> 9) & 0x1 ? 1.0f : 0.0f,
|
|
||||||
// (regs.pa_cl_vte_cntl >> 10) & 0x1 ? 1.0f : 0.0f);
|
|
||||||
|
|
||||||
// Done in VS, no need to flush state.
|
|
||||||
// if ((regs.pa_cl_vte_cntl & (1 << 0)) > 0) {
|
|
||||||
// draw_batcher_.set_window_scalar(1.0f, 1.0f);
|
|
||||||
//} else {
|
|
||||||
// draw_batcher_.set_window_scalar(1.0f / 2560.0f, -1.0f / 2560.0f);
|
|
||||||
//}
|
|
||||||
|
|
||||||
if (!dirty) {
|
|
||||||
return UpdateStatus::kCompatible;
|
|
||||||
}
|
|
||||||
|
|
||||||
return UpdateStatus::kMismatch;
|
|
||||||
}
|
|
||||||
|
|
||||||
VulkanCommandProcessor::UpdateStatus
|
|
||||||
VulkanCommandProcessor::UpdateRasterizerState(PrimitiveType prim_type) {
|
|
||||||
auto& regs = update_rasterizer_state_regs_;
|
|
||||||
|
|
||||||
bool dirty = false;
|
|
||||||
dirty |= SetShadowRegister(®s.pa_su_sc_mode_cntl,
|
|
||||||
XE_GPU_REG_PA_SU_SC_MODE_CNTL);
|
|
||||||
dirty |= SetShadowRegister(®s.pa_sc_screen_scissor_tl,
|
|
||||||
XE_GPU_REG_PA_SC_SCREEN_SCISSOR_TL);
|
|
||||||
dirty |= SetShadowRegister(®s.pa_sc_screen_scissor_br,
|
|
||||||
XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR);
|
|
||||||
dirty |= SetShadowRegister(®s.multi_prim_ib_reset_index,
|
|
||||||
XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX);
|
|
||||||
dirty |= regs.prim_type != prim_type;
|
|
||||||
if (!dirty) {
|
|
||||||
return UpdateStatus::kCompatible;
|
|
||||||
}
|
|
||||||
|
|
||||||
regs.prim_type = prim_type;
|
|
||||||
|
|
||||||
SCOPE_profile_cpu_f("gpu");
|
|
||||||
|
|
||||||
return UpdateStatus::kMismatch;
|
|
||||||
}
|
|
||||||
|
|
||||||
VulkanCommandProcessor::UpdateStatus
|
|
||||||
VulkanCommandProcessor::UpdateBlendState() {
|
|
||||||
auto& reg_file = *register_file_;
|
|
||||||
auto& regs = update_blend_state_regs_;
|
|
||||||
|
|
||||||
// Alpha testing -- ALPHAREF, ALPHAFUNC, ALPHATESTENABLE
|
|
||||||
// Deprecated in GL, implemented in shader.
|
|
||||||
// if(ALPHATESTENABLE && frag_out.a [<=/ALPHAFUNC] ALPHAREF) discard;
|
|
||||||
// uint32_t color_control = reg_file[XE_GPU_REG_RB_COLORCONTROL].u32;
|
|
||||||
// draw_batcher_.set_alpha_test((color_control & 0x4) != 0, //
|
|
||||||
// ALPAHTESTENABLE
|
|
||||||
// color_control & 0x7, // ALPHAFUNC
|
|
||||||
// reg_file[XE_GPU_REG_RB_ALPHA_REF].f32);
|
|
||||||
|
|
||||||
bool dirty = false;
|
|
||||||
dirty |=
|
|
||||||
SetShadowRegister(®s.rb_blendcontrol[0], XE_GPU_REG_RB_BLENDCONTROL_0);
|
|
||||||
dirty |=
|
|
||||||
SetShadowRegister(®s.rb_blendcontrol[1], XE_GPU_REG_RB_BLENDCONTROL_1);
|
|
||||||
dirty |=
|
|
||||||
SetShadowRegister(®s.rb_blendcontrol[2], XE_GPU_REG_RB_BLENDCONTROL_2);
|
|
||||||
dirty |=
|
|
||||||
SetShadowRegister(®s.rb_blendcontrol[3], XE_GPU_REG_RB_BLENDCONTROL_3);
|
|
||||||
dirty |= SetShadowRegister(®s.rb_blend_rgba[0], XE_GPU_REG_RB_BLEND_RED);
|
|
||||||
dirty |= SetShadowRegister(®s.rb_blend_rgba[1], XE_GPU_REG_RB_BLEND_GREEN);
|
|
||||||
dirty |= SetShadowRegister(®s.rb_blend_rgba[2], XE_GPU_REG_RB_BLEND_BLUE);
|
|
||||||
dirty |= SetShadowRegister(®s.rb_blend_rgba[3], XE_GPU_REG_RB_BLEND_ALPHA);
|
|
||||||
if (!dirty) {
|
|
||||||
return UpdateStatus::kCompatible;
|
|
||||||
}
|
|
||||||
|
|
||||||
SCOPE_profile_cpu_f("gpu");
|
|
||||||
|
|
||||||
return UpdateStatus::kMismatch;
|
|
||||||
}
|
|
||||||
|
|
||||||
VulkanCommandProcessor::UpdateStatus
|
|
||||||
VulkanCommandProcessor::UpdateDepthStencilState() {
|
|
||||||
auto& regs = update_depth_stencil_state_regs_;
|
|
||||||
|
|
||||||
bool dirty = false;
|
|
||||||
dirty |= SetShadowRegister(®s.rb_depthcontrol, XE_GPU_REG_RB_DEPTHCONTROL);
|
|
||||||
dirty |=
|
|
||||||
SetShadowRegister(®s.rb_stencilrefmask, XE_GPU_REG_RB_STENCILREFMASK);
|
|
||||||
if (!dirty) {
|
|
||||||
return UpdateStatus::kCompatible;
|
|
||||||
}
|
|
||||||
|
|
||||||
SCOPE_profile_cpu_f("gpu");
|
|
||||||
|
|
||||||
return UpdateStatus::kMismatch;
|
|
||||||
}
|
|
||||||
|
|
||||||
VulkanCommandProcessor::UpdateStatus
|
|
||||||
VulkanCommandProcessor::PopulateIndexBuffer(
|
|
||||||
IndexBufferInfo* index_buffer_info) {
|
|
||||||
auto& regs = *register_file_;
|
auto& regs = *register_file_;
|
||||||
if (!index_buffer_info || !index_buffer_info->guest_base) {
|
if (!index_buffer_info || !index_buffer_info->guest_base) {
|
||||||
// No index buffer or auto draw.
|
// No index buffer or auto draw.
|
||||||
return UpdateStatus::kCompatible;
|
return true;
|
||||||
}
|
}
|
||||||
auto& info = *index_buffer_info;
|
auto& info = *index_buffer_info;
|
||||||
|
|
||||||
|
@ -462,19 +332,44 @@ VulkanCommandProcessor::PopulateIndexBuffer(
|
||||||
|
|
||||||
trace_writer_.WriteMemoryRead(info.guest_base, info.length);
|
trace_writer_.WriteMemoryRead(info.guest_base, info.length);
|
||||||
|
|
||||||
return UpdateStatus::kCompatible;
|
// Upload (or get a cached copy of) the buffer.
|
||||||
|
const void* source_ptr =
|
||||||
|
memory_->TranslatePhysical<const void*>(info.guest_base);
|
||||||
|
size_t source_length =
|
||||||
|
info.count * (info.format == IndexFormat::kInt32 ? sizeof(uint32_t)
|
||||||
|
: sizeof(uint16_t));
|
||||||
|
auto buffer_ref =
|
||||||
|
buffer_cache_->UploadIndexBuffer(source_ptr, source_length, info.format);
|
||||||
|
if (buffer_ref.second == VK_WHOLE_SIZE) {
|
||||||
|
// Failed to upload buffer.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Bind the buffer.
|
||||||
|
VkIndexType index_type = info.format == IndexFormat::kInt32
|
||||||
|
? VK_INDEX_TYPE_UINT32
|
||||||
|
: VK_INDEX_TYPE_UINT16;
|
||||||
|
vkCmdBindIndexBuffer(command_buffer, buffer_ref.first, buffer_ref.second,
|
||||||
|
index_type);
|
||||||
|
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
VulkanCommandProcessor::UpdateStatus
|
bool VulkanCommandProcessor::PopulateVertexBuffers(
|
||||||
VulkanCommandProcessor::PopulateVertexBuffers() {
|
VkCommandBuffer command_buffer, VulkanShader* vertex_shader) {
|
||||||
|
auto& regs = *register_file_;
|
||||||
|
|
||||||
#if FINE_GRAINED_DRAW_SCOPES
|
#if FINE_GRAINED_DRAW_SCOPES
|
||||||
SCOPE_profile_cpu_f("gpu");
|
SCOPE_profile_cpu_f("gpu");
|
||||||
#endif // FINE_GRAINED_DRAW_SCOPES
|
#endif // FINE_GRAINED_DRAW_SCOPES
|
||||||
|
|
||||||
auto& regs = *register_file_;
|
auto& vertex_bindings = vertex_shader->vertex_bindings();
|
||||||
assert_not_null(active_vertex_shader_);
|
assert_true(vertex_bindings.size() <= 32);
|
||||||
|
VkBuffer all_buffers[32];
|
||||||
|
VkDeviceSize all_buffer_offsets[32];
|
||||||
|
uint32_t buffer_index = 0;
|
||||||
|
|
||||||
for (const auto& vertex_binding : active_vertex_shader_->vertex_bindings()) {
|
for (const auto& vertex_binding : vertex_bindings) {
|
||||||
int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 +
|
int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 +
|
||||||
(vertex_binding.fetch_constant / 3) * 6;
|
(vertex_binding.fetch_constant / 3) * 6;
|
||||||
const auto group = reinterpret_cast<xe_gpu_fetch_group_t*>(®s.values[r]);
|
const auto group = reinterpret_cast<xe_gpu_fetch_group_t*>(®s.values[r]);
|
||||||
|
@ -492,58 +387,72 @@ VulkanCommandProcessor::PopulateVertexBuffers() {
|
||||||
}
|
}
|
||||||
assert_true(fetch->endian == 2);
|
assert_true(fetch->endian == 2);
|
||||||
|
|
||||||
|
// TODO(benvanik): compute based on indices or vertex count.
|
||||||
|
// THIS CAN BE MASSIVELY INCORRECT (too large).
|
||||||
size_t valid_range = size_t(fetch->size * 4);
|
size_t valid_range = size_t(fetch->size * 4);
|
||||||
|
|
||||||
trace_writer_.WriteMemoryRead(fetch->address << 2, valid_range);
|
trace_writer_.WriteMemoryRead(fetch->address << 2, valid_range);
|
||||||
|
|
||||||
|
// Upload (or get a cached copy of) the buffer.
|
||||||
|
const void* source_ptr =
|
||||||
|
memory_->TranslatePhysical<const void*>(fetch->address << 2);
|
||||||
|
size_t source_length = valid_range;
|
||||||
|
auto buffer_ref =
|
||||||
|
buffer_cache_->UploadVertexBuffer(source_ptr, source_length);
|
||||||
|
if (buffer_ref.second == VK_WHOLE_SIZE) {
|
||||||
|
// Failed to upload buffer.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stash the buffer reference for our bulk bind at the end.
|
||||||
|
all_buffers[buffer_index] = buffer_ref.first;
|
||||||
|
all_buffer_offsets[buffer_index] = buffer_ref.second;
|
||||||
|
++buffer_index;
|
||||||
}
|
}
|
||||||
|
|
||||||
return UpdateStatus::kCompatible;
|
// Bind buffers.
|
||||||
|
vkCmdBindVertexBuffers(command_buffer, 0, buffer_index, all_buffers,
|
||||||
|
all_buffer_offsets);
|
||||||
|
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
VulkanCommandProcessor::UpdateStatus
|
bool VulkanCommandProcessor::PopulateSamplers(VkCommandBuffer command_buffer,
|
||||||
VulkanCommandProcessor::PopulateSamplers() {
|
VulkanShader* vertex_shader,
|
||||||
|
VulkanShader* pixel_shader) {
|
||||||
#if FINE_GRAINED_DRAW_SCOPES
|
#if FINE_GRAINED_DRAW_SCOPES
|
||||||
SCOPE_profile_cpu_f("gpu");
|
SCOPE_profile_cpu_f("gpu");
|
||||||
#endif // FINE_GRAINED_DRAW_SCOPES
|
#endif // FINE_GRAINED_DRAW_SCOPES
|
||||||
|
|
||||||
bool mismatch = false;
|
bool any_failed = false;
|
||||||
|
|
||||||
// VS and PS samplers are shared, but may be used exclusively.
|
// VS and PS samplers are shared, but may be used exclusively.
|
||||||
// We walk each and setup lazily.
|
// We walk each and setup lazily.
|
||||||
bool has_setup_sampler[32] = {false};
|
bool has_setup_sampler[32] = {false};
|
||||||
|
|
||||||
// Vertex texture samplers.
|
// Vertex texture samplers.
|
||||||
for (auto& texture_binding : active_vertex_shader_->texture_bindings()) {
|
for (auto& texture_binding : vertex_shader->texture_bindings()) {
|
||||||
if (has_setup_sampler[texture_binding.fetch_constant]) {
|
if (has_setup_sampler[texture_binding.fetch_constant]) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
has_setup_sampler[texture_binding.fetch_constant] = true;
|
has_setup_sampler[texture_binding.fetch_constant] = true;
|
||||||
auto status = PopulateSampler(texture_binding);
|
any_failed = PopulateSampler(command_buffer, texture_binding) || any_failed;
|
||||||
if (status == UpdateStatus::kError) {
|
|
||||||
return status;
|
|
||||||
} else if (status == UpdateStatus::kMismatch) {
|
|
||||||
mismatch = true;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Pixel shader texture sampler.
|
// Pixel shader texture sampler.
|
||||||
for (auto& texture_binding : active_pixel_shader_->texture_bindings()) {
|
for (auto& texture_binding : pixel_shader->texture_bindings()) {
|
||||||
if (has_setup_sampler[texture_binding.fetch_constant]) {
|
if (has_setup_sampler[texture_binding.fetch_constant]) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
has_setup_sampler[texture_binding.fetch_constant] = true;
|
has_setup_sampler[texture_binding.fetch_constant] = true;
|
||||||
auto status = PopulateSampler(texture_binding);
|
any_failed = PopulateSampler(command_buffer, texture_binding) || any_failed;
|
||||||
if (status == UpdateStatus::kError) {
|
|
||||||
return UpdateStatus::kError;
|
|
||||||
} else if (status == UpdateStatus::kMismatch) {
|
|
||||||
mismatch = true;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return mismatch ? UpdateStatus::kMismatch : UpdateStatus::kCompatible;
|
return !any_failed;
|
||||||
}
|
}
|
||||||
|
|
||||||
VulkanCommandProcessor::UpdateStatus VulkanCommandProcessor::PopulateSampler(
|
bool VulkanCommandProcessor::PopulateSampler(
|
||||||
|
VkCommandBuffer command_buffer,
|
||||||
const Shader::TextureBinding& texture_binding) {
|
const Shader::TextureBinding& texture_binding) {
|
||||||
auto& regs = *register_file_;
|
auto& regs = *register_file_;
|
||||||
int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 +
|
int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 +
|
||||||
|
@ -553,30 +462,34 @@ VulkanCommandProcessor::UpdateStatus VulkanCommandProcessor::PopulateSampler(
|
||||||
|
|
||||||
// ?
|
// ?
|
||||||
if (!fetch.type) {
|
if (!fetch.type) {
|
||||||
return UpdateStatus::kCompatible;
|
return true;
|
||||||
}
|
}
|
||||||
assert_true(fetch.type == 0x2);
|
assert_true(fetch.type == 0x2);
|
||||||
|
|
||||||
TextureInfo texture_info;
|
TextureInfo texture_info;
|
||||||
if (!TextureInfo::Prepare(fetch, &texture_info)) {
|
if (!TextureInfo::Prepare(fetch, &texture_info)) {
|
||||||
XELOGE("Unable to parse texture fetcher info");
|
XELOGE("Unable to parse texture fetcher info");
|
||||||
return UpdateStatus::kCompatible; // invalid texture used
|
return true; // invalid texture used
|
||||||
}
|
}
|
||||||
SamplerInfo sampler_info;
|
SamplerInfo sampler_info;
|
||||||
if (!SamplerInfo::Prepare(fetch, texture_binding.fetch_instr,
|
if (!SamplerInfo::Prepare(fetch, texture_binding.fetch_instr,
|
||||||
&sampler_info)) {
|
&sampler_info)) {
|
||||||
XELOGE("Unable to parse sampler info");
|
XELOGE("Unable to parse sampler info");
|
||||||
return UpdateStatus::kCompatible; // invalid texture used
|
return true; // invalid texture used
|
||||||
}
|
}
|
||||||
|
|
||||||
trace_writer_.WriteMemoryRead(texture_info.guest_address,
|
trace_writer_.WriteMemoryRead(texture_info.guest_address,
|
||||||
texture_info.input_length);
|
texture_info.input_length);
|
||||||
|
|
||||||
return UpdateStatus::kCompatible;
|
// TODO(benvanik): texture cache lookup.
|
||||||
|
// TODO(benvanik): bind or return so PopulateSamplers can batch.
|
||||||
|
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool VulkanCommandProcessor::IssueCopy() {
|
bool VulkanCommandProcessor::IssueCopy() {
|
||||||
SCOPE_profile_cpu_f("gpu");
|
SCOPE_profile_cpu_f("gpu");
|
||||||
|
// TODO(benvanik): resolve.
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -23,12 +23,17 @@
|
||||||
#include "xenia/base/threading.h"
|
#include "xenia/base/threading.h"
|
||||||
#include "xenia/gpu/command_processor.h"
|
#include "xenia/gpu/command_processor.h"
|
||||||
#include "xenia/gpu/register_file.h"
|
#include "xenia/gpu/register_file.h"
|
||||||
#include "xenia/gpu/spirv_shader_translator.h"
|
#include "xenia/gpu/vulkan/buffer_cache.h"
|
||||||
|
#include "xenia/gpu/vulkan/pipeline_cache.h"
|
||||||
|
#include "xenia/gpu/vulkan/render_cache.h"
|
||||||
|
#include "xenia/gpu/vulkan/texture_cache.h"
|
||||||
#include "xenia/gpu/vulkan/vulkan_shader.h"
|
#include "xenia/gpu/vulkan/vulkan_shader.h"
|
||||||
#include "xenia/gpu/xenos.h"
|
#include "xenia/gpu/xenos.h"
|
||||||
#include "xenia/kernel/xthread.h"
|
#include "xenia/kernel/xthread.h"
|
||||||
#include "xenia/memory.h"
|
#include "xenia/memory.h"
|
||||||
|
#include "xenia/ui/vulkan/fenced_pools.h"
|
||||||
#include "xenia/ui/vulkan/vulkan_context.h"
|
#include "xenia/ui/vulkan/vulkan_context.h"
|
||||||
|
#include "xenia/ui/vulkan/vulkan_device.h"
|
||||||
|
|
||||||
namespace xe {
|
namespace xe {
|
||||||
namespace gpu {
|
namespace gpu {
|
||||||
|
@ -45,12 +50,6 @@ class VulkanCommandProcessor : public CommandProcessor {
|
||||||
void ClearCaches() override;
|
void ClearCaches() override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
enum class UpdateStatus {
|
|
||||||
kCompatible,
|
|
||||||
kMismatch,
|
|
||||||
kError,
|
|
||||||
};
|
|
||||||
|
|
||||||
bool SetupContext() override;
|
bool SetupContext() override;
|
||||||
void ShutdownContext() override;
|
void ShutdownContext() override;
|
||||||
|
|
||||||
|
@ -65,97 +64,35 @@ class VulkanCommandProcessor : public CommandProcessor {
|
||||||
const uint32_t* host_address,
|
const uint32_t* host_address,
|
||||||
uint32_t dword_count) override;
|
uint32_t dword_count) override;
|
||||||
|
|
||||||
bool IssueDraw(PrimitiveType prim_type, uint32_t index_count,
|
bool IssueDraw(PrimitiveType primitive_type, uint32_t index_count,
|
||||||
IndexBufferInfo* index_buffer_info) override;
|
IndexBufferInfo* index_buffer_info) override;
|
||||||
UpdateStatus UpdateShaders(PrimitiveType prim_type);
|
bool PopulateIndexBuffer(VkCommandBuffer command_buffer,
|
||||||
UpdateStatus UpdateRenderTargets();
|
IndexBufferInfo* index_buffer_info);
|
||||||
UpdateStatus UpdateState(PrimitiveType prim_type);
|
bool PopulateVertexBuffers(VkCommandBuffer command_buffer,
|
||||||
UpdateStatus UpdateViewportState();
|
VulkanShader* vertex_shader);
|
||||||
UpdateStatus UpdateRasterizerState(PrimitiveType prim_type);
|
bool PopulateSamplers(VkCommandBuffer command_buffer,
|
||||||
UpdateStatus UpdateBlendState();
|
VulkanShader* vertex_shader,
|
||||||
UpdateStatus UpdateDepthStencilState();
|
VulkanShader* pixel_shader);
|
||||||
UpdateStatus PopulateIndexBuffer(IndexBufferInfo* index_buffer_info);
|
bool PopulateSampler(VkCommandBuffer command_buffer,
|
||||||
UpdateStatus PopulateVertexBuffers();
|
const Shader::TextureBinding& texture_binding);
|
||||||
UpdateStatus PopulateSamplers();
|
|
||||||
UpdateStatus PopulateSampler(const Shader::TextureBinding& texture_binding);
|
|
||||||
bool IssueCopy() override;
|
bool IssueCopy() override;
|
||||||
|
|
||||||
SpirvShaderTranslator shader_translator_;
|
xe::ui::vulkan::VulkanDevice* device_ = nullptr;
|
||||||
|
|
||||||
private:
|
// TODO(benvanik): abstract behind context?
|
||||||
bool SetShadowRegister(uint32_t* dest, uint32_t register_name);
|
// Queue used to submit work. This may be a dedicated queue for the command
|
||||||
bool SetShadowRegister(float* dest, uint32_t register_name);
|
// processor and no locking will be required for use. If a dedicated queue
|
||||||
struct UpdateRenderTargetsRegisters {
|
// was not available this will be the device primary_queue and the
|
||||||
uint32_t rb_modecontrol;
|
// queue_mutex must be used to synchronize access to it.
|
||||||
uint32_t rb_surface_info;
|
VkQueue queue_ = nullptr;
|
||||||
uint32_t rb_color_info;
|
std::mutex* queue_mutex_ = nullptr;
|
||||||
uint32_t rb_color1_info;
|
|
||||||
uint32_t rb_color2_info;
|
|
||||||
uint32_t rb_color3_info;
|
|
||||||
uint32_t rb_color_mask;
|
|
||||||
uint32_t rb_depthcontrol;
|
|
||||||
uint32_t rb_stencilrefmask;
|
|
||||||
uint32_t rb_depth_info;
|
|
||||||
|
|
||||||
UpdateRenderTargetsRegisters() { Reset(); }
|
std::unique_ptr<BufferCache> buffer_cache_;
|
||||||
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
std::unique_ptr<PipelineCache> pipeline_cache_;
|
||||||
} update_render_targets_regs_;
|
std::unique_ptr<RenderCache> render_cache_;
|
||||||
struct UpdateViewportStateRegisters {
|
std::unique_ptr<TextureCache> texture_cache_;
|
||||||
// uint32_t pa_cl_clip_cntl;
|
|
||||||
uint32_t rb_surface_info;
|
|
||||||
uint32_t pa_cl_vte_cntl;
|
|
||||||
uint32_t pa_su_sc_mode_cntl;
|
|
||||||
uint32_t pa_sc_window_offset;
|
|
||||||
uint32_t pa_sc_window_scissor_tl;
|
|
||||||
uint32_t pa_sc_window_scissor_br;
|
|
||||||
float pa_cl_vport_xoffset;
|
|
||||||
float pa_cl_vport_yoffset;
|
|
||||||
float pa_cl_vport_zoffset;
|
|
||||||
float pa_cl_vport_xscale;
|
|
||||||
float pa_cl_vport_yscale;
|
|
||||||
float pa_cl_vport_zscale;
|
|
||||||
|
|
||||||
UpdateViewportStateRegisters() { Reset(); }
|
std::unique_ptr<ui::vulkan::CommandBufferPool> command_buffer_pool_;
|
||||||
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
|
||||||
} update_viewport_state_regs_;
|
|
||||||
struct UpdateRasterizerStateRegisters {
|
|
||||||
uint32_t pa_su_sc_mode_cntl;
|
|
||||||
uint32_t pa_sc_screen_scissor_tl;
|
|
||||||
uint32_t pa_sc_screen_scissor_br;
|
|
||||||
uint32_t multi_prim_ib_reset_index;
|
|
||||||
PrimitiveType prim_type;
|
|
||||||
|
|
||||||
UpdateRasterizerStateRegisters() { Reset(); }
|
|
||||||
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
|
||||||
} update_rasterizer_state_regs_;
|
|
||||||
struct UpdateBlendStateRegisters {
|
|
||||||
uint32_t rb_blendcontrol[4];
|
|
||||||
float rb_blend_rgba[4];
|
|
||||||
|
|
||||||
UpdateBlendStateRegisters() { Reset(); }
|
|
||||||
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
|
||||||
} update_blend_state_regs_;
|
|
||||||
struct UpdateDepthStencilStateRegisters {
|
|
||||||
uint32_t rb_depthcontrol;
|
|
||||||
uint32_t rb_stencilrefmask;
|
|
||||||
|
|
||||||
UpdateDepthStencilStateRegisters() { Reset(); }
|
|
||||||
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
|
||||||
} update_depth_stencil_state_regs_;
|
|
||||||
struct UpdateShadersRegisters {
|
|
||||||
PrimitiveType prim_type;
|
|
||||||
uint32_t pa_su_sc_mode_cntl;
|
|
||||||
uint32_t sq_program_cntl;
|
|
||||||
uint32_t sq_context_misc;
|
|
||||||
VulkanShader* vertex_shader;
|
|
||||||
VulkanShader* pixel_shader;
|
|
||||||
|
|
||||||
UpdateShadersRegisters() { Reset(); }
|
|
||||||
void Reset() {
|
|
||||||
sq_program_cntl = 0;
|
|
||||||
vertex_shader = pixel_shader = nullptr;
|
|
||||||
}
|
|
||||||
} update_shaders_regs_;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace vulkan
|
} // namespace vulkan
|
||||||
|
|
|
@ -0,0 +1,81 @@
|
||||||
|
/**
|
||||||
|
******************************************************************************
|
||||||
|
* Xenia : Xbox 360 Emulator Research Project *
|
||||||
|
******************************************************************************
|
||||||
|
* Copyright 2016 Ben Vanik. All rights reserved. *
|
||||||
|
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||||
|
******************************************************************************
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "xenia/ui/vulkan/fenced_pools.h"
|
||||||
|
|
||||||
|
#include "xenia/base/assert.h"
|
||||||
|
#include "xenia/base/math.h"
|
||||||
|
#include "xenia/ui/vulkan/vulkan_util.h"
|
||||||
|
|
||||||
|
namespace xe {
|
||||||
|
namespace ui {
|
||||||
|
namespace vulkan {
|
||||||
|
|
||||||
|
using xe::ui::vulkan::CheckResult;
|
||||||
|
|
||||||
|
CommandBufferPool::CommandBufferPool(VkDevice device,
|
||||||
|
uint32_t queue_family_index,
|
||||||
|
VkCommandBufferLevel level)
|
||||||
|
: BaseFencedPool(device), level_(level) {
|
||||||
|
// Create the pool used for allocating buffers.
|
||||||
|
// They are marked as transient (short-lived) and cycled frequently.
|
||||||
|
VkCommandPoolCreateInfo cmd_pool_info;
|
||||||
|
cmd_pool_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
|
||||||
|
cmd_pool_info.pNext = nullptr;
|
||||||
|
cmd_pool_info.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT |
|
||||||
|
VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
|
||||||
|
cmd_pool_info.queueFamilyIndex = queue_family_index;
|
||||||
|
auto err =
|
||||||
|
vkCreateCommandPool(device_, &cmd_pool_info, nullptr, &command_pool_);
|
||||||
|
CheckResult(err, "vkCreateCommandPool");
|
||||||
|
|
||||||
|
// Allocate a bunch of command buffers to start.
|
||||||
|
constexpr uint32_t kDefaultCount = 32;
|
||||||
|
VkCommandBufferAllocateInfo command_buffer_info;
|
||||||
|
command_buffer_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
|
||||||
|
command_buffer_info.pNext = nullptr;
|
||||||
|
command_buffer_info.commandPool = command_pool_;
|
||||||
|
command_buffer_info.level = level;
|
||||||
|
command_buffer_info.commandBufferCount = kDefaultCount;
|
||||||
|
VkCommandBuffer command_buffers[kDefaultCount];
|
||||||
|
err =
|
||||||
|
vkAllocateCommandBuffers(device_, &command_buffer_info, command_buffers);
|
||||||
|
CheckResult(err, "vkCreateCommandBuffer");
|
||||||
|
for (size_t i = 0; i < xe::countof(command_buffers); ++i) {
|
||||||
|
PushEntry(command_buffers[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
CommandBufferPool::~CommandBufferPool() {
|
||||||
|
vkDestroyCommandPool(device_, command_pool_, nullptr);
|
||||||
|
command_pool_ = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
VkCommandBuffer CommandBufferPool::AllocateEntry() {
|
||||||
|
// TODO(benvanik): allocate a bunch at once?
|
||||||
|
VkCommandBufferAllocateInfo command_buffer_info;
|
||||||
|
command_buffer_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
|
||||||
|
command_buffer_info.pNext = nullptr;
|
||||||
|
command_buffer_info.commandPool = command_pool_;
|
||||||
|
command_buffer_info.level = level_;
|
||||||
|
command_buffer_info.commandBufferCount = 1;
|
||||||
|
VkCommandBuffer command_buffer;
|
||||||
|
auto err =
|
||||||
|
vkAllocateCommandBuffers(device_, &command_buffer_info, &command_buffer);
|
||||||
|
CheckResult(err, "vkCreateCommandBuffer");
|
||||||
|
return command_buffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
void CommandBufferPool::FreeEntry(VkCommandBuffer handle) {
|
||||||
|
vkFreeCommandBuffers(device_, command_pool_, 1, &handle);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace vulkan
|
||||||
|
} // namespace ui
|
||||||
|
} // namespace xe
|
|
@ -0,0 +1,200 @@
|
||||||
|
/**
|
||||||
|
******************************************************************************
|
||||||
|
* Xenia : Xbox 360 Emulator Research Project *
|
||||||
|
******************************************************************************
|
||||||
|
* Copyright 2016 Ben Vanik. All rights reserved. *
|
||||||
|
* Released under the BSD license - see LICENSE in the root for more details. *
|
||||||
|
******************************************************************************
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef XENIA_UI_VULKAN_FENCED_POOLS_H_
|
||||||
|
#define XENIA_UI_VULKAN_FENCED_POOLS_H_
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
#include "xenia/base/assert.h"
|
||||||
|
#include "xenia/ui/vulkan/vulkan.h"
|
||||||
|
|
||||||
|
namespace xe {
|
||||||
|
namespace ui {
|
||||||
|
namespace vulkan {
|
||||||
|
|
||||||
|
// Simple pool for Vulkan homogenous objects that cannot be reused while
|
||||||
|
// in-flight.
|
||||||
|
// It batches pooled objects into groups and uses a vkQueueSubmit fence to
|
||||||
|
// indicate their availability. If no objects are free when one is requested
|
||||||
|
// the caller is expected to create them.
|
||||||
|
template <typename T, typename HANDLE>
|
||||||
|
class BaseFencedPool {
|
||||||
|
public:
|
||||||
|
BaseFencedPool(VkDevice device) : device_(device) {}
|
||||||
|
|
||||||
|
virtual ~BaseFencedPool() {
|
||||||
|
// TODO(benvanik): wait on fence until done.
|
||||||
|
assert_null(pending_batch_list_head_);
|
||||||
|
|
||||||
|
// Run down free lists.
|
||||||
|
while (free_batch_list_head_) {
|
||||||
|
auto batch = free_batch_list_head_;
|
||||||
|
free_batch_list_head_ = batch->next;
|
||||||
|
delete batch;
|
||||||
|
}
|
||||||
|
while (free_entry_list_head_) {
|
||||||
|
auto entry = free_entry_list_head_;
|
||||||
|
free_entry_list_head_ = entry->next;
|
||||||
|
static_cast<T*>(this)->FreeEntry(entry->handle);
|
||||||
|
delete entry;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Checks all pending batches for completion and scavenges their entries.
|
||||||
|
// This should be called as frequently as reasonable.
|
||||||
|
void Scavenge() {
|
||||||
|
while (pending_batch_list_head_) {
|
||||||
|
auto batch = pending_batch_list_head_;
|
||||||
|
if (vkGetFenceStatus(device_, batch->fence) == VK_SUCCESS) {
|
||||||
|
// Batch has completed. Reclaim.
|
||||||
|
pending_batch_list_head_ = batch->next;
|
||||||
|
if (batch == pending_batch_list_tail_) {
|
||||||
|
pending_batch_list_tail_ = nullptr;
|
||||||
|
}
|
||||||
|
batch->next = free_batch_list_head_;
|
||||||
|
free_batch_list_head_ = batch;
|
||||||
|
batch->entry_list_tail->next = free_entry_list_head_;
|
||||||
|
free_entry_list_head_ = batch->entry_list_head;
|
||||||
|
batch->entry_list_head = nullptr;
|
||||||
|
batch->entry_list_tail = nullptr;
|
||||||
|
} else {
|
||||||
|
// Batch is still in-flight. Since batches are executed in order we know
|
||||||
|
// no others after it could have completed, so early-exit.
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Begins a new batch.
|
||||||
|
// All entries acquired within this batch will be marked as in-use until
|
||||||
|
// the fence specified in EndBatch is signalled.
|
||||||
|
void BeginBatch() {
|
||||||
|
assert_null(open_batch_);
|
||||||
|
Batch* batch = nullptr;
|
||||||
|
if (free_batch_list_head_) {
|
||||||
|
// Reuse a batch.
|
||||||
|
batch = free_batch_list_head_;
|
||||||
|
free_batch_list_head_ = batch->next;
|
||||||
|
batch->next = nullptr;
|
||||||
|
} else {
|
||||||
|
// Allocate new batch.
|
||||||
|
batch = new Batch();
|
||||||
|
batch->next = nullptr;
|
||||||
|
}
|
||||||
|
batch->entry_list_head = nullptr;
|
||||||
|
batch->entry_list_tail = nullptr;
|
||||||
|
batch->fence = nullptr;
|
||||||
|
open_batch_ = batch;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Attempts to acquire an entry from the pool in the current batch.
|
||||||
|
// If none are available a new one will be allocated.
|
||||||
|
HANDLE AcquireEntry() {
|
||||||
|
Entry* entry = nullptr;
|
||||||
|
if (free_entry_list_head_) {
|
||||||
|
// Slice off an entry from the free list.
|
||||||
|
entry = free_entry_list_head_;
|
||||||
|
free_entry_list_head_ = entry->next;
|
||||||
|
} else {
|
||||||
|
// No entry available; allocate new.
|
||||||
|
entry = new Entry();
|
||||||
|
entry->handle = static_cast<T*>(this)->AllocateEntry();
|
||||||
|
}
|
||||||
|
entry->next = nullptr;
|
||||||
|
if (!open_batch_->entry_list_head) {
|
||||||
|
open_batch_->entry_list_head = entry;
|
||||||
|
}
|
||||||
|
if (open_batch_->entry_list_tail) {
|
||||||
|
open_batch_->entry_list_tail->next = entry;
|
||||||
|
}
|
||||||
|
open_batch_->entry_list_tail = entry;
|
||||||
|
return entry->handle;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ends the current batch using the given fence to indicate when the batch
|
||||||
|
// has completed execution on the GPU.
|
||||||
|
void EndBatch(VkFence fence) {
|
||||||
|
assert_not_null(open_batch_);
|
||||||
|
|
||||||
|
// Close and see if we have anything.
|
||||||
|
auto batch = open_batch_;
|
||||||
|
open_batch_ = nullptr;
|
||||||
|
if (!batch->entry_list_head) {
|
||||||
|
// Nothing to do.
|
||||||
|
batch->next = free_batch_list_head_;
|
||||||
|
free_batch_list_head_ = batch;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Track the fence.
|
||||||
|
batch->fence = fence;
|
||||||
|
|
||||||
|
// Append to the end of the batch list.
|
||||||
|
batch->next = nullptr;
|
||||||
|
if (!pending_batch_list_head_) {
|
||||||
|
pending_batch_list_head_ = batch;
|
||||||
|
}
|
||||||
|
if (pending_batch_list_tail_) {
|
||||||
|
pending_batch_list_tail_->next = batch;
|
||||||
|
} else {
|
||||||
|
pending_batch_list_tail_ = batch;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
void PushEntry(HANDLE handle) {
|
||||||
|
auto entry = new Entry();
|
||||||
|
entry->next = free_entry_list_head_;
|
||||||
|
entry->handle = handle;
|
||||||
|
free_entry_list_head_ = entry;
|
||||||
|
}
|
||||||
|
|
||||||
|
VkDevice device_ = nullptr;
|
||||||
|
|
||||||
|
private:
|
||||||
|
struct Entry {
|
||||||
|
Entry* next;
|
||||||
|
HANDLE handle;
|
||||||
|
};
|
||||||
|
struct Batch {
|
||||||
|
Batch* next;
|
||||||
|
Entry* entry_list_head;
|
||||||
|
Entry* entry_list_tail;
|
||||||
|
VkFence fence;
|
||||||
|
};
|
||||||
|
|
||||||
|
Batch* free_batch_list_head_ = nullptr;
|
||||||
|
Entry* free_entry_list_head_ = nullptr;
|
||||||
|
Batch* pending_batch_list_head_ = nullptr;
|
||||||
|
Batch* pending_batch_list_tail_ = nullptr;
|
||||||
|
Batch* open_batch_ = nullptr;
|
||||||
|
};
|
||||||
|
|
||||||
|
class CommandBufferPool
|
||||||
|
: public BaseFencedPool<CommandBufferPool, VkCommandBuffer> {
|
||||||
|
public:
|
||||||
|
CommandBufferPool(VkDevice device, uint32_t queue_family_index,
|
||||||
|
VkCommandBufferLevel level);
|
||||||
|
~CommandBufferPool() override;
|
||||||
|
|
||||||
|
protected:
|
||||||
|
friend class BaseFencedPool<CommandBufferPool, VkCommandBuffer>;
|
||||||
|
VkCommandBuffer AllocateEntry();
|
||||||
|
void FreeEntry(VkCommandBuffer handle);
|
||||||
|
|
||||||
|
VkCommandPool command_pool_ = nullptr;
|
||||||
|
VkCommandBufferLevel level_ = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace vulkan
|
||||||
|
} // namespace ui
|
||||||
|
} // namespace xe
|
||||||
|
|
||||||
|
#endif // XENIA_UI_VULKAN_FENCED_POOLS_H_
|
Loading…
Reference in New Issue