WIP rough sketch of vulkan backend structure.

This commit is contained in:
Ben Vanik 2016-02-18 16:43:17 -08:00
parent 35e08d9428
commit 4c4a641096
14 changed files with 1691 additions and 436 deletions

View File

@ -18,6 +18,10 @@ namespace xe {
// http://gnuradio.org/redmine/projects/gnuradio/repository/revisions/f2bc76cc65ffba51a141950f98e75364e49df874/entry/volk/kernels/volk/volk_32u_byteswap.h
// http://gnuradio.org/redmine/projects/gnuradio/repository/revisions/2c4c371885c31222362f70a1cd714415d1398021/entry/volk/kernels/volk/volk_64u_byteswap.h
void copy_128_aligned(void* dest, const void* src, size_t count) {
std::memcpy(dest, src, count * 16);
}
void copy_and_swap_16_aligned(uint16_t* dest, const uint16_t* src,
size_t count) {
return copy_and_swap_16_unaligned(dest, src, count);

View File

@ -121,6 +121,8 @@ inline void* low_address(void* address) {
return reinterpret_cast<void*>(uint64_t(address) & 0xFFFFFFFF);
}
void copy_128_aligned(void* dest, const void* src, size_t count);
void copy_and_swap_16_aligned(uint16_t* dest, const uint16_t* src,
size_t count);
void copy_and_swap_16_unaligned(uint16_t* dest, const uint16_t* src,

View File

@ -0,0 +1,330 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2016 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/gpu/vulkan/buffer_cache.h"
#include "xenia/base/logging.h"
#include "xenia/base/math.h"
#include "xenia/base/memory.h"
#include "xenia/base/profiling.h"
#include "xenia/gpu/gpu_flags.h"
#include "xenia/gpu/vulkan/vulkan_gpu_flags.h"
namespace xe {
namespace gpu {
namespace vulkan {
using xe::ui::vulkan::CheckResult;
BufferCache::BufferCache(RegisterFile* register_file,
ui::vulkan::VulkanDevice* device, size_t capacity)
: register_file_(register_file),
device_(*device),
transient_capacity_(capacity) {
// Uniform buffer.
VkBufferCreateInfo uniform_buffer_info;
uniform_buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
uniform_buffer_info.pNext = nullptr;
uniform_buffer_info.flags = 0;
uniform_buffer_info.size = transient_capacity_;
uniform_buffer_info.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT;
uniform_buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
uniform_buffer_info.queueFamilyIndexCount = 0;
uniform_buffer_info.pQueueFamilyIndices = nullptr;
auto err = vkCreateBuffer(device_, &uniform_buffer_info, nullptr,
&transient_uniform_buffer_);
CheckResult(err, "vkCreateBuffer");
// Index buffer.
VkBufferCreateInfo index_buffer_info;
index_buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
index_buffer_info.pNext = nullptr;
index_buffer_info.flags = 0;
index_buffer_info.size = transient_capacity_;
index_buffer_info.usage = VK_BUFFER_USAGE_INDEX_BUFFER_BIT;
index_buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
index_buffer_info.queueFamilyIndexCount = 0;
index_buffer_info.pQueueFamilyIndices = nullptr;
err = vkCreateBuffer(device_, &index_buffer_info, nullptr,
&transient_index_buffer_);
CheckResult(err, "vkCreateBuffer");
// Vertex buffer.
VkBufferCreateInfo vertex_buffer_info;
vertex_buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
vertex_buffer_info.pNext = nullptr;
vertex_buffer_info.flags = 0;
vertex_buffer_info.size = transient_capacity_;
vertex_buffer_info.usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT;
vertex_buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
vertex_buffer_info.queueFamilyIndexCount = 0;
vertex_buffer_info.pQueueFamilyIndices = nullptr;
err = vkCreateBuffer(*device, &vertex_buffer_info, nullptr,
&transient_vertex_buffer_);
CheckResult(err, "vkCreateBuffer");
// Allocate the underlying buffer we use for all storage.
// We query all types and take the max alignment.
VkMemoryRequirements uniform_buffer_requirements;
VkMemoryRequirements index_buffer_requirements;
VkMemoryRequirements vertex_buffer_requirements;
vkGetBufferMemoryRequirements(device_, transient_uniform_buffer_,
&uniform_buffer_requirements);
vkGetBufferMemoryRequirements(device_, transient_index_buffer_,
&index_buffer_requirements);
vkGetBufferMemoryRequirements(device_, transient_vertex_buffer_,
&vertex_buffer_requirements);
uniform_buffer_alignment_ = uniform_buffer_requirements.alignment;
index_buffer_alignment_ = index_buffer_requirements.alignment;
vertex_buffer_alignment_ = vertex_buffer_requirements.alignment;
VkMemoryRequirements buffer_requirements;
buffer_requirements.size = transient_capacity_;
buffer_requirements.alignment =
std::max(uniform_buffer_requirements.alignment,
std::max(index_buffer_requirements.alignment,
vertex_buffer_requirements.alignment));
buffer_requirements.memoryTypeBits =
uniform_buffer_requirements.memoryTypeBits |
index_buffer_requirements.memoryTypeBits |
vertex_buffer_requirements.memoryTypeBits;
transient_buffer_memory_ = device->AllocateMemory(
buffer_requirements, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
// Alias all buffers to our memory.
vkBindBufferMemory(device_, transient_uniform_buffer_,
transient_buffer_memory_, 0);
vkBindBufferMemory(device_, transient_index_buffer_, transient_buffer_memory_,
0);
vkBindBufferMemory(device_, transient_vertex_buffer_,
transient_buffer_memory_, 0);
// Map memory and keep it mapped while we use it.
err = vkMapMemory(device_, transient_buffer_memory_, 0, VK_WHOLE_SIZE, 0,
&transient_buffer_data_);
CheckResult(err, "vkMapMemory");
// Descriptor pool used for all of our cached descriptors.
// In the steady state we don't allocate anything, so these are all manually
// managed.
VkDescriptorPoolCreateInfo descriptor_pool_info;
descriptor_pool_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
descriptor_pool_info.pNext = nullptr;
descriptor_pool_info.flags =
VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT;
descriptor_pool_info.maxSets = 1;
VkDescriptorPoolSize pool_sizes[1];
pool_sizes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
pool_sizes[0].descriptorCount = 2;
descriptor_pool_info.poolSizeCount = 1;
descriptor_pool_info.pPoolSizes = pool_sizes;
err = vkCreateDescriptorPool(device_, &descriptor_pool_info, nullptr,
&descriptor_pool_);
CheckResult(err, "vkCreateDescriptorPool");
// Create the descriptor set layout used for our uniform buffer.
// As it is a static binding that uses dynamic offsets during draws we can
// create this once and reuse it forever.
VkDescriptorSetLayoutBinding vertex_uniform_binding;
vertex_uniform_binding.binding = 0;
vertex_uniform_binding.descriptorType =
VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
vertex_uniform_binding.descriptorCount = 1;
vertex_uniform_binding.stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
vertex_uniform_binding.pImmutableSamplers = nullptr;
VkDescriptorSetLayoutBinding fragment_uniform_binding;
fragment_uniform_binding.binding = 1;
fragment_uniform_binding.descriptorType =
VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
fragment_uniform_binding.descriptorCount = 1;
fragment_uniform_binding.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
fragment_uniform_binding.pImmutableSamplers = nullptr;
VkDescriptorSetLayoutCreateInfo descriptor_set_layout_info;
descriptor_set_layout_info.sType =
VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
descriptor_set_layout_info.pNext = nullptr;
descriptor_set_layout_info.flags = 0;
VkDescriptorSetLayoutBinding uniform_bindings[] = {
vertex_uniform_binding, fragment_uniform_binding,
};
descriptor_set_layout_info.bindingCount =
static_cast<uint32_t>(xe::countof(uniform_bindings));
descriptor_set_layout_info.pBindings = uniform_bindings;
err = vkCreateDescriptorSetLayout(device_, &descriptor_set_layout_info,
nullptr, &descriptor_set_layout_);
CheckResult(err, "vkCreateDescriptorSetLayout");
// Create the descriptor we'll use for the uniform buffer.
// This is what we hand out to everyone (who then also needs to use our
// offsets).
VkDescriptorSetAllocateInfo set_alloc_info;
set_alloc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
set_alloc_info.pNext = nullptr;
set_alloc_info.descriptorPool = descriptor_pool_;
set_alloc_info.descriptorSetCount = 1;
set_alloc_info.pSetLayouts = &descriptor_set_layout_;
err = vkAllocateDescriptorSets(device_, &set_alloc_info,
&transient_descriptor_set_);
CheckResult(err, "vkAllocateDescriptorSets");
}
BufferCache::~BufferCache() {
vkFreeDescriptorSets(device_, descriptor_pool_, 1,
&transient_descriptor_set_);
vkDestroyDescriptorSetLayout(device_, descriptor_set_layout_, nullptr);
vkDestroyDescriptorPool(device_, descriptor_pool_, nullptr);
vkUnmapMemory(device_, transient_buffer_memory_);
vkFreeMemory(device_, transient_buffer_memory_, nullptr);
vkDestroyBuffer(device_, transient_uniform_buffer_, nullptr);
vkDestroyBuffer(device_, transient_index_buffer_, nullptr);
vkDestroyBuffer(device_, transient_vertex_buffer_, nullptr);
}
VkDeviceSize BufferCache::UploadConstantRegisters(
const Shader::ConstantRegisterMap& constant_register_map) {
// Allocate space in the buffer for our data.
auto offset = AllocateTransientData(uniform_buffer_alignment_,
constant_register_map.packed_byte_length);
if (offset == VK_WHOLE_SIZE) {
// OOM.
return VK_WHOLE_SIZE;
}
// Run through registers and copy them into the buffer.
// TODO(benvanik): optimize this - it's hit twice every call.
const auto& values = register_file_->values;
uint8_t* dest_ptr =
reinterpret_cast<uint8_t*>(transient_buffer_data_) + offset;
for (int i = 0; i < 4; ++i) {
auto piece = constant_register_map.float_bitmap[i];
if (!piece) {
continue;
}
for (int j = 0, sh = 0; j < 64; ++j, sh << 1) {
if (piece & sh) {
xe::copy_128_aligned(
dest_ptr,
&values[XE_GPU_REG_SHADER_CONSTANT_000_X + i * 64 + j].f32, 1);
dest_ptr += 16;
}
}
}
for (int i = 0; i < 32; ++i) {
if (constant_register_map.int_bitmap & (1 << i)) {
xe::store<uint32_t>(dest_ptr,
values[XE_GPU_REG_SHADER_CONSTANT_LOOP_00 + i].u32);
dest_ptr += 4;
}
}
for (int i = 0; i < 8; ++i) {
if (constant_register_map.bool_bitmap[i]) {
xe::store<uint32_t>(
dest_ptr, values[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031 + i].u32);
dest_ptr += 4;
}
}
return offset;
}
std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadIndexBuffer(
const void* source_ptr, size_t source_length, IndexFormat format) {
// TODO(benvanik): check cache.
// Allocate space in the buffer for our data.
auto offset = AllocateTransientData(index_buffer_alignment_, source_length);
if (offset == VK_WHOLE_SIZE) {
// OOM.
return {nullptr, VK_WHOLE_SIZE};
}
// Copy data into the buffer.
// TODO(benvanik): get min/max indices and pass back?
// TODO(benvanik): memcpy then use compute shaders to swap?
if (format == IndexFormat::kInt16) {
// Endian::k8in16, swap half-words.
xe::copy_and_swap_16_aligned(
reinterpret_cast<uint16_t*>(transient_buffer_data_) + offset,
reinterpret_cast<const uint16_t*>(source_ptr), source_length / 2);
} else if (format == IndexFormat::kInt32) {
// Endian::k8in32, swap words.
xe::copy_and_swap_32_aligned(
reinterpret_cast<uint32_t*>(transient_buffer_data_) + offset,
reinterpret_cast<const uint32_t*>(source_ptr), source_length / 4);
}
return {transient_index_buffer_, offset};
}
std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadVertexBuffer(
const void* source_ptr, size_t source_length) {
// TODO(benvanik): check cache.
// Allocate space in the buffer for our data.
auto offset = AllocateTransientData(vertex_buffer_alignment_, source_length);
if (offset == VK_WHOLE_SIZE) {
// OOM.
return {nullptr, VK_WHOLE_SIZE};
}
// Copy data into the buffer.
// TODO(benvanik): memcpy then use compute shaders to swap?
// Endian::k8in32, swap words.
xe::copy_and_swap_32_aligned(
reinterpret_cast<uint32_t*>(transient_buffer_data_) + offset,
reinterpret_cast<const uint32_t*>(source_ptr), source_length / 4);
return {transient_vertex_buffer_, offset};
}
VkDeviceSize BufferCache::AllocateTransientData(size_t alignment,
size_t length) {
// Try to add to end, wrapping if required.
// Check to ensure there is space.
if (false) {
// Consume all fences.
}
// Slice off our bit.
return VK_WHOLE_SIZE;
}
void BufferCache::Flush(VkCommandBuffer command_buffer) {
// If we are flushing a big enough chunk queue up an event.
// We don't want to do this for everything but often enough so that we won't
// run out of space.
if (true) {
// VkEvent finish_event;
// vkCmdSetEvent(cmd_buffer, finish_event,
// VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);
}
// Flush memory.
// TODO(benvanik): subrange.
VkMappedMemoryRange dirty_range;
dirty_range.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
dirty_range.pNext = nullptr;
dirty_range.memory = transient_buffer_memory_;
dirty_range.offset = 0;
dirty_range.size = transient_capacity_;
vkFlushMappedMemoryRanges(device_, 1, &dirty_range);
}
void BufferCache::InvalidateCache() {
// TODO(benvanik): caching.
}
void BufferCache::ClearCache() {
// TODO(benvanik): caching.
}
} // namespace vulkan
} // namespace gpu
} // namespace xe

View File

@ -0,0 +1,111 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2016 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_VULKAN_BUFFER_CACHE_H_
#define XENIA_GPU_VULKAN_BUFFER_CACHE_H_
#include "xenia/gpu/register_file.h"
#include "xenia/gpu/shader.h"
#include "xenia/gpu/xenos.h"
#include "xenia/ui/vulkan/vulkan.h"
#include "xenia/ui/vulkan/vulkan_device.h"
namespace xe {
namespace gpu {
namespace vulkan {
// Efficiently manages buffers of various kinds.
// Used primarily for uploading index and vertex data from guest memory and
// transient data like shader constants.
class BufferCache {
public:
BufferCache(RegisterFile* register_file, ui::vulkan::VulkanDevice* device,
size_t capacity);
~BufferCache();
// Descriptor set containing the dynamic uniform buffer used for constant
// uploads. Used in conjunction with a dynamic offset returned by
// UploadConstantRegisters.
// The set contains two bindings:
// binding = 0: for use in vertex shaders
// binding = 1: for use in fragment shaders
VkDescriptorSet constant_descriptor_set() const {
return transient_descriptor_set_;
}
// Uploads the constants specified in the register maps to the transient
// uniform storage buffer.
// The registers are tightly packed in order as [floats, ints, bools].
// Returns an offset that can be used with the transient_descriptor_set or
// VK_WHOLE_SIZE if the constants could not be uploaded (OOM).
VkDeviceSize UploadConstantRegisters(
const Shader::ConstantRegisterMap& constant_register_map);
// Uploads index buffer data from guest memory, possibly eliding with
// recently uploaded data or cached copies.
// Returns a buffer and offset that can be used with vkCmdBindIndexBuffer.
// Size will be VK_WHOLE_SIZE if the data could not be uploaded (OOM).
std::pair<VkBuffer, VkDeviceSize> UploadIndexBuffer(const void* source_ptr,
size_t source_length,
IndexFormat format);
// Uploads vertex buffer data from guest memory, possibly eliding with
// recently uploaded data or cached copies.
// Returns a buffer and offset that can be used with vkCmdBindVertexBuffers.
// Size will be VK_WHOLE_SIZE if the data could not be uploaded (OOM).
std::pair<VkBuffer, VkDeviceSize> UploadVertexBuffer(const void* source_ptr,
size_t source_length);
// Flushes all pending data to the GPU.
// Until this is called the GPU is not guaranteed to see any data.
// The given command buffer will be used to queue up events so that the
// cache can determine when data has been consumed.
void Flush(VkCommandBuffer command_buffer);
// Marks the cache as potentially invalid.
// This is not as strong as ClearCache and is a hint that any and all data
// should be verified before being reused.
void InvalidateCache();
// Clears all cached content and prevents future elision with pending data.
void ClearCache();
private:
// Allocates a block of memory in the transient buffer.
// Returns VK_WHOLE_SIZE if requested amount of memory is not available.
VkDeviceSize AllocateTransientData(size_t alignment, size_t length);
RegisterFile* register_file_ = nullptr;
VkDevice device_ = nullptr;
// Staging ringbuffer we cycle through fast. Used for data we don't
// plan on keeping past the current frame.
size_t transient_capacity_ = 0;
VkBuffer transient_uniform_buffer_ = nullptr;
VkBuffer transient_index_buffer_ = nullptr;
VkBuffer transient_vertex_buffer_ = nullptr;
VkDeviceMemory transient_buffer_memory_ = nullptr;
void* transient_buffer_data_ = nullptr;
// Required alignemnts for our various types.
// All allocations must start at the appropriate alignment.
VkDeviceSize uniform_buffer_alignment_ = 0;
VkDeviceSize index_buffer_alignment_ = 0;
VkDeviceSize vertex_buffer_alignment_ = 0;
VkDescriptorPool descriptor_pool_ = nullptr;
VkDescriptorSetLayout descriptor_set_layout_ = nullptr;
VkDescriptorSet transient_descriptor_set_ = nullptr;
};
} // namespace vulkan
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_VULKAN_BUFFER_CACHE_H_

View File

@ -0,0 +1,335 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2016 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/gpu/vulkan/pipeline_cache.h"
#include "third_party/xxhash/xxhash.h"
#include "xenia/base/logging.h"
#include "xenia/base/math.h"
#include "xenia/base/memory.h"
#include "xenia/base/profiling.h"
#include "xenia/gpu/gpu_flags.h"
#include "xenia/gpu/vulkan/vulkan_gpu_flags.h"
namespace xe {
namespace gpu {
namespace vulkan {
using xe::ui::vulkan::CheckResult;
PipelineCache::PipelineCache(RegisterFile* register_file,
ui::vulkan::VulkanDevice* device)
: register_file_(register_file), device_(*device) {}
PipelineCache::~PipelineCache() {
// Destroy all shaders.
for (auto it : shader_map_) {
delete it.second;
}
shader_map_.clear();
}
VulkanShader* PipelineCache::LoadShader(ShaderType shader_type,
uint32_t guest_address,
const uint32_t* host_address,
uint32_t dword_count) {
// Hash the input memory and lookup the shader.
uint64_t data_hash = XXH64(host_address, dword_count * sizeof(uint32_t), 0);
auto it = shader_map_.find(data_hash);
if (it != shader_map_.end()) {
// Shader has been previously loaded.
return it->second;
}
// Always create the shader and stash it away.
// We need to track it even if it fails translation so we know not to try
// again.
VulkanShader* shader =
new VulkanShader(shader_type, data_hash, host_address, dword_count);
shader_map_.insert({data_hash, shader});
// Perform translation.
// If this fails the shader will be marked as invalid and ignored later.
if (!shader_translator_.Translate(shader)) {
XELOGE("Shader translation failed; marking shader as ignored");
return shader;
}
// Prepare the shader for use (creates our VkShaderModule).
// It could still fail at this point.
if (!shader->Prepare()) {
XELOGE("Shader preparation failed; marking shader as ignored");
return shader;
}
if (shader->is_valid()) {
XELOGGPU("Generated %s shader at 0x%.8X (%db):\n%s",
shader_type == ShaderType::kVertex ? "vertex" : "pixel",
guest_address, dword_count * 4,
shader->ucode_disassembly().c_str());
}
// Dump shader files if desired.
if (!FLAGS_dump_shaders.empty()) {
shader->Dump(FLAGS_dump_shaders, "vk");
}
return shader;
}
bool PipelineCache::ConfigurePipeline(VkCommandBuffer command_buffer,
VkRenderPass render_pass,
PrimitiveType primitive_type) {
return false;
}
void PipelineCache::ClearCache() {
// TODO(benvanik): caching.
}
bool PipelineCache::SetShadowRegister(uint32_t* dest, uint32_t register_name) {
uint32_t value = register_file_->values[register_name].u32;
if (*dest == value) {
return false;
}
*dest = value;
return true;
}
bool PipelineCache::SetShadowRegister(float* dest, uint32_t register_name) {
float value = register_file_->values[register_name].f32;
if (*dest == value) {
return false;
}
*dest = value;
return true;
}
PipelineCache::UpdateStatus PipelineCache::UpdateShaders(
PrimitiveType prim_type) {
auto& regs = update_shaders_regs_;
// These are the constant base addresses/ranges for shaders.
// We have these hardcoded right now cause nothing seems to differ.
assert_true(register_file_->values[XE_GPU_REG_SQ_VS_CONST].u32 ==
0x000FF000 ||
register_file_->values[XE_GPU_REG_SQ_VS_CONST].u32 == 0x00000000);
assert_true(register_file_->values[XE_GPU_REG_SQ_PS_CONST].u32 ==
0x000FF100 ||
register_file_->values[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000);
bool dirty = false;
dirty |= SetShadowRegister(&regs.pa_su_sc_mode_cntl,
XE_GPU_REG_PA_SU_SC_MODE_CNTL);
dirty |= SetShadowRegister(&regs.sq_program_cntl, XE_GPU_REG_SQ_PROGRAM_CNTL);
dirty |= SetShadowRegister(&regs.sq_context_misc, XE_GPU_REG_SQ_CONTEXT_MISC);
// dirty |= regs.vertex_shader != active_vertex_shader_;
// dirty |= regs.pixel_shader != active_pixel_shader_;
dirty |= regs.prim_type != prim_type;
if (!dirty) {
return UpdateStatus::kCompatible;
}
// regs.vertex_shader = static_cast<VulkanShader*>(active_vertex_shader_);
// regs.pixel_shader = static_cast<VulkanShader*>(active_pixel_shader_);
regs.prim_type = prim_type;
SCOPE_profile_cpu_f("gpu");
return UpdateStatus::kMismatch;
}
PipelineCache::UpdateStatus PipelineCache::UpdateRenderTargets() {
auto& regs = update_render_targets_regs_;
bool dirty = false;
dirty |= SetShadowRegister(&regs.rb_modecontrol, XE_GPU_REG_RB_MODECONTROL);
dirty |= SetShadowRegister(&regs.rb_surface_info, XE_GPU_REG_RB_SURFACE_INFO);
dirty |= SetShadowRegister(&regs.rb_color_info, XE_GPU_REG_RB_COLOR_INFO);
dirty |= SetShadowRegister(&regs.rb_color1_info, XE_GPU_REG_RB_COLOR1_INFO);
dirty |= SetShadowRegister(&regs.rb_color2_info, XE_GPU_REG_RB_COLOR2_INFO);
dirty |= SetShadowRegister(&regs.rb_color3_info, XE_GPU_REG_RB_COLOR3_INFO);
dirty |= SetShadowRegister(&regs.rb_color_mask, XE_GPU_REG_RB_COLOR_MASK);
dirty |= SetShadowRegister(&regs.rb_depthcontrol, XE_GPU_REG_RB_DEPTHCONTROL);
dirty |=
SetShadowRegister(&regs.rb_stencilrefmask, XE_GPU_REG_RB_STENCILREFMASK);
dirty |= SetShadowRegister(&regs.rb_depth_info, XE_GPU_REG_RB_DEPTH_INFO);
if (!dirty) {
return UpdateStatus::kCompatible;
}
SCOPE_profile_cpu_f("gpu");
return UpdateStatus::kMismatch;
}
PipelineCache::UpdateStatus PipelineCache::UpdateState(
PrimitiveType prim_type) {
bool mismatch = false;
#define CHECK_UPDATE_STATUS(status, mismatch, error_message) \
{ \
if (status == UpdateStatus::kError) { \
XELOGE(error_message); \
return status; \
} else if (status == UpdateStatus::kMismatch) { \
mismatch = true; \
} \
}
UpdateStatus status;
status = UpdateViewportState();
CHECK_UPDATE_STATUS(status, mismatch, "Unable to update viewport state");
status = UpdateRasterizerState(prim_type);
CHECK_UPDATE_STATUS(status, mismatch, "Unable to update rasterizer state");
status = UpdateBlendState();
CHECK_UPDATE_STATUS(status, mismatch, "Unable to update blend state");
status = UpdateDepthStencilState();
CHECK_UPDATE_STATUS(status, mismatch, "Unable to update depth/stencil state");
return mismatch ? UpdateStatus::kMismatch : UpdateStatus::kCompatible;
}
PipelineCache::UpdateStatus PipelineCache::UpdateViewportState() {
auto& regs = update_viewport_state_regs_;
bool dirty = false;
// dirty |= SetShadowRegister(&state_regs.pa_cl_clip_cntl,
// XE_GPU_REG_PA_CL_CLIP_CNTL);
dirty |= SetShadowRegister(&regs.rb_surface_info, XE_GPU_REG_RB_SURFACE_INFO);
dirty |= SetShadowRegister(&regs.pa_cl_vte_cntl, XE_GPU_REG_PA_CL_VTE_CNTL);
dirty |= SetShadowRegister(&regs.pa_su_sc_mode_cntl,
XE_GPU_REG_PA_SU_SC_MODE_CNTL);
dirty |= SetShadowRegister(&regs.pa_sc_window_offset,
XE_GPU_REG_PA_SC_WINDOW_OFFSET);
dirty |= SetShadowRegister(&regs.pa_sc_window_scissor_tl,
XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL);
dirty |= SetShadowRegister(&regs.pa_sc_window_scissor_br,
XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR);
dirty |= SetShadowRegister(&regs.pa_cl_vport_xoffset,
XE_GPU_REG_PA_CL_VPORT_XOFFSET);
dirty |= SetShadowRegister(&regs.pa_cl_vport_yoffset,
XE_GPU_REG_PA_CL_VPORT_YOFFSET);
dirty |= SetShadowRegister(&regs.pa_cl_vport_zoffset,
XE_GPU_REG_PA_CL_VPORT_ZOFFSET);
dirty |= SetShadowRegister(&regs.pa_cl_vport_xscale,
XE_GPU_REG_PA_CL_VPORT_XSCALE);
dirty |= SetShadowRegister(&regs.pa_cl_vport_yscale,
XE_GPU_REG_PA_CL_VPORT_YSCALE);
dirty |= SetShadowRegister(&regs.pa_cl_vport_zscale,
XE_GPU_REG_PA_CL_VPORT_ZSCALE);
// Much of this state machine is extracted from:
// https://github.com/freedreno/mesa/blob/master/src/mesa/drivers/dri/r200/r200_state.c
// http://fossies.org/dox/MesaLib-10.3.5/fd2__gmem_8c_source.html
// http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf
// http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf
// VTX_XY_FMT = true: the incoming X, Y have already been multiplied by 1/W0.
// = false: multiply the X, Y coordinates by 1/W0.
// VTX_Z_FMT = true: the incoming Z has already been multiplied by 1/W0.
// = false: multiply the Z coordinate by 1/W0.
// VTX_W0_FMT = true: the incoming W0 is not 1/W0. Perform the reciprocal to
// get 1/W0.
// draw_batcher_.set_vtx_fmt((regs.pa_cl_vte_cntl >> 8) & 0x1 ? 1.0f : 0.0f,
// (regs.pa_cl_vte_cntl >> 9) & 0x1 ? 1.0f : 0.0f,
// (regs.pa_cl_vte_cntl >> 10) & 0x1 ? 1.0f : 0.0f);
// Done in VS, no need to flush state.
// if ((regs.pa_cl_vte_cntl & (1 << 0)) > 0) {
// draw_batcher_.set_window_scalar(1.0f, 1.0f);
//} else {
// draw_batcher_.set_window_scalar(1.0f / 2560.0f, -1.0f / 2560.0f);
//}
if (!dirty) {
return UpdateStatus::kCompatible;
}
return UpdateStatus::kMismatch;
}
PipelineCache::UpdateStatus PipelineCache::UpdateRasterizerState(
PrimitiveType prim_type) {
auto& regs = update_rasterizer_state_regs_;
bool dirty = false;
dirty |= SetShadowRegister(&regs.pa_su_sc_mode_cntl,
XE_GPU_REG_PA_SU_SC_MODE_CNTL);
dirty |= SetShadowRegister(&regs.pa_sc_screen_scissor_tl,
XE_GPU_REG_PA_SC_SCREEN_SCISSOR_TL);
dirty |= SetShadowRegister(&regs.pa_sc_screen_scissor_br,
XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR);
dirty |= SetShadowRegister(&regs.multi_prim_ib_reset_index,
XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX);
dirty |= regs.prim_type != prim_type;
if (!dirty) {
return UpdateStatus::kCompatible;
}
regs.prim_type = prim_type;
SCOPE_profile_cpu_f("gpu");
return UpdateStatus::kMismatch;
}
PipelineCache::UpdateStatus PipelineCache::UpdateBlendState() {
auto& reg_file = *register_file_;
auto& regs = update_blend_state_regs_;
// Alpha testing -- ALPHAREF, ALPHAFUNC, ALPHATESTENABLE
// Deprecated in GL, implemented in shader.
// if(ALPHATESTENABLE && frag_out.a [<=/ALPHAFUNC] ALPHAREF) discard;
// uint32_t color_control = reg_file[XE_GPU_REG_RB_COLORCONTROL].u32;
// draw_batcher_.set_alpha_test((color_control & 0x4) != 0, //
// ALPAHTESTENABLE
// color_control & 0x7, // ALPHAFUNC
// reg_file[XE_GPU_REG_RB_ALPHA_REF].f32);
bool dirty = false;
dirty |=
SetShadowRegister(&regs.rb_blendcontrol[0], XE_GPU_REG_RB_BLENDCONTROL_0);
dirty |=
SetShadowRegister(&regs.rb_blendcontrol[1], XE_GPU_REG_RB_BLENDCONTROL_1);
dirty |=
SetShadowRegister(&regs.rb_blendcontrol[2], XE_GPU_REG_RB_BLENDCONTROL_2);
dirty |=
SetShadowRegister(&regs.rb_blendcontrol[3], XE_GPU_REG_RB_BLENDCONTROL_3);
dirty |= SetShadowRegister(&regs.rb_blend_rgba[0], XE_GPU_REG_RB_BLEND_RED);
dirty |= SetShadowRegister(&regs.rb_blend_rgba[1], XE_GPU_REG_RB_BLEND_GREEN);
dirty |= SetShadowRegister(&regs.rb_blend_rgba[2], XE_GPU_REG_RB_BLEND_BLUE);
dirty |= SetShadowRegister(&regs.rb_blend_rgba[3], XE_GPU_REG_RB_BLEND_ALPHA);
if (!dirty) {
return UpdateStatus::kCompatible;
}
SCOPE_profile_cpu_f("gpu");
return UpdateStatus::kMismatch;
}
PipelineCache::UpdateStatus PipelineCache::UpdateDepthStencilState() {
auto& regs = update_depth_stencil_state_regs_;
bool dirty = false;
dirty |= SetShadowRegister(&regs.rb_depthcontrol, XE_GPU_REG_RB_DEPTHCONTROL);
dirty |=
SetShadowRegister(&regs.rb_stencilrefmask, XE_GPU_REG_RB_STENCILREFMASK);
if (!dirty) {
return UpdateStatus::kCompatible;
}
SCOPE_profile_cpu_f("gpu");
return UpdateStatus::kMismatch;
}
} // namespace vulkan
} // namespace gpu
} // namespace xe

View File

@ -0,0 +1,167 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2016 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_VULKAN_PIPELINE_CACHE_H_
#define XENIA_GPU_VULKAN_PIPELINE_CACHE_H_
#include <unordered_map>
#include "xenia/gpu/register_file.h"
#include "xenia/gpu/spirv_shader_translator.h"
#include "xenia/gpu/vulkan/vulkan_shader.h"
#include "xenia/gpu/xenos.h"
#include "xenia/ui/spirv/spirv_disassembler.h"
#include "xenia/ui/vulkan/vulkan.h"
#include "xenia/ui/vulkan/vulkan_device.h"
namespace xe {
namespace gpu {
namespace vulkan {
// Configures and caches pipelines based on render state.
// This is responsible for properly setting all state required for a draw
// including shaders, various blend/etc options, and input configuration.
class PipelineCache {
public:
PipelineCache(RegisterFile* register_file, ui::vulkan::VulkanDevice* device);
~PipelineCache();
// Loads a shader from the cache, possibly translating it.
VulkanShader* LoadShader(ShaderType shader_type, uint32_t guest_address,
const uint32_t* host_address, uint32_t dword_count);
// Configures a pipeline using the current render state and the given render
// pass. If a previously available pipeline is available it will be used,
// otherwise a new one may be created. Any state that can be set dynamically
// in the command buffer is issued at this time.
// Returns whether the pipeline could be successfully created.
bool ConfigurePipeline(VkCommandBuffer command_buffer,
VkRenderPass render_pass,
PrimitiveType primitive_type);
// Currently configured pipeline layout, if any.
VkPipelineLayout current_pipeline_layout() const { return nullptr; }
// Currently configured vertex shader, if any.
VulkanShader* current_vertex_shader() const { return nullptr; }
// Currently configured pixel shader, if any.
VulkanShader* current_pixel_shader() const { return nullptr; }
// Clears all cached content.
void ClearCache();
private:
// TODO(benvanik): geometry shader cache.
// TODO(benvanik): translated shader cache.
// TODO(benvanik): pipeline layouts.
// TODO(benvanik): pipeline cache.
RegisterFile* register_file_ = nullptr;
VkDevice device_ = nullptr;
SpirvShaderTranslator shader_translator_;
xe::ui::spirv::SpirvDisassembler disassembler_;
// All loaded shaders mapped by their guest hash key.
std::unordered_map<uint64_t, VulkanShader*> shader_map_;
private:
enum class UpdateStatus {
kCompatible,
kMismatch,
kError,
};
UpdateStatus UpdateShaders(PrimitiveType prim_type);
UpdateStatus UpdateRenderTargets();
UpdateStatus UpdateState(PrimitiveType prim_type);
UpdateStatus UpdateViewportState();
UpdateStatus UpdateRasterizerState(PrimitiveType prim_type);
UpdateStatus UpdateBlendState();
UpdateStatus UpdateDepthStencilState();
bool SetShadowRegister(uint32_t* dest, uint32_t register_name);
bool SetShadowRegister(float* dest, uint32_t register_name);
struct UpdateRenderTargetsRegisters {
uint32_t rb_modecontrol;
uint32_t rb_surface_info;
uint32_t rb_color_info;
uint32_t rb_color1_info;
uint32_t rb_color2_info;
uint32_t rb_color3_info;
uint32_t rb_color_mask;
uint32_t rb_depthcontrol;
uint32_t rb_stencilrefmask;
uint32_t rb_depth_info;
UpdateRenderTargetsRegisters() { Reset(); }
void Reset() { std::memset(this, 0, sizeof(*this)); }
} update_render_targets_regs_;
struct UpdateViewportStateRegisters {
// uint32_t pa_cl_clip_cntl;
uint32_t rb_surface_info;
uint32_t pa_cl_vte_cntl;
uint32_t pa_su_sc_mode_cntl;
uint32_t pa_sc_window_offset;
uint32_t pa_sc_window_scissor_tl;
uint32_t pa_sc_window_scissor_br;
float pa_cl_vport_xoffset;
float pa_cl_vport_yoffset;
float pa_cl_vport_zoffset;
float pa_cl_vport_xscale;
float pa_cl_vport_yscale;
float pa_cl_vport_zscale;
UpdateViewportStateRegisters() { Reset(); }
void Reset() { std::memset(this, 0, sizeof(*this)); }
} update_viewport_state_regs_;
struct UpdateRasterizerStateRegisters {
uint32_t pa_su_sc_mode_cntl;
uint32_t pa_sc_screen_scissor_tl;
uint32_t pa_sc_screen_scissor_br;
uint32_t multi_prim_ib_reset_index;
PrimitiveType prim_type;
UpdateRasterizerStateRegisters() { Reset(); }
void Reset() { std::memset(this, 0, sizeof(*this)); }
} update_rasterizer_state_regs_;
struct UpdateBlendStateRegisters {
uint32_t rb_blendcontrol[4];
float rb_blend_rgba[4];
UpdateBlendStateRegisters() { Reset(); }
void Reset() { std::memset(this, 0, sizeof(*this)); }
} update_blend_state_regs_;
struct UpdateDepthStencilStateRegisters {
uint32_t rb_depthcontrol;
uint32_t rb_stencilrefmask;
UpdateDepthStencilStateRegisters() { Reset(); }
void Reset() { std::memset(this, 0, sizeof(*this)); }
} update_depth_stencil_state_regs_;
struct UpdateShadersRegisters {
PrimitiveType prim_type;
uint32_t pa_su_sc_mode_cntl;
uint32_t sq_program_cntl;
uint32_t sq_context_misc;
VulkanShader* vertex_shader;
VulkanShader* pixel_shader;
UpdateShadersRegisters() { Reset(); }
void Reset() {
sq_program_cntl = 0;
vertex_shader = pixel_shader = nullptr;
}
} update_shaders_regs_;
};
} // namespace vulkan
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_VULKAN_PIPELINE_CACHE_H_

View File

@ -0,0 +1,45 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2016 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/gpu/vulkan/render_cache.h"
#include "xenia/base/logging.h"
#include "xenia/base/math.h"
#include "xenia/base/memory.h"
#include "xenia/base/profiling.h"
#include "xenia/gpu/gpu_flags.h"
#include "xenia/gpu/vulkan/vulkan_gpu_flags.h"
namespace xe {
namespace gpu {
namespace vulkan {
using xe::ui::vulkan::CheckResult;
RenderCache::RenderCache(RegisterFile* register_file,
ui::vulkan::VulkanDevice* device)
: register_file_(register_file), device_(*device) {}
RenderCache::~RenderCache() = default;
VkRenderPass RenderCache::BeginRenderPass(VkCommandBuffer command_buffer) {
return nullptr;
}
void RenderCache::EndRenderPass() {
//
}
void RenderCache::ClearCache() {
// TODO(benvanik): caching.
}
} // namespace vulkan
} // namespace gpu
} // namespace xe

View File

@ -0,0 +1,46 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2016 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_VULKAN_RENDER_CACHE_H_
#define XENIA_GPU_VULKAN_RENDER_CACHE_H_
#include "xenia/gpu/register_file.h"
#include "xenia/gpu/shader.h"
#include "xenia/gpu/xenos.h"
#include "xenia/ui/vulkan/vulkan.h"
#include "xenia/ui/vulkan/vulkan_device.h"
namespace xe {
namespace gpu {
namespace vulkan {
// Configures and caches pipelines based on render state.
// This is responsible for properly setting all state required for a draw
// including shaders, various blend/etc options, and input configuration.
class RenderCache {
public:
RenderCache(RegisterFile* register_file, ui::vulkan::VulkanDevice* device);
~RenderCache();
VkRenderPass BeginRenderPass(VkCommandBuffer command_buffer);
void EndRenderPass();
// Clears all cached content.
void ClearCache();
private:
RegisterFile* register_file_ = nullptr;
VkDevice device_ = nullptr;
};
} // namespace vulkan
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_VULKAN_RENDER_CACHE_H_

View File

@ -0,0 +1,37 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2016 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/gpu/vulkan/texture_cache.h"
#include "xenia/base/logging.h"
#include "xenia/base/math.h"
#include "xenia/base/memory.h"
#include "xenia/base/profiling.h"
#include "xenia/gpu/gpu_flags.h"
#include "xenia/gpu/vulkan/vulkan_gpu_flags.h"
namespace xe {
namespace gpu {
namespace vulkan {
using xe::ui::vulkan::CheckResult;
TextureCache::TextureCache(RegisterFile* register_file,
ui::vulkan::VulkanDevice* device)
: register_file_(register_file), device_(*device) {}
TextureCache::~TextureCache() = default;
void TextureCache::ClearCache() {
// TODO(benvanik): caching.
}
} // namespace vulkan
} // namespace gpu
} // namespace xe

View File

@ -0,0 +1,47 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2016 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_VULKAN_TEXTURE_CACHE_H_
#define XENIA_GPU_VULKAN_TEXTURE_CACHE_H_
#include "xenia/gpu/register_file.h"
#include "xenia/gpu/shader.h"
#include "xenia/gpu/xenos.h"
#include "xenia/ui/vulkan/vulkan.h"
#include "xenia/ui/vulkan/vulkan_device.h"
namespace xe {
namespace gpu {
namespace vulkan {
// Configures and caches pipelines based on render state.
// This is responsible for properly setting all state required for a draw
// including shaders, various blend/etc options, and input configuration.
class TextureCache {
public:
TextureCache(RegisterFile* register_file, ui::vulkan::VulkanDevice* device);
~TextureCache();
// TODO(benvanik): UploadTexture.
// TODO(benvanik): Resolve.
// TODO(benvanik): ReadTexture.
// Clears all cached content.
void ClearCache();
private:
RegisterFile* register_file_ = nullptr;
VkDevice device_ = nullptr;
};
} // namespace vulkan
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_VULKAN_TEXTURE_CACHE_H_

View File

@ -20,12 +20,16 @@
#include "xenia/gpu/vulkan/vulkan_gpu_flags.h"
#include "xenia/gpu/vulkan/vulkan_graphics_system.h"
#include "xenia/gpu/xenos.h"
#include "xenia/ui/vulkan/vulkan_util.h"
namespace xe {
namespace gpu {
namespace vulkan {
using namespace xe::gpu::xenos;
using xe::ui::vulkan::CheckResult;
constexpr size_t kDefaultBufferCacheCapacity = 256 * 1024 * 1024;
VulkanCommandProcessor::VulkanCommandProcessor(
VulkanGraphicsSystem* graphics_system, kernel::KernelState* kernel_state)
@ -33,7 +37,14 @@ VulkanCommandProcessor::VulkanCommandProcessor(
VulkanCommandProcessor::~VulkanCommandProcessor() = default;
void VulkanCommandProcessor::ClearCaches() { CommandProcessor::ClearCaches(); }
void VulkanCommandProcessor::ClearCaches() {
CommandProcessor::ClearCaches();
buffer_cache_->ClearCache();
pipeline_cache_->ClearCache();
render_cache_->ClearCache();
texture_cache_->ClearCache();
}
bool VulkanCommandProcessor::SetupContext() {
if (!CommandProcessor::SetupContext()) {
@ -41,10 +52,47 @@ bool VulkanCommandProcessor::SetupContext() {
return false;
}
// Acquire our device and queue.
auto context = static_cast<xe::ui::vulkan::VulkanContext*>(context_.get());
device_ = context->device();
queue_ = device_->AcquireQueue();
if (!queue_) {
// Need to reuse primary queue (with locks).
queue_ = device_->primary_queue();
queue_mutex_ = &device_->primary_queue_mutex();
}
// Setup fenced pools used for all our per-frame/per-draw resources.
command_buffer_pool_ = std::make_unique<ui::vulkan::CommandBufferPool>(
*device_, device_->queue_family_index(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
// Initialize the state machine caches.
buffer_cache_ = std::make_unique<BufferCache>(register_file_, device_,
kDefaultBufferCacheCapacity);
pipeline_cache_ = std::make_unique<PipelineCache>(register_file_, device_);
render_cache_ = std::make_unique<RenderCache>(register_file_, device_);
texture_cache_ = std::make_unique<TextureCache>(register_file_, device_);
return true;
}
void VulkanCommandProcessor::ShutdownContext() {
// TODO(benvanik): wait until idle.
buffer_cache_.reset();
pipeline_cache_.reset();
render_cache_.reset();
texture_cache_.reset();
// Free all pools. This must come after all of our caches clean up.
command_buffer_pool_.reset();
// Release queue, if were using an acquired one.
if (!queue_mutex_) {
device_->ReleaseQueue(queue_);
queue_ = nullptr;
}
CommandProcessor::ShutdownContext();
}
@ -55,7 +103,8 @@ void VulkanCommandProcessor::MakeCoherent() {
CommandProcessor::MakeCoherent();
if (status_host & 0x80000000ul) {
// scratch_buffer_.ClearCache();
// TODO(benvanik): less-fine-grained clearing.
buffer_cache_->InvalidateCache();
}
}
@ -103,346 +152,167 @@ Shader* VulkanCommandProcessor::LoadShader(ShaderType shader_type,
uint32_t guest_address,
const uint32_t* host_address,
uint32_t dword_count) {
// return shader_cache_.LookupOrInsertShader(shader_type, host_address,
// dword_count);
return nullptr;
return pipeline_cache_->LoadShader(shader_type, guest_address, host_address,
dword_count);
}
bool VulkanCommandProcessor::IssueDraw(PrimitiveType prim_type,
bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type,
uint32_t index_count,
IndexBufferInfo* index_buffer_info) {
auto& regs = *register_file_;
#if FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_f("gpu");
#endif // FINE_GRAINED_DRAW_SCOPES
// Skip all drawing for now - what did you expect? :)
return true;
bool draw_valid = false;
// if (index_buffer_info) {
// draw_valid = draw_batcher_.BeginDrawElements(prim_type, index_count,
// index_buffer_info->format);
//} else {
// draw_valid = draw_batcher_.BeginDrawArrays(prim_type, index_count);
//}
if (!draw_valid) {
return false;
}
auto& regs = *register_file_;
auto enable_mode =
static_cast<ModeControl>(regs[XE_GPU_REG_RB_MODECONTROL].u32 & 0x7);
if (enable_mode == ModeControl::kIgnore) {
// Ignored.
// draw_batcher_.DiscardDraw();
return true;
} else if (enable_mode == ModeControl::kCopy) {
// Special copy handling.
// draw_batcher_.DiscardDraw();
return IssueCopy();
}
#define CHECK_ISSUE_UPDATE_STATUS(status, mismatch, error_message) \
{ \
if (status == UpdateStatus::kError) { \
XELOGE(error_message); \
/*draw_batcher_.DiscardDraw(); */ \
return false; \
} else if (status == UpdateStatus::kMismatch) { \
mismatch = true; \
} \
}
// TODO(benvanik): bigger batches.
command_buffer_pool_->BeginBatch();
VkCommandBuffer command_buffer = command_buffer_pool_->AcquireEntry();
VkCommandBufferBeginInfo command_buffer_begin_info;
command_buffer_begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
command_buffer_begin_info.pNext = nullptr;
command_buffer_begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
command_buffer_begin_info.pInheritanceInfo = nullptr;
auto err = vkBeginCommandBuffer(command_buffer, &command_buffer_begin_info);
CheckResult(err, "vkBeginCommandBuffer");
UpdateStatus status;
bool mismatch = false;
status = UpdateShaders(prim_type);
CHECK_ISSUE_UPDATE_STATUS(status, mismatch, "Unable to prepare draw shaders");
status = UpdateRenderTargets();
CHECK_ISSUE_UPDATE_STATUS(status, mismatch, "Unable to setup render targets");
// if (!active_framebuffer_) {
// // No framebuffer, so nothing we do will actually have an effect.
// // Treat it as a no-op.
// // TODO(benvanik): if we have a vs export, still allow it to go.
// draw_batcher_.DiscardDraw();
// return true;
//}
status = UpdateState(prim_type);
CHECK_ISSUE_UPDATE_STATUS(status, mismatch, "Unable to setup render state");
status = PopulateSamplers();
CHECK_ISSUE_UPDATE_STATUS(status, mismatch,
"Unable to prepare draw samplers");
status = PopulateIndexBuffer(index_buffer_info);
CHECK_ISSUE_UPDATE_STATUS(status, mismatch, "Unable to setup index buffer");
status = PopulateVertexBuffers();
CHECK_ISSUE_UPDATE_STATUS(status, mismatch, "Unable to setup vertex buffers");
// if (!draw_batcher_.CommitDraw()) {
// return false;
//}
// draw_batcher_.Flush(DrawBatcher::FlushMode::kMakeCoherent);
if (context_->WasLost()) {
// This draw lost us the context. This typically isn't hit.
assert_always();
// Begin the render pass.
// This will setup our framebuffer and begin the pass in the command buffer.
VkRenderPass render_pass = render_cache_->BeginRenderPass(command_buffer);
if (!render_pass) {
return false;
}
// Configure the pipeline for drawing.
// This encodes all render state (blend, depth, etc), our shader stages,
// and our vertex input layout.
if (!pipeline_cache_->ConfigurePipeline(command_buffer, render_pass,
primitive_type)) {
render_cache_->EndRenderPass();
return false;
}
// Upload the constants the shaders require.
auto vertex_shader = pipeline_cache_->current_vertex_shader();
auto pixel_shader = pipeline_cache_->current_pixel_shader();
auto vertex_constant_offset = buffer_cache_->UploadConstantRegisters(
vertex_shader->constant_register_map());
auto pixel_constant_offset = buffer_cache_->UploadConstantRegisters(
pixel_shader->constant_register_map());
if (vertex_constant_offset == VK_WHOLE_SIZE ||
pixel_constant_offset == VK_WHOLE_SIZE) {
render_cache_->EndRenderPass();
return false;
}
// Configure constant uniform access to point at our offsets.
auto constant_descriptor_set = buffer_cache_->constant_descriptor_set();
auto pipeline_layout = pipeline_cache_->current_pipeline_layout();
uint32_t constant_offsets[2] = {static_cast<uint32_t>(vertex_constant_offset),
static_cast<uint32_t>(pixel_constant_offset)};
vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
pipeline_layout, 0, 1, &constant_descriptor_set,
static_cast<uint32_t>(xe::countof(constant_offsets)),
constant_offsets);
// Upload and bind index buffer data (if we have any).
if (!PopulateIndexBuffer(command_buffer, index_buffer_info)) {
render_cache_->EndRenderPass();
return false;
}
// Upload and bind all vertex buffer data.
if (!PopulateVertexBuffers(command_buffer, vertex_shader)) {
render_cache_->EndRenderPass();
return false;
}
// Upload and set descriptors for all textures.
if (!PopulateSamplers(command_buffer, vertex_shader, pixel_shader)) {
render_cache_->EndRenderPass();
return false;
}
#if 0
// Actually issue the draw.
if (!index_buffer_info) {
// Auto-indexed draw.
uint32_t instance_count = 1;
uint32_t first_vertex = 0;
uint32_t first_instance = 0;
vkCmdDraw(command_buffer, index_count, instance_count, first_vertex,
first_instance);
} else {
// Index buffer draw.
uint32_t instance_count = 1;
uint32_t first_index =
register_file_->values[XE_GPU_REG_VGT_INDX_OFFSET].u32;
uint32_t vertex_offset = 0;
uint32_t first_instance = 0;
vkCmdDrawIndexed(command_buffer, index_count, instance_count, first_index,
vertex_offset, first_instance);
}
#endif
// End the rendering pass.
render_cache_->EndRenderPass();
// TODO(benvanik): bigger batches.
err = vkEndCommandBuffer(command_buffer);
CheckResult(err, "vkEndCommandBuffer");
VkFence fence;
VkFenceCreateInfo fence_info;
fence_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
fence_info.pNext = nullptr;
fence_info.flags = 0;
vkCreateFence(*device_, &fence_info, nullptr, &fence);
command_buffer_pool_->EndBatch(fence);
VkSubmitInfo submit_info;
submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
submit_info.pNext = nullptr;
submit_info.waitSemaphoreCount = 0;
submit_info.pWaitSemaphores = nullptr;
submit_info.commandBufferCount = 1;
submit_info.pCommandBuffers = &command_buffer;
submit_info.signalSemaphoreCount = 0;
submit_info.pSignalSemaphores = nullptr;
if (queue_mutex_) {
queue_mutex_->lock();
}
err = vkQueueSubmit(queue_, 1, &submit_info, fence);
if (queue_mutex_) {
queue_mutex_->unlock();
}
CheckResult(err, "vkQueueSubmit");
if (queue_mutex_) {
queue_mutex_->lock();
}
vkQueueWaitIdle(queue_);
if (queue_mutex_) {
queue_mutex_->unlock();
}
command_buffer_pool_->Scavenge();
vkDestroyFence(*device_, fence, nullptr);
return true;
}
bool VulkanCommandProcessor::SetShadowRegister(uint32_t* dest,
uint32_t register_name) {
uint32_t value = register_file_->values[register_name].u32;
if (*dest == value) {
return false;
}
*dest = value;
return true;
}
bool VulkanCommandProcessor::SetShadowRegister(float* dest,
uint32_t register_name) {
float value = register_file_->values[register_name].f32;
if (*dest == value) {
return false;
}
*dest = value;
return true;
}
VulkanCommandProcessor::UpdateStatus VulkanCommandProcessor::UpdateShaders(
PrimitiveType prim_type) {
auto& regs = update_shaders_regs_;
// These are the constant base addresses/ranges for shaders.
// We have these hardcoded right now cause nothing seems to differ.
assert_true(register_file_->values[XE_GPU_REG_SQ_VS_CONST].u32 ==
0x000FF000 ||
register_file_->values[XE_GPU_REG_SQ_VS_CONST].u32 == 0x00000000);
assert_true(register_file_->values[XE_GPU_REG_SQ_PS_CONST].u32 ==
0x000FF100 ||
register_file_->values[XE_GPU_REG_SQ_PS_CONST].u32 == 0x00000000);
bool dirty = false;
dirty |= SetShadowRegister(&regs.pa_su_sc_mode_cntl,
XE_GPU_REG_PA_SU_SC_MODE_CNTL);
dirty |= SetShadowRegister(&regs.sq_program_cntl, XE_GPU_REG_SQ_PROGRAM_CNTL);
dirty |= SetShadowRegister(&regs.sq_context_misc, XE_GPU_REG_SQ_CONTEXT_MISC);
dirty |= regs.vertex_shader != active_vertex_shader_;
dirty |= regs.pixel_shader != active_pixel_shader_;
dirty |= regs.prim_type != prim_type;
if (!dirty) {
return UpdateStatus::kCompatible;
}
regs.vertex_shader = static_cast<VulkanShader*>(active_vertex_shader_);
regs.pixel_shader = static_cast<VulkanShader*>(active_pixel_shader_);
regs.prim_type = prim_type;
SCOPE_profile_cpu_f("gpu");
return UpdateStatus::kMismatch;
}
VulkanCommandProcessor::UpdateStatus
VulkanCommandProcessor::UpdateRenderTargets() {
auto& regs = update_render_targets_regs_;
bool dirty = false;
dirty |= SetShadowRegister(&regs.rb_modecontrol, XE_GPU_REG_RB_MODECONTROL);
dirty |= SetShadowRegister(&regs.rb_surface_info, XE_GPU_REG_RB_SURFACE_INFO);
dirty |= SetShadowRegister(&regs.rb_color_info, XE_GPU_REG_RB_COLOR_INFO);
dirty |= SetShadowRegister(&regs.rb_color1_info, XE_GPU_REG_RB_COLOR1_INFO);
dirty |= SetShadowRegister(&regs.rb_color2_info, XE_GPU_REG_RB_COLOR2_INFO);
dirty |= SetShadowRegister(&regs.rb_color3_info, XE_GPU_REG_RB_COLOR3_INFO);
dirty |= SetShadowRegister(&regs.rb_color_mask, XE_GPU_REG_RB_COLOR_MASK);
dirty |= SetShadowRegister(&regs.rb_depthcontrol, XE_GPU_REG_RB_DEPTHCONTROL);
dirty |=
SetShadowRegister(&regs.rb_stencilrefmask, XE_GPU_REG_RB_STENCILREFMASK);
dirty |= SetShadowRegister(&regs.rb_depth_info, XE_GPU_REG_RB_DEPTH_INFO);
if (!dirty) {
return UpdateStatus::kCompatible;
}
SCOPE_profile_cpu_f("gpu");
return UpdateStatus::kMismatch;
}
VulkanCommandProcessor::UpdateStatus VulkanCommandProcessor::UpdateState(
PrimitiveType prim_type) {
bool mismatch = false;
#define CHECK_UPDATE_STATUS(status, mismatch, error_message) \
{ \
if (status == UpdateStatus::kError) { \
XELOGE(error_message); \
return status; \
} else if (status == UpdateStatus::kMismatch) { \
mismatch = true; \
} \
}
UpdateStatus status;
status = UpdateViewportState();
CHECK_UPDATE_STATUS(status, mismatch, "Unable to update viewport state");
status = UpdateRasterizerState(prim_type);
CHECK_UPDATE_STATUS(status, mismatch, "Unable to update rasterizer state");
status = UpdateBlendState();
CHECK_UPDATE_STATUS(status, mismatch, "Unable to update blend state");
status = UpdateDepthStencilState();
CHECK_UPDATE_STATUS(status, mismatch, "Unable to update depth/stencil state");
return mismatch ? UpdateStatus::kMismatch : UpdateStatus::kCompatible;
}
VulkanCommandProcessor::UpdateStatus
VulkanCommandProcessor::UpdateViewportState() {
auto& regs = update_viewport_state_regs_;
bool dirty = false;
// dirty |= SetShadowRegister(&state_regs.pa_cl_clip_cntl,
// XE_GPU_REG_PA_CL_CLIP_CNTL);
dirty |= SetShadowRegister(&regs.rb_surface_info, XE_GPU_REG_RB_SURFACE_INFO);
dirty |= SetShadowRegister(&regs.pa_cl_vte_cntl, XE_GPU_REG_PA_CL_VTE_CNTL);
dirty |= SetShadowRegister(&regs.pa_su_sc_mode_cntl,
XE_GPU_REG_PA_SU_SC_MODE_CNTL);
dirty |= SetShadowRegister(&regs.pa_sc_window_offset,
XE_GPU_REG_PA_SC_WINDOW_OFFSET);
dirty |= SetShadowRegister(&regs.pa_sc_window_scissor_tl,
XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL);
dirty |= SetShadowRegister(&regs.pa_sc_window_scissor_br,
XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR);
dirty |= SetShadowRegister(&regs.pa_cl_vport_xoffset,
XE_GPU_REG_PA_CL_VPORT_XOFFSET);
dirty |= SetShadowRegister(&regs.pa_cl_vport_yoffset,
XE_GPU_REG_PA_CL_VPORT_YOFFSET);
dirty |= SetShadowRegister(&regs.pa_cl_vport_zoffset,
XE_GPU_REG_PA_CL_VPORT_ZOFFSET);
dirty |= SetShadowRegister(&regs.pa_cl_vport_xscale,
XE_GPU_REG_PA_CL_VPORT_XSCALE);
dirty |= SetShadowRegister(&regs.pa_cl_vport_yscale,
XE_GPU_REG_PA_CL_VPORT_YSCALE);
dirty |= SetShadowRegister(&regs.pa_cl_vport_zscale,
XE_GPU_REG_PA_CL_VPORT_ZSCALE);
// Much of this state machine is extracted from:
// https://github.com/freedreno/mesa/blob/master/src/mesa/drivers/dri/r200/r200_state.c
// http://fossies.org/dox/MesaLib-10.3.5/fd2__gmem_8c_source.html
// http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf
// http://www.x.org/docs/AMD/old/evergreen_3D_registers_v2.pdf
// VTX_XY_FMT = true: the incoming X, Y have already been multiplied by 1/W0.
// = false: multiply the X, Y coordinates by 1/W0.
// VTX_Z_FMT = true: the incoming Z has already been multiplied by 1/W0.
// = false: multiply the Z coordinate by 1/W0.
// VTX_W0_FMT = true: the incoming W0 is not 1/W0. Perform the reciprocal to
// get 1/W0.
// draw_batcher_.set_vtx_fmt((regs.pa_cl_vte_cntl >> 8) & 0x1 ? 1.0f : 0.0f,
// (regs.pa_cl_vte_cntl >> 9) & 0x1 ? 1.0f : 0.0f,
// (regs.pa_cl_vte_cntl >> 10) & 0x1 ? 1.0f : 0.0f);
// Done in VS, no need to flush state.
// if ((regs.pa_cl_vte_cntl & (1 << 0)) > 0) {
// draw_batcher_.set_window_scalar(1.0f, 1.0f);
//} else {
// draw_batcher_.set_window_scalar(1.0f / 2560.0f, -1.0f / 2560.0f);
//}
if (!dirty) {
return UpdateStatus::kCompatible;
}
return UpdateStatus::kMismatch;
}
VulkanCommandProcessor::UpdateStatus
VulkanCommandProcessor::UpdateRasterizerState(PrimitiveType prim_type) {
auto& regs = update_rasterizer_state_regs_;
bool dirty = false;
dirty |= SetShadowRegister(&regs.pa_su_sc_mode_cntl,
XE_GPU_REG_PA_SU_SC_MODE_CNTL);
dirty |= SetShadowRegister(&regs.pa_sc_screen_scissor_tl,
XE_GPU_REG_PA_SC_SCREEN_SCISSOR_TL);
dirty |= SetShadowRegister(&regs.pa_sc_screen_scissor_br,
XE_GPU_REG_PA_SC_SCREEN_SCISSOR_BR);
dirty |= SetShadowRegister(&regs.multi_prim_ib_reset_index,
XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX);
dirty |= regs.prim_type != prim_type;
if (!dirty) {
return UpdateStatus::kCompatible;
}
regs.prim_type = prim_type;
SCOPE_profile_cpu_f("gpu");
return UpdateStatus::kMismatch;
}
VulkanCommandProcessor::UpdateStatus
VulkanCommandProcessor::UpdateBlendState() {
auto& reg_file = *register_file_;
auto& regs = update_blend_state_regs_;
// Alpha testing -- ALPHAREF, ALPHAFUNC, ALPHATESTENABLE
// Deprecated in GL, implemented in shader.
// if(ALPHATESTENABLE && frag_out.a [<=/ALPHAFUNC] ALPHAREF) discard;
// uint32_t color_control = reg_file[XE_GPU_REG_RB_COLORCONTROL].u32;
// draw_batcher_.set_alpha_test((color_control & 0x4) != 0, //
// ALPAHTESTENABLE
// color_control & 0x7, // ALPHAFUNC
// reg_file[XE_GPU_REG_RB_ALPHA_REF].f32);
bool dirty = false;
dirty |=
SetShadowRegister(&regs.rb_blendcontrol[0], XE_GPU_REG_RB_BLENDCONTROL_0);
dirty |=
SetShadowRegister(&regs.rb_blendcontrol[1], XE_GPU_REG_RB_BLENDCONTROL_1);
dirty |=
SetShadowRegister(&regs.rb_blendcontrol[2], XE_GPU_REG_RB_BLENDCONTROL_2);
dirty |=
SetShadowRegister(&regs.rb_blendcontrol[3], XE_GPU_REG_RB_BLENDCONTROL_3);
dirty |= SetShadowRegister(&regs.rb_blend_rgba[0], XE_GPU_REG_RB_BLEND_RED);
dirty |= SetShadowRegister(&regs.rb_blend_rgba[1], XE_GPU_REG_RB_BLEND_GREEN);
dirty |= SetShadowRegister(&regs.rb_blend_rgba[2], XE_GPU_REG_RB_BLEND_BLUE);
dirty |= SetShadowRegister(&regs.rb_blend_rgba[3], XE_GPU_REG_RB_BLEND_ALPHA);
if (!dirty) {
return UpdateStatus::kCompatible;
}
SCOPE_profile_cpu_f("gpu");
return UpdateStatus::kMismatch;
}
VulkanCommandProcessor::UpdateStatus
VulkanCommandProcessor::UpdateDepthStencilState() {
auto& regs = update_depth_stencil_state_regs_;
bool dirty = false;
dirty |= SetShadowRegister(&regs.rb_depthcontrol, XE_GPU_REG_RB_DEPTHCONTROL);
dirty |=
SetShadowRegister(&regs.rb_stencilrefmask, XE_GPU_REG_RB_STENCILREFMASK);
if (!dirty) {
return UpdateStatus::kCompatible;
}
SCOPE_profile_cpu_f("gpu");
return UpdateStatus::kMismatch;
}
VulkanCommandProcessor::UpdateStatus
VulkanCommandProcessor::PopulateIndexBuffer(
IndexBufferInfo* index_buffer_info) {
bool VulkanCommandProcessor::PopulateIndexBuffer(
VkCommandBuffer command_buffer, IndexBufferInfo* index_buffer_info) {
auto& regs = *register_file_;
if (!index_buffer_info || !index_buffer_info->guest_base) {
// No index buffer or auto draw.
return UpdateStatus::kCompatible;
return true;
}
auto& info = *index_buffer_info;
@ -462,19 +332,44 @@ VulkanCommandProcessor::PopulateIndexBuffer(
trace_writer_.WriteMemoryRead(info.guest_base, info.length);
return UpdateStatus::kCompatible;
// Upload (or get a cached copy of) the buffer.
const void* source_ptr =
memory_->TranslatePhysical<const void*>(info.guest_base);
size_t source_length =
info.count * (info.format == IndexFormat::kInt32 ? sizeof(uint32_t)
: sizeof(uint16_t));
auto buffer_ref =
buffer_cache_->UploadIndexBuffer(source_ptr, source_length, info.format);
if (buffer_ref.second == VK_WHOLE_SIZE) {
// Failed to upload buffer.
return false;
}
// Bind the buffer.
VkIndexType index_type = info.format == IndexFormat::kInt32
? VK_INDEX_TYPE_UINT32
: VK_INDEX_TYPE_UINT16;
vkCmdBindIndexBuffer(command_buffer, buffer_ref.first, buffer_ref.second,
index_type);
return true;
}
VulkanCommandProcessor::UpdateStatus
VulkanCommandProcessor::PopulateVertexBuffers() {
bool VulkanCommandProcessor::PopulateVertexBuffers(
VkCommandBuffer command_buffer, VulkanShader* vertex_shader) {
auto& regs = *register_file_;
#if FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_f("gpu");
#endif // FINE_GRAINED_DRAW_SCOPES
auto& regs = *register_file_;
assert_not_null(active_vertex_shader_);
auto& vertex_bindings = vertex_shader->vertex_bindings();
assert_true(vertex_bindings.size() <= 32);
VkBuffer all_buffers[32];
VkDeviceSize all_buffer_offsets[32];
uint32_t buffer_index = 0;
for (const auto& vertex_binding : active_vertex_shader_->vertex_bindings()) {
for (const auto& vertex_binding : vertex_bindings) {
int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 +
(vertex_binding.fetch_constant / 3) * 6;
const auto group = reinterpret_cast<xe_gpu_fetch_group_t*>(&regs.values[r]);
@ -492,58 +387,72 @@ VulkanCommandProcessor::PopulateVertexBuffers() {
}
assert_true(fetch->endian == 2);
// TODO(benvanik): compute based on indices or vertex count.
// THIS CAN BE MASSIVELY INCORRECT (too large).
size_t valid_range = size_t(fetch->size * 4);
trace_writer_.WriteMemoryRead(fetch->address << 2, valid_range);
// Upload (or get a cached copy of) the buffer.
const void* source_ptr =
memory_->TranslatePhysical<const void*>(fetch->address << 2);
size_t source_length = valid_range;
auto buffer_ref =
buffer_cache_->UploadVertexBuffer(source_ptr, source_length);
if (buffer_ref.second == VK_WHOLE_SIZE) {
// Failed to upload buffer.
return false;
}
// Stash the buffer reference for our bulk bind at the end.
all_buffers[buffer_index] = buffer_ref.first;
all_buffer_offsets[buffer_index] = buffer_ref.second;
++buffer_index;
}
return UpdateStatus::kCompatible;
// Bind buffers.
vkCmdBindVertexBuffers(command_buffer, 0, buffer_index, all_buffers,
all_buffer_offsets);
return true;
}
VulkanCommandProcessor::UpdateStatus
VulkanCommandProcessor::PopulateSamplers() {
bool VulkanCommandProcessor::PopulateSamplers(VkCommandBuffer command_buffer,
VulkanShader* vertex_shader,
VulkanShader* pixel_shader) {
#if FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_f("gpu");
#endif // FINE_GRAINED_DRAW_SCOPES
bool mismatch = false;
bool any_failed = false;
// VS and PS samplers are shared, but may be used exclusively.
// We walk each and setup lazily.
bool has_setup_sampler[32] = {false};
// Vertex texture samplers.
for (auto& texture_binding : active_vertex_shader_->texture_bindings()) {
for (auto& texture_binding : vertex_shader->texture_bindings()) {
if (has_setup_sampler[texture_binding.fetch_constant]) {
continue;
}
has_setup_sampler[texture_binding.fetch_constant] = true;
auto status = PopulateSampler(texture_binding);
if (status == UpdateStatus::kError) {
return status;
} else if (status == UpdateStatus::kMismatch) {
mismatch = true;
}
any_failed = PopulateSampler(command_buffer, texture_binding) || any_failed;
}
// Pixel shader texture sampler.
for (auto& texture_binding : active_pixel_shader_->texture_bindings()) {
for (auto& texture_binding : pixel_shader->texture_bindings()) {
if (has_setup_sampler[texture_binding.fetch_constant]) {
continue;
}
has_setup_sampler[texture_binding.fetch_constant] = true;
auto status = PopulateSampler(texture_binding);
if (status == UpdateStatus::kError) {
return UpdateStatus::kError;
} else if (status == UpdateStatus::kMismatch) {
mismatch = true;
}
any_failed = PopulateSampler(command_buffer, texture_binding) || any_failed;
}
return mismatch ? UpdateStatus::kMismatch : UpdateStatus::kCompatible;
return !any_failed;
}
VulkanCommandProcessor::UpdateStatus VulkanCommandProcessor::PopulateSampler(
bool VulkanCommandProcessor::PopulateSampler(
VkCommandBuffer command_buffer,
const Shader::TextureBinding& texture_binding) {
auto& regs = *register_file_;
int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 +
@ -553,30 +462,34 @@ VulkanCommandProcessor::UpdateStatus VulkanCommandProcessor::PopulateSampler(
// ?
if (!fetch.type) {
return UpdateStatus::kCompatible;
return true;
}
assert_true(fetch.type == 0x2);
TextureInfo texture_info;
if (!TextureInfo::Prepare(fetch, &texture_info)) {
XELOGE("Unable to parse texture fetcher info");
return UpdateStatus::kCompatible; // invalid texture used
return true; // invalid texture used
}
SamplerInfo sampler_info;
if (!SamplerInfo::Prepare(fetch, texture_binding.fetch_instr,
&sampler_info)) {
XELOGE("Unable to parse sampler info");
return UpdateStatus::kCompatible; // invalid texture used
return true; // invalid texture used
}
trace_writer_.WriteMemoryRead(texture_info.guest_address,
texture_info.input_length);
return UpdateStatus::kCompatible;
// TODO(benvanik): texture cache lookup.
// TODO(benvanik): bind or return so PopulateSamplers can batch.
return true;
}
bool VulkanCommandProcessor::IssueCopy() {
SCOPE_profile_cpu_f("gpu");
// TODO(benvanik): resolve.
return true;
}

View File

@ -23,12 +23,17 @@
#include "xenia/base/threading.h"
#include "xenia/gpu/command_processor.h"
#include "xenia/gpu/register_file.h"
#include "xenia/gpu/spirv_shader_translator.h"
#include "xenia/gpu/vulkan/buffer_cache.h"
#include "xenia/gpu/vulkan/pipeline_cache.h"
#include "xenia/gpu/vulkan/render_cache.h"
#include "xenia/gpu/vulkan/texture_cache.h"
#include "xenia/gpu/vulkan/vulkan_shader.h"
#include "xenia/gpu/xenos.h"
#include "xenia/kernel/xthread.h"
#include "xenia/memory.h"
#include "xenia/ui/vulkan/fenced_pools.h"
#include "xenia/ui/vulkan/vulkan_context.h"
#include "xenia/ui/vulkan/vulkan_device.h"
namespace xe {
namespace gpu {
@ -45,12 +50,6 @@ class VulkanCommandProcessor : public CommandProcessor {
void ClearCaches() override;
private:
enum class UpdateStatus {
kCompatible,
kMismatch,
kError,
};
bool SetupContext() override;
void ShutdownContext() override;
@ -65,97 +64,35 @@ class VulkanCommandProcessor : public CommandProcessor {
const uint32_t* host_address,
uint32_t dword_count) override;
bool IssueDraw(PrimitiveType prim_type, uint32_t index_count,
bool IssueDraw(PrimitiveType primitive_type, uint32_t index_count,
IndexBufferInfo* index_buffer_info) override;
UpdateStatus UpdateShaders(PrimitiveType prim_type);
UpdateStatus UpdateRenderTargets();
UpdateStatus UpdateState(PrimitiveType prim_type);
UpdateStatus UpdateViewportState();
UpdateStatus UpdateRasterizerState(PrimitiveType prim_type);
UpdateStatus UpdateBlendState();
UpdateStatus UpdateDepthStencilState();
UpdateStatus PopulateIndexBuffer(IndexBufferInfo* index_buffer_info);
UpdateStatus PopulateVertexBuffers();
UpdateStatus PopulateSamplers();
UpdateStatus PopulateSampler(const Shader::TextureBinding& texture_binding);
bool PopulateIndexBuffer(VkCommandBuffer command_buffer,
IndexBufferInfo* index_buffer_info);
bool PopulateVertexBuffers(VkCommandBuffer command_buffer,
VulkanShader* vertex_shader);
bool PopulateSamplers(VkCommandBuffer command_buffer,
VulkanShader* vertex_shader,
VulkanShader* pixel_shader);
bool PopulateSampler(VkCommandBuffer command_buffer,
const Shader::TextureBinding& texture_binding);
bool IssueCopy() override;
SpirvShaderTranslator shader_translator_;
xe::ui::vulkan::VulkanDevice* device_ = nullptr;
private:
bool SetShadowRegister(uint32_t* dest, uint32_t register_name);
bool SetShadowRegister(float* dest, uint32_t register_name);
struct UpdateRenderTargetsRegisters {
uint32_t rb_modecontrol;
uint32_t rb_surface_info;
uint32_t rb_color_info;
uint32_t rb_color1_info;
uint32_t rb_color2_info;
uint32_t rb_color3_info;
uint32_t rb_color_mask;
uint32_t rb_depthcontrol;
uint32_t rb_stencilrefmask;
uint32_t rb_depth_info;
// TODO(benvanik): abstract behind context?
// Queue used to submit work. This may be a dedicated queue for the command
// processor and no locking will be required for use. If a dedicated queue
// was not available this will be the device primary_queue and the
// queue_mutex must be used to synchronize access to it.
VkQueue queue_ = nullptr;
std::mutex* queue_mutex_ = nullptr;
UpdateRenderTargetsRegisters() { Reset(); }
void Reset() { std::memset(this, 0, sizeof(*this)); }
} update_render_targets_regs_;
struct UpdateViewportStateRegisters {
// uint32_t pa_cl_clip_cntl;
uint32_t rb_surface_info;
uint32_t pa_cl_vte_cntl;
uint32_t pa_su_sc_mode_cntl;
uint32_t pa_sc_window_offset;
uint32_t pa_sc_window_scissor_tl;
uint32_t pa_sc_window_scissor_br;
float pa_cl_vport_xoffset;
float pa_cl_vport_yoffset;
float pa_cl_vport_zoffset;
float pa_cl_vport_xscale;
float pa_cl_vport_yscale;
float pa_cl_vport_zscale;
std::unique_ptr<BufferCache> buffer_cache_;
std::unique_ptr<PipelineCache> pipeline_cache_;
std::unique_ptr<RenderCache> render_cache_;
std::unique_ptr<TextureCache> texture_cache_;
UpdateViewportStateRegisters() { Reset(); }
void Reset() { std::memset(this, 0, sizeof(*this)); }
} update_viewport_state_regs_;
struct UpdateRasterizerStateRegisters {
uint32_t pa_su_sc_mode_cntl;
uint32_t pa_sc_screen_scissor_tl;
uint32_t pa_sc_screen_scissor_br;
uint32_t multi_prim_ib_reset_index;
PrimitiveType prim_type;
UpdateRasterizerStateRegisters() { Reset(); }
void Reset() { std::memset(this, 0, sizeof(*this)); }
} update_rasterizer_state_regs_;
struct UpdateBlendStateRegisters {
uint32_t rb_blendcontrol[4];
float rb_blend_rgba[4];
UpdateBlendStateRegisters() { Reset(); }
void Reset() { std::memset(this, 0, sizeof(*this)); }
} update_blend_state_regs_;
struct UpdateDepthStencilStateRegisters {
uint32_t rb_depthcontrol;
uint32_t rb_stencilrefmask;
UpdateDepthStencilStateRegisters() { Reset(); }
void Reset() { std::memset(this, 0, sizeof(*this)); }
} update_depth_stencil_state_regs_;
struct UpdateShadersRegisters {
PrimitiveType prim_type;
uint32_t pa_su_sc_mode_cntl;
uint32_t sq_program_cntl;
uint32_t sq_context_misc;
VulkanShader* vertex_shader;
VulkanShader* pixel_shader;
UpdateShadersRegisters() { Reset(); }
void Reset() {
sq_program_cntl = 0;
vertex_shader = pixel_shader = nullptr;
}
} update_shaders_regs_;
std::unique_ptr<ui::vulkan::CommandBufferPool> command_buffer_pool_;
};
} // namespace vulkan

View File

@ -0,0 +1,81 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2016 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/ui/vulkan/fenced_pools.h"
#include "xenia/base/assert.h"
#include "xenia/base/math.h"
#include "xenia/ui/vulkan/vulkan_util.h"
namespace xe {
namespace ui {
namespace vulkan {
using xe::ui::vulkan::CheckResult;
CommandBufferPool::CommandBufferPool(VkDevice device,
uint32_t queue_family_index,
VkCommandBufferLevel level)
: BaseFencedPool(device), level_(level) {
// Create the pool used for allocating buffers.
// They are marked as transient (short-lived) and cycled frequently.
VkCommandPoolCreateInfo cmd_pool_info;
cmd_pool_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
cmd_pool_info.pNext = nullptr;
cmd_pool_info.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT |
VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
cmd_pool_info.queueFamilyIndex = queue_family_index;
auto err =
vkCreateCommandPool(device_, &cmd_pool_info, nullptr, &command_pool_);
CheckResult(err, "vkCreateCommandPool");
// Allocate a bunch of command buffers to start.
constexpr uint32_t kDefaultCount = 32;
VkCommandBufferAllocateInfo command_buffer_info;
command_buffer_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
command_buffer_info.pNext = nullptr;
command_buffer_info.commandPool = command_pool_;
command_buffer_info.level = level;
command_buffer_info.commandBufferCount = kDefaultCount;
VkCommandBuffer command_buffers[kDefaultCount];
err =
vkAllocateCommandBuffers(device_, &command_buffer_info, command_buffers);
CheckResult(err, "vkCreateCommandBuffer");
for (size_t i = 0; i < xe::countof(command_buffers); ++i) {
PushEntry(command_buffers[i]);
}
}
CommandBufferPool::~CommandBufferPool() {
vkDestroyCommandPool(device_, command_pool_, nullptr);
command_pool_ = nullptr;
}
VkCommandBuffer CommandBufferPool::AllocateEntry() {
// TODO(benvanik): allocate a bunch at once?
VkCommandBufferAllocateInfo command_buffer_info;
command_buffer_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
command_buffer_info.pNext = nullptr;
command_buffer_info.commandPool = command_pool_;
command_buffer_info.level = level_;
command_buffer_info.commandBufferCount = 1;
VkCommandBuffer command_buffer;
auto err =
vkAllocateCommandBuffers(device_, &command_buffer_info, &command_buffer);
CheckResult(err, "vkCreateCommandBuffer");
return command_buffer;
}
void CommandBufferPool::FreeEntry(VkCommandBuffer handle) {
vkFreeCommandBuffers(device_, command_pool_, 1, &handle);
}
} // namespace vulkan
} // namespace ui
} // namespace xe

View File

@ -0,0 +1,200 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2016 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_UI_VULKAN_FENCED_POOLS_H_
#define XENIA_UI_VULKAN_FENCED_POOLS_H_
#include <memory>
#include "xenia/base/assert.h"
#include "xenia/ui/vulkan/vulkan.h"
namespace xe {
namespace ui {
namespace vulkan {
// Simple pool for Vulkan homogenous objects that cannot be reused while
// in-flight.
// It batches pooled objects into groups and uses a vkQueueSubmit fence to
// indicate their availability. If no objects are free when one is requested
// the caller is expected to create them.
template <typename T, typename HANDLE>
class BaseFencedPool {
public:
BaseFencedPool(VkDevice device) : device_(device) {}
virtual ~BaseFencedPool() {
// TODO(benvanik): wait on fence until done.
assert_null(pending_batch_list_head_);
// Run down free lists.
while (free_batch_list_head_) {
auto batch = free_batch_list_head_;
free_batch_list_head_ = batch->next;
delete batch;
}
while (free_entry_list_head_) {
auto entry = free_entry_list_head_;
free_entry_list_head_ = entry->next;
static_cast<T*>(this)->FreeEntry(entry->handle);
delete entry;
}
}
// Checks all pending batches for completion and scavenges their entries.
// This should be called as frequently as reasonable.
void Scavenge() {
while (pending_batch_list_head_) {
auto batch = pending_batch_list_head_;
if (vkGetFenceStatus(device_, batch->fence) == VK_SUCCESS) {
// Batch has completed. Reclaim.
pending_batch_list_head_ = batch->next;
if (batch == pending_batch_list_tail_) {
pending_batch_list_tail_ = nullptr;
}
batch->next = free_batch_list_head_;
free_batch_list_head_ = batch;
batch->entry_list_tail->next = free_entry_list_head_;
free_entry_list_head_ = batch->entry_list_head;
batch->entry_list_head = nullptr;
batch->entry_list_tail = nullptr;
} else {
// Batch is still in-flight. Since batches are executed in order we know
// no others after it could have completed, so early-exit.
return;
}
}
}
// Begins a new batch.
// All entries acquired within this batch will be marked as in-use until
// the fence specified in EndBatch is signalled.
void BeginBatch() {
assert_null(open_batch_);
Batch* batch = nullptr;
if (free_batch_list_head_) {
// Reuse a batch.
batch = free_batch_list_head_;
free_batch_list_head_ = batch->next;
batch->next = nullptr;
} else {
// Allocate new batch.
batch = new Batch();
batch->next = nullptr;
}
batch->entry_list_head = nullptr;
batch->entry_list_tail = nullptr;
batch->fence = nullptr;
open_batch_ = batch;
}
// Attempts to acquire an entry from the pool in the current batch.
// If none are available a new one will be allocated.
HANDLE AcquireEntry() {
Entry* entry = nullptr;
if (free_entry_list_head_) {
// Slice off an entry from the free list.
entry = free_entry_list_head_;
free_entry_list_head_ = entry->next;
} else {
// No entry available; allocate new.
entry = new Entry();
entry->handle = static_cast<T*>(this)->AllocateEntry();
}
entry->next = nullptr;
if (!open_batch_->entry_list_head) {
open_batch_->entry_list_head = entry;
}
if (open_batch_->entry_list_tail) {
open_batch_->entry_list_tail->next = entry;
}
open_batch_->entry_list_tail = entry;
return entry->handle;
}
// Ends the current batch using the given fence to indicate when the batch
// has completed execution on the GPU.
void EndBatch(VkFence fence) {
assert_not_null(open_batch_);
// Close and see if we have anything.
auto batch = open_batch_;
open_batch_ = nullptr;
if (!batch->entry_list_head) {
// Nothing to do.
batch->next = free_batch_list_head_;
free_batch_list_head_ = batch;
return;
}
// Track the fence.
batch->fence = fence;
// Append to the end of the batch list.
batch->next = nullptr;
if (!pending_batch_list_head_) {
pending_batch_list_head_ = batch;
}
if (pending_batch_list_tail_) {
pending_batch_list_tail_->next = batch;
} else {
pending_batch_list_tail_ = batch;
}
}
protected:
void PushEntry(HANDLE handle) {
auto entry = new Entry();
entry->next = free_entry_list_head_;
entry->handle = handle;
free_entry_list_head_ = entry;
}
VkDevice device_ = nullptr;
private:
struct Entry {
Entry* next;
HANDLE handle;
};
struct Batch {
Batch* next;
Entry* entry_list_head;
Entry* entry_list_tail;
VkFence fence;
};
Batch* free_batch_list_head_ = nullptr;
Entry* free_entry_list_head_ = nullptr;
Batch* pending_batch_list_head_ = nullptr;
Batch* pending_batch_list_tail_ = nullptr;
Batch* open_batch_ = nullptr;
};
class CommandBufferPool
: public BaseFencedPool<CommandBufferPool, VkCommandBuffer> {
public:
CommandBufferPool(VkDevice device, uint32_t queue_family_index,
VkCommandBufferLevel level);
~CommandBufferPool() override;
protected:
friend class BaseFencedPool<CommandBufferPool, VkCommandBuffer>;
VkCommandBuffer AllocateEntry();
void FreeEntry(VkCommandBuffer handle);
VkCommandPool command_pool_ = nullptr;
VkCommandBufferLevel level_ = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
};
} // namespace vulkan
} // namespace ui
} // namespace xe
#endif // XENIA_UI_VULKAN_FENCED_POOLS_H_