Not crashing (but also likely not working) EDRAM emulation.
This commit is contained in:
parent
8820c73532
commit
de1e4661ff
|
@ -13,6 +13,7 @@
|
||||||
|
|
||||||
#include "third_party/stb/stb_image_write.h"
|
#include "third_party/stb/stb_image_write.h"
|
||||||
#include "xenia/base/logging.h"
|
#include "xenia/base/logging.h"
|
||||||
|
#include "xenia/base/profiling.h"
|
||||||
#include "xenia/base/string.h"
|
#include "xenia/base/string.h"
|
||||||
#include "xenia/base/threading.h"
|
#include "xenia/base/threading.h"
|
||||||
#include "xenia/gpu/command_processor.h"
|
#include "xenia/gpu/command_processor.h"
|
||||||
|
@ -189,10 +190,16 @@ void TraceDump::Run() {
|
||||||
});
|
});
|
||||||
|
|
||||||
xe::threading::Fence capture_fence;
|
xe::threading::Fence capture_fence;
|
||||||
|
bool did_capture = false;
|
||||||
loop_->PostDelayed(
|
loop_->PostDelayed(
|
||||||
[&]() {
|
[&]() {
|
||||||
// Capture.
|
// Capture.
|
||||||
auto raw_image = window_->context()->Capture();
|
auto raw_image = window_->context()->Capture();
|
||||||
|
if (!raw_image) {
|
||||||
|
// Failed to capture anything.
|
||||||
|
capture_fence.Signal();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// Save framebuffer png.
|
// Save framebuffer png.
|
||||||
std::string png_path = xe::to_string(base_output_path_ + L".png");
|
std::string png_path = xe::to_string(base_output_path_ + L".png");
|
||||||
|
@ -201,6 +208,7 @@ void TraceDump::Run() {
|
||||||
raw_image->data.data(),
|
raw_image->data.data(),
|
||||||
static_cast<int>(raw_image->stride));
|
static_cast<int>(raw_image->stride));
|
||||||
|
|
||||||
|
did_capture = true;
|
||||||
capture_fence.Signal();
|
capture_fence.Signal();
|
||||||
},
|
},
|
||||||
50);
|
50);
|
||||||
|
@ -211,10 +219,13 @@ void TraceDump::Run() {
|
||||||
loop_->Quit();
|
loop_->Quit();
|
||||||
loop_->AwaitQuit();
|
loop_->AwaitQuit();
|
||||||
|
|
||||||
player_.reset();
|
Profiler::Shutdown();
|
||||||
emulator_.reset();
|
|
||||||
window_.reset();
|
window_.reset();
|
||||||
loop_.reset();
|
loop_.reset();
|
||||||
|
player_.reset();
|
||||||
|
emulator_.reset();
|
||||||
|
|
||||||
|
// TODO(benvanik): die if failed to capture?
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace gpu
|
} // namespace gpu
|
||||||
|
|
|
@ -84,7 +84,7 @@ VulkanShader* PipelineCache::LoadShader(ShaderType shader_type,
|
||||||
}
|
}
|
||||||
|
|
||||||
bool PipelineCache::ConfigurePipeline(VkCommandBuffer command_buffer,
|
bool PipelineCache::ConfigurePipeline(VkCommandBuffer command_buffer,
|
||||||
VkRenderPass render_pass,
|
const RenderState* render_state,
|
||||||
VulkanShader* vertex_shader,
|
VulkanShader* vertex_shader,
|
||||||
VulkanShader* pixel_shader,
|
VulkanShader* pixel_shader,
|
||||||
PrimitiveType primitive_type) {
|
PrimitiveType primitive_type) {
|
||||||
|
|
|
@ -14,6 +14,7 @@
|
||||||
|
|
||||||
#include "xenia/gpu/register_file.h"
|
#include "xenia/gpu/register_file.h"
|
||||||
#include "xenia/gpu/spirv_shader_translator.h"
|
#include "xenia/gpu/spirv_shader_translator.h"
|
||||||
|
#include "xenia/gpu/vulkan/render_cache.h"
|
||||||
#include "xenia/gpu/vulkan/vulkan_shader.h"
|
#include "xenia/gpu/vulkan/vulkan_shader.h"
|
||||||
#include "xenia/gpu/xenos.h"
|
#include "xenia/gpu/xenos.h"
|
||||||
#include "xenia/ui/spirv/spirv_disassembler.h"
|
#include "xenia/ui/spirv/spirv_disassembler.h"
|
||||||
|
@ -42,7 +43,8 @@ class PipelineCache {
|
||||||
// in the command buffer is issued at this time.
|
// in the command buffer is issued at this time.
|
||||||
// Returns whether the pipeline could be successfully created.
|
// Returns whether the pipeline could be successfully created.
|
||||||
bool ConfigurePipeline(VkCommandBuffer command_buffer,
|
bool ConfigurePipeline(VkCommandBuffer command_buffer,
|
||||||
VkRenderPass render_pass, VulkanShader* vertex_shader,
|
const RenderState* render_state,
|
||||||
|
VulkanShader* vertex_shader,
|
||||||
VulkanShader* pixel_shader,
|
VulkanShader* pixel_shader,
|
||||||
PrimitiveType primitive_type);
|
PrimitiveType primitive_type);
|
||||||
|
|
||||||
|
|
|
@ -9,6 +9,8 @@
|
||||||
|
|
||||||
#include "xenia/gpu/vulkan/render_cache.h"
|
#include "xenia/gpu/vulkan/render_cache.h"
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
|
||||||
#include "xenia/base/logging.h"
|
#include "xenia/base/logging.h"
|
||||||
#include "xenia/base/math.h"
|
#include "xenia/base/math.h"
|
||||||
#include "xenia/base/memory.h"
|
#include "xenia/base/memory.h"
|
||||||
|
@ -20,55 +22,711 @@ namespace xe {
|
||||||
namespace gpu {
|
namespace gpu {
|
||||||
namespace vulkan {
|
namespace vulkan {
|
||||||
|
|
||||||
|
using namespace xe::gpu::xenos;
|
||||||
using xe::ui::vulkan::CheckResult;
|
using xe::ui::vulkan::CheckResult;
|
||||||
|
|
||||||
|
constexpr uint32_t kEdramBufferCapacity = 10 * 1024 * 1024;
|
||||||
|
|
||||||
|
VkFormat ColorRenderTargetFormatToVkFormat(ColorRenderTargetFormat format) {
|
||||||
|
switch (format) {
|
||||||
|
case ColorRenderTargetFormat::k_8_8_8_8:
|
||||||
|
case ColorRenderTargetFormat::k_8_8_8_8_GAMMA:
|
||||||
|
return VK_FORMAT_R8G8B8A8_UNORM;
|
||||||
|
case ColorRenderTargetFormat::k_2_10_10_10:
|
||||||
|
case ColorRenderTargetFormat::k_2_10_10_10_unknown:
|
||||||
|
return VK_FORMAT_A2R10G10B10_UNORM_PACK32;
|
||||||
|
case ColorRenderTargetFormat::k_2_10_10_10_FLOAT:
|
||||||
|
case ColorRenderTargetFormat::k_2_10_10_10_FLOAT_unknown:
|
||||||
|
// WARNING: this is wrong, most likely - no float form in vulkan?
|
||||||
|
XELOGW("Unsupported EDRAM format k_2_10_10_10_FLOAT used");
|
||||||
|
return VK_FORMAT_A2R10G10B10_SSCALED_PACK32;
|
||||||
|
case ColorRenderTargetFormat::k_16_16:
|
||||||
|
return VK_FORMAT_R16G16_UNORM;
|
||||||
|
case ColorRenderTargetFormat::k_16_16_16_16:
|
||||||
|
return VK_FORMAT_R16G16B16A16_UNORM;
|
||||||
|
case ColorRenderTargetFormat::k_16_16_FLOAT:
|
||||||
|
return VK_FORMAT_R16G16_SFLOAT;
|
||||||
|
case ColorRenderTargetFormat::k_16_16_16_16_FLOAT:
|
||||||
|
return VK_FORMAT_R16G16B16A16_SFLOAT;
|
||||||
|
case ColorRenderTargetFormat::k_32_FLOAT:
|
||||||
|
return VK_FORMAT_R32_SFLOAT;
|
||||||
|
case ColorRenderTargetFormat::k_32_32_FLOAT:
|
||||||
|
return VK_FORMAT_R32G32_SFLOAT;
|
||||||
|
default:
|
||||||
|
assert_unhandled_case(key.edram_format);
|
||||||
|
return VK_FORMAT_UNDEFINED;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
VkFormat DepthRenderTargetFormatToVkFormat(DepthRenderTargetFormat format) {
|
||||||
|
switch (format) {
|
||||||
|
case DepthRenderTargetFormat::kD24S8:
|
||||||
|
return VK_FORMAT_D24_UNORM_S8_UINT;
|
||||||
|
case DepthRenderTargetFormat::kD24FS8:
|
||||||
|
// TODO(benvanik): some way to emulate? resolve-time flag?
|
||||||
|
XELOGW("Unsupported EDRAM format kD24FS8 used");
|
||||||
|
return VK_FORMAT_D24_UNORM_S8_UINT;
|
||||||
|
default:
|
||||||
|
return VK_FORMAT_UNDEFINED;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Cached view into the EDRAM memory.
|
||||||
|
// The image is aliased to a region of the edram_memory_ based on the tile
|
||||||
|
// parameters.
|
||||||
|
// TODO(benvanik): reuse VkImage's with multiple VkViews for compatible
|
||||||
|
// formats?
|
||||||
|
class CachedTileView {
|
||||||
|
public:
|
||||||
|
// Key identifying the view in the cache.
|
||||||
|
TileViewKey key;
|
||||||
|
// Image mapped into EDRAM.
|
||||||
|
VkImage image = nullptr;
|
||||||
|
// Simple view on the image matching the format.
|
||||||
|
VkImageView image_view = nullptr;
|
||||||
|
|
||||||
|
CachedTileView(VkDevice device, VkDeviceMemory edram_memory,
|
||||||
|
TileViewKey view_key);
|
||||||
|
~CachedTileView();
|
||||||
|
|
||||||
|
bool IsEqual(const TileViewKey& other_key) const {
|
||||||
|
auto a = reinterpret_cast<const uint64_t*>(&key);
|
||||||
|
auto b = reinterpret_cast<const uint64_t*>(&other_key);
|
||||||
|
return *a == *b;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
VkDevice device_ = nullptr;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Cached framebuffer referencing tile attachments.
|
||||||
|
// Each framebuffer is specific to a render pass. Ugh.
|
||||||
|
class CachedFramebuffer {
|
||||||
|
public:
|
||||||
|
// TODO(benvanik): optimized key? tile base + format for each?
|
||||||
|
|
||||||
|
// Framebuffer with the attachments ready for use in the parent render pass.
|
||||||
|
VkFramebuffer handle = nullptr;
|
||||||
|
// Width of the framebuffer in pixels.
|
||||||
|
uint32_t width = 0;
|
||||||
|
// Height of the framebuffer in pixels.
|
||||||
|
uint32_t height = 0;
|
||||||
|
// References to color attachments, if used.
|
||||||
|
CachedTileView* color_attachments[4] = {nullptr};
|
||||||
|
// Reference to depth/stencil attachment, if used.
|
||||||
|
CachedTileView* depth_stencil_attachment = nullptr;
|
||||||
|
|
||||||
|
CachedFramebuffer(VkDevice device, VkRenderPass render_pass,
|
||||||
|
uint32_t surface_width, uint32_t surface_height,
|
||||||
|
CachedTileView* target_color_attachments[4],
|
||||||
|
CachedTileView* target_depth_stencil_attachment);
|
||||||
|
~CachedFramebuffer();
|
||||||
|
|
||||||
|
bool IsCompatible(const RenderConfiguration& desired_config) const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
VkDevice device_ = nullptr;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Cached render passes based on register states.
|
||||||
|
// Each render pass is dependent on the format, dimensions, and use of
|
||||||
|
// all attachments. The same render pass can be reused for multiple
|
||||||
|
// framebuffers pointing at various tile views, though those cached
|
||||||
|
// framebuffers are specific to the render pass.
|
||||||
|
class CachedRenderPass {
|
||||||
|
public:
|
||||||
|
// Configuration this pass was created with.
|
||||||
|
RenderConfiguration config;
|
||||||
|
// Initialized render pass for the register state.
|
||||||
|
VkRenderPass handle = nullptr;
|
||||||
|
// Cache of framebuffers for the various tile attachments.
|
||||||
|
std::vector<CachedFramebuffer*> cached_framebuffers;
|
||||||
|
|
||||||
|
CachedRenderPass(VkDevice device, const RenderConfiguration& desired_config);
|
||||||
|
~CachedRenderPass();
|
||||||
|
|
||||||
|
bool IsCompatible(const RenderConfiguration& desired_config) const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
VkDevice device_ = nullptr;
|
||||||
|
};
|
||||||
|
|
||||||
|
CachedTileView::CachedTileView(VkDevice device, VkDeviceMemory edram_memory,
|
||||||
|
TileViewKey view_key)
|
||||||
|
: device_(device), key(std::move(view_key)) {
|
||||||
|
// Map format to Vulkan.
|
||||||
|
VkFormat vulkan_format = VK_FORMAT_UNDEFINED;
|
||||||
|
uint32_t bpp = 4;
|
||||||
|
if (key.color_or_depth) {
|
||||||
|
auto edram_format = static_cast<ColorRenderTargetFormat>(key.edram_format);
|
||||||
|
vulkan_format = ColorRenderTargetFormatToVkFormat(edram_format);
|
||||||
|
switch (edram_format) {
|
||||||
|
case ColorRenderTargetFormat::k_16_16_16_16:
|
||||||
|
case ColorRenderTargetFormat::k_16_16_16_16_FLOAT:
|
||||||
|
case ColorRenderTargetFormat::k_32_32_FLOAT:
|
||||||
|
bpp = 8;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
bpp = 4;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
auto edram_format = static_cast<DepthRenderTargetFormat>(key.edram_format);
|
||||||
|
vulkan_format = DepthRenderTargetFormatToVkFormat(edram_format);
|
||||||
|
}
|
||||||
|
assert_true(vulkan_format != VK_FORMAT_UNDEFINED);
|
||||||
|
assert_true(bpp == 4);
|
||||||
|
|
||||||
|
// Create the image with the desired properties.
|
||||||
|
VkImageCreateInfo image_info;
|
||||||
|
image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
|
||||||
|
image_info.pNext = nullptr;
|
||||||
|
// TODO(benvanik): exploit VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT so we can have
|
||||||
|
// multiple views.
|
||||||
|
image_info.flags = 0;
|
||||||
|
image_info.imageType = VK_IMAGE_TYPE_2D;
|
||||||
|
image_info.format = vulkan_format;
|
||||||
|
image_info.extent.width = key.tile_width * 80;
|
||||||
|
image_info.extent.height = key.tile_height * 16;
|
||||||
|
image_info.extent.depth = 1;
|
||||||
|
image_info.mipLevels = 1;
|
||||||
|
image_info.arrayLayers = 1;
|
||||||
|
// TODO(benvanik): native MSAA support?
|
||||||
|
image_info.samples = VK_SAMPLE_COUNT_1_BIT;
|
||||||
|
image_info.tiling = VK_IMAGE_TILING_OPTIMAL;
|
||||||
|
image_info.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
|
||||||
|
VK_IMAGE_USAGE_TRANSFER_DST_BIT |
|
||||||
|
VK_IMAGE_USAGE_SAMPLED_BIT;
|
||||||
|
image_info.usage |= key.color_or_depth
|
||||||
|
? VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT
|
||||||
|
: VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
|
||||||
|
image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
|
||||||
|
image_info.queueFamilyIndexCount = 0;
|
||||||
|
image_info.pQueueFamilyIndices = nullptr;
|
||||||
|
image_info.initialLayout = VK_IMAGE_LAYOUT_PREINITIALIZED;
|
||||||
|
auto err = vkCreateImage(device_, &image_info, nullptr, &image);
|
||||||
|
CheckResult(err, "vkCreateImage");
|
||||||
|
|
||||||
|
// Verify our assumptions about memory layout are correct.
|
||||||
|
VkDeviceSize edram_offset = key.tile_offset * 5120;
|
||||||
|
VkMemoryRequirements memory_requirements;
|
||||||
|
vkGetImageMemoryRequirements(device, image, &memory_requirements);
|
||||||
|
assert_true(edram_offset + memory_requirements.size <= kEdramBufferCapacity);
|
||||||
|
assert_true(edram_offset % memory_requirements.alignment == 0);
|
||||||
|
|
||||||
|
// Bind to the region of EDRAM we occupy.
|
||||||
|
err = vkBindImageMemory(device_, image, edram_memory, edram_offset);
|
||||||
|
CheckResult(err, "vkBindImageMemory");
|
||||||
|
|
||||||
|
// Create the image view we'll use to attach it to a framebuffer.
|
||||||
|
VkImageViewCreateInfo image_view_info;
|
||||||
|
image_view_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
|
||||||
|
image_view_info.pNext = nullptr;
|
||||||
|
image_view_info.flags = 0;
|
||||||
|
image_view_info.image = image;
|
||||||
|
image_view_info.viewType = VK_IMAGE_VIEW_TYPE_2D;
|
||||||
|
image_view_info.format = image_info.format;
|
||||||
|
// TODO(benvanik): manipulate? may not be able to when attached.
|
||||||
|
image_view_info.components = {
|
||||||
|
VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B,
|
||||||
|
VK_COMPONENT_SWIZZLE_A,
|
||||||
|
};
|
||||||
|
image_view_info.subresourceRange = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1};
|
||||||
|
if (key.color_or_depth) {
|
||||||
|
image_view_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
|
||||||
|
} else {
|
||||||
|
image_view_info.subresourceRange.aspectMask =
|
||||||
|
VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
|
||||||
|
}
|
||||||
|
err = vkCreateImageView(device_, &image_view_info, nullptr, &image_view);
|
||||||
|
CheckResult(err, "vkCreateImageView");
|
||||||
|
|
||||||
|
// TODO(benvanik): transition to general layout?
|
||||||
|
}
|
||||||
|
|
||||||
|
CachedTileView::~CachedTileView() {
|
||||||
|
vkDestroyImageView(device_, image_view, nullptr);
|
||||||
|
vkDestroyImage(device_, image, nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
CachedFramebuffer::CachedFramebuffer(
|
||||||
|
VkDevice device, VkRenderPass render_pass, uint32_t surface_width,
|
||||||
|
uint32_t surface_height, CachedTileView* target_color_attachments[4],
|
||||||
|
CachedTileView* target_depth_stencil_attachment)
|
||||||
|
: device_(device),
|
||||||
|
width(surface_width),
|
||||||
|
height(surface_height),
|
||||||
|
depth_stencil_attachment(target_depth_stencil_attachment) {
|
||||||
|
for (int i = 0; i < 4; ++i) {
|
||||||
|
color_attachments[i] = target_color_attachments[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create framebuffer.
|
||||||
|
VkImageView image_views[5] = {nullptr};
|
||||||
|
int image_view_count = 0;
|
||||||
|
for (int i = 0; i < 4; ++i) {
|
||||||
|
if (color_attachments[i]) {
|
||||||
|
image_views[image_view_count++] = color_attachments[i]->image_view;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (depth_stencil_attachment) {
|
||||||
|
image_views[image_view_count++] = depth_stencil_attachment->image_view;
|
||||||
|
}
|
||||||
|
VkFramebufferCreateInfo framebuffer_info;
|
||||||
|
framebuffer_info.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO;
|
||||||
|
framebuffer_info.pNext = nullptr;
|
||||||
|
framebuffer_info.renderPass = render_pass;
|
||||||
|
framebuffer_info.attachmentCount = image_view_count;
|
||||||
|
framebuffer_info.pAttachments = image_views;
|
||||||
|
framebuffer_info.width = width;
|
||||||
|
framebuffer_info.height = height;
|
||||||
|
framebuffer_info.layers = 1;
|
||||||
|
auto err = vkCreateFramebuffer(device_, &framebuffer_info, nullptr, &handle);
|
||||||
|
CheckResult(err, "vkCreateFramebuffer");
|
||||||
|
}
|
||||||
|
|
||||||
|
CachedFramebuffer::~CachedFramebuffer() {
|
||||||
|
vkDestroyFramebuffer(device_, handle, nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool CachedFramebuffer::IsCompatible(
|
||||||
|
const RenderConfiguration& desired_config) const {
|
||||||
|
// We already know all render pass things line up, so let's verify dimensions,
|
||||||
|
// edram offsets, etc. We need an exact match.
|
||||||
|
// TODO(benvanik): separate image views from images in tiles and store in fb?
|
||||||
|
for (int i = 0; i < 4; ++i) {
|
||||||
|
// Ensure the the attachment points to the same tile.
|
||||||
|
if (!color_attachments[i]) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
auto& color_info = color_attachments[i]->key;
|
||||||
|
auto& desired_color_info = desired_config.color[i];
|
||||||
|
if (color_info.tile_offset != desired_color_info.edram_base ||
|
||||||
|
color_info.edram_format !=
|
||||||
|
static_cast<uint16_t>(desired_color_info.format)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Ensure depth attachment is correct.
|
||||||
|
if (depth_stencil_attachment &&
|
||||||
|
(depth_stencil_attachment->key.tile_offset !=
|
||||||
|
desired_config.depth_stencil.edram_base ||
|
||||||
|
depth_stencil_attachment->key.edram_format !=
|
||||||
|
static_cast<uint16_t>(desired_config.depth_stencil.format))) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
CachedRenderPass::CachedRenderPass(VkDevice device,
|
||||||
|
const RenderConfiguration& desired_config)
|
||||||
|
: device_(device) {
|
||||||
|
std::memcpy(&config, &desired_config, sizeof(config));
|
||||||
|
|
||||||
|
// Initialize all attachments to default unused.
|
||||||
|
// As we set layout(location=RT) in shaders we must always provide 4.
|
||||||
|
VkAttachmentDescription attachments[5];
|
||||||
|
for (int i = 0; i < 4; ++i) {
|
||||||
|
attachments[i].flags = 0;
|
||||||
|
attachments[i].format = VK_FORMAT_UNDEFINED;
|
||||||
|
attachments[i].samples = VK_SAMPLE_COUNT_1_BIT;
|
||||||
|
attachments[i].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
|
||||||
|
attachments[i].storeOp = VK_ATTACHMENT_STORE_OP_STORE;
|
||||||
|
attachments[i].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
|
||||||
|
attachments[i].stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
|
||||||
|
attachments[i].initialLayout = VK_IMAGE_LAYOUT_GENERAL;
|
||||||
|
attachments[i].finalLayout = VK_IMAGE_LAYOUT_GENERAL;
|
||||||
|
}
|
||||||
|
auto& depth_stencil_attachment = attachments[4];
|
||||||
|
depth_stencil_attachment.flags = 0;
|
||||||
|
depth_stencil_attachment.format = VK_FORMAT_UNDEFINED;
|
||||||
|
depth_stencil_attachment.samples = VK_SAMPLE_COUNT_1_BIT;
|
||||||
|
depth_stencil_attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
|
||||||
|
depth_stencil_attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
|
||||||
|
depth_stencil_attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
|
||||||
|
depth_stencil_attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
|
||||||
|
depth_stencil_attachment.initialLayout = VK_IMAGE_LAYOUT_GENERAL;
|
||||||
|
depth_stencil_attachment.finalLayout = VK_IMAGE_LAYOUT_GENERAL;
|
||||||
|
VkAttachmentReference depth_stencil_attachment_ref;
|
||||||
|
depth_stencil_attachment_ref.attachment = VK_ATTACHMENT_UNUSED;
|
||||||
|
depth_stencil_attachment_ref.layout = VK_IMAGE_LAYOUT_GENERAL;
|
||||||
|
|
||||||
|
// Configure attachments based on what's enabled.
|
||||||
|
VkAttachmentReference color_attachment_refs[4];
|
||||||
|
for (int i = 0; i < 4; ++i) {
|
||||||
|
auto& color_config = config.color[i];
|
||||||
|
// TODO(benvanik): see how loose we can be with these.
|
||||||
|
attachments[i].format =
|
||||||
|
ColorRenderTargetFormatToVkFormat(color_config.format);
|
||||||
|
auto& color_attachment_ref = color_attachment_refs[i];
|
||||||
|
color_attachment_ref.attachment = i;
|
||||||
|
color_attachment_ref.layout = VK_IMAGE_LAYOUT_GENERAL;
|
||||||
|
}
|
||||||
|
auto& depth_config = config.depth_stencil;
|
||||||
|
depth_stencil_attachment_ref.attachment = 4;
|
||||||
|
depth_stencil_attachment.format =
|
||||||
|
DepthRenderTargetFormatToVkFormat(depth_config.format);
|
||||||
|
|
||||||
|
// Single subpass that writes to our attachments.
|
||||||
|
VkSubpassDescription subpass_info;
|
||||||
|
subpass_info.flags = 0;
|
||||||
|
subpass_info.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
|
||||||
|
subpass_info.inputAttachmentCount = 0;
|
||||||
|
subpass_info.pInputAttachments = nullptr;
|
||||||
|
subpass_info.colorAttachmentCount = 4;
|
||||||
|
subpass_info.pColorAttachments = color_attachment_refs;
|
||||||
|
subpass_info.pResolveAttachments = nullptr;
|
||||||
|
subpass_info.pDepthStencilAttachment = &depth_stencil_attachment_ref;
|
||||||
|
subpass_info.preserveAttachmentCount = 0;
|
||||||
|
subpass_info.pPreserveAttachments = nullptr;
|
||||||
|
|
||||||
|
// Create the render pass.
|
||||||
|
VkRenderPassCreateInfo render_pass_info;
|
||||||
|
render_pass_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO;
|
||||||
|
render_pass_info.pNext = nullptr;
|
||||||
|
render_pass_info.attachmentCount = 5;
|
||||||
|
render_pass_info.pAttachments = attachments;
|
||||||
|
render_pass_info.subpassCount = 1;
|
||||||
|
render_pass_info.pSubpasses = &subpass_info;
|
||||||
|
render_pass_info.dependencyCount = 0;
|
||||||
|
render_pass_info.pDependencies = nullptr;
|
||||||
|
auto err = vkCreateRenderPass(device_, &render_pass_info, nullptr, &handle);
|
||||||
|
CheckResult(err, "vkCreateRenderPass");
|
||||||
|
}
|
||||||
|
|
||||||
|
CachedRenderPass::~CachedRenderPass() {
|
||||||
|
for (auto framebuffer : cached_framebuffers) {
|
||||||
|
delete framebuffer;
|
||||||
|
}
|
||||||
|
cached_framebuffers.clear();
|
||||||
|
|
||||||
|
vkDestroyRenderPass(device_, handle, nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool CachedRenderPass::IsCompatible(
|
||||||
|
const RenderConfiguration& desired_config) const {
|
||||||
|
for (int i = 0; i < 4; ++i) {
|
||||||
|
// TODO(benvanik): allow compatible vulkan formats.
|
||||||
|
if (config.color[i].format != desired_config.color[i].format) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (config.depth_stencil.format != desired_config.depth_stencil.format) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
RenderCache::RenderCache(RegisterFile* register_file,
|
RenderCache::RenderCache(RegisterFile* register_file,
|
||||||
ui::vulkan::VulkanDevice* device)
|
ui::vulkan::VulkanDevice* device)
|
||||||
: register_file_(register_file), device_(*device) {}
|
: register_file_(register_file), device_(*device) {
|
||||||
|
// Create the buffer we'll bind to our memory.
|
||||||
|
// We do this first so we can get the right memory type.
|
||||||
|
VkBufferCreateInfo buffer_info;
|
||||||
|
buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
|
||||||
|
buffer_info.pNext = nullptr;
|
||||||
|
buffer_info.flags = 0;
|
||||||
|
buffer_info.size = kEdramBufferCapacity;
|
||||||
|
buffer_info.usage =
|
||||||
|
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT;
|
||||||
|
buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
|
||||||
|
buffer_info.queueFamilyIndexCount = 0;
|
||||||
|
buffer_info.pQueueFamilyIndices = nullptr;
|
||||||
|
auto err = vkCreateBuffer(*device, &buffer_info, nullptr, &edram_buffer_);
|
||||||
|
CheckResult(err, "vkCreateBuffer");
|
||||||
|
|
||||||
RenderCache::~RenderCache() = default;
|
// Query requirements for the buffer.
|
||||||
|
// It should be 1:1.
|
||||||
|
VkMemoryRequirements buffer_requirements;
|
||||||
|
vkGetBufferMemoryRequirements(device_, edram_buffer_, &buffer_requirements);
|
||||||
|
assert_true(buffer_requirements.size == kEdramBufferCapacity);
|
||||||
|
|
||||||
VkRenderPass RenderCache::BeginRenderPass(VkCommandBuffer command_buffer,
|
// Create a dummy image so we can see what memory bits it requires.
|
||||||
|
// They should overlap with the buffer requirements but are likely more
|
||||||
|
// strict.
|
||||||
|
VkImageCreateInfo test_image_info;
|
||||||
|
test_image_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
|
||||||
|
test_image_info.pNext = nullptr;
|
||||||
|
test_image_info.flags = 0;
|
||||||
|
test_image_info.imageType = VK_IMAGE_TYPE_2D;
|
||||||
|
test_image_info.format = VK_FORMAT_R8G8B8A8_UINT;
|
||||||
|
test_image_info.extent.width = 128;
|
||||||
|
test_image_info.extent.height = 128;
|
||||||
|
test_image_info.extent.depth = 1;
|
||||||
|
test_image_info.mipLevels = 1;
|
||||||
|
test_image_info.arrayLayers = 1;
|
||||||
|
test_image_info.samples = VK_SAMPLE_COUNT_1_BIT;
|
||||||
|
test_image_info.tiling = VK_IMAGE_TILING_OPTIMAL;
|
||||||
|
test_image_info.usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
|
||||||
|
test_image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
|
||||||
|
test_image_info.queueFamilyIndexCount = 0;
|
||||||
|
test_image_info.pQueueFamilyIndices = nullptr;
|
||||||
|
test_image_info.initialLayout = VK_IMAGE_LAYOUT_GENERAL;
|
||||||
|
VkImage test_image = nullptr;
|
||||||
|
err = vkCreateImage(device_, &test_image_info, nullptr, &test_image);
|
||||||
|
CheckResult(err, "vkCreateImage");
|
||||||
|
VkMemoryRequirements image_requirements;
|
||||||
|
vkGetImageMemoryRequirements(device_, test_image, &image_requirements);
|
||||||
|
vkDestroyImage(device_, test_image, nullptr);
|
||||||
|
assert_true((image_requirements.memoryTypeBits &
|
||||||
|
buffer_requirements.memoryTypeBits) != 0);
|
||||||
|
|
||||||
|
// Allocate EDRAM memory.
|
||||||
|
VkMemoryRequirements memory_requirements;
|
||||||
|
memory_requirements.size = buffer_requirements.size;
|
||||||
|
memory_requirements.alignment = buffer_requirements.alignment;
|
||||||
|
memory_requirements.memoryTypeBits = image_requirements.memoryTypeBits;
|
||||||
|
// TODO(benvanik): do we need it host visible?
|
||||||
|
edram_memory_ = device->AllocateMemory(memory_requirements, 0);
|
||||||
|
|
||||||
|
// Bind buffer to map our entire memory.
|
||||||
|
vkBindBufferMemory(device_, edram_buffer_, edram_memory_, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
RenderCache::~RenderCache() {
|
||||||
|
// TODO(benvanik): wait for idle.
|
||||||
|
|
||||||
|
// Dispose all render passes (and their framebuffers).
|
||||||
|
for (auto render_pass : cached_render_passes_) {
|
||||||
|
delete render_pass;
|
||||||
|
}
|
||||||
|
cached_render_passes_.clear();
|
||||||
|
|
||||||
|
// Dispose all of our cached tile views.
|
||||||
|
for (auto tile_view : cached_tile_views_) {
|
||||||
|
delete tile_view;
|
||||||
|
}
|
||||||
|
cached_tile_views_.clear();
|
||||||
|
|
||||||
|
// Release underlying EDRAM memory.
|
||||||
|
vkDestroyBuffer(device_, edram_buffer_, nullptr);
|
||||||
|
vkFreeMemory(device_, edram_memory_, nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
const RenderState* RenderCache::BeginRenderPass(VkCommandBuffer command_buffer,
|
||||||
VulkanShader* vertex_shader,
|
VulkanShader* vertex_shader,
|
||||||
VulkanShader* pixel_shader) {
|
VulkanShader* pixel_shader) {
|
||||||
assert_null(current_command_buffer_);
|
assert_null(current_command_buffer_);
|
||||||
current_command_buffer_ = command_buffer;
|
current_command_buffer_ = command_buffer;
|
||||||
|
|
||||||
// Lookup or construct a render pass compatible with our current state.
|
// Lookup or construct a render pass compatible with our current state.
|
||||||
VkRenderPass render_pass = nullptr;
|
auto config = ¤t_state_.config;
|
||||||
|
CachedRenderPass* render_pass = nullptr;
|
||||||
|
CachedFramebuffer* framebuffer = nullptr;
|
||||||
|
auto& regs = shadow_registers_;
|
||||||
|
bool dirty = false;
|
||||||
|
dirty |= SetShadowRegister(®s.rb_modecontrol, XE_GPU_REG_RB_MODECONTROL);
|
||||||
|
dirty |= SetShadowRegister(®s.rb_surface_info, XE_GPU_REG_RB_SURFACE_INFO);
|
||||||
|
dirty |= SetShadowRegister(®s.rb_color_info, XE_GPU_REG_RB_COLOR_INFO);
|
||||||
|
dirty |= SetShadowRegister(®s.rb_color1_info, XE_GPU_REG_RB_COLOR1_INFO);
|
||||||
|
dirty |= SetShadowRegister(®s.rb_color2_info, XE_GPU_REG_RB_COLOR2_INFO);
|
||||||
|
dirty |= SetShadowRegister(®s.rb_color3_info, XE_GPU_REG_RB_COLOR3_INFO);
|
||||||
|
dirty |= SetShadowRegister(®s.rb_depth_info, XE_GPU_REG_RB_DEPTH_INFO);
|
||||||
|
dirty |= SetShadowRegister(®s.pa_sc_window_scissor_tl,
|
||||||
|
XE_GPU_REG_PA_SC_WINDOW_SCISSOR_TL);
|
||||||
|
dirty |= SetShadowRegister(®s.pa_sc_window_scissor_br,
|
||||||
|
XE_GPU_REG_PA_SC_WINDOW_SCISSOR_BR);
|
||||||
|
if (!dirty && current_state_.render_pass) {
|
||||||
|
// No registers have changed so we can reuse the previous render pass -
|
||||||
|
// just begin with what we had.
|
||||||
|
render_pass = current_state_.render_pass;
|
||||||
|
framebuffer = current_state_.framebuffer;
|
||||||
|
} else {
|
||||||
|
// Re-parse configuration.
|
||||||
|
if (!ParseConfiguration(config)) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
// Begin render pass.
|
// Lookup or generate a new render pass and framebuffer for the new state.
|
||||||
|
if (!ConfigureRenderPass(config, &render_pass, &framebuffer)) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
current_state_.render_pass = render_pass;
|
||||||
|
current_state_.framebuffer = framebuffer;
|
||||||
|
}
|
||||||
|
if (!render_pass) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Setup render pass in command buffer.
|
||||||
|
// This is meant to preserve previous contents as we may be called
|
||||||
|
// repeatedly.
|
||||||
VkRenderPassBeginInfo render_pass_begin_info;
|
VkRenderPassBeginInfo render_pass_begin_info;
|
||||||
render_pass_begin_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
|
render_pass_begin_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
|
||||||
render_pass_begin_info.pNext = nullptr;
|
render_pass_begin_info.pNext = nullptr;
|
||||||
render_pass_begin_info.renderPass = render_pass;
|
render_pass_begin_info.renderPass = render_pass->handle;
|
||||||
|
render_pass_begin_info.framebuffer = framebuffer->handle;
|
||||||
// Target framebuffer.
|
|
||||||
// render_pass_begin_info.framebuffer = current_buffer.framebuffer;
|
|
||||||
|
|
||||||
// Render into the entire buffer (or at least tell the API we are doing
|
// Render into the entire buffer (or at least tell the API we are doing
|
||||||
// this). In theory it'd be better to clip this to the scissor region, but
|
// this). In theory it'd be better to clip this to the scissor region, but
|
||||||
// the docs warn anything but the full framebuffer may be slow.
|
// the docs warn anything but the full framebuffer may be slow.
|
||||||
render_pass_begin_info.renderArea.offset.x = 0;
|
render_pass_begin_info.renderArea.offset.x = 0;
|
||||||
render_pass_begin_info.renderArea.offset.y = 0;
|
render_pass_begin_info.renderArea.offset.y = 0;
|
||||||
// render_pass_begin_info.renderArea.extent.width = surface_width_;
|
render_pass_begin_info.renderArea.extent.width = config->surface_pitch_px;
|
||||||
// render_pass_begin_info.renderArea.extent.height = surface_height_;
|
render_pass_begin_info.renderArea.extent.height = config->surface_height_px;
|
||||||
|
|
||||||
// Configure clear color, if clearing.
|
// Configure clear color, if clearing.
|
||||||
VkClearValue color_clear_value;
|
// TODO(benvanik): enable clearing here during resolve?
|
||||||
color_clear_value.color.float32[0] = 238 / 255.0f;
|
render_pass_begin_info.clearValueCount = 0;
|
||||||
color_clear_value.color.float32[1] = 238 / 255.0f;
|
render_pass_begin_info.pClearValues = nullptr;
|
||||||
color_clear_value.color.float32[2] = 238 / 255.0f;
|
|
||||||
color_clear_value.color.float32[3] = 1.0f;
|
|
||||||
VkClearValue clear_values[] = {color_clear_value};
|
|
||||||
render_pass_begin_info.clearValueCount =
|
|
||||||
static_cast<uint32_t>(xe::countof(clear_values));
|
|
||||||
render_pass_begin_info.pClearValues = clear_values;
|
|
||||||
|
|
||||||
|
// Begin the render pass.
|
||||||
vkCmdBeginRenderPass(command_buffer, &render_pass_begin_info,
|
vkCmdBeginRenderPass(command_buffer, &render_pass_begin_info,
|
||||||
VK_SUBPASS_CONTENTS_INLINE);
|
VK_SUBPASS_CONTENTS_INLINE);
|
||||||
|
|
||||||
return render_pass;
|
return ¤t_state_;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool RenderCache::ParseConfiguration(RenderConfiguration* config) {
|
||||||
|
auto& regs = shadow_registers_;
|
||||||
|
|
||||||
|
// RB_MODECONTROL
|
||||||
|
// Rough mode control (color, color+depth, etc).
|
||||||
|
config->mode_control = static_cast<ModeControl>(regs.rb_modecontrol & 0x7);
|
||||||
|
|
||||||
|
// RB_SURFACE_INFO
|
||||||
|
// http://fossies.org/dox/MesaLib-10.3.5/fd2__gmem_8c_source.html
|
||||||
|
config->surface_pitch_px = regs.rb_surface_info & 0x3FFF;
|
||||||
|
config->surface_msaa =
|
||||||
|
static_cast<MsaaSamples>((regs.rb_surface_info >> 16) & 0x3);
|
||||||
|
|
||||||
|
// TODO(benvanik): verify min/max so we don't go out of bounds.
|
||||||
|
// TODO(benvanik): has to be a good way to get height.
|
||||||
|
// Guess the height from the scissor height.
|
||||||
|
// It's wildly inaccurate, but I've never seen it be bigger than the
|
||||||
|
// EDRAM tiling.
|
||||||
|
uint32_t ws_y = (regs.pa_sc_window_scissor_tl >> 16) & 0x7FFF;
|
||||||
|
uint32_t ws_h = ((regs.pa_sc_window_scissor_br >> 16) & 0x7FFF) - ws_y;
|
||||||
|
config->surface_height_px = std::min(2560u, xe::round_up(ws_h, 16));
|
||||||
|
|
||||||
|
// Color attachment configuration.
|
||||||
|
if (config->mode_control == ModeControl::kColorDepth) {
|
||||||
|
uint32_t color_info[4] = {
|
||||||
|
regs.rb_color_info, regs.rb_color1_info, regs.rb_color2_info,
|
||||||
|
regs.rb_color3_info,
|
||||||
|
};
|
||||||
|
for (int i = 0; i < 4; ++i) {
|
||||||
|
config->color[i].edram_base = color_info[i] & 0xFFF;
|
||||||
|
config->color[i].format =
|
||||||
|
static_cast<ColorRenderTargetFormat>((color_info[i] >> 16) & 0xF);
|
||||||
|
// We don't support GAMMA formats, so switch them to what we do support.
|
||||||
|
switch (config->color[i].format) {
|
||||||
|
case ColorRenderTargetFormat::k_8_8_8_8_GAMMA:
|
||||||
|
config->color[i].format = ColorRenderTargetFormat::k_8_8_8_8;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (int i = 0; i < 4; ++i) {
|
||||||
|
config->color[i].edram_base = 0;
|
||||||
|
config->color[i].format = ColorRenderTargetFormat::k_8_8_8_8;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Depth/stencil attachment configuration.
|
||||||
|
if (config->mode_control == ModeControl::kColorDepth ||
|
||||||
|
config->mode_control == ModeControl::kDepth) {
|
||||||
|
config->depth_stencil.edram_base = regs.rb_depth_info & 0xFFF;
|
||||||
|
config->depth_stencil.format =
|
||||||
|
static_cast<DepthRenderTargetFormat>((regs.rb_depth_info >> 16) & 0x1);
|
||||||
|
} else {
|
||||||
|
config->depth_stencil.edram_base = 0;
|
||||||
|
config->depth_stencil.format = DepthRenderTargetFormat::kD24S8;
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool RenderCache::ConfigureRenderPass(RenderConfiguration* config,
|
||||||
|
CachedRenderPass** out_render_pass,
|
||||||
|
CachedFramebuffer** out_framebuffer) {
|
||||||
|
*out_render_pass = nullptr;
|
||||||
|
*out_framebuffer = nullptr;
|
||||||
|
|
||||||
|
// TODO(benvanik): better lookup.
|
||||||
|
// Attempt to find the render pass in our cache.
|
||||||
|
CachedRenderPass* render_pass = nullptr;
|
||||||
|
for (auto cached_render_pass : cached_render_passes_) {
|
||||||
|
if (cached_render_pass->IsCompatible(*config)) {
|
||||||
|
// Found a match.
|
||||||
|
render_pass = cached_render_pass;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If no render pass was found in the cache create a new one.
|
||||||
|
if (!render_pass) {
|
||||||
|
render_pass = new CachedRenderPass(device_, *config);
|
||||||
|
cached_render_passes_.push_back(render_pass);
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO(benvanik): better lookup.
|
||||||
|
// Attempt to find the framebuffer in the render pass cache.
|
||||||
|
CachedFramebuffer* framebuffer = nullptr;
|
||||||
|
for (auto cached_framebuffer : render_pass->cached_framebuffers) {
|
||||||
|
if (cached_framebuffer->IsCompatible(*config)) {
|
||||||
|
// Found a match.
|
||||||
|
framebuffer = cached_framebuffer;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If no framebuffer was found in the cache create a new one.
|
||||||
|
if (!framebuffer) {
|
||||||
|
CachedTileView* target_color_attachments[4] = {nullptr, nullptr, nullptr,
|
||||||
|
nullptr};
|
||||||
|
for (int i = 0; i < 4; ++i) {
|
||||||
|
TileViewKey color_key;
|
||||||
|
color_key.tile_offset = config->color[i].edram_base;
|
||||||
|
color_key.tile_width = config->surface_pitch_px / 80;
|
||||||
|
color_key.tile_height = config->surface_height_px / 16;
|
||||||
|
color_key.color_or_depth = 1;
|
||||||
|
color_key.edram_format = static_cast<uint16_t>(config->color[i].format);
|
||||||
|
target_color_attachments[i] = GetTileView(color_key);
|
||||||
|
if (!target_color_attachments) {
|
||||||
|
XELOGE("Failed to get tile view for color attachment");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TileViewKey depth_stencil_key;
|
||||||
|
depth_stencil_key.tile_offset = config->depth_stencil.edram_base;
|
||||||
|
depth_stencil_key.tile_width = config->surface_pitch_px / 80;
|
||||||
|
depth_stencil_key.tile_height = config->surface_height_px / 16;
|
||||||
|
depth_stencil_key.color_or_depth = 0;
|
||||||
|
depth_stencil_key.edram_format =
|
||||||
|
static_cast<uint16_t>(config->depth_stencil.format);
|
||||||
|
auto target_depth_stencil_attachment = GetTileView(depth_stencil_key);
|
||||||
|
if (!target_depth_stencil_attachment) {
|
||||||
|
XELOGE("Failed to get tile view for depth/stencil attachment");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
framebuffer = new CachedFramebuffer(
|
||||||
|
device_, render_pass->handle, config->surface_pitch_px,
|
||||||
|
config->surface_height_px, target_color_attachments,
|
||||||
|
target_depth_stencil_attachment);
|
||||||
|
render_pass->cached_framebuffers.push_back(framebuffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
*out_render_pass = render_pass;
|
||||||
|
*out_framebuffer = framebuffer;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
CachedTileView* RenderCache::GetTileView(const TileViewKey& view_key) {
|
||||||
|
// Check the cache.
|
||||||
|
// TODO(benvanik): better lookup.
|
||||||
|
for (auto tile_view : cached_tile_views_) {
|
||||||
|
if (tile_view->IsEqual(view_key)) {
|
||||||
|
return tile_view;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a new tile and add to the cache.
|
||||||
|
auto tile_view = new CachedTileView(device_, edram_memory_, view_key);
|
||||||
|
cached_tile_views_.push_back(tile_view);
|
||||||
|
return tile_view;
|
||||||
}
|
}
|
||||||
|
|
||||||
void RenderCache::EndRenderPass() {
|
void RenderCache::EndRenderPass() {
|
||||||
|
@ -84,6 +742,15 @@ void RenderCache::ClearCache() {
|
||||||
// TODO(benvanik): caching.
|
// TODO(benvanik): caching.
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool RenderCache::SetShadowRegister(uint32_t* dest, uint32_t register_name) {
|
||||||
|
uint32_t value = register_file_->values[register_name].u32;
|
||||||
|
if (*dest == value) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
*dest = value;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace vulkan
|
} // namespace vulkan
|
||||||
} // namespace gpu
|
} // namespace gpu
|
||||||
} // namespace xe
|
} // namespace xe
|
||||||
|
|
|
@ -21,6 +21,58 @@ namespace xe {
|
||||||
namespace gpu {
|
namespace gpu {
|
||||||
namespace vulkan {
|
namespace vulkan {
|
||||||
|
|
||||||
|
// TODO(benvanik): make public API?
|
||||||
|
class CachedTileView;
|
||||||
|
class CachedFramebuffer;
|
||||||
|
class CachedRenderPass;
|
||||||
|
|
||||||
|
// Uniquely identifies EDRAM tiles.
|
||||||
|
struct TileViewKey {
|
||||||
|
// Offset into EDRAM in 5120b tiles.
|
||||||
|
uint16_t tile_offset;
|
||||||
|
// Tile width of the view in base 80x16 tiles.
|
||||||
|
uint16_t tile_width;
|
||||||
|
// Tile height of the view in base 80x16 tiles.
|
||||||
|
uint16_t tile_height;
|
||||||
|
// 1 if format is ColorRenderTargetFormat, else DepthRenderTargetFormat.
|
||||||
|
uint16_t color_or_depth : 1;
|
||||||
|
// Either ColorRenderTargetFormat or DepthRenderTargetFormat.
|
||||||
|
uint16_t edram_format : 15;
|
||||||
|
};
|
||||||
|
static_assert(sizeof(TileViewKey) == 8, "Key must be tightly packed");
|
||||||
|
|
||||||
|
// Parsed render configuration from the current render state.
|
||||||
|
struct RenderConfiguration {
|
||||||
|
// Render mode (color+depth, depth-only, etc).
|
||||||
|
xenos::ModeControl mode_control;
|
||||||
|
// Target surface pitch, in pixels.
|
||||||
|
uint32_t surface_pitch_px;
|
||||||
|
// ESTIMATED target surface height, in pixels.
|
||||||
|
uint32_t surface_height_px;
|
||||||
|
// Surface MSAA setting.
|
||||||
|
MsaaSamples surface_msaa;
|
||||||
|
// Color attachments for the 4 render targets.
|
||||||
|
struct {
|
||||||
|
uint32_t edram_base;
|
||||||
|
ColorRenderTargetFormat format;
|
||||||
|
} color[4];
|
||||||
|
// Depth/stencil attachment.
|
||||||
|
struct {
|
||||||
|
uint32_t edram_base;
|
||||||
|
DepthRenderTargetFormat format;
|
||||||
|
} depth_stencil;
|
||||||
|
};
|
||||||
|
|
||||||
|
// Current render state based on the register-specified configuration.
|
||||||
|
struct RenderState {
|
||||||
|
// Parsed configuration.
|
||||||
|
RenderConfiguration config;
|
||||||
|
// Render pass (to be used with pipelines/etc).
|
||||||
|
CachedRenderPass* render_pass = nullptr;
|
||||||
|
// Target framebuffer bound to the render pass.
|
||||||
|
CachedFramebuffer* framebuffer = nullptr;
|
||||||
|
};
|
||||||
|
|
||||||
// Manages the virtualized EDRAM and the render target cache.
|
// Manages the virtualized EDRAM and the render target cache.
|
||||||
//
|
//
|
||||||
// On the 360 the render target is an opaque block of memory in EDRAM that's
|
// On the 360 the render target is an opaque block of memory in EDRAM that's
|
||||||
|
@ -165,7 +217,7 @@ class RenderCache {
|
||||||
|
|
||||||
// Begins a render pass targeting the state-specified framebuffer formats.
|
// Begins a render pass targeting the state-specified framebuffer formats.
|
||||||
// The command buffer will be transitioned into the render pass phase.
|
// The command buffer will be transitioned into the render pass phase.
|
||||||
VkRenderPass BeginRenderPass(VkCommandBuffer command_buffer,
|
const RenderState* BeginRenderPass(VkCommandBuffer command_buffer,
|
||||||
VulkanShader* vertex_shader,
|
VulkanShader* vertex_shader,
|
||||||
VulkanShader* pixel_shader);
|
VulkanShader* pixel_shader);
|
||||||
|
|
||||||
|
@ -177,9 +229,57 @@ class RenderCache {
|
||||||
void ClearCache();
|
void ClearCache();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
// Parses the current state into a configuration object.
|
||||||
|
bool ParseConfiguration(RenderConfiguration* config);
|
||||||
|
|
||||||
|
// Gets or creates a render pass and frame buffer for the given configuration.
|
||||||
|
// This attempts to reuse as much as possible across render passes and
|
||||||
|
// framebuffers.
|
||||||
|
bool ConfigureRenderPass(RenderConfiguration* config,
|
||||||
|
CachedRenderPass** out_render_pass,
|
||||||
|
CachedFramebuffer** out_framebuffer);
|
||||||
|
|
||||||
|
// Gets or creates a tile view with the given parameters.
|
||||||
|
CachedTileView* GetTileView(const TileViewKey& view_key);
|
||||||
|
|
||||||
RegisterFile* register_file_ = nullptr;
|
RegisterFile* register_file_ = nullptr;
|
||||||
VkDevice device_ = nullptr;
|
VkDevice device_ = nullptr;
|
||||||
|
|
||||||
|
// Entire 10MiB of EDRAM, aliased to hell by various VkImages.
|
||||||
|
VkDeviceMemory edram_memory_ = nullptr;
|
||||||
|
// Buffer overlayed 1:1 with edram_memory_ to allow raw access.
|
||||||
|
VkBuffer edram_buffer_ = nullptr;
|
||||||
|
|
||||||
|
// Cache of VkImage and VkImageView's for all of our EDRAM tilings.
|
||||||
|
// TODO(benvanik): non-linear lookup? Should only be a small number of these.
|
||||||
|
std::vector<CachedTileView*> cached_tile_views_;
|
||||||
|
|
||||||
|
// Cache of render passes based on formats.
|
||||||
|
std::vector<CachedRenderPass*> cached_render_passes_;
|
||||||
|
|
||||||
|
// Shadows of the registers that impact the render pass we choose.
|
||||||
|
// If the registers don't change between passes we can quickly reuse the
|
||||||
|
// previous one.
|
||||||
|
struct ShadowRegisters {
|
||||||
|
uint32_t rb_modecontrol;
|
||||||
|
uint32_t rb_surface_info;
|
||||||
|
uint32_t rb_color_info;
|
||||||
|
uint32_t rb_color1_info;
|
||||||
|
uint32_t rb_color2_info;
|
||||||
|
uint32_t rb_color3_info;
|
||||||
|
uint32_t rb_depth_info;
|
||||||
|
uint32_t pa_sc_window_scissor_tl;
|
||||||
|
uint32_t pa_sc_window_scissor_br;
|
||||||
|
|
||||||
|
ShadowRegisters() { Reset(); }
|
||||||
|
void Reset() { std::memset(this, 0, sizeof(*this)); }
|
||||||
|
} shadow_registers_;
|
||||||
|
bool SetShadowRegister(uint32_t* dest, uint32_t register_name);
|
||||||
|
|
||||||
|
// Configuration used for the current/previous Begin/End, representing the
|
||||||
|
// current shadow register state.
|
||||||
|
RenderState current_state_;
|
||||||
|
|
||||||
// Only valid during a BeginRenderPass/EndRenderPass block.
|
// Only valid during a BeginRenderPass/EndRenderPass block.
|
||||||
VkCommandBuffer current_command_buffer_ = nullptr;
|
VkCommandBuffer current_command_buffer_ = nullptr;
|
||||||
};
|
};
|
||||||
|
|
|
@ -179,9 +179,17 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
||||||
// We need the to do just about anything so validate here.
|
// We need the to do just about anything so validate here.
|
||||||
auto vertex_shader = static_cast<VulkanShader*>(active_vertex_shader());
|
auto vertex_shader = static_cast<VulkanShader*>(active_vertex_shader());
|
||||||
auto pixel_shader = static_cast<VulkanShader*>(active_pixel_shader());
|
auto pixel_shader = static_cast<VulkanShader*>(active_pixel_shader());
|
||||||
if (!vertex_shader || !vertex_shader->is_valid() || !pixel_shader ||
|
if (!vertex_shader || !vertex_shader->is_valid()) {
|
||||||
!pixel_shader->is_valid()) {
|
// Always need a vertex shader.
|
||||||
// Skipped because we can't understand the shader.
|
return true;
|
||||||
|
}
|
||||||
|
// Depth-only mode doesn't need a pixel shader (we'll use a fake one).
|
||||||
|
if (enable_mode == ModeControl::kDepth) {
|
||||||
|
// Use a dummy pixel shader when required.
|
||||||
|
// TODO(benvanik): dummy pixel shader.
|
||||||
|
assert_not_null(pixel_shader);
|
||||||
|
} else if (!pixel_shader || !pixel_shader->is_valid()) {
|
||||||
|
// Need a pixel shader in normal color mode.
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -198,16 +206,16 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
||||||
|
|
||||||
// Begin the render pass.
|
// Begin the render pass.
|
||||||
// This will setup our framebuffer and begin the pass in the command buffer.
|
// This will setup our framebuffer and begin the pass in the command buffer.
|
||||||
VkRenderPass render_pass = render_cache_->BeginRenderPass(
|
auto render_state = render_cache_->BeginRenderPass(
|
||||||
command_buffer, vertex_shader, pixel_shader);
|
command_buffer, vertex_shader, pixel_shader);
|
||||||
if (!render_pass) {
|
if (!render_state) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Configure the pipeline for drawing.
|
// Configure the pipeline for drawing.
|
||||||
// This encodes all render state (blend, depth, etc), our shader stages,
|
// This encodes all render state (blend, depth, etc), our shader stages,
|
||||||
// and our vertex input layout.
|
// and our vertex input layout.
|
||||||
if (!pipeline_cache_->ConfigurePipeline(command_buffer, render_pass,
|
if (!pipeline_cache_->ConfigurePipeline(command_buffer, render_state,
|
||||||
vertex_shader, pixel_shader,
|
vertex_shader, pixel_shader,
|
||||||
primitive_type)) {
|
primitive_type)) {
|
||||||
render_cache_->EndRenderPass();
|
render_cache_->EndRenderPass();
|
||||||
|
@ -215,12 +223,14 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Upload the constants the shaders require.
|
// Upload the constants the shaders require.
|
||||||
auto vertex_constant_offset = buffer_cache_->UploadConstantRegisters(
|
// These are optional, and if none are defined 0 will be returned.
|
||||||
|
VkDeviceSize vertex_constant_offset = buffer_cache_->UploadConstantRegisters(
|
||||||
vertex_shader->constant_register_map());
|
vertex_shader->constant_register_map());
|
||||||
auto pixel_constant_offset = buffer_cache_->UploadConstantRegisters(
|
VkDeviceSize pixel_constant_offset = buffer_cache_->UploadConstantRegisters(
|
||||||
pixel_shader->constant_register_map());
|
pixel_shader->constant_register_map());
|
||||||
if (vertex_constant_offset == VK_WHOLE_SIZE ||
|
if (vertex_constant_offset == VK_WHOLE_SIZE ||
|
||||||
pixel_constant_offset == VK_WHOLE_SIZE) {
|
pixel_constant_offset == VK_WHOLE_SIZE) {
|
||||||
|
// Shader wants constants but we couldn't upload them.
|
||||||
render_cache_->EndRenderPass();
|
render_cache_->EndRenderPass();
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -307,11 +317,17 @@ bool VulkanCommandProcessor::IssueDraw(PrimitiveType primitive_type,
|
||||||
if (queue_mutex_) {
|
if (queue_mutex_) {
|
||||||
queue_mutex_->lock();
|
queue_mutex_->lock();
|
||||||
}
|
}
|
||||||
vkQueueWaitIdle(queue_);
|
err = vkQueueWaitIdle(queue_);
|
||||||
|
CheckResult(err, "vkQueueWaitIdle");
|
||||||
|
err = vkDeviceWaitIdle(*device_);
|
||||||
|
CheckResult(err, "vkDeviceWaitIdle");
|
||||||
if (queue_mutex_) {
|
if (queue_mutex_) {
|
||||||
queue_mutex_->unlock();
|
queue_mutex_->unlock();
|
||||||
}
|
}
|
||||||
|
while (command_buffer_pool_->has_pending()) {
|
||||||
command_buffer_pool_->Scavenge();
|
command_buffer_pool_->Scavenge();
|
||||||
|
xe::threading::MaybeYield();
|
||||||
|
}
|
||||||
vkDestroyFence(*device_, fence, nullptr);
|
vkDestroyFence(*device_, fence, nullptr);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
|
|
@ -47,6 +47,9 @@ class BaseFencedPool {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// True if one or more batches are still pending on the GPU.
|
||||||
|
bool has_pending() const { return pending_batch_list_head_ != nullptr; }
|
||||||
|
|
||||||
// Checks all pending batches for completion and scavenges their entries.
|
// Checks all pending batches for completion and scavenges their entries.
|
||||||
// This should be called as frequently as reasonable.
|
// This should be called as frequently as reasonable.
|
||||||
void Scavenge() {
|
void Scavenge() {
|
||||||
|
|
|
@ -10,3 +10,7 @@
|
||||||
#include "xenia/ui/vulkan/vulkan.h"
|
#include "xenia/ui/vulkan/vulkan.h"
|
||||||
|
|
||||||
DEFINE_bool(vulkan_validation, false, "Enable Vulkan validation layers.");
|
DEFINE_bool(vulkan_validation, false, "Enable Vulkan validation layers.");
|
||||||
|
|
||||||
|
DEFINE_bool(vulkan_primary_queue_only, false,
|
||||||
|
"Force the use of the primary queue, ignoring any additional that "
|
||||||
|
"may be present.");
|
||||||
|
|
|
@ -30,5 +30,6 @@
|
||||||
#define XELOGVK XELOGI
|
#define XELOGVK XELOGI
|
||||||
|
|
||||||
DECLARE_bool(vulkan_validation);
|
DECLARE_bool(vulkan_validation);
|
||||||
|
DECLARE_bool(vulkan_primary_queue_only);
|
||||||
|
|
||||||
#endif // XENIA_UI_VULKAN_VULKAN_H_
|
#endif // XENIA_UI_VULKAN_VULKAN_H_
|
||||||
|
|
|
@ -142,7 +142,7 @@ void VulkanContext::EndSwap() {
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unique_ptr<RawImage> VulkanContext::Capture() {
|
std::unique_ptr<RawImage> VulkanContext::Capture() {
|
||||||
assert_always();
|
// TODO(benvanik): read back swap chain front buffer.
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -129,6 +129,11 @@ bool VulkanDevice::Initialize(DeviceInfo device_info) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Some tools *cough* renderdoc *cough* can't handle multiple queues.
|
||||||
|
if (FLAGS_vulkan_primary_queue_only) {
|
||||||
|
queue_count = 1;
|
||||||
|
}
|
||||||
|
|
||||||
VkDeviceQueueCreateInfo queue_info;
|
VkDeviceQueueCreateInfo queue_info;
|
||||||
queue_info.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
|
queue_info.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
|
||||||
queue_info.pNext = nullptr;
|
queue_info.pNext = nullptr;
|
||||||
|
|
Loading…
Reference in New Issue