Merge branch 'master' of https://github.com/xenia-project/xenia into canary_experimental

This commit is contained in:
Gliniak 2022-07-04 08:04:31 +02:00
commit 6e753c6399
149 changed files with 27259 additions and 42442 deletions

15
.gitmodules vendored
View File

@ -7,9 +7,6 @@
[submodule "third_party/binutils-ppc-cygwin"]
path = third_party/binutils-ppc-cygwin
url = https://github.com/benvanik/binutils-ppc-cygwin.git
[submodule "third_party/spirv-tools"]
path = third_party/spirv-tools
url = https://github.com/xenia-project/SPIRV-Tools.git
[submodule "third_party/catch"]
path = third_party/catch
url = https://github.com/catchorg/Catch2.git
@ -22,12 +19,6 @@
[submodule "third_party/premake-export-compile-commands"]
path = third_party/premake-export-compile-commands
url = https://github.com/xenia-project/premake-export-compile-commands.git
[submodule "third_party/spirv-headers"]
path = third_party/spirv-headers
url = https://github.com/KhronosGroup/SPIRV-Headers.git
[submodule "third_party/volk"]
path = third_party/volk
url = https://github.com/zeux/volk.git
[submodule "third_party/discord-rpc"]
path = third_party/discord-rpc
url = https://github.com/discordapp/discord-rpc.git
@ -85,6 +76,12 @@
[submodule "third_party/Vulkan-Headers"]
path = third_party/Vulkan-Headers
url = https://github.com/KhronosGroup/Vulkan-Headers.git
[submodule "third_party/glslang"]
path = third_party/glslang
url = https://github.com/KhronosGroup/glslang.git
[submodule "third_party/SPIRV-Tools"]
path = third_party/SPIRV-Tools
url = https://github.com/KhronosGroup/SPIRV-Tools.git
[submodule "third_party/VulkanMemoryAllocator"]
path = third_party/VulkanMemoryAllocator
url = https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator.git

View File

@ -247,7 +247,6 @@ workspace("xenia")
include("third_party/imgui.lua")
include("third_party/mspack.lua")
include("third_party/snappy.lua")
include("third_party/spirv-tools.lua")
include("third_party/xxhash.lua")
if not os.istarget("android") then
@ -288,7 +287,6 @@ workspace("xenia")
include("src/xenia/kernel")
include("src/xenia/patcher")
include("src/xenia/ui")
include("src/xenia/ui/spirv")
include("src/xenia/ui/vulkan")
include("src/xenia/vfs")

View File

@ -27,7 +27,6 @@ project("xenia-app")
"xenia-kernel",
"xenia-patcher",
"xenia-ui",
"xenia-ui-spirv",
"xenia-ui-vulkan",
"xenia-patcher",
"xenia-vfs",
@ -44,7 +43,6 @@ project("xenia-app")
"libavutil",
"mspack",
"snappy",
"spirv-tools",
"xxhash",
})
defines({

View File

@ -59,7 +59,7 @@
#include "third_party/fmt/include/fmt/format.h"
DEFINE_string(apu, "any", "Audio system. Use: [any, nop, sdl, xaudio2]", "APU");
DEFINE_string(gpu, "any", "Graphics system. Use: [any, d3d12, null]",
DEFINE_string(gpu, "any", "Graphics system. Use: [any, d3d12, vulkan, null]",
"GPU");
DEFINE_string(hid, "any", "Input system. Use: [any, nop, sdl, winkey, xinput]",
"HID");
@ -259,11 +259,82 @@ std::unique_ptr<apu::AudioSystem> EmulatorApp::CreateAudioSystem(
}
std::unique_ptr<gpu::GraphicsSystem> EmulatorApp::CreateGraphicsSystem() {
// While Vulkan is supported by a large variety of operating systems (Windows,
// GNU/Linux, Android, also via the MoltenVK translation layer on top of Metal
// on macOS and iOS), please don't remove platform-specific GPU backends from
// Xenia.
//
// Regardless of the operating system, having multiple options provides more
// stability to users. In case of driver issues, users may try switching
// between the available backends. For example, in June 2022, on Nvidia Ampere
// (RTX 30xx), Xenia had synchronization issues that resulted in flickering,
// most prominently in 4D5307E6, on Direct3D 12 - but the same issue was not
// reproducible in the Vulkan backend, however, it used ImageSampleExplicitLod
// with explicit gradients for cubemaps, which triggered a different driver
// bug on Nvidia (every 1 out of 2x2 pixels receiving junk).
//
// Specifically on Microsoft platforms, there are a few reasons why supporting
// Direct3D 12 is desirable rather than limiting Xenia to Vulkan only:
// - Wider hardware support for Direct3D 12 on x86 Windows desktops.
// Direct3D 12 requires the minimum of Nvidia Fermi, or, with a pre-2021
// driver version, Intel HD Graphics 4200. Vulkan, however, is supported
// only starting with Nvidia Kepler and a much more recent Intel UHD
// Graphics generation.
// - Wider hardware support on other kinds of Microsoft devices. The Xbox One
// and the Xbox Series X|S only support Direct3D as the GPU API in their UWP
// runtime, and only version 12 can be granted expanded resource access.
// Qualcomm, as of June 2022, also doesn't provide a Vulkan implementation
// for their Arm-based Windows devices, while Direct3D 12 is available.
// - Both older Intel GPUs and the Xbox One apparently, as well as earlier
// Windows 10 versions, also require Shader Model 5.1 DXBC shaders rather
// than Shader Model 6 DXIL ones, so a DXBC shader translator should be
// available in Xenia too, a DXIL one doesn't fully replace it.
// - As of June 2022, AMD also refuses to implement the
// VK_EXT_fragment_shader_interlock Vulkan extension in their drivers, as
// well as its OpenGL counterpart, which is heavily utilized for accurate
// support of Xenos render target formats that don't have PC equivalents
// (8_8_8_8_GAMMA, 2_10_10_10_FLOAT, 16_16 and 16_16_16_16 with -32 to 32
// range, D24FS8) with correct blending. Direct3D 12, however, requires
// support for similar functionality (rasterizer-ordered views) on the
// feature level 12_1, and the AMD driver implements it on Direct3D, as well
// as raster order groups in their Metal driver.
//
// Additionally, different host GPU APIs receive feature support at different
// paces. VK_EXT_fragment_shader_interlock first appeared in 2019, for
// instance, while Xenia had been taking advantage of rasterizer-ordered views
// on Direct3D 12 for over half a year at that point (they have existed in
// Direct3D 12 since the first version).
//
// MoltenVK on top Metal also has its flaws and limitations. Metal, for
// instance, as of June 2022, doesn't provide a switch for primitive restart,
// while Vulkan does - so MoltenVK is not completely transparent to Xenia,
// many of its issues that may be not very obvious (unlike when the Metal API
// is used directly) should be taken into account in Xenia. Also, as of June
// 2022, MoltenVK translates SPIR-V shaders into the C++-based Metal Shading
// Language rather than AIR directly, which likely massively increases
// pipeline object creation time - and Xenia translates shaders and creates
// pipelines when they're first actually used for a draw command by the game,
// thus it can't precompile anything that hasn't ever been encountered before
// there's already no time to waste.
//
// Very old hardware (Direct3D 10 level) is also not supported by most Vulkan
// drivers. However, in the future, Xenia may be ported to it using the
// Direct3D 11 API with the feature level 10_1 or 10_0. OpenGL, however, had
// been lagging behind Direct3D prior to versions 4.x, and didn't receive
// compute shaders until a 4.2 extension (while 4.2 already corresponds
// roughly to Direct3D 11 features) - and replacing Xenia compute shaders with
// transform feedback / stream output is not always trivial (in particular,
// will need to rely on GL_ARB_transform_feedback3 for skipping over memory
// locations that shouldn't be overwritten).
//
// For maintainability, as much implementation code as possible should be
// placed in `xe::gpu` and shared between the backends rather than duplicated
// between them.
Factory<gpu::GraphicsSystem> factory;
#if XE_PLATFORM_WIN32
factory.Add<gpu::d3d12::D3D12GraphicsSystem>("d3d12");
#endif // XE_PLATFORM_WIN32
//factory.Add<gpu::vulkan::VulkanGraphicsSystem>("vulkan");
factory.Add<gpu::vulkan::VulkanGraphicsSystem>("vulkan");
factory.Add<gpu::null::NullGraphicsSystem>("null");
return factory.Create(cvars::gpu);
}

View File

@ -11,20 +11,14 @@ project("xenia-gpu")
"fmt",
"glslang-spirv",
"snappy",
"spirv-tools",
"xenia-base",
"xenia-ui",
"xenia-ui-spirv",
"xxhash",
})
defines({
})
includedirs({
project_root.."/third_party/spirv-tools/external/include",
project_root.."/third_party/Vulkan-Headers/include",
})
local_platform_files()
-- local_platform_files("spirv")
-- local_platform_files("spirv/passes")
group("src")
project("xenia-gpu-shader-compiler")
@ -36,13 +30,13 @@ project("xenia-gpu-shader-compiler")
"fmt",
"glslang-spirv",
"snappy",
"spirv-tools",
"xenia-base",
"xenia-gpu",
"xenia-ui",
"xenia-ui-spirv",
"xenia-ui-vulkan",
})
defines({
includedirs({
project_root.."/third_party/Vulkan-Headers/include",
})
files({
"shader_compiler_main.cc",

View File

@ -807,6 +807,9 @@ class Shader {
Translation(Shader& shader, uint64_t modification)
: shader_(shader), modification_(modification) {}
// If there was some failure during preparation on the implementation side.
void MakeInvalid() { is_valid_ = false; }
private:
friend class Shader;
friend class ShaderTranslator;

View File

@ -9,9 +9,12 @@
#include <cinttypes>
#include <cstring>
#include <sstream>
#include <string>
#include <utility>
#include <vector>
#include "third_party/glslang/SPIRV/disassemble.h"
#include "xenia/base/assert.h"
#include "xenia/base/console_app_main.h"
#include "xenia/base/cvar.h"
@ -23,7 +26,7 @@
#include "xenia/gpu/shader_translator.h"
#include "xenia/gpu/spirv_shader_translator.h"
#include "xenia/gpu/xenos.h"
#include "xenia/ui/spirv/spirv_disassembler.h"
#include "xenia/ui/vulkan/spirv_tools_context.h"
// For D3DDisassemble:
#if XE_PLATFORM_WIN32
@ -118,9 +121,10 @@ int shader_compiler_main(const std::vector<std::string>& args) {
shader->AnalyzeUcode(ucode_disasm_buffer);
std::unique_ptr<ShaderTranslator> translator;
SpirvShaderTranslator::Features spirv_features(true);
if (cvars::shader_output_type == "spirv" ||
cvars::shader_output_type == "spirvtext") {
translator = std::make_unique<SpirvShaderTranslator>();
translator = std::make_unique<SpirvShaderTranslator>(spirv_features);
} else if (cvars::shader_output_type == "dxbc" ||
cvars::shader_output_type == "dxbctext") {
translator = std::make_unique<DxbcShaderTranslator>(
@ -183,13 +187,30 @@ int shader_compiler_main(const std::vector<std::string>& args) {
const void* source_data = translation->translated_binary().data();
size_t source_data_size = translation->translated_binary().size();
std::unique_ptr<xe::ui::spirv::SpirvDisassembler::Result> spirv_disasm_result;
std::string spirv_disasm;
if (cvars::shader_output_type == "spirvtext") {
// Disassemble SPIRV.
spirv_disasm_result = xe::ui::spirv::SpirvDisassembler().Disassemble(
reinterpret_cast<const uint32_t*>(source_data), source_data_size / 4);
source_data = spirv_disasm_result->text();
source_data_size = std::strlen(spirv_disasm_result->text()) + 1;
std::ostringstream spirv_disasm_stream;
std::vector<unsigned int> spirv_source;
spirv_source.reserve(source_data_size / sizeof(unsigned int));
spirv_source.insert(spirv_source.cend(),
reinterpret_cast<const unsigned int*>(source_data),
reinterpret_cast<const unsigned int*>(source_data) +
source_data_size / sizeof(unsigned int));
spv::Disassemble(spirv_disasm_stream, spirv_source);
spirv_disasm = std::move(spirv_disasm_stream.str());
ui::vulkan::SpirvToolsContext spirv_tools_context;
if (spirv_tools_context.Initialize(spirv_features.spirv_version)) {
std::string spirv_validation_error;
spirv_tools_context.Validate(
reinterpret_cast<const uint32_t*>(spirv_source.data()),
spirv_source.size(), &spirv_validation_error);
if (!spirv_validation_error.empty()) {
spirv_disasm.append(1, '\n');
spirv_disasm.append(spirv_validation_error);
}
}
source_data = spirv_disasm.c_str();
source_data_size = spirv_disasm.size();
}
#if XE_PLATFORM_WIN32
ID3DBlob* dxbc_disasm_blob = nullptr;

View File

@ -1,36 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2016 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/gpu/spirv/compiler.h"
namespace xe {
namespace gpu {
namespace spirv {
Compiler::Compiler() {}
void Compiler::AddPass(std::unique_ptr<CompilerPass> pass) {
compiler_passes_.push_back(std::move(pass));
}
bool Compiler::Compile(spv::Module* module) {
for (auto& pass : compiler_passes_) {
if (!pass->Run(module)) {
return false;
}
}
return true;
}
void Compiler::Reset() { compiler_passes_.clear(); }
} // namespace spirv
} // namespace gpu
} // namespace xe

View File

@ -1,41 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2016 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_SPIRV_COMPILER_H_
#define XENIA_GPU_SPIRV_COMPILER_H_
#include "xenia/base/arena.h"
#include "xenia/gpu/spirv/compiler_pass.h"
#include "third_party/glslang-spirv/SpvBuilder.h"
#include "third_party/spirv/GLSL.std.450.hpp11"
namespace xe {
namespace gpu {
namespace spirv {
// SPIR-V Compiler. Designed to optimize SPIR-V code before feeding it into the
// drivers.
class Compiler {
public:
Compiler();
void AddPass(std::unique_ptr<CompilerPass> pass);
void Reset();
bool Compile(spv::Module* module);
private:
std::vector<std::unique_ptr<CompilerPass>> compiler_passes_;
};
} // namespace spirv
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_SPIRV_COMPILER_H_

View File

@ -1,37 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2016 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_SPIRV_COMPILER_PASS_H_
#define XENIA_GPU_SPIRV_COMPILER_PASS_H_
#include "xenia/base/arena.h"
#include "third_party/glslang-spirv/SpvBuilder.h"
#include "third_party/spirv/GLSL.std.450.hpp11"
namespace xe {
namespace gpu {
namespace spirv {
class CompilerPass {
public:
CompilerPass() = default;
virtual ~CompilerPass() {}
virtual bool Run(spv::Module* module) = 0;
private:
xe::Arena ir_arena_;
};
} // namespace spirv
} // namespace gpu
} // namespace xe
#endif

View File

@ -1,30 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2016 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/gpu/spirv/passes/control_flow_analysis_pass.h"
namespace xe {
namespace gpu {
namespace spirv {
ControlFlowAnalysisPass::ControlFlowAnalysisPass() {}
bool ControlFlowAnalysisPass::Run(spv::Module* module) {
for (auto function : module->getFunctions()) {
// For each OpBranchConditional, see if we can find a point where control
// flow converges and then append an OpSelectionMerge.
// Potential problems: while loops constructed from branch instructions
}
return true;
}
} // namespace spirv
} // namespace gpu
} // namespace xe

View File

@ -1,34 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2016 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_SPIRV_PASSES_CONTROL_FLOW_ANALYSIS_PASS_H_
#define XENIA_GPU_SPIRV_PASSES_CONTROL_FLOW_ANALYSIS_PASS_H_
#include "xenia/gpu/spirv/compiler_pass.h"
namespace xe {
namespace gpu {
namespace spirv {
// Control-flow analysis pass. Runs through control-flow and adds merge opcodes
// where necessary.
class ControlFlowAnalysisPass : public CompilerPass {
public:
ControlFlowAnalysisPass();
bool Run(spv::Module* module) override;
private:
};
} // namespace spirv
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_SPIRV_PASSES_CONTROL_FLOW_ANALYSIS_PASS_H_

View File

@ -1,48 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2016 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/gpu/spirv/passes/control_flow_simplification_pass.h"
namespace xe {
namespace gpu {
namespace spirv {
ControlFlowSimplificationPass::ControlFlowSimplificationPass() {}
bool ControlFlowSimplificationPass::Run(spv::Module* module) {
for (auto function : module->getFunctions()) {
// Walk through the blocks in the function and merge any blocks which are
// unconditionally dominated.
for (auto it = function->getBlocks().end() - 1;
it != function->getBlocks().begin();) {
auto block = *it;
if (!block->isUnreachable() && block->getPredecessors().size() == 1) {
auto prev_block = block->getPredecessors()[0];
auto last_instr =
prev_block->getInstruction(prev_block->getInstructionCount() - 1);
if (last_instr->getOpCode() == spv::Op::OpBranch) {
if (prev_block->getSuccessors().size() == 1 &&
prev_block->getSuccessors()[0] == block) {
// We're dominated by this block. Merge into it.
prev_block->merge(block);
block->setUnreachable();
}
}
}
--it;
}
}
return true;
}
} // namespace spirv
} // namespace gpu
} // namespace xe

View File

@ -1,34 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2016 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_SPIRV_PASSES_CONTROL_FLOW_SIMPLIFICATION_PASS_H_
#define XENIA_GPU_SPIRV_PASSES_CONTROL_FLOW_SIMPLIFICATION_PASS_H_
#include "xenia/gpu/spirv/compiler_pass.h"
namespace xe {
namespace gpu {
namespace spirv {
// Control-flow simplification pass. Combines adjacent blocks and marks
// any unreachable blocks.
class ControlFlowSimplificationPass : public CompilerPass {
public:
ControlFlowSimplificationPass();
bool Run(spv::Module* module) override;
private:
};
} // namespace spirv
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_SPIRV_PASSES_CONTROL_FLOW_SIMPLIFICATION_PASS_H_

View File

@ -0,0 +1,30 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2022 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/gpu/spirv_shader.h"
#include <cstring>
namespace xe {
namespace gpu {
SpirvShader::SpirvShader(xenos::ShaderType shader_type,
uint64_t ucode_data_hash, const uint32_t* ucode_dwords,
size_t ucode_dword_count,
std::endian ucode_source_endian)
: Shader(shader_type, ucode_data_hash, ucode_dwords, ucode_dword_count,
ucode_source_endian) {}
Shader::Translation* SpirvShader::CreateTranslationInstance(
uint64_t modification) {
return new SpirvTranslation(*this, modification);
}
} // namespace gpu
} // namespace xe

View File

@ -0,0 +1,81 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2022 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_SPIRV_SHADER_H_
#define XENIA_GPU_SPIRV_SHADER_H_
#include <atomic>
#include <vector>
#include "xenia/gpu/shader.h"
#include "xenia/gpu/spirv_shader_translator.h"
#include "xenia/gpu/xenos.h"
namespace xe {
namespace gpu {
class SpirvShader : public Shader {
public:
class SpirvTranslation : public Translation {
public:
explicit SpirvTranslation(SpirvShader& shader, uint64_t modification)
: Translation(shader, modification) {}
};
explicit SpirvShader(xenos::ShaderType shader_type, uint64_t ucode_data_hash,
const uint32_t* ucode_dwords, size_t ucode_dword_count,
std::endian ucode_source_endian = std::endian::big);
// Resource bindings are gathered after the successful translation of any
// modification for simplicity of translation (and they don't depend on
// modification bits).
struct TextureBinding {
uint32_t fetch_constant : 5;
// Stacked and 3D are separate TextureBindings.
xenos::FetchOpDimension dimension : 2;
uint32_t is_signed : 1;
};
// Safe to hash and compare with memcmp for layout hashing.
const std::vector<TextureBinding>& GetTextureBindingsAfterTranslation()
const {
return texture_bindings_;
}
const uint32_t GetUsedTextureMaskAfterTranslation() const {
return used_texture_mask_;
}
struct SamplerBinding {
uint32_t fetch_constant : 5;
xenos::TextureFilter mag_filter : 2;
xenos::TextureFilter min_filter : 2;
xenos::TextureFilter mip_filter : 2;
xenos::AnisoFilter aniso_filter : 3;
};
const std::vector<SamplerBinding>& GetSamplerBindingsAfterTranslation()
const {
return sampler_bindings_;
}
protected:
Translation* CreateTranslationInstance(uint64_t modification) override;
private:
friend class SpirvShaderTranslator;
std::atomic_flag bindings_setup_entered_ = ATOMIC_FLAG_INIT;
std::vector<TextureBinding> texture_bindings_;
std::vector<SamplerBinding> sampler_bindings_;
uint32_t used_texture_mask_ = 0;
};
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_SPIRV_SHADER_H_

File diff suppressed because it is too large Load Diff

View File

@ -2,7 +2,7 @@
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2016 Ben Vanik. All rights reserved. *
* Copyright 2022 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
@ -10,91 +10,274 @@
#ifndef XENIA_GPU_SPIRV_SHADER_TRANSLATOR_H_
#define XENIA_GPU_SPIRV_SHADER_TRANSLATOR_H_
#include <array>
#include <cstdint>
#include <memory>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include "third_party/glslang-spirv/SpvBuilder.h"
#include "third_party/spirv/GLSL.std.450.hpp11"
#include "third_party/glslang/SPIRV/SpvBuilder.h"
#include "xenia/gpu/shader_translator.h"
#include "xenia/ui/spirv/spirv_disassembler.h"
#include "xenia/ui/spirv/spirv_validator.h"
#include "xenia/gpu/xenos.h"
#include "xenia/ui/vulkan/vulkan_provider.h"
namespace xe {
namespace gpu {
// Push constants embedded within the command buffer.
// The total size of this struct must be <= 128b (as that's the commonly
// supported size).
struct SpirvPushConstants {
// Accessible to vertex shader only:
float window_scale[4]; // scale x/y, offset x/y (pixels)
float vtx_fmt[4];
// Accessible to geometry shader only:
float point_size[4]; // psx, psy, unused, unused
// Accessible to fragment shader only:
float alpha_test[4]; // alpha test enable, func, ref
float color_exp_bias[4];
uint32_t ps_param_gen;
};
static_assert(sizeof(SpirvPushConstants) <= 128,
"Push constants must fit <= 128b");
constexpr uint32_t kSpirvPushConstantVertexRangeOffset = 0;
constexpr uint32_t kSpirvPushConstantVertexRangeSize = (sizeof(float) * 4) * 2;
constexpr uint32_t kSpirvPushConstantGeometryRangeOffset =
kSpirvPushConstantVertexRangeOffset + kSpirvPushConstantVertexRangeSize;
constexpr uint32_t kSpirvPushConstantGeometryRangeSize = (sizeof(float) * 4);
constexpr uint32_t kSpirvPushConstantFragmentRangeOffset =
kSpirvPushConstantGeometryRangeOffset + kSpirvPushConstantGeometryRangeSize;
constexpr uint32_t kSpirvPushConstantFragmentRangeSize =
(sizeof(float) * 4) + sizeof(uint32_t);
constexpr uint32_t kSpirvPushConstantsSize = sizeof(SpirvPushConstants);
class SpirvShaderTranslator : public ShaderTranslator {
public:
SpirvShaderTranslator();
~SpirvShaderTranslator() override;
union Modification {
// If anything in this is structure is changed in a way not compatible with
// the previous layout, invalidate the pipeline storages by increasing this
// version number (0xYYYYMMDD)!
// TODO(Triang3l): Change to 0xYYYYMMDD once it's out of the rapid
// prototyping stage (easier to do small granular updates with an
// incremental counter).
static constexpr uint32_t kVersion = 4;
enum class DepthStencilMode : uint32_t {
kNoModifiers,
// Early fragment tests - enable if alpha test and alpha to coverage are
// disabled; ignored if anything in the shader blocks early Z writing.
kEarlyHint,
// TODO(Triang3l): Unorm24 (rounding) and float24 (truncating and
// rounding) output modes.
};
struct {
// Dynamically indexable register count from SQ_PROGRAM_CNTL.
uint32_t dynamic_addressable_register_count : 8;
// Pipeline stage and input configuration.
Shader::HostVertexShaderType host_vertex_shader_type
: Shader::kHostVertexShaderTypeBitCount;
} vertex;
struct PixelShaderModification {
// Dynamically indexable register count from SQ_PROGRAM_CNTL.
uint32_t dynamic_addressable_register_count : 8;
uint32_t param_gen_enable : 1;
uint32_t param_gen_interpolator : 4;
// If param_gen_enable is set, this must be set for point primitives, and
// must not be set for other primitive types - enables the point sprite
// coordinates input, and also effects the flag bits in PsParamGen.
uint32_t param_gen_point : 1;
// For host render targets - depth / stencil output mode.
DepthStencilMode depth_stencil_mode : 3;
} pixel;
uint64_t value = 0;
Modification(uint64_t modification_value = 0) : value(modification_value) {}
};
enum : uint32_t {
kSysFlag_XYDividedByW_Shift,
kSysFlag_ZDividedByW_Shift,
kSysFlag_WNotReciprocal_Shift,
kSysFlag_PrimitivePolygonal_Shift,
kSysFlag_PrimitiveLine_Shift,
kSysFlag_AlphaPassIfLess_Shift,
kSysFlag_AlphaPassIfEqual_Shift,
kSysFlag_AlphaPassIfGreater_Shift,
kSysFlag_ConvertColor0ToGamma_Shift,
kSysFlag_ConvertColor1ToGamma_Shift,
kSysFlag_ConvertColor2ToGamma_Shift,
kSysFlag_ConvertColor3ToGamma_Shift,
kSysFlag_Count,
kSysFlag_XYDividedByW = 1u << kSysFlag_XYDividedByW_Shift,
kSysFlag_ZDividedByW = 1u << kSysFlag_ZDividedByW_Shift,
kSysFlag_WNotReciprocal = 1u << kSysFlag_WNotReciprocal_Shift,
kSysFlag_PrimitivePolygonal = 1u << kSysFlag_PrimitivePolygonal_Shift,
kSysFlag_PrimitiveLine = 1u << kSysFlag_PrimitiveLine_Shift,
kSysFlag_AlphaPassIfLess = 1u << kSysFlag_AlphaPassIfLess_Shift,
kSysFlag_AlphaPassIfEqual = 1u << kSysFlag_AlphaPassIfEqual_Shift,
kSysFlag_AlphaPassIfGreater = 1u << kSysFlag_AlphaPassIfGreater_Shift,
kSysFlag_ConvertColor0ToGamma = 1u << kSysFlag_ConvertColor0ToGamma_Shift,
kSysFlag_ConvertColor1ToGamma = 1u << kSysFlag_ConvertColor1ToGamma_Shift,
kSysFlag_ConvertColor2ToGamma = 1u << kSysFlag_ConvertColor2ToGamma_Shift,
kSysFlag_ConvertColor3ToGamma = 1u << kSysFlag_ConvertColor3ToGamma_Shift,
};
static_assert(kSysFlag_Count <= 32, "Too many flags in the system constants");
// IF SYSTEM CONSTANTS ARE CHANGED OR ADDED, THE FOLLOWING MUST BE UPDATED:
// - SystemConstantIndex enum.
// - Structure members in BeginTranslation.
struct SystemConstants {
uint32_t flags;
xenos::Endian vertex_index_endian;
int32_t vertex_base_index;
uint32_t padding_vertex_base_index;
float ndc_scale[3];
uint32_t padding_ndc_scale;
float ndc_offset[3];
uint32_t padding_ndc_offset;
// Each byte contains post-swizzle TextureSign values for each of the needed
// components of each of the 32 used texture fetch constants.
uint32_t texture_swizzled_signs[8];
// If the imageViewFormatSwizzle portability subset is not supported, the
// component swizzle (taking both guest and host swizzles into account) to
// apply to the result directly in the shader code. In each uint32_t,
// swizzles for 2 texture fetch constants (in bits 0:11 and 12:23).
uint32_t texture_swizzles[16];
float alpha_test_reference;
float padding_alpha_test_reference[3];
float color_exp_bias[4];
};
// The minimum limit for maxPerStageDescriptorStorageBuffers is 4, and for
// maxStorageBufferRange it's 128 MB. These are the values of those limits on
// Arm Mali as of November 2020. Xenia needs 512 MB shared memory to be bound,
// therefore SSBOs must only be used for shared memory - all other storage
// resources must be images or texel buffers.
enum DescriptorSet : uint32_t {
// According to the "Pipeline Layout Compatibility" section of the Vulkan
// specification:
// "Two pipeline layouts are defined to be "compatible for set N" if they
// were created with identically defined descriptor set layouts for sets
// zero through N, and if they were created with identical push constant
// ranges."
// "Place the least frequently changing descriptor sets near the start of
// the pipeline layout, and place the descriptor sets representing the most
// frequently changing resources near the end. When pipelines are switched,
// only the descriptor set bindings that have been invalidated will need to
// be updated and the remainder of the descriptor set bindings will remain
// in place."
// This is partially the reverse of the Direct3D 12's rule of placing the
// most frequently changed descriptor sets in the beginning. Here all
// descriptor sets with an immutable layout are placed first, in reverse
// frequency of changing, and sets that may be different for different
// pipeline states last.
// Always the same descriptor set layouts for all pipeline layouts:
// Never changed.
kDescriptorSetSharedMemoryAndEdram,
// Pretty rarely used and rarely changed - flow control constants.
kDescriptorSetBoolLoopConstants,
// May stay the same across many draws.
kDescriptorSetSystemConstants,
// Less frequently changed (per-material).
kDescriptorSetFloatConstantsPixel,
// Quite frequently changed (for one object drawn multiple times, for
// instance - may contain projection matrices).
kDescriptorSetFloatConstantsVertex,
// Very frequently changed, especially for UI draws, and for models drawn in
// multiple parts - contains vertex and texture fetch constants.
kDescriptorSetFetchConstants,
// Mutable part of the pipeline layout:
kDescriptorSetMutableLayoutsStart,
// Rarely used at all, but may be changed at an unpredictable rate when
// vertex textures are used.
kDescriptorSetSamplersVertex = kDescriptorSetMutableLayoutsStart,
kDescriptorSetTexturesVertex,
// Per-material textures.
kDescriptorSetSamplersPixel,
kDescriptorSetTexturesPixel,
kDescriptorSetCount,
};
// "Xenia Emulator Microcode Translator".
// https://github.com/KhronosGroup/SPIRV-Headers/blob/c43a43c7cc3af55910b9bec2a71e3e8a622443cf/include/spirv/spir-v.xml#L79
static constexpr uint32_t kSpirvMagicToolId = 26;
struct Features {
explicit Features(const ui::vulkan::VulkanProvider& provider);
explicit Features(bool all = false);
unsigned int spirv_version;
uint32_t max_storage_buffer_range;
bool clip_distance;
bool cull_distance;
bool image_view_format_swizzle;
bool signed_zero_inf_nan_preserve_float32;
bool denorm_flush_to_zero_float32;
};
SpirvShaderTranslator(const Features& features);
// Not storing anything else in modifications (as this shader translator is
// being replaced anyway).
uint64_t GetDefaultVertexShaderModification(
uint32_t dynamic_addressable_register_count,
Shader::HostVertexShaderType host_vertex_shader_type =
Shader::HostVertexShaderType::kVertex) const override {
return dynamic_addressable_register_count;
}
Shader::HostVertexShaderType::kVertex) const override;
uint64_t GetDefaultPixelShaderModification(
uint32_t dynamic_addressable_register_count) const override {
return dynamic_addressable_register_count;
uint32_t dynamic_addressable_register_count) const override;
static constexpr uint32_t GetSharedMemoryStorageBufferCountLog2(
uint32_t max_storage_buffer_range) {
if (max_storage_buffer_range >= 512 * 1024 * 1024) {
return 0;
}
if (max_storage_buffer_range >= 256 * 1024 * 1024) {
return 1;
}
return 2;
}
uint32_t GetSharedMemoryStorageBufferCountLog2() const {
return GetSharedMemoryStorageBufferCountLog2(
features_.max_storage_buffer_range);
}
// Common functions useful not only for the translator, but also for EDRAM
// emulation via conventional render targets.
// Converts the color value externally clamped to [0, 31.875] to 7e3 floating
// point, with zeros in bits 10:31, rounding to the nearest even.
static spv::Id PreClampedFloat32To7e3(spv::Builder& builder,
spv::Id f32_scalar,
spv::Id ext_inst_glsl_std_450);
// Same as PreClampedFloat32To7e3, but clamps the input to [0, 31.875].
static spv::Id UnclampedFloat32To7e3(spv::Builder& builder,
spv::Id f32_scalar,
spv::Id ext_inst_glsl_std_450);
// Converts the 7e3 number in bits [f10_shift, f10_shift + 10) to a 32-bit
// float.
static spv::Id Float7e3To32(spv::Builder& builder, spv::Id f10_uint_scalar,
uint32_t f10_shift, bool result_as_uint,
spv::Id ext_inst_glsl_std_450);
// Converts the depth value externally clamped to the representable [0, 2)
// range to 20e4 floating point, with zeros in bits 24:31, rounding to the
// nearest even or towards zero. If remap_from_0_to_0_5 is true, it's assumed
// that 0...1 is pre-remapped to 0...0.5 in the input.
static spv::Id PreClampedDepthTo20e4(spv::Builder& builder,
spv::Id f32_scalar,
bool round_to_nearest_even,
bool remap_from_0_to_0_5,
spv::Id ext_inst_glsl_std_450);
// Converts the 20e4 number in bits [f24_shift, f24_shift + 10) to a 32-bit
// float.
static spv::Id Depth20e4To32(spv::Builder& builder, spv::Id f24_uint_scalar,
uint32_t f24_shift, bool remap_to_0_to_0_5,
bool result_as_uint,
spv::Id ext_inst_glsl_std_450);
protected:
uint32_t GetModificationRegisterCount() const override {
return uint32_t(current_translation().modification());
}
void Reset() override;
uint32_t GetModificationRegisterCount() const override;
void StartTranslation() override;
std::vector<uint8_t> CompleteTranslation() override;
void PostTranslation() override;
void PreProcessControlFlowInstructions(
std::vector<ucode::ControlFlowInstruction> instrs) override;
void ProcessLabel(uint32_t cf_index) override;
void ProcessControlFlowInstructionBegin(uint32_t cf_index) override;
void ProcessControlFlowInstructionEnd(uint32_t cf_index) override;
void ProcessControlFlowNopInstruction(uint32_t cf_index) override;
void ProcessExecInstructionBegin(const ParsedExecInstruction& instr) override;
void ProcessExecInstructionEnd(const ParsedExecInstruction& instr) override;
void ProcessLoopStartInstruction(
const ParsedLoopStartInstruction& instr) override;
void ProcessLoopEndInstruction(
const ParsedLoopEndInstruction& instr) override;
void ProcessCallInstruction(const ParsedCallInstruction& instr) override;
void ProcessReturnInstruction(const ParsedReturnInstruction& instr) override;
void ProcessJumpInstruction(const ParsedJumpInstruction& instr) override;
void ProcessAllocInstruction(const ParsedAllocInstruction& instr) override;
void ProcessVertexFetchInstruction(
const ParsedVertexFetchInstruction& instr) override;
void ProcessTextureFetchInstruction(
@ -102,99 +285,374 @@ class SpirvShaderTranslator : public ShaderTranslator {
void ProcessAluInstruction(const ParsedAluInstruction& instr) override;
private:
spv::Function* CreateCubeFunction();
struct TextureBinding {
uint32_t fetch_constant;
// Stacked and 3D are separate TextureBindings.
xenos::FetchOpDimension dimension;
bool is_signed;
bool ProcessVectorAluOperation(const ParsedAluInstruction& instr,
bool& close_predicate_block);
bool ProcessScalarAluOperation(const ParsedAluInstruction& instr,
bool& close_predicate_block);
spv::Id BitfieldExtract(spv::Id result_type, spv::Id base, bool is_signed,
uint32_t offset, uint32_t count);
spv::Id ConvertNormVar(spv::Id var, spv::Id result_type, uint32_t bits,
bool is_signed);
// Creates a call to the given GLSL intrinsic.
spv::Id CreateGlslStd450InstructionCall(spv::Decoration precision,
spv::Id result_type,
spv::GLSLstd450 instruction_ordinal,
std::vector<spv::Id> args);
// Loads an operand into a value.
// The value returned will be in the form described in the operand (number of
// components, etc).
spv::Id LoadFromOperand(const InstructionOperand& op);
// Stores a value based on the specified result information.
// The value will be transformed into the appropriate form for the result and
// the proper components will be selected.
void StoreToResult(spv::Id source_value_id, const InstructionResult& result);
xe::ui::spirv::SpirvDisassembler disassembler_;
xe::ui::spirv::SpirvValidator validator_;
// True if there's an open predicated block
bool open_predicated_block_ = false;
bool predicated_block_cond_ = false;
spv::Block* predicated_block_end_ = nullptr;
// Exec block conditional?
bool exec_cond_ = false;
spv::Block* exec_skip_block_ = nullptr;
// TODO(benvanik): replace with something better, make reusable, etc.
std::unique_ptr<spv::Builder> builder_;
spv::Id glsl_std_450_instruction_set_ = 0;
// Generated function
spv::Function* translated_main_ = nullptr;
spv::Function* cube_function_ = nullptr;
// Types.
spv::Id float_type_ = 0, bool_type_ = 0, int_type_ = 0, uint_type_ = 0;
spv::Id vec2_int_type_ = 0, vec2_uint_type_ = 0, vec3_int_type_ = 0;
spv::Id vec2_float_type_ = 0, vec3_float_type_ = 0, vec4_float_type_ = 0;
spv::Id vec4_int_type_ = 0, vec4_uint_type_ = 0;
spv::Id vec2_bool_type_ = 0, vec3_bool_type_ = 0, vec4_bool_type_ = 0;
spv::Id image_2d_type_ = 0, image_3d_type_ = 0, image_cube_type_ = 0;
// Constants.
spv::Id vec4_float_zero_ = 0, vec4_float_one_ = 0;
// Array of AMD registers.
// These values are all pointers.
spv::Id registers_ptr_ = 0, registers_type_ = 0;
spv::Id consts_ = 0, a0_ = 0, p0_ = 0;
spv::Id aL_ = 0; // Loop index stack - .x is active loop
spv::Id loop_count_ = 0; // Loop counter stack
spv::Id ps_ = 0, pv_ = 0; // IDs of previous results
spv::Id pc_ = 0; // Program counter
spv::Id lod_ = 0; // LOD register
spv::Id pos_ = 0;
spv::Id push_consts_ = 0;
spv::Id interpolators_ = 0;
spv::Id point_size_ = 0;
spv::Id point_coord_ = 0;
spv::Id vertex_idx_ = 0;
spv::Id frag_outputs_ = 0, frag_depth_ = 0;
spv::Id samplers_ = 0;
spv::Id tex_[3] = {0}; // Images {2D, 3D, Cube}
std::unordered_map<uint32_t, uint32_t> tex_binding_map_;
spv::Id vtx_ = 0; // Vertex buffer array (32 runtime arrays)
std::unordered_map<uint32_t, uint32_t> vtx_binding_map_;
// SPIR-V IDs that are part of the in/out interface.
std::vector<spv::Id> interface_ids_;
struct CFBlock {
spv::Block* block = nullptr;
bool labelled = false;
spv::Id variable;
};
std::vector<CFBlock> cf_blocks_;
spv::Block* switch_break_block_ = nullptr;
spv::Block* loop_head_block_ = nullptr;
spv::Block* loop_body_block_ = nullptr;
spv::Block* loop_cont_block_ = nullptr;
spv::Block* loop_exit_block_ = nullptr;
struct SamplerBinding {
uint32_t fetch_constant;
xenos::TextureFilter mag_filter;
xenos::TextureFilter min_filter;
xenos::TextureFilter mip_filter;
xenos::AnisoFilter aniso_filter;
spv::Id variable;
};
// Builder helpers.
spv::Id SpirvSmearScalarResultOrConstant(spv::Id scalar, spv::Id vector_type);
void SpirvCreateSelectionMerge(
spv::Id merge_block_id, spv::SelectionControlMask selection_control_mask =
spv::SelectionControlMaskNone) {
std::unique_ptr<spv::Instruction> selection_merge_op =
std::make_unique<spv::Instruction>(spv::OpSelectionMerge);
selection_merge_op->addIdOperand(merge_block_id);
selection_merge_op->addImmediateOperand(selection_control_mask);
builder_->getBuildPoint()->addInstruction(std::move(selection_merge_op));
}
Modification GetSpirvShaderModification() const {
return Modification(current_translation().modification());
}
bool IsSpirvVertexShader() const {
return is_vertex_shader() &&
!Shader::IsHostVertexShaderTypeDomain(
GetSpirvShaderModification().vertex.host_vertex_shader_type);
}
bool IsSpirvTessEvalShader() const {
return is_vertex_shader() &&
Shader::IsHostVertexShaderTypeDomain(
GetSpirvShaderModification().vertex.host_vertex_shader_type);
}
bool IsExecutionModeEarlyFragmentTests() const {
// TODO(Triang3l): Not applicable to fragment shader interlock.
return is_pixel_shader() &&
GetSpirvShaderModification().pixel.depth_stencil_mode ==
Modification::DepthStencilMode::kEarlyHint &&
current_shader().implicit_early_z_write_allowed();
}
// Returns UINT32_MAX if PsParamGen doesn't need to be written.
uint32_t GetPsParamGenInterpolator() const;
// Must be called before emitting any SPIR-V operations that must be in a
// block in translator callbacks to ensure that if the last instruction added
// was something like OpBranch - in this case, an unreachable block is
// created.
void EnsureBuildPointAvailable();
void StartVertexOrTessEvalShaderBeforeMain();
void StartVertexOrTessEvalShaderInMain();
void CompleteVertexOrTessEvalShaderInMain();
void StartFragmentShaderBeforeMain();
void StartFragmentShaderInMain();
void CompleteFragmentShaderInMain();
// Updates the current flow control condition (to be called in the beginning
// of exec and in jumps), closing the previous conditionals if needed.
// However, if the condition is not different, the instruction-level predicate
// conditional also won't be closed - this must be checked separately if
// needed (for example, in jumps).
void UpdateExecConditionals(ParsedExecInstruction::Type type,
uint32_t bool_constant_index, bool condition);
// Opens or reopens the predicate check conditional for the instruction.
// Should be called before processing a non-control-flow instruction.
void UpdateInstructionPredication(bool predicated, bool condition);
// Closes the instruction-level predicate conditional if it's open, useful if
// a control flow instruction needs to do some code which needs to respect the
// current exec conditional, but can't itself be predicated.
void CloseInstructionPredication();
// Closes conditionals opened by exec and instructions within them (but not by
// labels) and updates the state accordingly.
void CloseExecConditionals();
spv::Id GetStorageAddressingIndex(
InstructionStorageAddressingMode addressing_mode, uint32_t storage_index,
bool is_float_constant = false);
// Loads unswizzled operand without sign modifiers as float4.
spv::Id LoadOperandStorage(const InstructionOperand& operand);
spv::Id ApplyOperandModifiers(spv::Id operand_value,
const InstructionOperand& original_operand,
bool invert_negate = false,
bool force_absolute = false);
// Returns the requested components, with the operand's swizzle applied, in a
// condensed form, but without negation / absolute value modifiers. The
// storage is float4, no matter what the component count of original_operand
// is (the storage will be either r# or c#, but the instruction may be
// scalar).
spv::Id GetUnmodifiedOperandComponents(
spv::Id operand_storage, const InstructionOperand& original_operand,
uint32_t components);
spv::Id GetOperandComponents(spv::Id operand_storage,
const InstructionOperand& original_operand,
uint32_t components, bool invert_negate = false,
bool force_absolute = false) {
return ApplyOperandModifiers(
GetUnmodifiedOperandComponents(operand_storage, original_operand,
components),
original_operand, invert_negate, force_absolute);
}
// If components are identical, the same Id will be written to both outputs.
void GetOperandScalarXY(spv::Id operand_storage,
const InstructionOperand& original_operand,
spv::Id& a_out, spv::Id& b_out,
bool invert_negate = false,
bool force_absolute = false);
// Gets the absolute value of the loaded operand if it's not absolute already.
spv::Id GetAbsoluteOperand(spv::Id operand_storage,
const InstructionOperand& original_operand);
// The type of the value must be a float vector consisting of
// xe::bit_count(result.GetUsedResultComponents()) elements, or (to replicate
// a scalar into all used components) float, or the value can be spv::NoResult
// if there's no result to store (like constants only).
void StoreResult(const InstructionResult& result, spv::Id value);
// For Shader Model 3 multiplication (+-0 or denormal * anything = +0),
// replaces the value with +0 if the minimum of the two operands is 0. This
// must be called with absolute values of operands - use GetAbsoluteOperand!
spv::Id ZeroIfAnyOperandIsZero(spv::Id value, spv::Id operand_0_abs,
spv::Id operand_1_abs);
// Return type is a xe::bit_count(result.GetUsedResultComponents())-component
// float vector or a single float, depending on whether it's a reduction
// instruction (check getTypeId of the result), or returns spv::NoResult if
// nothing to store.
spv::Id ProcessVectorAluOperation(const ParsedAluInstruction& instr,
bool& predicate_written);
// Returns a float value to write to the previous scalar register and to the
// destination. If the return value is ps itself (in the retain_prev case),
// returns spv::NoResult (handled as a special case, so if it's retain_prev,
// but don't need to write to anywhere, no OpLoad(ps) will be done).
spv::Id ProcessScalarAluOperation(const ParsedAluInstruction& instr,
bool& predicate_written);
// Perform endian swap of a uint scalar or vector.
spv::Id EndianSwap32Uint(spv::Id value, spv::Id endian);
spv::Id LoadUint32FromSharedMemory(spv::Id address_dwords_int);
// The source may be a floating-point scalar or a vector.
spv::Id PWLGammaToLinear(spv::Id gamma, bool gamma_pre_saturated);
spv::Id LinearToPWLGamma(spv::Id linear, bool linear_pre_saturated);
size_t FindOrAddTextureBinding(uint32_t fetch_constant,
xenos::FetchOpDimension dimension,
bool is_signed);
size_t FindOrAddSamplerBinding(uint32_t fetch_constant,
xenos::TextureFilter mag_filter,
xenos::TextureFilter min_filter,
xenos::TextureFilter mip_filter,
xenos::AnisoFilter aniso_filter);
// `texture_parameters` need to be set up except for `sampler`, which will be
// set internally, optionally doing linear interpolation between the an
// existing value and the new one (the result location may be the same as for
// the first lerp endpoint, but not across signedness).
void SampleTexture(spv::Builder::TextureParameters& texture_parameters,
spv::ImageOperandsMask image_operands_mask,
spv::Id image_unsigned, spv::Id image_signed,
spv::Id sampler, spv::Id is_all_signed,
spv::Id is_any_signed, spv::Id& result_unsigned_out,
spv::Id& result_signed_out,
spv::Id lerp_factor = spv::NoResult,
spv::Id lerp_first_unsigned = spv::NoResult,
spv::Id lerp_first_signed = spv::NoResult);
// `texture_parameters` need to be set up except for `sampler`, which will be
// set internally.
spv::Id QueryTextureLod(spv::Builder::TextureParameters& texture_parameters,
spv::Id image_unsigned, spv::Id image_signed,
spv::Id sampler, spv::Id is_all_signed);
Features features_;
std::unique_ptr<spv::Builder> builder_;
std::vector<spv::Id> id_vector_temp_;
// For helper functions like operand loading, so they don't conflict with
// id_vector_temp_ usage in bigger callbacks.
std::vector<spv::Id> id_vector_temp_util_;
std::vector<unsigned int> uint_vector_temp_;
std::vector<unsigned int> uint_vector_temp_util_;
spv::Id ext_inst_glsl_std_450_;
spv::Id type_void_;
union {
struct {
spv::Id type_bool_;
spv::Id type_bool2_;
spv::Id type_bool3_;
spv::Id type_bool4_;
};
// Index = component count - 1.
spv::Id type_bool_vectors_[4];
};
union {
struct {
spv::Id type_int_;
spv::Id type_int2_;
spv::Id type_int3_;
spv::Id type_int4_;
};
spv::Id type_int_vectors_[4];
};
union {
struct {
spv::Id type_uint_;
spv::Id type_uint2_;
spv::Id type_uint3_;
spv::Id type_uint4_;
};
spv::Id type_uint_vectors_[4];
};
union {
struct {
spv::Id type_float_;
spv::Id type_float2_;
spv::Id type_float3_;
spv::Id type_float4_;
};
spv::Id type_float_vectors_[4];
};
spv::Id const_int_0_;
spv::Id const_int4_0_;
spv::Id const_uint_0_;
spv::Id const_uint4_0_;
union {
struct {
spv::Id const_float_0_;
spv::Id const_float2_0_;
spv::Id const_float3_0_;
spv::Id const_float4_0_;
};
spv::Id const_float_vectors_0_[4];
};
union {
struct {
spv::Id const_float_1_;
spv::Id const_float2_1_;
spv::Id const_float3_1_;
spv::Id const_float4_1_;
};
spv::Id const_float_vectors_1_[4];
};
// vec2(0.0, 1.0), to arbitrarily VectorShuffle non-constant and constant
// components.
spv::Id const_float2_0_1_;
enum SystemConstantIndex : unsigned int {
kSystemConstantFlags,
kSystemConstantIndexVertexIndexEndian,
kSystemConstantIndexVertexBaseIndex,
kSystemConstantNdcScale,
kSystemConstantNdcOffset,
kSystemConstantTextureSwizzledSigns,
kSystemConstantTextureSwizzles,
kSystemConstantAlphaTestReference,
kSystemConstantColorExpBias,
};
spv::Id uniform_system_constants_;
spv::Id uniform_float_constants_;
spv::Id uniform_bool_loop_constants_;
spv::Id uniform_fetch_constants_;
spv::Id buffers_shared_memory_;
// Not using combined images and samplers because
// maxPerStageDescriptorSamplers is often lower than
// maxPerStageDescriptorSampledImages, and for every fetch constant, there
// are, for regular fetches, two bindings (unsigned and signed).
std::vector<TextureBinding> texture_bindings_;
std::vector<SamplerBinding> sampler_bindings_;
// VS as VS only - int.
spv::Id input_vertex_index_;
// VS as TES only - int.
spv::Id input_primitive_id_;
// PS, only when needed - float4.
spv::Id input_fragment_coord_;
// PS, only when needed - bool.
spv::Id input_front_facing_;
// In vertex or tessellation evaluation shaders - outputs, always
// xenos::kMaxInterpolators.
// In pixel shaders - inputs, min(xenos::kMaxInterpolators, register_count()).
spv::Id input_output_interpolators_[xenos::kMaxInterpolators];
static const std::string kInterpolatorNamePrefix;
enum OutputPerVertexMember : unsigned int {
kOutputPerVertexMemberPosition,
kOutputPerVertexMemberCount,
};
spv::Id output_per_vertex_;
std::array<spv::Id, xenos::kMaxColorRenderTargets> output_fragment_data_;
std::vector<spv::Id> main_interface_;
spv::Function* function_main_;
spv::Id main_system_constant_flags_;
// bool.
spv::Id var_main_predicate_;
// uint4.
spv::Id var_main_loop_count_;
// int4.
spv::Id var_main_loop_address_;
// int.
spv::Id var_main_address_register_;
// float.
spv::Id var_main_previous_scalar_;
// `base + index * stride` in dwords from the last vfetch_full as it may be
// needed by vfetch_mini - int.
spv::Id var_main_vfetch_address_;
// float.
spv::Id var_main_tfetch_lod_;
// float3.
spv::Id var_main_tfetch_gradients_h_;
spv::Id var_main_tfetch_gradients_v_;
// float4[register_count()].
spv::Id var_main_registers_;
// VS only - float3 (special exports).
spv::Id var_main_point_size_edge_flag_kill_vertex_;
spv::Block* main_loop_header_;
spv::Block* main_loop_continue_;
spv::Block* main_loop_merge_;
spv::Id main_loop_pc_next_;
spv::Block* main_switch_header_;
std::unique_ptr<spv::Instruction> main_switch_op_;
spv::Block* main_switch_merge_;
std::vector<spv::Id> main_switch_next_pc_phi_operands_;
// If the exec bool constant / predicate conditional is open, block after it
// (not added to the function yet).
spv::Block* cf_exec_conditional_merge_;
// If the instruction-level predicate conditional is open, block after it (not
// added to the function yet).
spv::Block* cf_instruction_predicate_merge_;
// When cf_exec_conditional_merge_ is not null:
// If the current exec conditional is based on a bool constant: the number of
// the bool constant.
// If it's based on the predicate value: kCfExecBoolConstantPredicate.
uint32_t cf_exec_bool_constant_or_predicate_;
static constexpr uint32_t kCfExecBoolConstantPredicate = UINT32_MAX;
// When cf_exec_conditional_merge_ is not null, the expected bool constant or
// predicate value for the current exec conditional.
bool cf_exec_condition_;
// When cf_instruction_predicate_merge_ is not null, the expected predicate
// value for the current or the last instruction.
bool cf_instruction_predicate_condition_;
// Whether there was a `setp` in the current exec before the current
// instruction, thus instruction-level predicate value can be different than
// the exec-level predicate value, and can't merge two execs with the same
// predicate condition anymore.
bool cf_exec_predicate_written_;
};
} // namespace gpu

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,648 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2022 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/gpu/spirv_shader_translator.h"
#include <cstdint>
#include <memory>
#include <utility>
#include "third_party/glslang/SPIRV/GLSL.std.450.h"
#include "xenia/base/assert.h"
#include "xenia/base/math.h"
namespace xe {
namespace gpu {
spv::Id SpirvShaderTranslator::PreClampedFloat32To7e3(
spv::Builder& builder, spv::Id f32_scalar, spv::Id ext_inst_glsl_std_450) {
// https://github.com/Microsoft/DirectXTex/blob/master/DirectXTex/DirectXTexConvert.cpp
// Assuming the value is already clamped to [0, 31.875].
spv::Id type_uint = builder.makeUintType(32);
// Need the source as uint for bit operations.
{
spv::Id source_type = builder.getTypeId(f32_scalar);
assert_true(builder.isScalarType(source_type));
if (!builder.isUintType(source_type)) {
f32_scalar = builder.createUnaryOp(spv::OpBitcast, type_uint, f32_scalar);
}
}
// The denormal 7e3 case.
// denormal_biased_f32 = (f32 & 0x7FFFFF) | 0x800000
spv::Id denormal_biased_f32;
{
spv::Instruction* denormal_insert_instruction = new spv::Instruction(
builder.getUniqueId(), type_uint, spv::OpBitFieldInsert);
denormal_insert_instruction->addIdOperand(f32_scalar);
denormal_insert_instruction->addIdOperand(builder.makeUintConstant(1));
denormal_insert_instruction->addIdOperand(builder.makeUintConstant(23));
denormal_insert_instruction->addIdOperand(builder.makeUintConstant(9));
builder.getBuildPoint()->addInstruction(
std::unique_ptr<spv::Instruction>(denormal_insert_instruction));
denormal_biased_f32 = denormal_insert_instruction->getResultId();
}
// denormal_biased_f32_shift_amount = min(125 - (f32 >> 23), 24)
// Not allowing the shift to overflow as that's undefined in SPIR-V.
spv::Id denormal_biased_f32_shift_amount;
{
spv::Instruction* denormal_shift_amount_instruction =
new spv::Instruction(builder.getUniqueId(), type_uint, spv::OpExtInst);
denormal_shift_amount_instruction->addIdOperand(ext_inst_glsl_std_450);
denormal_shift_amount_instruction->addImmediateOperand(GLSLstd450UMin);
denormal_shift_amount_instruction->addIdOperand(builder.createBinOp(
spv::OpISub, type_uint, builder.makeUintConstant(125),
builder.createBinOp(spv::OpShiftRightLogical, type_uint, f32_scalar,
builder.makeUintConstant(23))));
denormal_shift_amount_instruction->addIdOperand(
builder.makeUintConstant(24));
builder.getBuildPoint()->addInstruction(
std::unique_ptr<spv::Instruction>(denormal_shift_amount_instruction));
denormal_biased_f32_shift_amount =
denormal_shift_amount_instruction->getResultId();
}
// denormal_biased_f32 =
// ((f32 & 0x7FFFFF) | 0x800000) >> min(125 - (f32 >> 23), 24)
denormal_biased_f32 = builder.createBinOp(spv::OpShiftRightLogical, type_uint,
denormal_biased_f32,
denormal_biased_f32_shift_amount);
// The normal 7e3 case.
// Bias the exponent.
// normal_biased_f32 = f32 - (124 << 23)
spv::Id normal_biased_f32 =
builder.createBinOp(spv::OpISub, type_uint, f32_scalar,
builder.makeUintConstant(UINT32_C(124) << 23));
// Select the needed conversion depending on whether the number is too small
// to be represented as normalized 7e3.
spv::Id biased_f32 = builder.createTriOp(
spv::OpSelect, type_uint,
builder.createBinOp(spv::OpULessThan, builder.makeBoolType(), f32_scalar,
builder.makeUintConstant(0x3E800000)),
denormal_biased_f32, normal_biased_f32);
// Build the 7e3 number rounding to the nearest even.
// ((biased_f32 + 0x7FFF + ((biased_f32 >> 16) & 1)) >> 16) & 0x3FF
return builder.createTriOp(
spv::OpBitFieldUExtract, type_uint,
builder.createBinOp(
spv::OpIAdd, type_uint,
builder.createBinOp(spv::OpIAdd, type_uint, biased_f32,
builder.makeUintConstant(0x7FFF)),
builder.createTriOp(spv::OpBitFieldUExtract, type_uint, biased_f32,
builder.makeUintConstant(16),
builder.makeUintConstant(1))),
builder.makeUintConstant(16), builder.makeUintConstant(10));
}
spv::Id SpirvShaderTranslator::UnclampedFloat32To7e3(
spv::Builder& builder, spv::Id f32_scalar, spv::Id ext_inst_glsl_std_450) {
spv::Id type_float = builder.makeFloatType(32);
// Need the source as float for clamping.
{
spv::Id source_type = builder.getTypeId(f32_scalar);
assert_true(builder.isScalarType(source_type));
if (!builder.isFloatType(source_type)) {
f32_scalar =
builder.createUnaryOp(spv::OpBitcast, type_float, f32_scalar);
}
}
{
spv::Instruction* clamp_instruction =
new spv::Instruction(builder.getUniqueId(), type_float, spv::OpExtInst);
clamp_instruction->addIdOperand(ext_inst_glsl_std_450);
clamp_instruction->addImmediateOperand(GLSLstd450NClamp);
clamp_instruction->addIdOperand(f32_scalar);
clamp_instruction->addIdOperand(builder.makeFloatConstant(0.0f));
clamp_instruction->addIdOperand(builder.makeFloatConstant(31.875f));
builder.getBuildPoint()->addInstruction(
std::unique_ptr<spv::Instruction>(clamp_instruction));
f32_scalar = clamp_instruction->getResultId();
}
return PreClampedFloat32To7e3(builder, f32_scalar, ext_inst_glsl_std_450);
}
spv::Id SpirvShaderTranslator::Float7e3To32(spv::Builder& builder,
spv::Id f10_uint_scalar,
uint32_t f10_shift,
bool result_as_uint,
spv::Id ext_inst_glsl_std_450) {
// https://github.com/Microsoft/DirectXTex/blob/master/DirectXTex/DirectXTexConvert.cpp
assert_true(builder.isUintType(builder.getTypeId(f10_uint_scalar)));
assert_true(f10_shift <= (32 - 10));
spv::Id type_bool = builder.makeBoolType();
spv::Id type_int = builder.makeIntType(32);
spv::Id type_uint = builder.makeUintType(32);
spv::Id f10_unbiased_exponent = builder.createTriOp(
spv::OpBitFieldUExtract, type_uint, f10_uint_scalar,
builder.makeUintConstant(f10_shift + 7), builder.makeUintConstant(3));
spv::Id f10_mantissa = builder.createTriOp(
spv::OpBitFieldUExtract, type_uint, f10_uint_scalar,
builder.makeUintConstant(f10_shift), builder.makeUintConstant(7));
// The denormal nonzero 7e3 case.
// denormal_mantissa_msb = findMSB(f10_mantissa)
spv::Id denormal_mantissa_msb;
{
spv::Instruction* denormal_mantissa_msb_instruction =
new spv::Instruction(builder.getUniqueId(), type_int, spv::OpExtInst);
denormal_mantissa_msb_instruction->addIdOperand(ext_inst_glsl_std_450);
denormal_mantissa_msb_instruction->addImmediateOperand(GLSLstd450FindUMsb);
denormal_mantissa_msb_instruction->addIdOperand(f10_mantissa);
builder.getBuildPoint()->addInstruction(
std::unique_ptr<spv::Instruction>(denormal_mantissa_msb_instruction));
denormal_mantissa_msb = denormal_mantissa_msb_instruction->getResultId();
}
denormal_mantissa_msb =
builder.createUnaryOp(spv::OpBitcast, type_uint, denormal_mantissa_msb);
// denormal_f32_unbiased_exponent = 1 - (7 - findMSB(f10_mantissa))
// Or:
// denormal_f32_unbiased_exponent = findMSB(f10_mantissa) - 6
spv::Id denormal_f32_unbiased_exponent =
builder.createBinOp(spv::OpISub, type_uint, denormal_mantissa_msb,
builder.makeUintConstant(6));
// Normalize the mantissa.
// denormal_f32_mantissa = f10_mantissa << (7 - findMSB(f10_mantissa))
spv::Id denormal_f32_mantissa = builder.createBinOp(
spv::OpShiftLeftLogical, type_uint, f10_mantissa,
builder.createBinOp(spv::OpISub, type_uint, builder.makeUintConstant(7),
denormal_mantissa_msb));
// If the 7e3 number is zero, make sure the float32 number is zero too.
spv::Id f10_mantissa_is_nonzero = builder.createBinOp(
spv::OpINotEqual, type_bool, f10_mantissa, builder.makeUintConstant(0));
// Set the unbiased exponent to -124 for zero - 124 will be added later,
// resulting in zero float32.
denormal_f32_unbiased_exponent = builder.createTriOp(
spv::OpSelect, type_uint, f10_mantissa_is_nonzero,
denormal_f32_unbiased_exponent, builder.makeUintConstant(uint32_t(-124)));
denormal_f32_mantissa =
builder.createTriOp(spv::OpSelect, type_uint, f10_mantissa_is_nonzero,
denormal_f32_mantissa, builder.makeUintConstant(0));
// Select the needed conversion depending on whether the number is normal.
spv::Id f10_is_normal =
builder.createBinOp(spv::OpINotEqual, type_bool, f10_unbiased_exponent,
builder.makeUintConstant(0));
spv::Id f32_unbiased_exponent = builder.createTriOp(
spv::OpSelect, type_uint, f10_is_normal, f10_unbiased_exponent,
denormal_f32_unbiased_exponent);
spv::Id f32_mantissa =
builder.createTriOp(spv::OpSelect, type_uint, f10_is_normal, f10_mantissa,
denormal_f32_mantissa);
// Bias the exponent and construct the build the float32 number.
spv::Id f32_shifted;
{
spv::Instruction* f32_insert_instruction = new spv::Instruction(
builder.getUniqueId(), type_uint, spv::OpBitFieldInsert);
f32_insert_instruction->addIdOperand(f32_mantissa);
f32_insert_instruction->addIdOperand(
builder.createBinOp(spv::OpIAdd, type_uint, f32_unbiased_exponent,
builder.makeUintConstant(124)));
f32_insert_instruction->addIdOperand(builder.makeUintConstant(7));
f32_insert_instruction->addIdOperand(builder.makeUintConstant(8));
builder.getBuildPoint()->addInstruction(
std::unique_ptr<spv::Instruction>(f32_insert_instruction));
f32_shifted = f32_insert_instruction->getResultId();
}
spv::Id f32 =
builder.createBinOp(spv::OpShiftLeftLogical, type_uint, f32_shifted,
builder.makeUintConstant(23 - 7));
if (!result_as_uint) {
f32 = builder.createUnaryOp(spv::OpBitcast, builder.makeFloatType(32), f32);
}
return f32;
}
spv::Id SpirvShaderTranslator::PreClampedDepthTo20e4(
spv::Builder& builder, spv::Id f32_scalar, bool round_to_nearest_even,
bool remap_from_0_to_0_5, spv::Id ext_inst_glsl_std_450) {
// CFloat24 from d3dref9.dll +
// https://github.com/Microsoft/DirectXTex/blob/master/DirectXTex/DirectXTexConvert.cpp
// Assuming the value is already clamped to [0, 2) (in all places, the depth
// is written with saturation).
uint32_t remap_bias = uint32_t(remap_from_0_to_0_5);
spv::Id type_uint = builder.makeUintType(32);
// Need the source as uint for bit operations.
{
spv::Id source_type = builder.getTypeId(f32_scalar);
assert_true(builder.isScalarType(source_type));
if (!builder.isUintType(source_type)) {
f32_scalar = builder.createUnaryOp(spv::OpBitcast, type_uint, f32_scalar);
}
}
// The denormal 20e4 case.
// denormal_biased_f32 = (f32 & 0x7FFFFF) | 0x800000
spv::Id denormal_biased_f32;
{
spv::Instruction* denormal_insert_instruction = new spv::Instruction(
builder.getUniqueId(), type_uint, spv::OpBitFieldInsert);
denormal_insert_instruction->addIdOperand(f32_scalar);
denormal_insert_instruction->addIdOperand(builder.makeUintConstant(1));
denormal_insert_instruction->addIdOperand(builder.makeUintConstant(23));
denormal_insert_instruction->addIdOperand(builder.makeUintConstant(9));
builder.getBuildPoint()->addInstruction(
std::unique_ptr<spv::Instruction>(denormal_insert_instruction));
denormal_biased_f32 = denormal_insert_instruction->getResultId();
}
// denormal_biased_f32_shift_amount = min(113 - (f32 >> 23), 24)
// Not allowing the shift to overflow as that's undefined in SPIR-V.
spv::Id denormal_biased_f32_shift_amount;
{
spv::Instruction* denormal_shift_amount_instruction =
new spv::Instruction(builder.getUniqueId(), type_uint, spv::OpExtInst);
denormal_shift_amount_instruction->addIdOperand(ext_inst_glsl_std_450);
denormal_shift_amount_instruction->addImmediateOperand(GLSLstd450UMin);
denormal_shift_amount_instruction->addIdOperand(builder.createBinOp(
spv::OpISub, type_uint, builder.makeUintConstant(113 - remap_bias),
builder.createBinOp(spv::OpShiftRightLogical, type_uint, f32_scalar,
builder.makeUintConstant(23))));
denormal_shift_amount_instruction->addIdOperand(
builder.makeUintConstant(24));
builder.getBuildPoint()->addInstruction(
std::unique_ptr<spv::Instruction>(denormal_shift_amount_instruction));
denormal_biased_f32_shift_amount =
denormal_shift_amount_instruction->getResultId();
}
// denormal_biased_f32 =
// ((f32 & 0x7FFFFF) | 0x800000) >> min(113 - (f32 >> 23), 24)
denormal_biased_f32 = builder.createBinOp(spv::OpShiftRightLogical, type_uint,
denormal_biased_f32,
denormal_biased_f32_shift_amount);
// The normal 20e4 case.
// Bias the exponent.
// normal_biased_f32 = f32 - (112 << 23)
spv::Id normal_biased_f32 = builder.createBinOp(
spv::OpISub, type_uint, f32_scalar,
builder.makeUintConstant((UINT32_C(112) - remap_bias) << 23));
// Select the needed conversion depending on whether the number is too small
// to be represented as normalized 20e4.
spv::Id biased_f32 = builder.createTriOp(
spv::OpSelect, type_uint,
builder.createBinOp(
spv::OpULessThan, builder.makeBoolType(), f32_scalar,
builder.makeUintConstant(0x38800000 - (remap_bias << 23))),
denormal_biased_f32, normal_biased_f32);
// Build the 20e4 number rounding to the nearest even or towards zero.
if (round_to_nearest_even) {
// biased_f32 += 3 + ((biased_f32 >> 3) & 1)
biased_f32 = builder.createBinOp(
spv::OpIAdd, type_uint,
builder.createBinOp(spv::OpIAdd, type_uint, biased_f32,
builder.makeUintConstant(3)),
builder.createTriOp(spv::OpBitFieldUExtract, type_uint, biased_f32,
builder.makeUintConstant(3),
builder.makeUintConstant(1)));
}
return builder.createTriOp(spv::OpBitFieldUExtract, type_uint, biased_f32,
builder.makeUintConstant(3),
builder.makeUintConstant(24));
}
spv::Id SpirvShaderTranslator::Depth20e4To32(spv::Builder& builder,
spv::Id f24_uint_scalar,
uint32_t f24_shift,
bool remap_to_0_to_0_5,
bool result_as_uint,
spv::Id ext_inst_glsl_std_450) {
// CFloat24 from d3dref9.dll +
// https://github.com/Microsoft/DirectXTex/blob/master/DirectXTex/DirectXTexConvert.cpp
assert_true(builder.isUintType(builder.getTypeId(f24_uint_scalar)));
assert_true(f24_shift <= (32 - 24));
uint32_t remap_bias = uint32_t(remap_to_0_to_0_5);
spv::Id type_bool = builder.makeBoolType();
spv::Id type_int = builder.makeIntType(32);
spv::Id type_uint = builder.makeUintType(32);
spv::Id f24_unbiased_exponent = builder.createTriOp(
spv::OpBitFieldUExtract, type_uint, f24_uint_scalar,
builder.makeUintConstant(f24_shift + 20), builder.makeUintConstant(4));
spv::Id f24_mantissa = builder.createTriOp(
spv::OpBitFieldUExtract, type_uint, f24_uint_scalar,
builder.makeUintConstant(f24_shift), builder.makeUintConstant(20));
// The denormal nonzero 20e4 case.
// denormal_mantissa_msb = findMSB(f24_mantissa)
spv::Id denormal_mantissa_msb;
{
spv::Instruction* denormal_mantissa_msb_instruction =
new spv::Instruction(builder.getUniqueId(), type_int, spv::OpExtInst);
denormal_mantissa_msb_instruction->addIdOperand(ext_inst_glsl_std_450);
denormal_mantissa_msb_instruction->addImmediateOperand(GLSLstd450FindUMsb);
denormal_mantissa_msb_instruction->addIdOperand(f24_mantissa);
builder.getBuildPoint()->addInstruction(
std::unique_ptr<spv::Instruction>(denormal_mantissa_msb_instruction));
denormal_mantissa_msb = denormal_mantissa_msb_instruction->getResultId();
}
denormal_mantissa_msb =
builder.createUnaryOp(spv::OpBitcast, type_uint, denormal_mantissa_msb);
// denormal_f32_unbiased_exponent = 1 - (20 - findMSB(f24_mantissa))
// Or:
// denormal_f32_unbiased_exponent = findMSB(f24_mantissa) - 19
spv::Id denormal_f32_unbiased_exponent =
builder.createBinOp(spv::OpISub, type_uint, denormal_mantissa_msb,
builder.makeUintConstant(19));
// Normalize the mantissa.
// denormal_f32_mantissa = f24_mantissa << (20 - findMSB(f24_mantissa))
spv::Id denormal_f32_mantissa = builder.createBinOp(
spv::OpShiftLeftLogical, type_uint, f24_mantissa,
builder.createBinOp(spv::OpISub, type_uint, builder.makeUintConstant(20),
denormal_mantissa_msb));
// If the 20e4 number is zero, make sure the float32 number is zero too.
spv::Id f24_mantissa_is_nonzero = builder.createBinOp(
spv::OpINotEqual, type_bool, f24_mantissa, builder.makeUintConstant(0));
// Set the unbiased exponent to -112 for zero - 112 will be added later,
// resulting in zero float32.
denormal_f32_unbiased_exponent = builder.createTriOp(
spv::OpSelect, type_uint, f24_mantissa_is_nonzero,
denormal_f32_unbiased_exponent,
builder.makeUintConstant(uint32_t(-int32_t(112 - remap_bias))));
denormal_f32_mantissa =
builder.createTriOp(spv::OpSelect, type_uint, f24_mantissa_is_nonzero,
denormal_f32_mantissa, builder.makeUintConstant(0));
// Select the needed conversion depending on whether the number is normal.
spv::Id f24_is_normal =
builder.createBinOp(spv::OpINotEqual, type_bool, f24_unbiased_exponent,
builder.makeUintConstant(0));
spv::Id f32_unbiased_exponent = builder.createTriOp(
spv::OpSelect, type_uint, f24_is_normal, f24_unbiased_exponent,
denormal_f32_unbiased_exponent);
spv::Id f32_mantissa =
builder.createTriOp(spv::OpSelect, type_uint, f24_is_normal, f24_mantissa,
denormal_f32_mantissa);
// Bias the exponent and construct the build the float32 number.
spv::Id f32_shifted;
{
spv::Instruction* f32_insert_instruction = new spv::Instruction(
builder.getUniqueId(), type_uint, spv::OpBitFieldInsert);
f32_insert_instruction->addIdOperand(f32_mantissa);
f32_insert_instruction->addIdOperand(
builder.createBinOp(spv::OpIAdd, type_uint, f32_unbiased_exponent,
builder.makeUintConstant(112 - remap_bias)));
f32_insert_instruction->addIdOperand(builder.makeUintConstant(20));
f32_insert_instruction->addIdOperand(builder.makeUintConstant(8));
builder.getBuildPoint()->addInstruction(
std::unique_ptr<spv::Instruction>(f32_insert_instruction));
f32_shifted = f32_insert_instruction->getResultId();
}
spv::Id f32 =
builder.createBinOp(spv::OpShiftLeftLogical, type_uint, f32_shifted,
builder.makeUintConstant(23 - 20));
if (!result_as_uint) {
f32 = builder.createUnaryOp(spv::OpBitcast, builder.makeFloatType(32), f32);
}
return f32;
}
void SpirvShaderTranslator::CompleteFragmentShaderInMain() {
id_vector_temp_.clear();
id_vector_temp_.push_back(builder_->makeIntConstant(kSystemConstantFlags));
spv::Id system_constant_flags = builder_->createLoad(
builder_->createAccessChain(spv::StorageClassUniform,
uniform_system_constants_, id_vector_temp_),
spv::NoPrecision);
if (current_shader().writes_color_target(0) &&
!IsExecutionModeEarlyFragmentTests()) {
// Alpha test.
// TODO(Triang3l): Check how alpha test works with NaN on Direct3D 9.
// Extract the comparison function (less, equal, greater bits).
spv::Id alpha_test_function = builder_->createTriOp(
spv::OpBitFieldUExtract, type_uint_, main_system_constant_flags_,
builder_->makeUintConstant(kSysFlag_AlphaPassIfLess_Shift),
builder_->makeUintConstant(3));
// Check if the comparison function is not "always" - that should pass even
// for NaN likely, unlike "less, equal or greater".
spv::Id alpha_test_function_is_non_always = builder_->createBinOp(
spv::OpINotEqual, type_bool_, alpha_test_function,
builder_->makeUintConstant(uint32_t(xenos::CompareFunction::kAlways)));
spv::Block& block_alpha_test = builder_->makeNewBlock();
spv::Block& block_alpha_test_merge = builder_->makeNewBlock();
SpirvCreateSelectionMerge(block_alpha_test_merge.getId(),
spv::SelectionControlDontFlattenMask);
builder_->createConditionalBranch(alpha_test_function_is_non_always,
&block_alpha_test,
&block_alpha_test_merge);
builder_->setBuildPoint(&block_alpha_test);
{
id_vector_temp_.clear();
id_vector_temp_.push_back(builder_->makeIntConstant(3));
spv::Id alpha_test_alpha =
builder_->createLoad(builder_->createAccessChain(
spv::StorageClassOutput,
output_fragment_data_[0], id_vector_temp_),
spv::NoPrecision);
id_vector_temp_.clear();
id_vector_temp_.push_back(
builder_->makeIntConstant(kSystemConstantAlphaTestReference));
spv::Id alpha_test_reference =
builder_->createLoad(builder_->createAccessChain(
spv::StorageClassUniform,
uniform_system_constants_, id_vector_temp_),
spv::NoPrecision);
// The comparison function is not "always" - perform the alpha test.
// Handle "not equal" specially (specifically as "not equal" so it's true
// for NaN, not "less or greater" which is false for NaN).
spv::Id alpha_test_function_is_not_equal = builder_->createBinOp(
spv::OpIEqual, type_bool_, alpha_test_function,
builder_->makeUintConstant(
uint32_t(xenos::CompareFunction::kNotEqual)));
spv::Block& block_alpha_test_not_equal = builder_->makeNewBlock();
spv::Block& block_alpha_test_non_not_equal = builder_->makeNewBlock();
spv::Block& block_alpha_test_not_equal_merge = builder_->makeNewBlock();
SpirvCreateSelectionMerge(block_alpha_test_not_equal_merge.getId(),
spv::SelectionControlDontFlattenMask);
builder_->createConditionalBranch(alpha_test_function_is_not_equal,
&block_alpha_test_not_equal,
&block_alpha_test_non_not_equal);
spv::Id alpha_test_result_not_equal, alpha_test_result_non_not_equal;
builder_->setBuildPoint(&block_alpha_test_not_equal);
{
// "Not equal" function.
alpha_test_result_not_equal =
builder_->createBinOp(spv::OpFUnordNotEqual, type_bool_,
alpha_test_alpha, alpha_test_reference);
builder_->createBranch(&block_alpha_test_not_equal_merge);
}
builder_->setBuildPoint(&block_alpha_test_non_not_equal);
{
// Function other than "not equal".
static const spv::Op kAlphaTestOps[] = {
spv::OpFOrdLessThan, spv::OpFOrdEqual, spv::OpFOrdGreaterThan};
for (uint32_t i = 0; i < 3; ++i) {
spv::Id alpha_test_comparison_result = builder_->createBinOp(
spv::OpLogicalAnd, type_bool_,
builder_->createBinOp(kAlphaTestOps[i], type_bool_,
alpha_test_alpha, alpha_test_reference),
builder_->createBinOp(
spv::OpINotEqual, type_bool_,
builder_->createBinOp(
spv::OpBitwiseAnd, type_uint_, alpha_test_function,
builder_->makeUintConstant(UINT32_C(1) << i)),
const_uint_0_));
if (i) {
alpha_test_result_non_not_equal = builder_->createBinOp(
spv::OpLogicalOr, type_bool_, alpha_test_result_non_not_equal,
alpha_test_comparison_result);
} else {
alpha_test_result_non_not_equal = alpha_test_comparison_result;
}
}
builder_->createBranch(&block_alpha_test_not_equal_merge);
}
builder_->setBuildPoint(&block_alpha_test_not_equal_merge);
spv::Id alpha_test_result;
{
std::unique_ptr<spv::Instruction> alpha_test_result_phi_op =
std::make_unique<spv::Instruction>(builder_->getUniqueId(),
type_bool_, spv::OpPhi);
alpha_test_result_phi_op->addIdOperand(alpha_test_result_not_equal);
alpha_test_result_phi_op->addIdOperand(
block_alpha_test_not_equal.getId());
alpha_test_result_phi_op->addIdOperand(alpha_test_result_non_not_equal);
alpha_test_result_phi_op->addIdOperand(
block_alpha_test_non_not_equal.getId());
alpha_test_result = alpha_test_result_phi_op->getResultId();
builder_->getBuildPoint()->addInstruction(
std::move(alpha_test_result_phi_op));
}
// Discard the pixel if the alpha test has failed. Creating a merge block
// even though it will contain just one OpBranch since SPIR-V requires
// structured control flow in shaders.
spv::Block& block_alpha_test_kill = builder_->makeNewBlock();
spv::Block& block_alpha_test_kill_merge = builder_->makeNewBlock();
SpirvCreateSelectionMerge(block_alpha_test_kill_merge.getId(),
spv::SelectionControlDontFlattenMask);
builder_->createConditionalBranch(alpha_test_result,
&block_alpha_test_kill_merge,
&block_alpha_test_kill);
builder_->setBuildPoint(&block_alpha_test_kill);
builder_->createNoResultOp(spv::OpKill);
// OpKill terminates the block.
builder_->setBuildPoint(&block_alpha_test_kill_merge);
builder_->createBranch(&block_alpha_test_merge);
}
builder_->setBuildPoint(&block_alpha_test_merge);
}
uint32_t color_targets_remaining = current_shader().writes_color_targets();
uint32_t color_target_index;
while (xe::bit_scan_forward(color_targets_remaining, &color_target_index)) {
color_targets_remaining &= ~(UINT32_C(1) << color_target_index);
spv::Id color_variable = output_fragment_data_[color_target_index];
spv::Id color = builder_->createLoad(color_variable, spv::NoPrecision);
// Apply the exponent bias after the alpha test and alpha to coverage
// because they need the unbiased alpha from the shader.
id_vector_temp_.clear();
id_vector_temp_.reserve(2);
id_vector_temp_.push_back(
builder_->makeIntConstant(kSystemConstantColorExpBias));
id_vector_temp_.push_back(
builder_->makeIntConstant(int32_t(color_target_index)));
color = builder_->createBinOp(
spv::OpVectorTimesScalar, type_float4_, color,
builder_->createLoad(builder_->createAccessChain(
spv::StorageClassUniform,
uniform_system_constants_, id_vector_temp_),
spv::NoPrecision));
builder_->addDecoration(color, spv::DecorationNoContraction);
// Convert to gamma space - this is incorrect, since it must be done after
// blending on the Xbox 360, but this is just one of many blending issues in
// the host render target path.
// TODO(Triang3l): Gamma as sRGB check.
spv::Id color_rgb;
{
std::unique_ptr<spv::Instruction> color_rgb_shuffle_op =
std::make_unique<spv::Instruction>(
builder_->getUniqueId(), type_float3_, spv::OpVectorShuffle);
color_rgb_shuffle_op->addIdOperand(color);
color_rgb_shuffle_op->addIdOperand(color);
color_rgb_shuffle_op->addImmediateOperand(0);
color_rgb_shuffle_op->addImmediateOperand(1);
color_rgb_shuffle_op->addImmediateOperand(2);
color_rgb = color_rgb_shuffle_op->getResultId();
builder_->getBuildPoint()->addInstruction(
std::move(color_rgb_shuffle_op));
}
spv::Id is_gamma = builder_->createBinOp(
spv::OpINotEqual, type_bool_,
builder_->createBinOp(
spv::OpBitwiseAnd, type_uint_, main_system_constant_flags_,
builder_->makeUintConstant(kSysFlag_ConvertColor0ToGamma
<< color_target_index)),
const_uint_0_);
spv::Block& block_gamma_head = *builder_->getBuildPoint();
spv::Block& block_gamma = builder_->makeNewBlock();
spv::Block& block_gamma_merge = builder_->makeNewBlock();
SpirvCreateSelectionMerge(block_gamma_merge.getId());
builder_->createConditionalBranch(is_gamma, &block_gamma,
&block_gamma_merge);
builder_->setBuildPoint(&block_gamma);
spv::Id color_rgb_gamma = LinearToPWLGamma(color_rgb, false);
builder_->createBranch(&block_gamma_merge);
builder_->setBuildPoint(&block_gamma_merge);
{
std::unique_ptr<spv::Instruction> gamma_phi_op =
std::make_unique<spv::Instruction>(builder_->getUniqueId(),
type_float3_, spv::OpPhi);
gamma_phi_op->addIdOperand(color_rgb_gamma);
gamma_phi_op->addIdOperand(block_gamma.getId());
gamma_phi_op->addIdOperand(color_rgb);
gamma_phi_op->addIdOperand(block_gamma_head.getId());
color_rgb = gamma_phi_op->getResultId();
builder_->getBuildPoint()->addInstruction(std::move(gamma_phi_op));
}
{
std::unique_ptr<spv::Instruction> color_rgba_shuffle_op =
std::make_unique<spv::Instruction>(
builder_->getUniqueId(), type_float4_, spv::OpVectorShuffle);
color_rgba_shuffle_op->addIdOperand(color_rgb);
color_rgba_shuffle_op->addIdOperand(color);
color_rgba_shuffle_op->addImmediateOperand(0);
color_rgba_shuffle_op->addImmediateOperand(1);
color_rgba_shuffle_op->addImmediateOperand(2);
color_rgba_shuffle_op->addImmediateOperand(3 + 3);
color = color_rgba_shuffle_op->getResultId();
builder_->getBuildPoint()->addInstruction(
std::move(color_rgba_shuffle_op));
}
builder_->createStore(color, color_variable);
}
}
} // namespace gpu
} // namespace xe

View File

@ -1,850 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2016 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/gpu/vulkan/buffer_cache.h"
#include "xenia/base/logging.h"
#include "xenia/base/math.h"
#include "xenia/base/memory.h"
#include "xenia/base/profiling.h"
#include "xenia/gpu/gpu_flags.h"
#include "xenia/gpu/vulkan/vulkan_gpu_flags.h"
#include "xenia/ui/vulkan/vulkan_mem_alloc.h"
#include "xenia/ui/vulkan/vulkan_util.h"
using namespace xe::gpu::xenos;
namespace xe {
namespace gpu {
namespace vulkan {
#if XE_ARCH_AMD64
void copy_cmp_swap_16_unaligned(void* dest_ptr, const void* src_ptr,
uint16_t cmp_value, size_t count) {
auto dest = reinterpret_cast<uint16_t*>(dest_ptr);
auto src = reinterpret_cast<const uint16_t*>(src_ptr);
__m128i shufmask =
_mm_set_epi8(0x0E, 0x0F, 0x0C, 0x0D, 0x0A, 0x0B, 0x08, 0x09, 0x06, 0x07,
0x04, 0x05, 0x02, 0x03, 0x00, 0x01);
__m128i cmpval = _mm_set1_epi16(cmp_value);
size_t i;
for (i = 0; i + 8 <= count; i += 8) {
__m128i input = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&src[i]));
__m128i output = _mm_shuffle_epi8(input, shufmask);
__m128i mask = _mm_cmpeq_epi16(output, cmpval);
output = _mm_or_si128(output, mask);
_mm_storeu_si128(reinterpret_cast<__m128i*>(&dest[i]), output);
}
for (; i < count; ++i) { // handle residual elements
dest[i] = byte_swap(src[i]);
}
}
void copy_cmp_swap_32_unaligned(void* dest_ptr, const void* src_ptr,
uint32_t cmp_value, size_t count) {
auto dest = reinterpret_cast<uint32_t*>(dest_ptr);
auto src = reinterpret_cast<const uint32_t*>(src_ptr);
__m128i shufmask =
_mm_set_epi8(0x0C, 0x0D, 0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B, 0x04, 0x05,
0x06, 0x07, 0x00, 0x01, 0x02, 0x03);
__m128i cmpval = _mm_set1_epi32(cmp_value);
size_t i;
for (i = 0; i + 4 <= count; i += 4) {
__m128i input = _mm_loadu_si128(reinterpret_cast<const __m128i*>(&src[i]));
__m128i output = _mm_shuffle_epi8(input, shufmask);
__m128i mask = _mm_cmpeq_epi32(output, cmpval);
output = _mm_or_si128(output, mask);
_mm_storeu_si128(reinterpret_cast<__m128i*>(&dest[i]), output);
}
for (; i < count; ++i) { // handle residual elements
dest[i] = byte_swap(src[i]);
}
}
#else
void copy_cmp_swap_16_unaligned(void* dest_ptr, const void* src_ptr,
uint16_t cmp_value, size_t count) {
auto dest = reinterpret_cast<uint16_t*>(dest_ptr);
auto src = reinterpret_cast<const uint16_t*>(src_ptr);
for (size_t i = 0; i < count; ++i) {
uint16_t value = byte_swap(src[i]);
dest[i] = value == cmp_value ? 0xFFFF : value;
}
}
void copy_cmp_swap_32_unaligned(void* dest_ptr, const void* src_ptr,
uint32_t cmp_value, size_t count) {
auto dest = reinterpret_cast<uint32_t*>(dest_ptr);
auto src = reinterpret_cast<const uint32_t*>(src_ptr);
for (size_t i = 0; i < count; ++i) {
uint32_t value = byte_swap(src[i]);
dest[i] = value == cmp_value ? 0xFFFFFFFF : value;
}
}
#endif
using xe::ui::vulkan::util::CheckResult;
constexpr VkDeviceSize kConstantRegisterUniformRange =
512 * 4 * 4 + 8 * 4 + 32 * 4;
BufferCache::BufferCache(RegisterFile* register_file, Memory* memory,
const ui::vulkan::VulkanProvider& provider,
size_t capacity)
: register_file_(register_file), memory_(memory), provider_(provider) {
transient_buffer_ = std::make_unique<ui::vulkan::CircularBuffer>(
provider_,
VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT |
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
capacity, 256);
}
BufferCache::~BufferCache() { Shutdown(); }
VkResult BufferCache::Initialize() {
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
VkDevice device = provider_.device();
VkResult status = VK_SUCCESS;
VkMemoryRequirements pool_reqs;
transient_buffer_->GetBufferMemoryRequirements(&pool_reqs);
VkMemoryAllocateInfo pool_allocate_info;
pool_allocate_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
pool_allocate_info.pNext = nullptr;
pool_allocate_info.allocationSize = pool_reqs.size;
pool_allocate_info.memoryTypeIndex = ui::vulkan::util::ChooseHostMemoryType(
provider_, pool_reqs.memoryTypeBits, false);
if (pool_allocate_info.memoryTypeIndex == UINT32_MAX) {
return VK_ERROR_INITIALIZATION_FAILED;
}
status = dfn.vkAllocateMemory(device, &pool_allocate_info, nullptr,
&gpu_memory_pool_);
if (status != VK_SUCCESS) {
return status;
}
status = transient_buffer_->Initialize(gpu_memory_pool_, 0);
if (status != VK_SUCCESS) {
return status;
}
// Create a memory allocator for textures.
VmaVulkanFunctions vulkan_funcs = {};
ui::vulkan::FillVMAVulkanFunctions(&vulkan_funcs, provider_);
VmaAllocatorCreateInfo alloc_info = {};
alloc_info.physicalDevice = provider_.physical_device();
alloc_info.device = device;
alloc_info.pVulkanFunctions = &vulkan_funcs;
alloc_info.instance = provider_.instance();
status = vmaCreateAllocator(&alloc_info, &mem_allocator_);
if (status != VK_SUCCESS) {
return status;
}
status = CreateConstantDescriptorSet();
if (status != VK_SUCCESS) {
return status;
}
status = CreateVertexDescriptorPool();
if (status != VK_SUCCESS) {
return status;
}
return VK_SUCCESS;
}
VkResult BufferCache::CreateVertexDescriptorPool() {
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
VkDevice device = provider_.device();
VkResult status;
std::vector<VkDescriptorPoolSize> pool_sizes;
pool_sizes.push_back({
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
32 * 16384,
});
vertex_descriptor_pool_ = std::make_unique<ui::vulkan::DescriptorPool>(
provider_, 32 * 16384, pool_sizes);
// 32 storage buffers available to vertex shader.
// TODO(DrChat): In the future, this could hold memexport staging data.
VkDescriptorSetLayoutBinding binding = {
0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
32, VK_SHADER_STAGE_VERTEX_BIT,
nullptr,
};
VkDescriptorSetLayoutCreateInfo layout_info = {
VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
nullptr,
0,
1,
&binding,
};
status = dfn.vkCreateDescriptorSetLayout(device, &layout_info, nullptr,
&vertex_descriptor_set_layout_);
if (status != VK_SUCCESS) {
return status;
}
return VK_SUCCESS;
}
void BufferCache::FreeVertexDescriptorPool() {
vertex_descriptor_pool_.reset();
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
VkDevice device = provider_.device();
ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyDescriptorSetLayout,
device, vertex_descriptor_set_layout_);
}
VkResult BufferCache::CreateConstantDescriptorSet() {
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
VkDevice device = provider_.device();
VkResult status = VK_SUCCESS;
// Descriptor pool used for all of our cached descriptors.
// In the steady state we don't allocate anything, so these are all manually
// managed.
VkDescriptorPoolCreateInfo transient_descriptor_pool_info;
transient_descriptor_pool_info.sType =
VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
transient_descriptor_pool_info.pNext = nullptr;
transient_descriptor_pool_info.flags =
VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT;
transient_descriptor_pool_info.maxSets = 1;
VkDescriptorPoolSize pool_sizes[1];
pool_sizes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
pool_sizes[0].descriptorCount = 2;
transient_descriptor_pool_info.poolSizeCount = 1;
transient_descriptor_pool_info.pPoolSizes = pool_sizes;
status = dfn.vkCreateDescriptorPool(device, &transient_descriptor_pool_info,
nullptr, &constant_descriptor_pool_);
if (status != VK_SUCCESS) {
return status;
}
// Create the descriptor set layout used for our uniform buffer.
// As it is a static binding that uses dynamic offsets during draws we can
// create this once and reuse it forever.
VkDescriptorSetLayoutBinding bindings[2] = {};
// Vertex constants
bindings[0].binding = 0;
bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
bindings[0].descriptorCount = 1;
bindings[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
bindings[0].pImmutableSamplers = nullptr;
// Fragment constants
bindings[1].binding = 1;
bindings[1].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
bindings[1].descriptorCount = 1;
bindings[1].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
bindings[1].pImmutableSamplers = nullptr;
VkDescriptorSetLayoutCreateInfo descriptor_set_layout_info = {};
descriptor_set_layout_info.sType =
VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
descriptor_set_layout_info.pNext = nullptr;
descriptor_set_layout_info.flags = 0;
descriptor_set_layout_info.bindingCount =
static_cast<uint32_t>(xe::countof(bindings));
descriptor_set_layout_info.pBindings = bindings;
status = dfn.vkCreateDescriptorSetLayout(device, &descriptor_set_layout_info,
nullptr,
&constant_descriptor_set_layout_);
if (status != VK_SUCCESS) {
return status;
}
// Create the descriptor we'll use for the uniform buffer.
// This is what we hand out to everyone (who then also needs to use our
// offsets).
VkDescriptorSetAllocateInfo set_alloc_info;
set_alloc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
set_alloc_info.pNext = nullptr;
set_alloc_info.descriptorPool = constant_descriptor_pool_;
set_alloc_info.descriptorSetCount = 1;
set_alloc_info.pSetLayouts = &constant_descriptor_set_layout_;
status = dfn.vkAllocateDescriptorSets(device, &set_alloc_info,
&constant_descriptor_set_);
if (status != VK_SUCCESS) {
return status;
}
// Initialize descriptor set with our buffers.
VkDescriptorBufferInfo buffer_info;
buffer_info.buffer = transient_buffer_->gpu_buffer();
buffer_info.offset = 0;
buffer_info.range = kConstantRegisterUniformRange;
VkWriteDescriptorSet descriptor_writes[2];
auto& vertex_uniform_binding_write = descriptor_writes[0];
vertex_uniform_binding_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
vertex_uniform_binding_write.pNext = nullptr;
vertex_uniform_binding_write.dstSet = constant_descriptor_set_;
vertex_uniform_binding_write.dstBinding = 0;
vertex_uniform_binding_write.dstArrayElement = 0;
vertex_uniform_binding_write.descriptorCount = 1;
vertex_uniform_binding_write.descriptorType =
VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
vertex_uniform_binding_write.pBufferInfo = &buffer_info;
auto& fragment_uniform_binding_write = descriptor_writes[1];
fragment_uniform_binding_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
fragment_uniform_binding_write.pNext = nullptr;
fragment_uniform_binding_write.dstSet = constant_descriptor_set_;
fragment_uniform_binding_write.dstBinding = 1;
fragment_uniform_binding_write.dstArrayElement = 0;
fragment_uniform_binding_write.descriptorCount = 1;
fragment_uniform_binding_write.descriptorType =
VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
fragment_uniform_binding_write.pBufferInfo = &buffer_info;
dfn.vkUpdateDescriptorSets(device, 2, descriptor_writes, 0, nullptr);
return VK_SUCCESS;
}
void BufferCache::FreeConstantDescriptorSet() {
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
VkDevice device = provider_.device();
if (constant_descriptor_set_) {
dfn.vkFreeDescriptorSets(device, constant_descriptor_pool_, 1,
&constant_descriptor_set_);
constant_descriptor_set_ = nullptr;
}
ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyDescriptorSetLayout,
device,
constant_descriptor_set_layout_);
ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyDescriptorPool, device,
constant_descriptor_pool_);
}
void BufferCache::Shutdown() {
if (mem_allocator_) {
vmaDestroyAllocator(mem_allocator_);
mem_allocator_ = nullptr;
}
FreeConstantDescriptorSet();
FreeVertexDescriptorPool();
transient_buffer_->Shutdown();
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
VkDevice device = provider_.device();
ui::vulkan::util::DestroyAndNullHandle(dfn.vkFreeMemory, device,
gpu_memory_pool_);
}
std::pair<VkDeviceSize, VkDeviceSize> BufferCache::UploadConstantRegisters(
VkCommandBuffer command_buffer,
const Shader::ConstantRegisterMap& vertex_constant_register_map,
const Shader::ConstantRegisterMap& pixel_constant_register_map,
VkFence fence) {
// Fat struct, including all registers:
// struct {
// vec4 float[512];
// uint bool[8];
// uint loop[32];
// };
auto offset = AllocateTransientData(kConstantRegisterUniformRange, fence);
if (offset == VK_WHOLE_SIZE) {
// OOM.
return {VK_WHOLE_SIZE, VK_WHOLE_SIZE};
}
// Copy over all the registers.
const auto& values = register_file_->values;
uint8_t* dest_ptr = transient_buffer_->host_base() + offset;
std::memcpy(dest_ptr, &values[XE_GPU_REG_SHADER_CONSTANT_000_X].f32,
(512 * 4 * 4));
dest_ptr += 512 * 4 * 4;
std::memcpy(dest_ptr, &values[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031].u32,
8 * 4);
dest_ptr += 8 * 4;
std::memcpy(dest_ptr, &values[XE_GPU_REG_SHADER_CONSTANT_LOOP_00].u32,
32 * 4);
dest_ptr += 32 * 4;
transient_buffer_->Flush(offset, kConstantRegisterUniformRange);
// Append a barrier to the command buffer.
VkBufferMemoryBarrier barrier = {
VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
nullptr,
VK_ACCESS_HOST_WRITE_BIT,
VK_ACCESS_UNIFORM_READ_BIT | VK_ACCESS_SHADER_READ_BIT,
VK_QUEUE_FAMILY_IGNORED,
VK_QUEUE_FAMILY_IGNORED,
transient_buffer_->gpu_buffer(),
offset,
kConstantRegisterUniformRange,
};
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
dfn.vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_HOST_BIT,
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 0, nullptr, 1,
&barrier, 0, nullptr);
return {offset, offset};
// Packed upload code.
// This is not currently supported by the shaders, but would be awesome.
// We should be able to use this for any shader that does not do dynamic
// constant indexing.
#if 0
// Allocate space in the buffer for our data.
auto offset =
AllocateTransientData(constant_register_map.packed_byte_length, fence);
if (offset == VK_WHOLE_SIZE) {
// OOM.
return VK_WHOLE_SIZE;
}
// Run through registers and copy them into the buffer.
// TODO(benvanik): optimize this - it's hit twice every call.
const auto& values = register_file_->values;
uint8_t* dest_ptr =
reinterpret_cast<uint8_t*>(transient_buffer_data_) + offset;
for (int i = 0; i < 4; ++i) {
auto piece = constant_register_map.float_bitmap[i];
if (!piece) {
continue;
}
for (int j = 0, sh = 0; j < 64; ++j, sh << 1) {
if (piece & sh) {
xe::copy_128_aligned(
dest_ptr,
&values[XE_GPU_REG_SHADER_CONSTANT_000_X + i * 64 + j].f32, 1);
dest_ptr += 16;
}
}
}
for (int i = 0; i < 32; ++i) {
if (constant_register_map.loop_bitmap & (1 << i)) {
xe::store<uint32_t>(dest_ptr,
values[XE_GPU_REG_SHADER_CONSTANT_LOOP_00 + i].u32);
dest_ptr += 4;
}
}
for (int i = 0; i < 8; ++i) {
if (constant_register_map.bool_bitmap[i]) {
xe::store<uint32_t>(
dest_ptr, values[XE_GPU_REG_SHADER_CONSTANT_BOOL_000_031 + i].u32);
dest_ptr += 4;
}
}
return offset;
#endif // 0
}
std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadIndexBuffer(
VkCommandBuffer command_buffer, uint32_t source_addr,
uint32_t source_length, xenos::IndexFormat format, VkFence fence) {
// Allocate space in the buffer for our data.
auto offset = AllocateTransientData(source_length, fence);
if (offset == VK_WHOLE_SIZE) {
// OOM.
return {nullptr, VK_WHOLE_SIZE};
}
const void* source_ptr = memory_->TranslatePhysical(source_addr);
uint32_t prim_reset_index =
register_file_->values[XE_GPU_REG_VGT_MULTI_PRIM_IB_RESET_INDX].u32;
bool prim_reset_enabled =
!!(register_file_->values[XE_GPU_REG_PA_SU_SC_MODE_CNTL].u32 & (1 << 21));
// Copy data into the buffer. If primitive reset is enabled, translate any
// primitive reset indices to something Vulkan understands.
// TODO(benvanik): memcpy then use compute shaders to swap?
if (prim_reset_enabled) {
if (format == xenos::IndexFormat::kInt16) {
// Endian::k8in16, swap half-words.
copy_cmp_swap_16_unaligned(
transient_buffer_->host_base() + offset, source_ptr,
static_cast<uint16_t>(prim_reset_index), source_length / 2);
} else if (format == xenos::IndexFormat::kInt32) {
// Endian::k8in32, swap words.
copy_cmp_swap_32_unaligned(transient_buffer_->host_base() + offset,
source_ptr, prim_reset_index,
source_length / 4);
}
} else {
if (format == xenos::IndexFormat::kInt16) {
// Endian::k8in16, swap half-words.
xe::copy_and_swap_16_unaligned(transient_buffer_->host_base() + offset,
source_ptr, source_length / 2);
} else if (format == xenos::IndexFormat::kInt32) {
// Endian::k8in32, swap words.
xe::copy_and_swap_32_unaligned(transient_buffer_->host_base() + offset,
source_ptr, source_length / 4);
}
}
transient_buffer_->Flush(offset, source_length);
// Append a barrier to the command buffer.
VkBufferMemoryBarrier barrier = {
VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
nullptr,
VK_ACCESS_HOST_WRITE_BIT,
VK_ACCESS_INDEX_READ_BIT,
VK_QUEUE_FAMILY_IGNORED,
VK_QUEUE_FAMILY_IGNORED,
transient_buffer_->gpu_buffer(),
offset,
source_length,
};
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
dfn.vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_HOST_BIT,
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, 0, nullptr, 1,
&barrier, 0, nullptr);
return {transient_buffer_->gpu_buffer(), offset};
}
std::pair<VkBuffer, VkDeviceSize> BufferCache::UploadVertexBuffer(
VkCommandBuffer command_buffer, uint32_t source_addr,
uint32_t source_length, xenos::Endian endian, VkFence fence) {
auto offset = FindCachedTransientData(source_addr, source_length);
if (offset != VK_WHOLE_SIZE) {
return {transient_buffer_->gpu_buffer(), offset};
}
// Slow path :)
// Expand the region up to the allocation boundary
auto physical_heap = memory_->GetPhysicalHeap();
uint32_t upload_base = source_addr;
uint32_t upload_size = source_length;
// Ping the memory subsystem for allocation size.
// TODO(DrChat): Artifacting occurring in 5841089E with this enabled.
// physical_heap->QueryBaseAndSize(&upload_base, &upload_size);
assert(upload_base <= source_addr);
uint32_t source_offset = source_addr - upload_base;
// Allocate space in the buffer for our data.
offset = AllocateTransientData(upload_size, fence);
if (offset == VK_WHOLE_SIZE) {
// OOM.
XELOGW(
"Failed to allocate transient data for vertex buffer! Wanted to "
"allocate {} bytes.",
upload_size);
return {nullptr, VK_WHOLE_SIZE};
}
const void* upload_ptr = memory_->TranslatePhysical(upload_base);
// Copy data into the buffer.
// TODO(benvanik): memcpy then use compute shaders to swap?
if (endian == xenos::Endian::k8in32) {
// Endian::k8in32, swap words.
xe::copy_and_swap_32_unaligned(transient_buffer_->host_base() + offset,
upload_ptr, source_length / 4);
} else if (endian == xenos::Endian::k16in32) {
xe::copy_and_swap_16_in_32_unaligned(
transient_buffer_->host_base() + offset, upload_ptr, source_length / 4);
} else {
assert_always();
}
transient_buffer_->Flush(offset, upload_size);
// Append a barrier to the command buffer.
VkBufferMemoryBarrier barrier = {
VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
nullptr,
VK_ACCESS_HOST_WRITE_BIT,
VK_ACCESS_SHADER_READ_BIT,
VK_QUEUE_FAMILY_IGNORED,
VK_QUEUE_FAMILY_IGNORED,
transient_buffer_->gpu_buffer(),
offset,
upload_size,
};
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
dfn.vkCmdPipelineBarrier(command_buffer, VK_PIPELINE_STAGE_HOST_BIT,
VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, 0, 0, nullptr,
1, &barrier, 0, nullptr);
CacheTransientData(upload_base, upload_size, offset);
return {transient_buffer_->gpu_buffer(), offset + source_offset};
}
void BufferCache::HashVertexBindings(
XXH3_state_t* hash_state,
const std::vector<Shader::VertexBinding>& vertex_bindings) {
auto& regs = *register_file_;
for (const auto& vertex_binding : vertex_bindings) {
#if 0
XXH3_64bits_update(hash_state, &vertex_binding.binding_index, sizeof(vertex_binding.binding_index));
XXH3_64bits_update(hash_state, &vertex_binding.fetch_constant, sizeof(vertex_binding.fetch_constant));
XXH3_64bits_update(hash_state, &vertex_binding.stride_words, sizeof(vertex_binding.stride_words));
#endif
int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 +
(vertex_binding.fetch_constant / 3) * 6;
const auto group = reinterpret_cast<xe_gpu_fetch_group_t*>(&regs.values[r]);
switch (vertex_binding.fetch_constant % 3) {
case 0: {
auto& fetch = group->vertex_fetch_0;
XXH3_64bits_update(hash_state, &fetch, sizeof(fetch));
} break;
case 1: {
auto& fetch = group->vertex_fetch_1;
XXH3_64bits_update(hash_state, &fetch, sizeof(fetch));
} break;
case 2: {
auto& fetch = group->vertex_fetch_2;
XXH3_64bits_update(hash_state, &fetch, sizeof(fetch));
} break;
}
}
}
VkDescriptorSet BufferCache::PrepareVertexSet(
VkCommandBuffer command_buffer, VkFence fence,
const std::vector<Shader::VertexBinding>& vertex_bindings) {
// (quickly) Generate a hash.
XXH3_state_t hash_state;
XXH3_64bits_reset(&hash_state);
// (quickly) Generate a hash.
HashVertexBindings(&hash_state, vertex_bindings);
uint64_t hash = XXH3_64bits_digest(&hash_state);
for (auto it = vertex_sets_.find(hash); it != vertex_sets_.end(); ++it) {
// TODO(DrChat): We need to compare the bindings and ensure they're equal.
return it->second;
}
if (!vertex_descriptor_pool_->has_open_batch()) {
vertex_descriptor_pool_->BeginBatch(fence);
}
VkDescriptorSet set =
vertex_descriptor_pool_->AcquireEntry(vertex_descriptor_set_layout_);
if (!set) {
return nullptr;
}
// TODO(DrChat): Define magic number 32 as a constant somewhere.
VkDescriptorBufferInfo buffer_infos[32] = {};
VkWriteDescriptorSet descriptor_write = {
VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
nullptr,
set,
0,
0,
0,
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
nullptr,
buffer_infos,
nullptr,
};
auto& regs = *register_file_;
for (const auto& vertex_binding : vertex_bindings) {
int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 +
(vertex_binding.fetch_constant / 3) * 6;
const auto group = reinterpret_cast<xe_gpu_fetch_group_t*>(&regs.values[r]);
const xe_gpu_vertex_fetch_t* fetch = nullptr;
switch (vertex_binding.fetch_constant % 3) {
case 0:
fetch = &group->vertex_fetch_0;
break;
case 1:
fetch = &group->vertex_fetch_1;
break;
case 2:
fetch = &group->vertex_fetch_2;
break;
}
// TODO(DrChat): Some games use type kInvalidTexture (with no data).
switch (fetch->type) {
case xenos::FetchConstantType::kVertex:
break;
case xenos::FetchConstantType::kInvalidVertex:
if (cvars::gpu_allow_invalid_fetch_constants) {
break;
}
XELOGW(
"Vertex fetch constant {} ({:08X} {:08X}) has \"invalid\" type! "
"This "
"is incorrect behavior, but you can try bypassing this by "
"launching Xenia with --gpu_allow_invalid_fetch_constants=true.",
vertex_binding.fetch_constant, fetch->dword_0, fetch->dword_1);
return nullptr;
default:
XELOGW(
"Vertex fetch constant {} ({:08X} {:08X}) is completely invalid!",
vertex_binding.fetch_constant, fetch->dword_0, fetch->dword_1);
return nullptr;
}
// TODO(benvanik): compute based on indices or vertex count.
// THIS CAN BE MASSIVELY INCORRECT (too large).
// This may not be possible (with indexed vfetch).
uint32_t source_length = fetch->size * 4;
uint32_t physical_address = fetch->address << 2;
// TODO(DrChat): This needs to be put in gpu::CommandProcessor
// trace_writer_.WriteMemoryRead(physical_address, source_length);
// Upload (or get a cached copy of) the buffer.
auto buffer_ref = UploadVertexBuffer(command_buffer, physical_address,
source_length, fetch->endian, fence);
if (buffer_ref.second == VK_WHOLE_SIZE) {
// Failed to upload buffer.
XELOGW("Failed to upload vertex buffer!");
return nullptr;
}
// Stash the buffer reference for our bulk bind at the end.
buffer_infos[descriptor_write.descriptorCount++] = {
buffer_ref.first,
buffer_ref.second,
source_length,
};
}
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
VkDevice device = provider_.device();
dfn.vkUpdateDescriptorSets(device, 1, &descriptor_write, 0, nullptr);
vertex_sets_[hash] = set;
return set;
}
VkDeviceSize BufferCache::AllocateTransientData(VkDeviceSize length,
VkFence fence) {
// Try fast path (if we have space).
VkDeviceSize offset = TryAllocateTransientData(length, fence);
if (offset != VK_WHOLE_SIZE) {
return offset;
}
// Ran out of easy allocations.
// Try consuming fences before we panic.
transient_buffer_->Scavenge();
// Try again. It may still fail if we didn't get enough space back.
offset = TryAllocateTransientData(length, fence);
return offset;
}
VkDeviceSize BufferCache::TryAllocateTransientData(VkDeviceSize length,
VkFence fence) {
auto alloc = transient_buffer_->Acquire(length, fence);
if (alloc) {
return alloc->offset;
}
// No more space.
return VK_WHOLE_SIZE;
}
VkDeviceSize BufferCache::FindCachedTransientData(uint32_t guest_address,
uint32_t guest_length) {
if (transient_cache_.empty()) {
// Short-circuit exit.
return VK_WHOLE_SIZE;
}
// Find the first element > guest_address
auto it = transient_cache_.upper_bound(guest_address);
if (it != transient_cache_.begin()) {
// it = first element <= guest_address
--it;
if ((it->first + it->second.first) >= (guest_address + guest_length)) {
// This data is contained within some existing transient data.
auto source_offset = static_cast<VkDeviceSize>(guest_address - it->first);
return it->second.second + source_offset;
}
}
return VK_WHOLE_SIZE;
}
void BufferCache::CacheTransientData(uint32_t guest_address,
uint32_t guest_length,
VkDeviceSize offset) {
transient_cache_[guest_address] = {guest_length, offset};
// Erase any entries contained within
auto it = transient_cache_.upper_bound(guest_address);
while (it != transient_cache_.end()) {
if ((guest_address + guest_length) >= (it->first + it->second.first)) {
it = transient_cache_.erase(it);
} else {
break;
}
}
}
void BufferCache::Flush(VkCommandBuffer command_buffer) {
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
VkDevice device = provider_.device();
// If we are flushing a big enough chunk queue up an event.
// We don't want to do this for everything but often enough so that we won't
// run out of space.
if (true) {
// VkEvent finish_event;
// dfn.vkCmdSetEvent(cmd_buffer, finish_event,
// VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);
}
// Flush memory.
// TODO(benvanik): subrange.
VkMappedMemoryRange dirty_range;
dirty_range.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
dirty_range.pNext = nullptr;
dirty_range.memory = transient_buffer_->gpu_memory();
dirty_range.offset = 0;
dirty_range.size = transient_buffer_->capacity();
dfn.vkFlushMappedMemoryRanges(device, 1, &dirty_range);
}
void BufferCache::InvalidateCache() {
// Called by VulkanCommandProcessor::MakeCoherent()
// Discard everything?
transient_cache_.clear();
}
void BufferCache::ClearCache() { transient_cache_.clear(); }
void BufferCache::Scavenge() {
SCOPE_profile_cpu_f("gpu");
transient_cache_.clear();
transient_buffer_->Scavenge();
// TODO(DrChat): These could persist across frames, we just need a smart way
// to delete unused ones.
vertex_sets_.clear();
if (vertex_descriptor_pool_->has_open_batch()) {
vertex_descriptor_pool_->EndBatch();
}
vertex_descriptor_pool_->Scavenge();
}
} // namespace vulkan
} // namespace gpu
} // namespace xe

View File

@ -1,175 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2016 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_VULKAN_BUFFER_CACHE_H_
#define XENIA_GPU_VULKAN_BUFFER_CACHE_H_
#include "xenia/base/xxhash.h"
#include "xenia/gpu/register_file.h"
#include "xenia/gpu/shader.h"
#include "xenia/gpu/xenos.h"
#include "xenia/memory.h"
#include "xenia/ui/vulkan/circular_buffer.h"
#include "xenia/ui/vulkan/fenced_pools.h"
#include "xenia/ui/vulkan/vulkan_mem_alloc.h"
#include "xenia/ui/vulkan/vulkan_provider.h"
#include <map>
#include <unordered_map>
namespace xe {
namespace gpu {
namespace vulkan {
// Efficiently manages buffers of various kinds.
// Used primarily for uploading index and vertex data from guest memory and
// transient data like shader constants.
class BufferCache {
public:
BufferCache(RegisterFile* register_file, Memory* memory,
const ui::vulkan::VulkanProvider& provider, size_t capacity);
~BufferCache();
VkResult Initialize();
void Shutdown();
// Descriptor set containing the dynamic uniform buffer used for constant
// uploads. Used in conjunction with a dynamic offset returned by
// UploadConstantRegisters.
// The set contains two bindings:
// binding = 0: for use in vertex shaders
// binding = 1: for use in fragment shaders
VkDescriptorSet constant_descriptor_set() const {
return constant_descriptor_set_;
}
VkDescriptorSetLayout constant_descriptor_set_layout() const {
return constant_descriptor_set_layout_;
}
// Descriptor set containing vertex buffers stored in storage buffers.
// This set contains one binding with an array of 32 storage buffers.
VkDescriptorSetLayout vertex_descriptor_set_layout() const {
return vertex_descriptor_set_layout_;
}
// Uploads the constants specified in the register maps to the transient
// uniform storage buffer.
// The registers are tightly packed in order as [floats, ints, bools].
// Returns an offset that can be used with the transient_descriptor_set or
// VK_WHOLE_SIZE if the constants could not be uploaded (OOM).
// The returned offsets may alias.
std::pair<VkDeviceSize, VkDeviceSize> UploadConstantRegisters(
VkCommandBuffer command_buffer,
const Shader::ConstantRegisterMap& vertex_constant_register_map,
const Shader::ConstantRegisterMap& pixel_constant_register_map,
VkFence fence);
// Uploads index buffer data from guest memory, possibly eliding with
// recently uploaded data or cached copies.
// Returns a buffer and offset that can be used with vkCmdBindIndexBuffer.
// Size will be VK_WHOLE_SIZE if the data could not be uploaded (OOM).
std::pair<VkBuffer, VkDeviceSize> UploadIndexBuffer(
VkCommandBuffer command_buffer, uint32_t source_addr,
uint32_t source_length, xenos::IndexFormat format, VkFence fence);
// Uploads vertex buffer data from guest memory, possibly eliding with
// recently uploaded data or cached copies.
// Returns a buffer and offset that can be used with vkCmdBindVertexBuffers.
// Size will be VK_WHOLE_SIZE if the data could not be uploaded (OOM).
std::pair<VkBuffer, VkDeviceSize> UploadVertexBuffer(
VkCommandBuffer command_buffer, uint32_t source_addr,
uint32_t source_length, xenos::Endian endian, VkFence fence);
// Prepares and returns a vertex descriptor set.
VkDescriptorSet PrepareVertexSet(
VkCommandBuffer setup_buffer, VkFence fence,
const std::vector<Shader::VertexBinding>& vertex_bindings);
// Flushes all pending data to the GPU.
// Until this is called the GPU is not guaranteed to see any data.
// The given command buffer will be used to queue up events so that the
// cache can determine when data has been consumed.
void Flush(VkCommandBuffer command_buffer);
// Marks the cache as potentially invalid.
// This is not as strong as ClearCache and is a hint that any and all data
// should be verified before being reused.
void InvalidateCache();
// Clears all cached content and prevents future elision with pending data.
void ClearCache();
// Wipes all data no longer needed.
void Scavenge();
private:
// This represents an uploaded vertex buffer.
struct VertexBuffer {
uint32_t guest_address;
uint32_t size;
VmaAllocation alloc;
VmaAllocationInfo alloc_info;
};
VkResult CreateVertexDescriptorPool();
void FreeVertexDescriptorPool();
VkResult CreateConstantDescriptorSet();
void FreeConstantDescriptorSet();
void HashVertexBindings(
XXH3_state_t* hash_state,
const std::vector<Shader::VertexBinding>& vertex_bindings);
// Allocates a block of memory in the transient buffer.
// When memory is not available fences are checked and space is reclaimed.
// Returns VK_WHOLE_SIZE if requested amount of memory is not available.
VkDeviceSize AllocateTransientData(VkDeviceSize length, VkFence fence);
// Tries to allocate a block of memory in the transient buffer.
// Returns VK_WHOLE_SIZE if requested amount of memory is not available.
VkDeviceSize TryAllocateTransientData(VkDeviceSize length, VkFence fence);
// Finds a block of data in the transient buffer sourced from the specified
// guest address and length.
VkDeviceSize FindCachedTransientData(uint32_t guest_address,
uint32_t guest_length);
// Adds a block of data to the frame cache.
void CacheTransientData(uint32_t guest_address, uint32_t guest_length,
VkDeviceSize offset);
RegisterFile* register_file_ = nullptr;
Memory* memory_ = nullptr;
const ui::vulkan::VulkanProvider& provider_;
VkDeviceMemory gpu_memory_pool_ = nullptr;
VmaAllocator mem_allocator_ = nullptr;
// Staging ringbuffer we cycle through fast. Used for data we don't
// plan on keeping past the current frame.
std::unique_ptr<ui::vulkan::CircularBuffer> transient_buffer_ = nullptr;
std::map<uint32_t, std::pair<uint32_t, VkDeviceSize>> transient_cache_;
// Vertex buffer descriptors
std::unique_ptr<ui::vulkan::DescriptorPool> vertex_descriptor_pool_ = nullptr;
VkDescriptorSetLayout vertex_descriptor_set_layout_ = nullptr;
// Current frame vertex sets.
std::unordered_map<uint64_t, VkDescriptorSet> vertex_sets_;
// Descriptor set used to hold vertex/pixel shader float constants
VkDescriptorPool constant_descriptor_pool_ = nullptr;
VkDescriptorSetLayout constant_descriptor_set_layout_ = nullptr;
VkDescriptorSet constant_descriptor_set_ = nullptr;
};
} // namespace vulkan
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_VULKAN_BUFFER_CACHE_H_

View File

@ -0,0 +1,367 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2020 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/gpu/vulkan/deferred_command_buffer.h"
#include <cstddef>
#include <cstdint>
#include <cstring>
#include "xenia/base/assert.h"
#include "xenia/base/math.h"
#include "xenia/base/profiling.h"
#include "xenia/gpu/vulkan/vulkan_command_processor.h"
namespace xe {
namespace gpu {
namespace vulkan {
DeferredCommandBuffer::DeferredCommandBuffer(
const VulkanCommandProcessor& command_processor, size_t initial_size)
: command_processor_(command_processor) {
command_stream_.reserve(initial_size / sizeof(uintmax_t));
}
void DeferredCommandBuffer::Reset() { command_stream_.clear(); }
void DeferredCommandBuffer::Execute(VkCommandBuffer command_buffer) {
#if XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_f("gpu");
#endif // XE_UI_VULKAN_FINE_GRAINED_DRAW_SCOPES
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn =
command_processor_.GetVulkanProvider().dfn();
const uintmax_t* stream = command_stream_.data();
size_t stream_remaining = command_stream_.size();
while (stream_remaining) {
const CommandHeader& header =
*reinterpret_cast<const CommandHeader*>(stream);
stream += kCommandHeaderSizeElements;
stream_remaining -= kCommandHeaderSizeElements;
switch (header.command) {
case Command::kVkBeginRenderPass: {
auto& args = *reinterpret_cast<const ArgsVkBeginRenderPass*>(stream);
size_t offset_bytes = sizeof(ArgsVkBeginRenderPass);
VkRenderPassBeginInfo render_pass_begin_info;
render_pass_begin_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
render_pass_begin_info.pNext = nullptr;
render_pass_begin_info.renderPass = args.render_pass;
render_pass_begin_info.framebuffer = args.framebuffer;
render_pass_begin_info.renderArea = args.render_area;
render_pass_begin_info.clearValueCount = args.clear_value_count;
if (render_pass_begin_info.clearValueCount) {
offset_bytes = xe::align(offset_bytes, alignof(VkClearValue));
render_pass_begin_info.pClearValues =
reinterpret_cast<const VkClearValue*>(
reinterpret_cast<const uint8_t*>(stream) + offset_bytes);
offset_bytes +=
sizeof(VkClearValue) * render_pass_begin_info.clearValueCount;
} else {
render_pass_begin_info.pClearValues = nullptr;
}
dfn.vkCmdBeginRenderPass(command_buffer, &render_pass_begin_info,
args.contents);
} break;
case Command::kVkBindDescriptorSets: {
auto& args = *reinterpret_cast<const ArgsVkBindDescriptorSets*>(stream);
size_t offset_bytes = xe::align(sizeof(ArgsVkBindDescriptorSets),
alignof(VkDescriptorSet));
const VkDescriptorSet* descriptor_sets =
reinterpret_cast<const VkDescriptorSet*>(
reinterpret_cast<const uint8_t*>(stream) + offset_bytes);
offset_bytes += sizeof(VkDescriptorSet) * args.descriptor_set_count;
const uint32_t* dynamic_offsets = nullptr;
if (args.dynamic_offset_count) {
offset_bytes = xe::align(offset_bytes, alignof(uint32_t));
dynamic_offsets = reinterpret_cast<const uint32_t*>(
reinterpret_cast<const uint8_t*>(stream) + offset_bytes);
offset_bytes += sizeof(uint32_t) * args.dynamic_offset_count;
}
dfn.vkCmdBindDescriptorSets(command_buffer, args.pipeline_bind_point,
args.layout, args.first_set,
args.descriptor_set_count, descriptor_sets,
args.dynamic_offset_count, dynamic_offsets);
} break;
case Command::kVkBindIndexBuffer: {
auto& args = *reinterpret_cast<const ArgsVkBindIndexBuffer*>(stream);
dfn.vkCmdBindIndexBuffer(command_buffer, args.buffer, args.offset,
args.index_type);
} break;
case Command::kVkBindPipeline: {
auto& args = *reinterpret_cast<const ArgsVkBindPipeline*>(stream);
dfn.vkCmdBindPipeline(command_buffer, args.pipeline_bind_point,
args.pipeline);
} break;
case Command::kVkBindVertexBuffers: {
auto& args = *reinterpret_cast<const ArgsVkBindVertexBuffers*>(stream);
size_t offset_bytes =
xe::align(sizeof(ArgsVkBindVertexBuffers), alignof(VkBuffer));
const VkBuffer* buffers = reinterpret_cast<const VkBuffer*>(
reinterpret_cast<const uint8_t*>(stream) + offset_bytes);
offset_bytes =
xe::align(offset_bytes + sizeof(VkBuffer) * args.binding_count,
alignof(VkDeviceSize));
const VkDeviceSize* offsets = reinterpret_cast<const VkDeviceSize*>(
reinterpret_cast<const uint8_t*>(stream) + offset_bytes);
dfn.vkCmdBindVertexBuffers(command_buffer, args.first_binding,
args.binding_count, buffers, offsets);
} break;
case Command::kVkClearAttachments: {
auto& args = *reinterpret_cast<const ArgsVkClearAttachments*>(stream);
size_t offset_bytes = xe::align(sizeof(ArgsVkClearAttachments),
alignof(VkClearAttachment));
const VkClearAttachment* attachments =
reinterpret_cast<const VkClearAttachment*>(
reinterpret_cast<const uint8_t*>(stream) + offset_bytes);
offset_bytes = xe::align(
offset_bytes + sizeof(VkClearAttachment) * args.attachment_count,
alignof(VkClearRect));
const VkClearRect* rects = reinterpret_cast<const VkClearRect*>(
reinterpret_cast<const uint8_t*>(stream) + offset_bytes);
dfn.vkCmdClearAttachments(command_buffer, args.attachment_count,
attachments, args.rect_count, rects);
} break;
case Command::kVkClearColorImage: {
auto& args = *reinterpret_cast<const ArgsVkClearColorImage*>(stream);
dfn.vkCmdClearColorImage(
command_buffer, args.image, args.image_layout, &args.color,
args.range_count,
reinterpret_cast<const VkImageSubresourceRange*>(
reinterpret_cast<const uint8_t*>(stream) +
xe::align(sizeof(ArgsVkClearColorImage),
alignof(VkImageSubresourceRange))));
} break;
case Command::kVkCopyBuffer: {
auto& args = *reinterpret_cast<const ArgsVkCopyBuffer*>(stream);
dfn.vkCmdCopyBuffer(
command_buffer, args.src_buffer, args.dst_buffer, args.region_count,
reinterpret_cast<const VkBufferCopy*>(
reinterpret_cast<const uint8_t*>(stream) +
xe::align(sizeof(ArgsVkCopyBuffer), alignof(VkBufferCopy))));
} break;
case Command::kVkCopyBufferToImage: {
auto& args = *reinterpret_cast<const ArgsVkCopyBufferToImage*>(stream);
dfn.vkCmdCopyBufferToImage(
command_buffer, args.src_buffer, args.dst_image,
args.dst_image_layout, args.region_count,
reinterpret_cast<const VkBufferImageCopy*>(
reinterpret_cast<const uint8_t*>(stream) +
xe::align(sizeof(ArgsVkCopyBufferToImage),
alignof(VkBufferImageCopy))));
} break;
case Command::kVkDispatch: {
auto& args = *reinterpret_cast<const ArgsVkDispatch*>(stream);
dfn.vkCmdDispatch(command_buffer, args.group_count_x,
args.group_count_y, args.group_count_z);
} break;
case Command::kVkDraw: {
auto& args = *reinterpret_cast<const ArgsVkDraw*>(stream);
dfn.vkCmdDraw(command_buffer, args.vertex_count, args.instance_count,
args.first_vertex, args.first_instance);
} break;
case Command::kVkDrawIndexed: {
auto& args = *reinterpret_cast<const ArgsVkDrawIndexed*>(stream);
dfn.vkCmdDrawIndexed(command_buffer, args.index_count,
args.instance_count, args.first_index,
args.vertex_offset, args.first_instance);
} break;
case Command::kVkEndRenderPass:
dfn.vkCmdEndRenderPass(command_buffer);
break;
case Command::kVkPipelineBarrier: {
auto& args = *reinterpret_cast<const ArgsVkPipelineBarrier*>(stream);
size_t barrier_offset_bytes = sizeof(ArgsVkPipelineBarrier);
const VkMemoryBarrier* memory_barriers = nullptr;
if (args.memory_barrier_count) {
barrier_offset_bytes =
xe::align(barrier_offset_bytes, alignof(VkMemoryBarrier));
memory_barriers = reinterpret_cast<const VkMemoryBarrier*>(
reinterpret_cast<const uint8_t*>(stream) + barrier_offset_bytes);
barrier_offset_bytes +=
sizeof(VkMemoryBarrier) * args.memory_barrier_count;
}
const VkBufferMemoryBarrier* buffer_memory_barriers = nullptr;
if (args.buffer_memory_barrier_count) {
barrier_offset_bytes =
xe::align(barrier_offset_bytes, alignof(VkBufferMemoryBarrier));
buffer_memory_barriers =
reinterpret_cast<const VkBufferMemoryBarrier*>(
reinterpret_cast<const uint8_t*>(stream) +
barrier_offset_bytes);
barrier_offset_bytes +=
sizeof(VkBufferMemoryBarrier) * args.buffer_memory_barrier_count;
}
const VkImageMemoryBarrier* image_memory_barriers = nullptr;
if (args.image_memory_barrier_count) {
barrier_offset_bytes =
xe::align(barrier_offset_bytes, alignof(VkImageMemoryBarrier));
image_memory_barriers = reinterpret_cast<const VkImageMemoryBarrier*>(
reinterpret_cast<const uint8_t*>(stream) + barrier_offset_bytes);
barrier_offset_bytes +=
sizeof(VkImageMemoryBarrier) * args.image_memory_barrier_count;
}
dfn.vkCmdPipelineBarrier(
command_buffer, args.src_stage_mask, args.dst_stage_mask,
args.dependency_flags, args.memory_barrier_count, memory_barriers,
args.buffer_memory_barrier_count, buffer_memory_barriers,
args.image_memory_barrier_count, image_memory_barriers);
} break;
case Command::kVkPushConstants: {
auto& args = *reinterpret_cast<const ArgsVkPushConstants*>(stream);
dfn.vkCmdPushConstants(command_buffer, args.layout, args.stage_flags,
args.offset, args.size,
reinterpret_cast<const uint8_t*>(stream) +
sizeof(ArgsVkPushConstants));
} break;
case Command::kVkSetBlendConstants: {
auto& args = *reinterpret_cast<const ArgsVkSetBlendConstants*>(stream);
dfn.vkCmdSetBlendConstants(command_buffer, args.blend_constants);
} break;
case Command::kVkSetDepthBias: {
auto& args = *reinterpret_cast<const ArgsVkSetDepthBias*>(stream);
dfn.vkCmdSetDepthBias(command_buffer, args.depth_bias_constant_factor,
args.depth_bias_clamp,
args.depth_bias_slope_factor);
} break;
case Command::kVkSetScissor: {
auto& args = *reinterpret_cast<const ArgsVkSetScissor*>(stream);
dfn.vkCmdSetScissor(
command_buffer, args.first_scissor, args.scissor_count,
reinterpret_cast<const VkRect2D*>(
reinterpret_cast<const uint8_t*>(stream) +
xe::align(sizeof(ArgsVkSetScissor), alignof(VkRect2D))));
} break;
case Command::kVkSetStencilCompareMask: {
auto& args =
*reinterpret_cast<const ArgsSetStencilMaskReference*>(stream);
dfn.vkCmdSetStencilCompareMask(command_buffer, args.face_mask,
args.mask_reference);
} break;
case Command::kVkSetStencilReference: {
auto& args =
*reinterpret_cast<const ArgsSetStencilMaskReference*>(stream);
dfn.vkCmdSetStencilReference(command_buffer, args.face_mask,
args.mask_reference);
} break;
case Command::kVkSetStencilWriteMask: {
auto& args =
*reinterpret_cast<const ArgsSetStencilMaskReference*>(stream);
dfn.vkCmdSetStencilWriteMask(command_buffer, args.face_mask,
args.mask_reference);
} break;
case Command::kVkSetViewport: {
auto& args = *reinterpret_cast<const ArgsVkSetViewport*>(stream);
dfn.vkCmdSetViewport(
command_buffer, args.first_viewport, args.viewport_count,
reinterpret_cast<const VkViewport*>(
reinterpret_cast<const uint8_t*>(stream) +
xe::align(sizeof(ArgsVkSetViewport), alignof(VkViewport))));
} break;
default:
assert_unhandled_case(header.command);
break;
}
stream += header.arguments_size_elements;
stream_remaining -= header.arguments_size_elements;
}
}
void DeferredCommandBuffer::CmdVkPipelineBarrier(
VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask,
VkDependencyFlags dependency_flags, uint32_t memory_barrier_count,
const VkMemoryBarrier* memory_barriers,
uint32_t buffer_memory_barrier_count,
const VkBufferMemoryBarrier* buffer_memory_barriers,
uint32_t image_memory_barrier_count,
const VkImageMemoryBarrier* image_memory_barriers) {
size_t arguments_size = sizeof(ArgsVkPipelineBarrier);
size_t memory_barriers_offset = 0;
if (memory_barrier_count) {
arguments_size = xe::align(arguments_size, alignof(VkMemoryBarrier));
memory_barriers_offset = arguments_size;
arguments_size += sizeof(VkMemoryBarrier) * memory_barrier_count;
}
size_t buffer_memory_barriers_offset = 0;
if (buffer_memory_barrier_count) {
arguments_size = xe::align(arguments_size, alignof(VkBufferMemoryBarrier));
buffer_memory_barriers_offset = arguments_size;
arguments_size +=
sizeof(VkBufferMemoryBarrier) * buffer_memory_barrier_count;
}
size_t image_memory_barriers_offset = 0;
if (image_memory_barrier_count) {
arguments_size = xe::align(arguments_size, alignof(VkImageMemoryBarrier));
image_memory_barriers_offset = arguments_size;
arguments_size += sizeof(VkImageMemoryBarrier) * image_memory_barrier_count;
}
uint8_t* args_ptr = reinterpret_cast<uint8_t*>(
WriteCommand(Command::kVkPipelineBarrier, arguments_size));
auto& args = *reinterpret_cast<ArgsVkPipelineBarrier*>(args_ptr);
args.src_stage_mask = src_stage_mask;
args.dst_stage_mask = dst_stage_mask;
args.dependency_flags = dependency_flags;
args.memory_barrier_count = memory_barrier_count;
args.buffer_memory_barrier_count = buffer_memory_barrier_count;
args.image_memory_barrier_count = image_memory_barrier_count;
if (memory_barrier_count) {
std::memcpy(args_ptr + memory_barriers_offset, memory_barriers,
sizeof(VkMemoryBarrier) * memory_barrier_count);
}
if (buffer_memory_barrier_count) {
std::memcpy(args_ptr + buffer_memory_barriers_offset,
buffer_memory_barriers,
sizeof(VkBufferMemoryBarrier) * buffer_memory_barrier_count);
}
if (image_memory_barrier_count) {
std::memcpy(args_ptr + image_memory_barriers_offset, image_memory_barriers,
sizeof(VkImageMemoryBarrier) * image_memory_barrier_count);
}
}
void* DeferredCommandBuffer::WriteCommand(Command command,
size_t arguments_size_bytes) {
size_t arguments_size_elements =
(arguments_size_bytes + sizeof(uintmax_t) - 1) / sizeof(uintmax_t);
size_t offset = command_stream_.size();
command_stream_.resize(offset + kCommandHeaderSizeElements +
arguments_size_elements);
CommandHeader& header =
*reinterpret_cast<CommandHeader*>(command_stream_.data() + offset);
header.command = command;
header.arguments_size_elements = uint32_t(arguments_size_elements);
return command_stream_.data() + (offset + kCommandHeaderSizeElements);
}
} // namespace vulkan
} // namespace gpu
} // namespace xe

View File

@ -0,0 +1,550 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2020 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_VULKAN_DEFERRED_COMMAND_BUFFER_H_
#define XENIA_GPU_VULKAN_DEFERRED_COMMAND_BUFFER_H_
#include <cstddef>
#include <cstdint>
#include <cstring>
#include "xenia/base/assert.h"
#include "xenia/base/math.h"
#include "xenia/ui/vulkan/vulkan_provider.h"
namespace xe {
namespace gpu {
namespace vulkan {
class VulkanCommandProcessor;
class DeferredCommandBuffer {
public:
DeferredCommandBuffer(const VulkanCommandProcessor& command_processor,
size_t initial_size_bytes = 1024 * 1024);
void Reset();
void Execute(VkCommandBuffer command_buffer);
// render_pass_begin->pNext of all barriers must be null.
void CmdVkBeginRenderPass(const VkRenderPassBeginInfo* render_pass_begin,
VkSubpassContents contents) {
assert_null(render_pass_begin->pNext);
size_t arguments_size = sizeof(ArgsVkBeginRenderPass);
uint32_t clear_value_count = render_pass_begin->clearValueCount;
size_t clear_values_offset = 0;
if (clear_value_count) {
arguments_size = xe::align(arguments_size, alignof(VkClearValue));
clear_values_offset = arguments_size;
arguments_size += sizeof(VkClearValue) * clear_value_count;
}
uint8_t* args_ptr = reinterpret_cast<uint8_t*>(
WriteCommand(Command::kVkBeginRenderPass, arguments_size));
auto& args = *reinterpret_cast<ArgsVkBeginRenderPass*>(args_ptr);
args.render_pass = render_pass_begin->renderPass;
args.framebuffer = render_pass_begin->framebuffer;
args.render_area = render_pass_begin->renderArea;
args.clear_value_count = clear_value_count;
args.contents = contents;
if (clear_value_count) {
std::memcpy(args_ptr + clear_values_offset,
render_pass_begin->pClearValues,
sizeof(VkClearValue) * clear_value_count);
}
}
void CmdVkBindDescriptorSets(VkPipelineBindPoint pipeline_bind_point,
VkPipelineLayout layout, uint32_t first_set,
uint32_t descriptor_set_count,
const VkDescriptorSet* descriptor_sets,
uint32_t dynamic_offset_count,
const uint32_t* dynamic_offsets) {
size_t arguments_size =
xe::align(sizeof(ArgsVkBindDescriptorSets), alignof(VkDescriptorSet));
size_t descriptor_sets_offset = arguments_size;
arguments_size += sizeof(VkDescriptorSet) * descriptor_set_count;
size_t dynamic_offsets_offset = 0;
if (dynamic_offset_count) {
arguments_size = xe::align(arguments_size, alignof(uint32_t));
dynamic_offsets_offset = arguments_size;
arguments_size += sizeof(uint32_t) * dynamic_offset_count;
}
uint8_t* args_ptr = reinterpret_cast<uint8_t*>(
WriteCommand(Command::kVkBindDescriptorSets, arguments_size));
auto& args = *reinterpret_cast<ArgsVkBindDescriptorSets*>(args_ptr);
args.pipeline_bind_point = pipeline_bind_point;
args.layout = layout;
args.first_set = first_set;
args.descriptor_set_count = descriptor_set_count;
args.dynamic_offset_count = dynamic_offset_count;
std::memcpy(args_ptr + descriptor_sets_offset, descriptor_sets,
sizeof(VkDescriptorSet) * descriptor_set_count);
if (dynamic_offset_count) {
std::memcpy(args_ptr + dynamic_offsets_offset, dynamic_offsets,
sizeof(uint32_t) * dynamic_offset_count);
}
}
void CmdVkBindIndexBuffer(VkBuffer buffer, VkDeviceSize offset,
VkIndexType index_type) {
auto& args = *reinterpret_cast<ArgsVkBindIndexBuffer*>(WriteCommand(
Command::kVkBindIndexBuffer, sizeof(ArgsVkBindIndexBuffer)));
args.buffer = buffer;
args.offset = offset;
args.index_type = index_type;
}
void CmdVkBindPipeline(VkPipelineBindPoint pipeline_bind_point,
VkPipeline pipeline) {
auto& args = *reinterpret_cast<ArgsVkBindPipeline*>(
WriteCommand(Command::kVkBindPipeline, sizeof(ArgsVkBindPipeline)));
args.pipeline_bind_point = pipeline_bind_point;
args.pipeline = pipeline;
}
void CmdVkBindVertexBuffers(uint32_t first_binding, uint32_t binding_count,
const VkBuffer* buffers,
const VkDeviceSize* offsets) {
size_t arguments_size =
xe::align(sizeof(ArgsVkBindVertexBuffers), alignof(VkBuffer));
size_t buffers_offset = arguments_size;
arguments_size =
xe::align(arguments_size + sizeof(VkBuffer) * binding_count,
alignof(VkDeviceSize));
size_t offsets_offset = arguments_size;
arguments_size += sizeof(VkDeviceSize) * binding_count;
uint8_t* args_ptr = reinterpret_cast<uint8_t*>(
WriteCommand(Command::kVkBindVertexBuffers, arguments_size));
auto& args = *reinterpret_cast<ArgsVkBindVertexBuffers*>(args_ptr);
args.first_binding = first_binding;
args.binding_count = binding_count;
std::memcpy(args_ptr + buffers_offset, buffers,
sizeof(VkBuffer) * binding_count);
std::memcpy(args_ptr + offsets_offset, offsets,
sizeof(VkDeviceSize) * binding_count);
}
void CmdClearAttachmentsEmplace(uint32_t attachment_count,
VkClearAttachment*& attachments_out,
uint32_t rect_count,
VkClearRect*& rects_out) {
size_t arguments_size =
xe::align(sizeof(ArgsVkClearAttachments), alignof(VkClearAttachment));
size_t attachments_offset = arguments_size;
arguments_size =
xe::align(arguments_size + sizeof(VkClearAttachment) * attachment_count,
alignof(VkClearRect));
size_t rects_offset = arguments_size;
arguments_size += sizeof(VkClearRect) * rect_count;
uint8_t* args_ptr = reinterpret_cast<uint8_t*>(
WriteCommand(Command::kVkClearAttachments, arguments_size));
auto& args = *reinterpret_cast<ArgsVkClearAttachments*>(args_ptr);
args.attachment_count = attachment_count;
args.rect_count = rect_count;
attachments_out =
reinterpret_cast<VkClearAttachment*>(args_ptr + attachments_offset);
rects_out = reinterpret_cast<VkClearRect*>(args_ptr + rects_offset);
}
void CmdVkClearAttachments(uint32_t attachment_count,
const VkClearAttachment* attachments,
uint32_t rect_count, const VkClearRect* rects) {
VkClearAttachment* attachments_arg;
VkClearRect* rects_arg;
CmdClearAttachmentsEmplace(attachment_count, attachments_arg, rect_count,
rects_arg);
std::memcpy(attachments_arg, attachments,
sizeof(VkClearAttachment) * attachment_count);
std::memcpy(rects_arg, rects, sizeof(VkClearRect) * rect_count);
}
VkImageSubresourceRange* CmdClearColorImageEmplace(
VkImage image, VkImageLayout image_layout, const VkClearColorValue* color,
uint32_t range_count) {
const size_t header_size = xe::align(sizeof(ArgsVkClearColorImage),
alignof(VkImageSubresourceRange));
uint8_t* args_ptr = reinterpret_cast<uint8_t*>(WriteCommand(
Command::kVkClearColorImage,
header_size + sizeof(VkImageSubresourceRange) * range_count));
auto& args = *reinterpret_cast<ArgsVkClearColorImage*>(args_ptr);
args.image = image;
args.image_layout = image_layout;
args.color = *color;
args.range_count = range_count;
return reinterpret_cast<VkImageSubresourceRange*>(args_ptr + header_size);
}
void CmdVkClearColorImage(VkImage image, VkImageLayout image_layout,
const VkClearColorValue* color,
uint32_t range_count,
const VkImageSubresourceRange* ranges) {
std::memcpy(
CmdClearColorImageEmplace(image, image_layout, color, range_count),
ranges, sizeof(VkImageSubresourceRange) * range_count);
}
VkBufferCopy* CmdCopyBufferEmplace(VkBuffer src_buffer, VkBuffer dst_buffer,
uint32_t region_count) {
const size_t header_size =
xe::align(sizeof(ArgsVkCopyBuffer), alignof(VkBufferCopy));
uint8_t* args_ptr = reinterpret_cast<uint8_t*>(
WriteCommand(Command::kVkCopyBuffer,
header_size + sizeof(VkBufferCopy) * region_count));
auto& args = *reinterpret_cast<ArgsVkCopyBuffer*>(args_ptr);
args.src_buffer = src_buffer;
args.dst_buffer = dst_buffer;
args.region_count = region_count;
return reinterpret_cast<VkBufferCopy*>(args_ptr + header_size);
}
void CmdVkCopyBuffer(VkBuffer src_buffer, VkBuffer dst_buffer,
uint32_t region_count, const VkBufferCopy* regions) {
std::memcpy(CmdCopyBufferEmplace(src_buffer, dst_buffer, region_count),
regions, sizeof(VkBufferCopy) * region_count);
}
VkBufferImageCopy* CmdCopyBufferToImageEmplace(VkBuffer src_buffer,
VkImage dst_image,
VkImageLayout dst_image_layout,
uint32_t region_count) {
const size_t header_size =
xe::align(sizeof(ArgsVkCopyBufferToImage), alignof(VkBufferImageCopy));
uint8_t* args_ptr = reinterpret_cast<uint8_t*>(
WriteCommand(Command::kVkCopyBufferToImage,
header_size + sizeof(VkBufferImageCopy) * region_count));
auto& args = *reinterpret_cast<ArgsVkCopyBufferToImage*>(args_ptr);
args.src_buffer = src_buffer;
args.dst_image = dst_image;
args.dst_image_layout = dst_image_layout;
args.region_count = region_count;
return reinterpret_cast<VkBufferImageCopy*>(args_ptr + header_size);
}
void CmdVkCopyBufferToImage(VkBuffer src_buffer, VkImage dst_image,
VkImageLayout dst_image_layout,
uint32_t region_count,
const VkBufferImageCopy* regions) {
std::memcpy(CmdCopyBufferToImageEmplace(src_buffer, dst_image,
dst_image_layout, region_count),
regions, sizeof(VkBufferImageCopy) * region_count);
}
void CmdVkDispatch(uint32_t group_count_x, uint32_t group_count_y,
uint32_t group_count_z) {
auto& args = *reinterpret_cast<ArgsVkDispatch*>(
WriteCommand(Command::kVkDispatch, sizeof(ArgsVkDispatch)));
args.group_count_x = group_count_x;
args.group_count_y = group_count_y;
args.group_count_z = group_count_z;
}
void CmdVkDraw(uint32_t vertex_count, uint32_t instance_count,
uint32_t first_vertex, uint32_t first_instance) {
auto& args = *reinterpret_cast<ArgsVkDraw*>(
WriteCommand(Command::kVkDraw, sizeof(ArgsVkDraw)));
args.vertex_count = vertex_count;
args.instance_count = instance_count;
args.first_vertex = first_vertex;
args.first_instance = first_instance;
}
void CmdVkDrawIndexed(uint32_t index_count, uint32_t instance_count,
uint32_t first_index, int32_t vertex_offset,
uint32_t first_instance) {
auto& args = *reinterpret_cast<ArgsVkDrawIndexed*>(
WriteCommand(Command::kVkDrawIndexed, sizeof(ArgsVkDrawIndexed)));
args.index_count = index_count;
args.instance_count = instance_count;
args.first_index = first_index;
args.vertex_offset = vertex_offset;
args.first_instance = first_instance;
}
void CmdVkEndRenderPass() { WriteCommand(Command::kVkEndRenderPass, 0); }
// pNext of all barriers must be null.
void CmdVkPipelineBarrier(VkPipelineStageFlags src_stage_mask,
VkPipelineStageFlags dst_stage_mask,
VkDependencyFlags dependency_flags,
uint32_t memory_barrier_count,
const VkMemoryBarrier* memory_barriers,
uint32_t buffer_memory_barrier_count,
const VkBufferMemoryBarrier* buffer_memory_barriers,
uint32_t image_memory_barrier_count,
const VkImageMemoryBarrier* image_memory_barriers);
void CmdVkPushConstants(VkPipelineLayout layout,
VkShaderStageFlags stage_flags, uint32_t offset,
uint32_t size, const void* values) {
uint8_t* args_ptr = reinterpret_cast<uint8_t*>(WriteCommand(
Command::kVkPushConstants, sizeof(ArgsVkPushConstants) + size));
auto& args = *reinterpret_cast<ArgsVkPushConstants*>(args_ptr);
args.layout = layout;
args.stage_flags = stage_flags;
args.offset = offset;
args.size = size;
std::memcpy(args_ptr + sizeof(ArgsVkPushConstants), values, size);
}
void CmdVkSetBlendConstants(const float* blend_constants) {
auto& args = *reinterpret_cast<ArgsVkSetBlendConstants*>(WriteCommand(
Command::kVkSetBlendConstants, sizeof(ArgsVkSetBlendConstants)));
std::memcpy(args.blend_constants, blend_constants, sizeof(float) * 4);
}
void CmdVkSetDepthBias(float depth_bias_constant_factor,
float depth_bias_clamp,
float depth_bias_slope_factor) {
auto& args = *reinterpret_cast<ArgsVkSetDepthBias*>(
WriteCommand(Command::kVkSetDepthBias, sizeof(ArgsVkSetDepthBias)));
args.depth_bias_constant_factor = depth_bias_constant_factor;
args.depth_bias_clamp = depth_bias_clamp;
args.depth_bias_slope_factor = depth_bias_slope_factor;
}
void CmdVkSetScissor(uint32_t first_scissor, uint32_t scissor_count,
const VkRect2D* scissors) {
const size_t header_size =
xe::align(sizeof(ArgsVkSetScissor), alignof(VkRect2D));
uint8_t* args_ptr = reinterpret_cast<uint8_t*>(
WriteCommand(Command::kVkSetScissor,
header_size + sizeof(VkRect2D) * scissor_count));
auto& args = *reinterpret_cast<ArgsVkSetScissor*>(args_ptr);
args.first_scissor = first_scissor;
args.scissor_count = scissor_count;
std::memcpy(args_ptr + header_size, scissors,
sizeof(VkRect2D) * scissor_count);
}
void CmdVkSetStencilCompareMask(VkStencilFaceFlags face_mask,
uint32_t compare_mask) {
auto& args = *reinterpret_cast<ArgsSetStencilMaskReference*>(
WriteCommand(Command::kVkSetStencilCompareMask,
sizeof(ArgsSetStencilMaskReference)));
args.face_mask = face_mask;
args.mask_reference = compare_mask;
}
void CmdVkSetStencilReference(VkStencilFaceFlags face_mask,
uint32_t reference) {
auto& args = *reinterpret_cast<ArgsSetStencilMaskReference*>(WriteCommand(
Command::kVkSetStencilReference, sizeof(ArgsSetStencilMaskReference)));
args.face_mask = face_mask;
args.mask_reference = reference;
}
void CmdVkSetStencilWriteMask(VkStencilFaceFlags face_mask,
uint32_t write_mask) {
auto& args = *reinterpret_cast<ArgsSetStencilMaskReference*>(WriteCommand(
Command::kVkSetStencilWriteMask, sizeof(ArgsSetStencilMaskReference)));
args.face_mask = face_mask;
args.mask_reference = write_mask;
}
void CmdVkSetViewport(uint32_t first_viewport, uint32_t viewport_count,
const VkViewport* viewports) {
const size_t header_size =
xe::align(sizeof(ArgsVkSetViewport), alignof(VkViewport));
uint8_t* args_ptr = reinterpret_cast<uint8_t*>(
WriteCommand(Command::kVkSetViewport,
header_size + sizeof(VkViewport) * viewport_count));
auto& args = *reinterpret_cast<ArgsVkSetViewport*>(args_ptr);
args.first_viewport = first_viewport;
args.viewport_count = viewport_count;
std::memcpy(args_ptr + header_size, viewports,
sizeof(VkViewport) * viewport_count);
}
private:
enum class Command {
kVkBeginRenderPass,
kVkBindDescriptorSets,
kVkBindIndexBuffer,
kVkBindPipeline,
kVkBindVertexBuffers,
kVkClearAttachments,
kVkClearColorImage,
kVkCopyBuffer,
kVkCopyBufferToImage,
kVkDispatch,
kVkDraw,
kVkDrawIndexed,
kVkEndRenderPass,
kVkPipelineBarrier,
kVkPushConstants,
kVkSetBlendConstants,
kVkSetDepthBias,
kVkSetScissor,
kVkSetStencilCompareMask,
kVkSetStencilReference,
kVkSetStencilWriteMask,
kVkSetViewport,
};
struct CommandHeader {
Command command;
uint32_t arguments_size_elements;
};
static constexpr size_t kCommandHeaderSizeElements =
(sizeof(CommandHeader) + sizeof(uintmax_t) - 1) / sizeof(uintmax_t);
struct ArgsVkBeginRenderPass {
VkRenderPass render_pass;
VkFramebuffer framebuffer;
VkRect2D render_area;
uint32_t clear_value_count;
VkSubpassContents contents;
// Followed by aligned optional VkClearValue[].
static_assert(alignof(VkClearValue) <= alignof(uintmax_t));
};
struct ArgsVkBindDescriptorSets {
VkPipelineBindPoint pipeline_bind_point;
VkPipelineLayout layout;
uint32_t first_set;
uint32_t descriptor_set_count;
uint32_t dynamic_offset_count;
// Followed by aligned VkDescriptorSet[], optional uint32_t[].
static_assert(alignof(VkDescriptorSet) <= alignof(uintmax_t));
};
struct ArgsVkBindIndexBuffer {
VkBuffer buffer;
VkDeviceSize offset;
VkIndexType index_type;
};
struct ArgsVkBindPipeline {
VkPipelineBindPoint pipeline_bind_point;
VkPipeline pipeline;
};
struct ArgsVkBindVertexBuffers {
uint32_t first_binding;
uint32_t binding_count;
// Followed by aligned VkBuffer[], VkDeviceSize[].
static_assert(alignof(VkBuffer) <= alignof(uintmax_t));
static_assert(alignof(VkDeviceSize) <= alignof(uintmax_t));
};
struct ArgsVkClearAttachments {
uint32_t attachment_count;
uint32_t rect_count;
// Followed by aligned VkClearAttachment[], VkClearRect[].
static_assert(alignof(VkClearAttachment) <= alignof(uintmax_t));
static_assert(alignof(VkClearRect) <= alignof(uintmax_t));
};
struct ArgsVkClearColorImage {
VkImage image;
VkImageLayout image_layout;
VkClearColorValue color;
uint32_t range_count;
// Followed by aligned VkImageSubresourceRange[].
static_assert(alignof(VkImageSubresourceRange) <= alignof(uintmax_t));
};
struct ArgsVkCopyBuffer {
VkBuffer src_buffer;
VkBuffer dst_buffer;
uint32_t region_count;
// Followed by aligned VkBufferCopy[].
static_assert(alignof(VkBufferCopy) <= alignof(uintmax_t));
};
struct ArgsVkCopyBufferToImage {
VkBuffer src_buffer;
VkImage dst_image;
VkImageLayout dst_image_layout;
uint32_t region_count;
// Followed by aligned VkBufferImageCopy[].
static_assert(alignof(VkBufferImageCopy) <= alignof(uintmax_t));
};
struct ArgsVkDispatch {
uint32_t group_count_x;
uint32_t group_count_y;
uint32_t group_count_z;
};
struct ArgsVkDraw {
uint32_t vertex_count;
uint32_t instance_count;
uint32_t first_vertex;
uint32_t first_instance;
};
struct ArgsVkDrawIndexed {
uint32_t index_count;
uint32_t instance_count;
uint32_t first_index;
int32_t vertex_offset;
uint32_t first_instance;
};
struct ArgsVkPipelineBarrier {
VkPipelineStageFlags src_stage_mask;
VkPipelineStageFlags dst_stage_mask;
VkDependencyFlags dependency_flags;
uint32_t memory_barrier_count;
uint32_t buffer_memory_barrier_count;
uint32_t image_memory_barrier_count;
// Followed by aligned optional VkMemoryBarrier[],
// optional VkBufferMemoryBarrier[], optional VkImageMemoryBarrier[].
static_assert(alignof(VkMemoryBarrier) <= alignof(uintmax_t));
static_assert(alignof(VkBufferMemoryBarrier) <= alignof(uintmax_t));
static_assert(alignof(VkImageMemoryBarrier) <= alignof(uintmax_t));
};
struct ArgsVkPushConstants {
VkPipelineLayout layout;
VkShaderStageFlags stage_flags;
uint32_t offset;
uint32_t size;
// Followed by `size` bytes of values.
};
struct ArgsVkSetBlendConstants {
float blend_constants[4];
};
struct ArgsVkSetDepthBias {
float depth_bias_constant_factor;
float depth_bias_clamp;
float depth_bias_slope_factor;
};
struct ArgsVkSetScissor {
uint32_t first_scissor;
uint32_t scissor_count;
// Followed by aligned VkRect2D[].
static_assert(alignof(VkRect2D) <= alignof(uintmax_t));
};
struct ArgsSetStencilMaskReference {
VkStencilFaceFlags face_mask;
uint32_t mask_reference;
};
struct ArgsVkSetViewport {
uint32_t first_viewport;
uint32_t viewport_count;
// Followed by aligned VkViewport[].
static_assert(alignof(VkViewport) <= alignof(uintmax_t));
};
void* WriteCommand(Command command, size_t arguments_size_bytes);
const VulkanCommandProcessor& command_processor_;
// uintmax_t to ensure uint64_t and pointer alignment of all structures.
std::vector<uintmax_t> command_stream_;
};
} // namespace vulkan
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_VULKAN_DEFERRED_COMMAND_BUFFER_H_

View File

@ -8,10 +8,10 @@ project("xenia-gpu-vulkan")
language("C++")
links({
"fmt",
"glslang-spirv",
"xenia-base",
"xenia-gpu",
"xenia-ui",
"xenia-ui-spirv",
"xenia-ui-vulkan",
"xxhash",
})
@ -20,10 +20,9 @@ project("xenia-gpu-vulkan")
})
local_platform_files()
files({
"shaders/bytecode/vulkan_spirv/*.h",
"../shaders/bytecode/vulkan_spirv/*.h",
})
-- TODO(benvanik): kill this and move to the debugger UI.
group("src")
project("xenia-gpu-vulkan-trace-viewer")
uuid("86a1dddc-a26a-4885-8c55-cf745225d93e")
@ -43,7 +42,6 @@ project("xenia-gpu-vulkan-trace-viewer")
"xenia-kernel",
"xenia-patcher",
"xenia-ui",
"xenia-ui-spirv",
"xenia-ui-vulkan",
"xenia-vfs",
"xenia-patcher",
@ -58,7 +56,6 @@ project("xenia-gpu-vulkan-trace-viewer")
"libavutil",
"mspack",
"snappy",
"spirv-tools",
"xxhash",
})
includedirs({
@ -77,12 +74,6 @@ project("xenia-gpu-vulkan-trace-viewer")
})
filter("platforms:Windows")
links({
"xenia-apu-xaudio2",
"xenia-hid-winkey",
"xenia-hid-xinput",
})
-- Only create the .user file if it doesn't already exist.
local user_file = project_root.."/build/xenia-gpu-vulkan-trace-viewer.vcxproj.user"
if not os.isfile(user_file) then
@ -111,7 +102,6 @@ project("xenia-gpu-vulkan-trace-dump")
"xenia-hid-nop",
"xenia-kernel",
"xenia-ui",
"xenia-ui-spirv",
"xenia-ui-vulkan",
"xenia-vfs",
"xenia-patcher",
@ -126,7 +116,6 @@ project("xenia-gpu-vulkan-trace-dump")
"libavutil",
"mspack",
"snappy",
"spirv-tools",
"xxhash",
})
includedirs({

File diff suppressed because it is too large Load Diff

View File

@ -1,406 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2016 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_VULKAN_RENDER_CACHE_H_
#define XENIA_GPU_VULKAN_RENDER_CACHE_H_
#include "xenia/gpu/register_file.h"
#include "xenia/gpu/registers.h"
#include "xenia/gpu/shader.h"
#include "xenia/gpu/texture_info.h"
#include "xenia/gpu/vulkan/vulkan_shader.h"
#include "xenia/gpu/xenos.h"
#include "xenia/ui/vulkan/vulkan_provider.h"
namespace xe {
namespace gpu {
namespace vulkan {
// TODO(benvanik): make public API?
class CachedTileView;
class CachedFramebuffer;
class CachedRenderPass;
// Uniquely identifies EDRAM tiles.
struct TileViewKey {
// Offset into EDRAM in 5120b tiles.
uint16_t tile_offset;
// Tile width of the view in base 80x16 tiles.
uint16_t tile_width;
// Tile height of the view in base 80x16 tiles.
uint16_t tile_height;
// 1 if format is ColorRenderTargetFormat, else DepthRenderTargetFormat.
uint16_t color_or_depth : 1;
// Surface MSAA samples
uint16_t msaa_samples : 2;
// Either ColorRenderTargetFormat or DepthRenderTargetFormat.
uint16_t edram_format : 13;
};
static_assert(sizeof(TileViewKey) == 8, "Key must be tightly packed");
// Cached view representing EDRAM memory.
// TODO(benvanik): reuse VkImage's with multiple VkViews for compatible
// formats?
class CachedTileView {
public:
// Key identifying the view in the cache.
TileViewKey key;
// Image
VkImage image = nullptr;
// Simple view on the image matching the format.
VkImageView image_view = nullptr;
// Image layout
VkImageLayout image_layout = VK_IMAGE_LAYOUT_UNDEFINED;
// Memory buffer
VkDeviceMemory memory = nullptr;
// Image sample count
VkSampleCountFlagBits sample_count = VK_SAMPLE_COUNT_1_BIT;
// (if a depth view) Image view of depth aspect
VkImageView image_view_depth = nullptr;
// (if a depth view) Image view of stencil aspect
VkImageView image_view_stencil = nullptr;
CachedTileView(const ui::vulkan::VulkanProvider& provider,
VkDeviceMemory edram_memory, TileViewKey view_key);
~CachedTileView();
VkResult Initialize(VkCommandBuffer command_buffer);
bool IsEqual(const TileViewKey& other_key) const {
auto a = reinterpret_cast<const uint64_t*>(&key);
auto b = reinterpret_cast<const uint64_t*>(&other_key);
return *a == *b;
}
bool operator<(const CachedTileView& other) const {
return key.tile_offset < other.key.tile_offset;
}
VkExtent2D GetSize() const {
return {key.tile_width * 80u, key.tile_height * 16u};
}
private:
const ui::vulkan::VulkanProvider& provider_;
};
// Parsed render configuration from the current render state.
struct RenderConfiguration {
// Render mode (color+depth, depth-only, etc).
xenos::ModeControl mode_control;
// Target surface pitch multiplied by MSAA, in pixels.
uint32_t surface_pitch_px;
// ESTIMATED target surface height multiplied by MSAA, in pixels.
uint32_t surface_height_px;
// Surface MSAA setting.
xenos::MsaaSamples surface_msaa;
// Color attachments for the 4 render targets.
struct {
bool used;
uint32_t edram_base;
xenos::ColorRenderTargetFormat format;
} color[4];
// Depth/stencil attachment.
struct {
bool used;
uint32_t edram_base;
xenos::DepthRenderTargetFormat format;
} depth_stencil;
};
// Current render state based on the register-specified configuration.
struct RenderState {
// Parsed configuration.
RenderConfiguration config;
// Render pass (to be used with pipelines/etc).
CachedRenderPass* render_pass = nullptr;
VkRenderPass render_pass_handle = nullptr;
// Target framebuffer bound to the render pass.
CachedFramebuffer* framebuffer = nullptr;
VkFramebuffer framebuffer_handle = nullptr;
bool color_attachment_written[4] = {false};
bool depth_attachment_written = false;
};
// Manages the virtualized EDRAM and the render target cache.
//
// On the 360 the render target is an opaque block of memory in EDRAM that's
// only accessible via resolves. We use this to our advantage to simulate
// something like it as best we can by having a shared backing memory with
// a multitude of views for each tile location in EDRAM.
//
// This allows us to have the same base address write to the same memory
// regardless of framebuffer format. Resolving then uses whatever format the
// resolve requests straight from the backing memory.
//
// EDRAM is a beast and we only approximate it as best we can. Basically,
// the 10MiB of EDRAM is composed of 2048 5120b tiles. Each tile is 80x16px.
// +-----+-----+-----+---
// |tile0|tile1|tile2|... 2048 times
// +-----+-----+-----+---
// Operations dealing with EDRAM deal in tile offsets, so base 0x100 is tile
// offset 256, 256*5120=1310720b into the buffer. All rendering operations are
// aligned to tiles so trying to draw at 256px wide will have a real width of
// 320px by rounding up to the next tile.
//
// MSAA and other settings will modify the exact pixel sizes, like 4X makes
// each tile effectively 40x8px / 2X makes each tile 80x8px, but they are still
// all 5120b. As we try to emulate this we adjust our viewport when rendering to
// stretch pixels as needed.
//
// It appears that games also take advantage of MSAA stretching tiles when doing
// clears. Games will clear a view with 1/2X pitch/height and 4X MSAA and then
// later draw to that view with 1X pitch/height and 1X MSAA.
//
// The good news is that games cannot read EDRAM directly but must use a copy
// operation to get the data out. That gives us a chance to do whatever we
// need to (re-tile, etc) only when requested.
//
// To approximate the tiled EDRAM layout we use a single large chunk of memory.
// From this memory we create many VkImages (and VkImageViews) of various
// formats and dimensions as requested by the game. These are used as
// attachments during rendering and as sources during copies. They are also
// heavily aliased - lots of images will reference the same locations in the
// underlying EDRAM buffer. The only requirement is that there are no hazards
// with specific tiles (reading/writing the same tile through different images)
// and otherwise it should be ok *fingers crossed*.
//
// One complication is the copy/resolve process itself: we need to give back
// the data asked for in the format desired and where it goes is arbitrary
// (any address in physical memory). If the game is good we get resolves of
// EDRAM into fixed base addresses with scissored regions. If the game is bad
// we are broken.
//
// Resolves from EDRAM result in tiled textures - that's texture tiles, not
// EDRAM tiles. If we wanted to ensure byte-for-byte correctness we'd need to
// then tile the images as we wrote them out. For now, we just attempt to
// get the (X, Y) in linear space and do that. This really comes into play
// when multiple resolves write to the same texture or memory aliased by
// multiple textures - which is common due to predicated tiling. The examples
// below demonstrate what this looks like, but the important thing is that
// we are aware of partial textures and overlapping regions.
//
// TODO(benvanik): what, if any, barriers do we need? any transitions?
//
// Example with multiple render targets:
// Two color targets of 256x256px tightly packed in EDRAM:
// color target 0: base 0x0, pitch 320, scissor 0,0, 256x256
// starts at tile 0, buffer offset 0
// contains 64 tiles (320/80)*(256/16)
// color target 1: base 0x40, pitch 320, scissor 256,0, 256x256
// starts at tile 64 (after color target 0), buffer offset 327680b
// contains 64 tiles
// In EDRAM each set of 64 tiles is contiguous:
// +------+------+ +------+------+------+
// |ct0.0 |ct0.1 |...|ct0.63|ct1.0 |ct1.1 |...
// +------+------+ +------+------+------+
// To render into these, we setup two VkImages:
// image 0: bound to buffer offset 0, 320x256x4=327680b
// image 1: bound to buffer offset 327680b, 320x256x4=327680b
// So when we render to them:
// +------+-+ scissored to 256x256, actually 320x256
// | . | | <- . appears at some untiled offset in the buffer, but
// | | | consistent if aliased with the same format
// +------+-+
// In theory, this gives us proper aliasing in most cases.
//
// Example with horizontal predicated tiling:
// Trying to render 1024x576 @4X MSAA, splitting into two regions
// horizontally:
// +----------+
// | 1024x288 |
// +----------+
// | 1024x288 |
// +----------+
// EDRAM configured for 1056x288px with tile size 2112x567px (4X MSAA):
// color target 0: base 0x0, pitch 1080, 26x36 tiles
// First render (top):
// window offset 0,0
// scissor 0,0, 1024x288
// First resolve (top):
// RB_COPY_DEST_BASE 0x1F45D000
// RB_COPY_DEST_PITCH pitch=1024, height=576
// vertices: 0,0, 1024,0, 1024,288
// Second render (bottom):
// window offset 0,-288
// scissor 0,288, 1024x288
// Second resolve (bottom):
// RB_COPY_DEST_BASE 0x1F57D000 (+1179648b)
// RB_COPY_DEST_PITCH pitch=1024, height=576
// (exactly 1024x288*4b after first resolve)
// vertices: 0,288, 1024,288, 1024,576
// Resolving here is easy as the textures are contiguous in memory. We can
// snoop in the first resolve with the dest height to know the total size,
// and in the second resolve see that it overlaps and place it in the
// existing target.
//
// Example with vertical predicated tiling:
// Trying to render 1280x720 @2X MSAA, splitting into two regions
// vertically:
// +-----+-----+
// | 640 | 640 |
// | x | x |
// | 720 | 720 |
// +-----+-----+
// EDRAM configured for 640x736px with tile size 640x1472px (2X MSAA):
// color target 0: base 0x0, pitch 640, 8x92 tiles
// First render (left):
// window offset 0,0
// scissor 0,0, 640x720
// First resolve (left):
// RB_COPY_DEST_BASE 0x1BC6D000
// RB_COPY_DEST_PITCH pitch=1280, height=720
// vertices: 0,0, 640,0, 640,720
// Second render (right):
// window offset -640,0
// scissor 640,0, 640x720
// Second resolve (right):
// RB_COPY_DEST_BASE 0x1BC81000 (+81920b)
// RB_COPY_DEST_PITCH pitch=1280, height=720
// vertices: 640,0, 1280,0, 1280,720
// Resolving here is much more difficult as resolves are tiled and the right
// half of the texture is 81920b away:
// 81920/4bpp=20480px, /32 (texture tile size)=640px
// We know the texture size with the first resolve and with the second we
// must check for overlap then compute the offset (in both X and Y).
class RenderCache {
public:
RenderCache(RegisterFile* register_file,
const ui::vulkan::VulkanProvider& provider);
~RenderCache();
VkResult Initialize();
void Shutdown();
// Call this to determine if you should start a new render pass or continue
// with an already open pass.
bool dirty() const;
CachedTileView* FindTileView(uint32_t base, uint32_t pitch,
xenos::MsaaSamples samples, bool color_or_depth,
uint32_t format);
// Begins a render pass targeting the state-specified framebuffer formats.
// The command buffer will be transitioned into the render pass phase.
const RenderState* BeginRenderPass(VkCommandBuffer command_buffer,
VulkanShader* vertex_shader,
VulkanShader* pixel_shader);
// Ends the current render pass.
// The command buffer will be transitioned out of the render pass phase.
void EndRenderPass();
// Clears all cached content.
void ClearCache();
// Queues commands to copy EDRAM contents into an image.
// The command buffer must not be inside of a render pass when calling this.
void RawCopyToImage(VkCommandBuffer command_buffer, uint32_t edram_base,
VkImage image, VkImageLayout image_layout,
bool color_or_depth, VkOffset3D offset,
VkExtent3D extents);
// Queues commands to blit EDRAM contents into an image.
// The command buffer must not be inside of a render pass when calling this.
void BlitToImage(VkCommandBuffer command_buffer, uint32_t edram_base,
uint32_t pitch, uint32_t height,
xenos::MsaaSamples num_samples, VkImage image,
VkImageLayout image_layout, bool color_or_depth,
uint32_t format, VkFilter filter, VkOffset3D offset,
VkExtent3D extents);
// Queues commands to clear EDRAM contents with a solid color.
// The command buffer must not be inside of a render pass when calling this.
void ClearEDRAMColor(VkCommandBuffer command_buffer, uint32_t edram_base,
xenos::ColorRenderTargetFormat format, uint32_t pitch,
uint32_t height, xenos::MsaaSamples num_samples,
float* color);
// Queues commands to clear EDRAM contents with depth/stencil values.
// The command buffer must not be inside of a render pass when calling this.
void ClearEDRAMDepthStencil(VkCommandBuffer command_buffer,
uint32_t edram_base,
xenos::DepthRenderTargetFormat format,
uint32_t pitch, uint32_t height,
xenos::MsaaSamples num_samples, float depth,
uint32_t stencil);
// Queues commands to fill EDRAM contents with a constant value.
// The command buffer must not be inside of a render pass when calling this.
void FillEDRAM(VkCommandBuffer command_buffer, uint32_t value);
private:
// Parses the current state into a configuration object.
bool ParseConfiguration(RenderConfiguration* config);
// Finds a tile view. Returns nullptr if none found matching the key.
CachedTileView* FindTileView(const TileViewKey& view_key) const;
// Gets or creates a tile view with the given parameters.
CachedTileView* FindOrCreateTileView(VkCommandBuffer command_buffer,
const TileViewKey& view_key);
void UpdateTileView(VkCommandBuffer command_buffer, CachedTileView* view,
bool load, bool insert_barrier = true);
// Gets or creates a render pass and frame buffer for the given configuration.
// This attempts to reuse as much as possible across render passes and
// framebuffers.
bool ConfigureRenderPass(VkCommandBuffer command_buffer,
RenderConfiguration* config,
CachedRenderPass** out_render_pass,
CachedFramebuffer** out_framebuffer);
RegisterFile* register_file_ = nullptr;
const ui::vulkan::VulkanProvider& provider_;
// Entire 10MiB of EDRAM.
VkDeviceMemory edram_memory_ = nullptr;
// Buffer overlayed 1:1 with edram_memory_ to allow raw access.
VkBuffer edram_buffer_ = nullptr;
// Cache of VkImage and VkImageView's for all of our EDRAM tilings.
// TODO(benvanik): non-linear lookup? Should only be a small number of these.
std::vector<CachedTileView*> cached_tile_views_;
// Cache of render passes based on formats.
std::vector<CachedRenderPass*> cached_render_passes_;
// Shadows of the registers that impact the render pass we choose.
// If the registers don't change between passes we can quickly reuse the
// previous one.
struct ShadowRegisters {
reg::RB_MODECONTROL rb_modecontrol;
reg::RB_SURFACE_INFO rb_surface_info;
reg::RB_COLOR_INFO rb_color_info;
reg::RB_COLOR_INFO rb_color1_info;
reg::RB_COLOR_INFO rb_color2_info;
reg::RB_COLOR_INFO rb_color3_info;
reg::RB_DEPTH_INFO rb_depth_info;
uint32_t pa_sc_window_scissor_tl;
uint32_t pa_sc_window_scissor_br;
ShadowRegisters() { Reset(); }
void Reset() { std::memset(this, 0, sizeof(*this)); }
} shadow_registers_;
bool SetShadowRegister(uint32_t* dest, uint32_t register_name);
// Configuration used for the current/previous Begin/End, representing the
// current shadow register state.
RenderState current_state_;
// Only valid during a BeginRenderPass/EndRenderPass block.
VkCommandBuffer current_command_buffer_ = nullptr;
};
} // namespace vulkan
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_VULKAN_RENDER_CACHE_H_

View File

@ -1,2 +0,0 @@
DisableFormat: true
SortIncludes: false

View File

@ -1,52 +0,0 @@
// Generated with `xb buildshaders`.
#if 0
; SPIR-V
; Version: 1.0
; Generator: Khronos Glslang Reference Front End; 10
; Bound: 16104
; Schema: 0
OpCapability Shader
OpCapability Sampled1D
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %5663 "main" %3302 %4841
OpExecutionMode %5663 OriginUpperLeft
OpDecorate %3302 Location 0
OpDecorate %4841 Location 0
%void = OpTypeVoid
%1282 = OpTypeFunction %void
%float = OpTypeFloat 32
%v4float = OpTypeVector %float 4
%uint = OpTypeInt 32 0
%uint_16 = OpConstant %uint 16
%_arr_v4float_uint_16 = OpTypeArray %v4float %uint_16
%_ptr_Input__arr_v4float_uint_16 = OpTypePointer Input %_arr_v4float_uint_16
%3302 = OpVariable %_ptr_Input__arr_v4float_uint_16 Input
%uint_4 = OpConstant %uint 4
%_arr_v4float_uint_4 = OpTypeArray %v4float %uint_4
%_ptr_Output__arr_v4float_uint_4 = OpTypePointer Output %_arr_v4float_uint_4
%4841 = OpVariable %_ptr_Output__arr_v4float_uint_4 Output
%5663 = OpFunction %void None %1282
%16103 = OpLabel
OpReturn
OpFunctionEnd
#endif
const uint32_t dummy_ps[] = {
0x07230203, 0x00010000, 0x0008000A, 0x00003EE8, 0x00000000, 0x00020011,
0x00000001, 0x00020011, 0x0000002B, 0x0006000B, 0x00000001, 0x4C534C47,
0x6474732E, 0x3035342E, 0x00000000, 0x0003000E, 0x00000000, 0x00000001,
0x0007000F, 0x00000004, 0x0000161F, 0x6E69616D, 0x00000000, 0x00000CE6,
0x000012E9, 0x00030010, 0x0000161F, 0x00000007, 0x00040047, 0x00000CE6,
0x0000001E, 0x00000000, 0x00040047, 0x000012E9, 0x0000001E, 0x00000000,
0x00020013, 0x00000008, 0x00030021, 0x00000502, 0x00000008, 0x00030016,
0x0000000D, 0x00000020, 0x00040017, 0x0000001D, 0x0000000D, 0x00000004,
0x00040015, 0x0000000B, 0x00000020, 0x00000000, 0x0004002B, 0x0000000B,
0x00000A3A, 0x00000010, 0x0004001C, 0x0000056F, 0x0000001D, 0x00000A3A,
0x00040020, 0x000007EC, 0x00000001, 0x0000056F, 0x0004003B, 0x000007EC,
0x00000CE6, 0x00000001, 0x0004002B, 0x0000000B, 0x00000A16, 0x00000004,
0x0004001C, 0x000005C3, 0x0000001D, 0x00000A16, 0x00040020, 0x00000840,
0x00000003, 0x000005C3, 0x0004003B, 0x00000840, 0x000012E9, 0x00000003,
0x00050036, 0x00000008, 0x0000161F, 0x00000000, 0x00000502, 0x000200F8,
0x00003EE7, 0x000100FD, 0x00010038,
};

View File

@ -1,193 +0,0 @@
// Generated with `xb buildshaders`.
#if 0
; SPIR-V
; Version: 1.0
; Generator: Khronos Glslang Reference Front End; 10
; Bound: 23916
; Schema: 0
OpCapability Geometry
OpCapability GeometryPointSize
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Geometry %5663 "main" %4930 %5305 %5430 %3302 %4044 %4656 %3736
OpExecutionMode %5663 InputLinesAdjacency
OpExecutionMode %5663 Invocations 1
OpExecutionMode %5663 OutputLineStrip
OpExecutionMode %5663 OutputVertices 5
OpMemberDecorate %_struct_1032 0 BuiltIn Position
OpMemberDecorate %_struct_1032 1 BuiltIn PointSize
OpDecorate %_struct_1032 Block
OpMemberDecorate %_struct_1033 0 BuiltIn Position
OpMemberDecorate %_struct_1033 1 BuiltIn PointSize
OpDecorate %_struct_1033 Block
OpDecorate %5430 Location 0
OpDecorate %3302 Location 0
OpDecorate %4044 Location 16
OpDecorate %4656 Location 17
OpDecorate %3736 Location 16
%void = OpTypeVoid
%1282 = OpTypeFunction %void
%float = OpTypeFloat 32
%v4float = OpTypeVector %float 4
%_struct_1032 = OpTypeStruct %v4float %float
%_ptr_Output__struct_1032 = OpTypePointer Output %_struct_1032
%4930 = OpVariable %_ptr_Output__struct_1032 Output
%int = OpTypeInt 32 1
%int_0 = OpConstant %int 0
%_struct_1033 = OpTypeStruct %v4float %float
%uint = OpTypeInt 32 0
%uint_4 = OpConstant %uint 4
%_arr__struct_1033_uint_4 = OpTypeArray %_struct_1033 %uint_4
%_ptr_Input__arr__struct_1033_uint_4 = OpTypePointer Input %_arr__struct_1033_uint_4
%5305 = OpVariable %_ptr_Input__arr__struct_1033_uint_4 Input
%_ptr_Input_v4float = OpTypePointer Input %v4float
%_ptr_Output_v4float = OpTypePointer Output %v4float
%int_1 = OpConstant %int 1
%_ptr_Input_float = OpTypePointer Input %float
%_ptr_Output_float = OpTypePointer Output %float
%uint_16 = OpConstant %uint 16
%_arr_v4float_uint_16 = OpTypeArray %v4float %uint_16
%_ptr_Output__arr_v4float_uint_16 = OpTypePointer Output %_arr_v4float_uint_16
%5430 = OpVariable %_ptr_Output__arr_v4float_uint_16 Output
%_arr__arr_v4float_uint_16_uint_4 = OpTypeArray %_arr_v4float_uint_16 %uint_4
%_ptr_Input__arr__arr_v4float_uint_16_uint_4 = OpTypePointer Input %_arr__arr_v4float_uint_16_uint_4
%3302 = OpVariable %_ptr_Input__arr__arr_v4float_uint_16_uint_4 Input
%_ptr_Input__arr_v4float_uint_16 = OpTypePointer Input %_arr_v4float_uint_16
%int_2 = OpConstant %int 2
%int_3 = OpConstant %int 3
%v2float = OpTypeVector %float 2
%_arr_v2float_uint_4 = OpTypeArray %v2float %uint_4
%_ptr_Input__arr_v2float_uint_4 = OpTypePointer Input %_arr_v2float_uint_4
%4044 = OpVariable %_ptr_Input__arr_v2float_uint_4 Input
%_arr_float_uint_4 = OpTypeArray %float %uint_4
%_ptr_Input__arr_float_uint_4 = OpTypePointer Input %_arr_float_uint_4
%4656 = OpVariable %_ptr_Input__arr_float_uint_4 Input
%_ptr_Output_v2float = OpTypePointer Output %v2float
%3736 = OpVariable %_ptr_Output_v2float Output
%5663 = OpFunction %void None %1282
%23915 = OpLabel
%7129 = OpAccessChain %_ptr_Input_v4float %5305 %int_0 %int_0
%15646 = OpLoad %v4float %7129
%19981 = OpAccessChain %_ptr_Output_v4float %4930 %int_0
OpStore %19981 %15646
%19905 = OpAccessChain %_ptr_Input_float %5305 %int_0 %int_1
%7391 = OpLoad %float %19905
%19982 = OpAccessChain %_ptr_Output_float %4930 %int_1
OpStore %19982 %7391
%19848 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %3302 %int_0
%10874 = OpLoad %_arr_v4float_uint_16 %19848
OpStore %5430 %10874
OpEmitVertex
%22812 = OpAccessChain %_ptr_Input_v4float %5305 %int_1 %int_0
%11398 = OpLoad %v4float %22812
OpStore %19981 %11398
%16622 = OpAccessChain %_ptr_Input_float %5305 %int_1 %int_1
%7967 = OpLoad %float %16622
OpStore %19982 %7967
%16623 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %3302 %int_1
%10875 = OpLoad %_arr_v4float_uint_16 %16623
OpStore %5430 %10875
OpEmitVertex
%22813 = OpAccessChain %_ptr_Input_v4float %5305 %int_2 %int_0
%11399 = OpLoad %v4float %22813
OpStore %19981 %11399
%16624 = OpAccessChain %_ptr_Input_float %5305 %int_2 %int_1
%7968 = OpLoad %float %16624
OpStore %19982 %7968
%16625 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %3302 %int_2
%10876 = OpLoad %_arr_v4float_uint_16 %16625
OpStore %5430 %10876
OpEmitVertex
%22814 = OpAccessChain %_ptr_Input_v4float %5305 %int_3 %int_0
%11400 = OpLoad %v4float %22814
OpStore %19981 %11400
%16626 = OpAccessChain %_ptr_Input_float %5305 %int_3 %int_1
%7969 = OpLoad %float %16626
OpStore %19982 %7969
%16627 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %3302 %int_3
%10877 = OpLoad %_arr_v4float_uint_16 %16627
OpStore %5430 %10877
OpEmitVertex
OpStore %19981 %15646
OpStore %19982 %7391
OpStore %5430 %10874
OpEmitVertex
OpEndPrimitive
OpReturn
OpFunctionEnd
#endif
const uint32_t line_quad_list_gs[] = {
0x07230203, 0x00010000, 0x0008000A, 0x00005D6C, 0x00000000, 0x00020011,
0x00000002, 0x00020011, 0x00000018, 0x0006000B, 0x00000001, 0x4C534C47,
0x6474732E, 0x3035342E, 0x00000000, 0x0003000E, 0x00000000, 0x00000001,
0x000C000F, 0x00000003, 0x0000161F, 0x6E69616D, 0x00000000, 0x00001342,
0x000014B9, 0x00001536, 0x00000CE6, 0x00000FCC, 0x00001230, 0x00000E98,
0x00030010, 0x0000161F, 0x00000015, 0x00040010, 0x0000161F, 0x00000000,
0x00000001, 0x00030010, 0x0000161F, 0x0000001C, 0x00040010, 0x0000161F,
0x0000001A, 0x00000005, 0x00050048, 0x00000408, 0x00000000, 0x0000000B,
0x00000000, 0x00050048, 0x00000408, 0x00000001, 0x0000000B, 0x00000001,
0x00030047, 0x00000408, 0x00000002, 0x00050048, 0x00000409, 0x00000000,
0x0000000B, 0x00000000, 0x00050048, 0x00000409, 0x00000001, 0x0000000B,
0x00000001, 0x00030047, 0x00000409, 0x00000002, 0x00040047, 0x00001536,
0x0000001E, 0x00000000, 0x00040047, 0x00000CE6, 0x0000001E, 0x00000000,
0x00040047, 0x00000FCC, 0x0000001E, 0x00000010, 0x00040047, 0x00001230,
0x0000001E, 0x00000011, 0x00040047, 0x00000E98, 0x0000001E, 0x00000010,
0x00020013, 0x00000008, 0x00030021, 0x00000502, 0x00000008, 0x00030016,
0x0000000D, 0x00000020, 0x00040017, 0x0000001D, 0x0000000D, 0x00000004,
0x0004001E, 0x00000408, 0x0000001D, 0x0000000D, 0x00040020, 0x00000685,
0x00000003, 0x00000408, 0x0004003B, 0x00000685, 0x00001342, 0x00000003,
0x00040015, 0x0000000C, 0x00000020, 0x00000001, 0x0004002B, 0x0000000C,
0x00000A0B, 0x00000000, 0x0004001E, 0x00000409, 0x0000001D, 0x0000000D,
0x00040015, 0x0000000B, 0x00000020, 0x00000000, 0x0004002B, 0x0000000B,
0x00000A16, 0x00000004, 0x0004001C, 0x0000032E, 0x00000409, 0x00000A16,
0x00040020, 0x000005AB, 0x00000001, 0x0000032E, 0x0004003B, 0x000005AB,
0x000014B9, 0x00000001, 0x00040020, 0x0000029A, 0x00000001, 0x0000001D,
0x00040020, 0x0000029B, 0x00000003, 0x0000001D, 0x0004002B, 0x0000000C,
0x00000A0E, 0x00000001, 0x00040020, 0x0000028A, 0x00000001, 0x0000000D,
0x00040020, 0x0000028B, 0x00000003, 0x0000000D, 0x0004002B, 0x0000000B,
0x00000A3A, 0x00000010, 0x0004001C, 0x00000473, 0x0000001D, 0x00000A3A,
0x00040020, 0x000006F0, 0x00000003, 0x00000473, 0x0004003B, 0x000006F0,
0x00001536, 0x00000003, 0x0004001C, 0x00000973, 0x00000473, 0x00000A16,
0x00040020, 0x0000002D, 0x00000001, 0x00000973, 0x0004003B, 0x0000002D,
0x00000CE6, 0x00000001, 0x00040020, 0x000006F1, 0x00000001, 0x00000473,
0x0004002B, 0x0000000C, 0x00000A11, 0x00000002, 0x0004002B, 0x0000000C,
0x00000A14, 0x00000003, 0x00040017, 0x00000013, 0x0000000D, 0x00000002,
0x0004001C, 0x000002A2, 0x00000013, 0x00000A16, 0x00040020, 0x0000051F,
0x00000001, 0x000002A2, 0x0004003B, 0x0000051F, 0x00000FCC, 0x00000001,
0x0004001C, 0x00000248, 0x0000000D, 0x00000A16, 0x00040020, 0x000004C5,
0x00000001, 0x00000248, 0x0004003B, 0x000004C5, 0x00001230, 0x00000001,
0x00040020, 0x00000290, 0x00000003, 0x00000013, 0x0004003B, 0x00000290,
0x00000E98, 0x00000003, 0x00050036, 0x00000008, 0x0000161F, 0x00000000,
0x00000502, 0x000200F8, 0x00005D6B, 0x00060041, 0x0000029A, 0x00001BD9,
0x000014B9, 0x00000A0B, 0x00000A0B, 0x0004003D, 0x0000001D, 0x00003D1E,
0x00001BD9, 0x00050041, 0x0000029B, 0x00004E0D, 0x00001342, 0x00000A0B,
0x0003003E, 0x00004E0D, 0x00003D1E, 0x00060041, 0x0000028A, 0x00004DC1,
0x000014B9, 0x00000A0B, 0x00000A0E, 0x0004003D, 0x0000000D, 0x00001CDF,
0x00004DC1, 0x00050041, 0x0000028B, 0x00004E0E, 0x00001342, 0x00000A0E,
0x0003003E, 0x00004E0E, 0x00001CDF, 0x00050041, 0x000006F1, 0x00004D88,
0x00000CE6, 0x00000A0B, 0x0004003D, 0x00000473, 0x00002A7A, 0x00004D88,
0x0003003E, 0x00001536, 0x00002A7A, 0x000100DA, 0x00060041, 0x0000029A,
0x0000591C, 0x000014B9, 0x00000A0E, 0x00000A0B, 0x0004003D, 0x0000001D,
0x00002C86, 0x0000591C, 0x0003003E, 0x00004E0D, 0x00002C86, 0x00060041,
0x0000028A, 0x000040EE, 0x000014B9, 0x00000A0E, 0x00000A0E, 0x0004003D,
0x0000000D, 0x00001F1F, 0x000040EE, 0x0003003E, 0x00004E0E, 0x00001F1F,
0x00050041, 0x000006F1, 0x000040EF, 0x00000CE6, 0x00000A0E, 0x0004003D,
0x00000473, 0x00002A7B, 0x000040EF, 0x0003003E, 0x00001536, 0x00002A7B,
0x000100DA, 0x00060041, 0x0000029A, 0x0000591D, 0x000014B9, 0x00000A11,
0x00000A0B, 0x0004003D, 0x0000001D, 0x00002C87, 0x0000591D, 0x0003003E,
0x00004E0D, 0x00002C87, 0x00060041, 0x0000028A, 0x000040F0, 0x000014B9,
0x00000A11, 0x00000A0E, 0x0004003D, 0x0000000D, 0x00001F20, 0x000040F0,
0x0003003E, 0x00004E0E, 0x00001F20, 0x00050041, 0x000006F1, 0x000040F1,
0x00000CE6, 0x00000A11, 0x0004003D, 0x00000473, 0x00002A7C, 0x000040F1,
0x0003003E, 0x00001536, 0x00002A7C, 0x000100DA, 0x00060041, 0x0000029A,
0x0000591E, 0x000014B9, 0x00000A14, 0x00000A0B, 0x0004003D, 0x0000001D,
0x00002C88, 0x0000591E, 0x0003003E, 0x00004E0D, 0x00002C88, 0x00060041,
0x0000028A, 0x000040F2, 0x000014B9, 0x00000A14, 0x00000A0E, 0x0004003D,
0x0000000D, 0x00001F21, 0x000040F2, 0x0003003E, 0x00004E0E, 0x00001F21,
0x00050041, 0x000006F1, 0x000040F3, 0x00000CE6, 0x00000A14, 0x0004003D,
0x00000473, 0x00002A7D, 0x000040F3, 0x0003003E, 0x00001536, 0x00002A7D,
0x000100DA, 0x0003003E, 0x00004E0D, 0x00003D1E, 0x0003003E, 0x00004E0E,
0x00001CDF, 0x0003003E, 0x00001536, 0x00002A7A, 0x000100DA, 0x000100DB,
0x000100FD, 0x00010038,
};

View File

@ -1,244 +0,0 @@
// Generated with `xb buildshaders`.
#if 0
; SPIR-V
; Version: 1.0
; Generator: Khronos Glslang Reference Front End; 10
; Bound: 24916
; Schema: 0
OpCapability Geometry
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Geometry %5663 "main" %5305 %4065 %4930 %5430 %3302 %5753 %5479
OpExecutionMode %5663 InputPoints
OpExecutionMode %5663 Invocations 1
OpExecutionMode %5663 OutputTriangleStrip
OpExecutionMode %5663 OutputVertices 4
OpMemberDecorate %_struct_1017 0 BuiltIn Position
OpDecorate %_struct_1017 Block
OpMemberDecorate %_struct_1287 0 Offset 0
OpMemberDecorate %_struct_1287 1 Offset 16
OpMemberDecorate %_struct_1287 2 Offset 32
OpMemberDecorate %_struct_1287 3 Offset 48
OpMemberDecorate %_struct_1287 4 Offset 64
OpDecorate %_struct_1287 Block
OpDecorate %4065 Location 17
OpMemberDecorate %_struct_1018 0 BuiltIn Position
OpDecorate %_struct_1018 Block
OpDecorate %5430 Location 0
OpDecorate %3302 Location 0
OpDecorate %5753 Location 16
OpDecorate %5479 Location 16
%void = OpTypeVoid
%1282 = OpTypeFunction %void
%float = OpTypeFloat 32
%v4float = OpTypeVector %float 4
%_struct_1017 = OpTypeStruct %v4float
%uint = OpTypeInt 32 0
%uint_1 = OpConstant %uint 1
%_arr__struct_1017_uint_1 = OpTypeArray %_struct_1017 %uint_1
%_ptr_Input__arr__struct_1017_uint_1 = OpTypePointer Input %_arr__struct_1017_uint_1
%5305 = OpVariable %_ptr_Input__arr__struct_1017_uint_1 Input
%int = OpTypeInt 32 1
%int_0 = OpConstant %int 0
%_ptr_Input_v4float = OpTypePointer Input %v4float
%v2float = OpTypeVector %float 2
%_ptr_Function_v2float = OpTypePointer Function %v2float
%_struct_1287 = OpTypeStruct %v4float %v4float %v4float %v4float %uint
%_ptr_PushConstant__struct_1287 = OpTypePointer PushConstant %_struct_1287
%3463 = OpVariable %_ptr_PushConstant__struct_1287 PushConstant
%int_2 = OpConstant %int 2
%_ptr_PushConstant_v4float = OpTypePointer PushConstant %v4float
%_arr_float_uint_1 = OpTypeArray %float %uint_1
%_ptr_Input__arr_float_uint_1 = OpTypePointer Input %_arr_float_uint_1
%4065 = OpVariable %_ptr_Input__arr_float_uint_1 Input
%_ptr_Input_float = OpTypePointer Input %float
%float_0 = OpConstant %float 0
%bool = OpTypeBool
%int_4 = OpConstant %int 4
%_struct_1018 = OpTypeStruct %v4float
%_ptr_Output__struct_1018 = OpTypePointer Output %_struct_1018
%4930 = OpVariable %_ptr_Output__struct_1018 Output
%uint_4 = OpConstant %uint 4
%_arr_v2float_uint_4 = OpTypeArray %v2float %uint_4
%float_n1 = OpConstant %float -1
%float_1 = OpConstant %float 1
%73 = OpConstantComposite %v2float %float_n1 %float_1
%768 = OpConstantComposite %v2float %float_1 %float_1
%74 = OpConstantComposite %v2float %float_n1 %float_n1
%769 = OpConstantComposite %v2float %float_1 %float_n1
%2941 = OpConstantComposite %_arr_v2float_uint_4 %73 %768 %74 %769
%_ptr_Function__arr_v2float_uint_4 = OpTypePointer Function %_arr_v2float_uint_4
%_ptr_Output_v4float = OpTypePointer Output %v4float
%uint_16 = OpConstant %uint 16
%_arr_v4float_uint_16 = OpTypeArray %v4float %uint_16
%_ptr_Output__arr_v4float_uint_16 = OpTypePointer Output %_arr_v4float_uint_16
%5430 = OpVariable %_ptr_Output__arr_v4float_uint_16 Output
%_arr__arr_v4float_uint_16_uint_1 = OpTypeArray %_arr_v4float_uint_16 %uint_1
%_ptr_Input__arr__arr_v4float_uint_16_uint_1 = OpTypePointer Input %_arr__arr_v4float_uint_16_uint_1
%3302 = OpVariable %_ptr_Input__arr__arr_v4float_uint_16_uint_1 Input
%_ptr_Input__arr_v4float_uint_16 = OpTypePointer Input %_arr_v4float_uint_16
%_ptr_Output_v2float = OpTypePointer Output %v2float
%5753 = OpVariable %_ptr_Output_v2float Output
%1823 = OpConstantComposite %v2float %float_0 %float_0
%int_1 = OpConstant %int 1
%_arr_v2float_uint_1 = OpTypeArray %v2float %uint_1
%_ptr_Input__arr_v2float_uint_1 = OpTypePointer Input %_arr_v2float_uint_1
%5479 = OpVariable %_ptr_Input__arr_v2float_uint_1 Input
%5663 = OpFunction %void None %1282
%24915 = OpLabel
%18491 = OpVariable %_ptr_Function__arr_v2float_uint_4 Function
%5238 = OpVariable %_ptr_Function__arr_v2float_uint_4 Function
%22270 = OpAccessChain %_ptr_Input_v4float %5305 %int_0 %int_0
%8181 = OpLoad %v4float %22270
%20420 = OpAccessChain %_ptr_PushConstant_v4float %3463 %int_2
%20062 = OpLoad %v4float %20420
%19110 = OpVectorShuffle %v2float %20062 %20062 0 1
%7988 = OpAccessChain %_ptr_Input_float %4065 %int_0
%13069 = OpLoad %float %7988
%23515 = OpFOrdGreaterThan %bool %13069 %float_0
OpSelectionMerge %16839 None
OpBranchConditional %23515 %13106 %16839
%13106 = OpLabel
%18836 = OpCompositeConstruct %v2float %13069 %13069
OpBranch %16839
%16839 = OpLabel
%19748 = OpPhi %v2float %19110 %24915 %18836 %13106
%24067 = OpAccessChain %_ptr_PushConstant_v4float %3463 %int_0
%15439 = OpLoad %v4float %24067
%10399 = OpVectorShuffle %v2float %15439 %15439 2 3
%24282 = OpFDiv %v2float %19748 %10399
OpBranch %6318
%6318 = OpLabel
%22958 = OpPhi %int %int_0 %16839 %11651 %12148
%24788 = OpSLessThan %bool %22958 %int_4
OpLoopMerge %12265 %12148 None
OpBranchConditional %24788 %12148 %12265
%12148 = OpLabel
%17761 = OpVectorShuffle %v2float %8181 %8181 0 1
OpStore %18491 %2941
%19574 = OpAccessChain %_ptr_Function_v2float %18491 %22958
%15971 = OpLoad %v2float %19574
%17243 = OpFMul %v2float %15971 %24282
%16594 = OpFAdd %v2float %17761 %17243
%10618 = OpCompositeExtract %float %16594 0
%14087 = OpCompositeExtract %float %16594 1
%7641 = OpCompositeExtract %float %8181 2
%7529 = OpCompositeExtract %float %8181 3
%18260 = OpCompositeConstruct %v4float %10618 %14087 %7641 %7529
%8483 = OpAccessChain %_ptr_Output_v4float %4930 %int_0
OpStore %8483 %18260
%19848 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %3302 %int_0
%7910 = OpLoad %_arr_v4float_uint_16 %19848
OpStore %5430 %7910
OpStore %5238 %2941
%13290 = OpAccessChain %_ptr_Function_v2float %5238 %22958
%19207 = OpLoad %v2float %13290
%8973 = OpExtInst %v2float %1 FMax %19207 %1823
OpStore %5753 %8973
OpEmitVertex
%11651 = OpIAdd %int %22958 %int_1
OpBranch %6318
%12265 = OpLabel
OpEndPrimitive
OpReturn
OpFunctionEnd
#endif
const uint32_t point_list_gs[] = {
0x07230203, 0x00010000, 0x0008000A, 0x00006154, 0x00000000, 0x00020011,
0x00000002, 0x0006000B, 0x00000001, 0x4C534C47, 0x6474732E, 0x3035342E,
0x00000000, 0x0003000E, 0x00000000, 0x00000001, 0x000C000F, 0x00000003,
0x0000161F, 0x6E69616D, 0x00000000, 0x000014B9, 0x00000FE1, 0x00001342,
0x00001536, 0x00000CE6, 0x00001679, 0x00001567, 0x00030010, 0x0000161F,
0x00000013, 0x00040010, 0x0000161F, 0x00000000, 0x00000001, 0x00030010,
0x0000161F, 0x0000001D, 0x00040010, 0x0000161F, 0x0000001A, 0x00000004,
0x00050048, 0x000003F9, 0x00000000, 0x0000000B, 0x00000000, 0x00030047,
0x000003F9, 0x00000002, 0x00050048, 0x00000507, 0x00000000, 0x00000023,
0x00000000, 0x00050048, 0x00000507, 0x00000001, 0x00000023, 0x00000010,
0x00050048, 0x00000507, 0x00000002, 0x00000023, 0x00000020, 0x00050048,
0x00000507, 0x00000003, 0x00000023, 0x00000030, 0x00050048, 0x00000507,
0x00000004, 0x00000023, 0x00000040, 0x00030047, 0x00000507, 0x00000002,
0x00040047, 0x00000FE1, 0x0000001E, 0x00000011, 0x00050048, 0x000003FA,
0x00000000, 0x0000000B, 0x00000000, 0x00030047, 0x000003FA, 0x00000002,
0x00040047, 0x00001536, 0x0000001E, 0x00000000, 0x00040047, 0x00000CE6,
0x0000001E, 0x00000000, 0x00040047, 0x00001679, 0x0000001E, 0x00000010,
0x00040047, 0x00001567, 0x0000001E, 0x00000010, 0x00020013, 0x00000008,
0x00030021, 0x00000502, 0x00000008, 0x00030016, 0x0000000D, 0x00000020,
0x00040017, 0x0000001D, 0x0000000D, 0x00000004, 0x0003001E, 0x000003F9,
0x0000001D, 0x00040015, 0x0000000B, 0x00000020, 0x00000000, 0x0004002B,
0x0000000B, 0x00000A0D, 0x00000001, 0x0004001C, 0x0000023D, 0x000003F9,
0x00000A0D, 0x00040020, 0x000004BA, 0x00000001, 0x0000023D, 0x0004003B,
0x000004BA, 0x000014B9, 0x00000001, 0x00040015, 0x0000000C, 0x00000020,
0x00000001, 0x0004002B, 0x0000000C, 0x00000A0B, 0x00000000, 0x00040020,
0x0000029A, 0x00000001, 0x0000001D, 0x00040017, 0x00000013, 0x0000000D,
0x00000002, 0x00040020, 0x00000290, 0x00000007, 0x00000013, 0x0007001E,
0x00000507, 0x0000001D, 0x0000001D, 0x0000001D, 0x0000001D, 0x0000000B,
0x00040020, 0x00000784, 0x00000009, 0x00000507, 0x0004003B, 0x00000784,
0x00000D87, 0x00000009, 0x0004002B, 0x0000000C, 0x00000A11, 0x00000002,
0x00040020, 0x0000029B, 0x00000009, 0x0000001D, 0x0004001C, 0x00000239,
0x0000000D, 0x00000A0D, 0x00040020, 0x000004B6, 0x00000001, 0x00000239,
0x0004003B, 0x000004B6, 0x00000FE1, 0x00000001, 0x00040020, 0x0000028A,
0x00000001, 0x0000000D, 0x0004002B, 0x0000000D, 0x00000A0C, 0x00000000,
0x00020014, 0x00000009, 0x0004002B, 0x0000000C, 0x00000A17, 0x00000004,
0x0003001E, 0x000003FA, 0x0000001D, 0x00040020, 0x00000676, 0x00000003,
0x000003FA, 0x0004003B, 0x00000676, 0x00001342, 0x00000003, 0x0004002B,
0x0000000B, 0x00000A16, 0x00000004, 0x0004001C, 0x000004D3, 0x00000013,
0x00000A16, 0x0004002B, 0x0000000D, 0x00000341, 0xBF800000, 0x0004002B,
0x0000000D, 0x0000008A, 0x3F800000, 0x0005002C, 0x00000013, 0x00000049,
0x00000341, 0x0000008A, 0x0005002C, 0x00000013, 0x00000300, 0x0000008A,
0x0000008A, 0x0005002C, 0x00000013, 0x0000004A, 0x00000341, 0x00000341,
0x0005002C, 0x00000013, 0x00000301, 0x0000008A, 0x00000341, 0x0007002C,
0x000004D3, 0x00000B7D, 0x00000049, 0x00000300, 0x0000004A, 0x00000301,
0x00040020, 0x00000750, 0x00000007, 0x000004D3, 0x00040020, 0x0000029C,
0x00000003, 0x0000001D, 0x0004002B, 0x0000000B, 0x00000A3A, 0x00000010,
0x0004001C, 0x00000989, 0x0000001D, 0x00000A3A, 0x00040020, 0x00000043,
0x00000003, 0x00000989, 0x0004003B, 0x00000043, 0x00001536, 0x00000003,
0x0004001C, 0x00000A2E, 0x00000989, 0x00000A0D, 0x00040020, 0x000000E8,
0x00000001, 0x00000A2E, 0x0004003B, 0x000000E8, 0x00000CE6, 0x00000001,
0x00040020, 0x00000044, 0x00000001, 0x00000989, 0x00040020, 0x00000291,
0x00000003, 0x00000013, 0x0004003B, 0x00000291, 0x00001679, 0x00000003,
0x0005002C, 0x00000013, 0x0000071F, 0x00000A0C, 0x00000A0C, 0x0004002B,
0x0000000C, 0x00000A0E, 0x00000001, 0x0004001C, 0x00000281, 0x00000013,
0x00000A0D, 0x00040020, 0x000004FE, 0x00000001, 0x00000281, 0x0004003B,
0x000004FE, 0x00001567, 0x00000001, 0x00050036, 0x00000008, 0x0000161F,
0x00000000, 0x00000502, 0x000200F8, 0x00006153, 0x0004003B, 0x00000750,
0x0000483B, 0x00000007, 0x0004003B, 0x00000750, 0x00001476, 0x00000007,
0x00060041, 0x0000029A, 0x000056FE, 0x000014B9, 0x00000A0B, 0x00000A0B,
0x0004003D, 0x0000001D, 0x00001FF5, 0x000056FE, 0x00050041, 0x0000029B,
0x00004FC4, 0x00000D87, 0x00000A11, 0x0004003D, 0x0000001D, 0x00004E5E,
0x00004FC4, 0x0007004F, 0x00000013, 0x00004AA6, 0x00004E5E, 0x00004E5E,
0x00000000, 0x00000001, 0x00050041, 0x0000028A, 0x00001F34, 0x00000FE1,
0x00000A0B, 0x0004003D, 0x0000000D, 0x0000330D, 0x00001F34, 0x000500BA,
0x00000009, 0x00005BDB, 0x0000330D, 0x00000A0C, 0x000300F7, 0x000041C7,
0x00000000, 0x000400FA, 0x00005BDB, 0x00003332, 0x000041C7, 0x000200F8,
0x00003332, 0x00050050, 0x00000013, 0x00004994, 0x0000330D, 0x0000330D,
0x000200F9, 0x000041C7, 0x000200F8, 0x000041C7, 0x000700F5, 0x00000013,
0x00004D24, 0x00004AA6, 0x00006153, 0x00004994, 0x00003332, 0x00050041,
0x0000029B, 0x00005E03, 0x00000D87, 0x00000A0B, 0x0004003D, 0x0000001D,
0x00003C4F, 0x00005E03, 0x0007004F, 0x00000013, 0x0000289F, 0x00003C4F,
0x00003C4F, 0x00000002, 0x00000003, 0x00050088, 0x00000013, 0x00005EDA,
0x00004D24, 0x0000289F, 0x000200F9, 0x000018AE, 0x000200F8, 0x000018AE,
0x000700F5, 0x0000000C, 0x000059AE, 0x00000A0B, 0x000041C7, 0x00002D83,
0x00002F74, 0x000500B1, 0x00000009, 0x000060D4, 0x000059AE, 0x00000A17,
0x000400F6, 0x00002FE9, 0x00002F74, 0x00000000, 0x000400FA, 0x000060D4,
0x00002F74, 0x00002FE9, 0x000200F8, 0x00002F74, 0x0007004F, 0x00000013,
0x00004561, 0x00001FF5, 0x00001FF5, 0x00000000, 0x00000001, 0x0003003E,
0x0000483B, 0x00000B7D, 0x00050041, 0x00000290, 0x00004C76, 0x0000483B,
0x000059AE, 0x0004003D, 0x00000013, 0x00003E63, 0x00004C76, 0x00050085,
0x00000013, 0x0000435B, 0x00003E63, 0x00005EDA, 0x00050081, 0x00000013,
0x000040D2, 0x00004561, 0x0000435B, 0x00050051, 0x0000000D, 0x0000297A,
0x000040D2, 0x00000000, 0x00050051, 0x0000000D, 0x00003707, 0x000040D2,
0x00000001, 0x00050051, 0x0000000D, 0x00001DD9, 0x00001FF5, 0x00000002,
0x00050051, 0x0000000D, 0x00001D69, 0x00001FF5, 0x00000003, 0x00070050,
0x0000001D, 0x00004754, 0x0000297A, 0x00003707, 0x00001DD9, 0x00001D69,
0x00050041, 0x0000029C, 0x00002123, 0x00001342, 0x00000A0B, 0x0003003E,
0x00002123, 0x00004754, 0x00050041, 0x00000044, 0x00004D88, 0x00000CE6,
0x00000A0B, 0x0004003D, 0x00000989, 0x00001EE6, 0x00004D88, 0x0003003E,
0x00001536, 0x00001EE6, 0x0003003E, 0x00001476, 0x00000B7D, 0x00050041,
0x00000290, 0x000033EA, 0x00001476, 0x000059AE, 0x0004003D, 0x00000013,
0x00004B07, 0x000033EA, 0x0007000C, 0x00000013, 0x0000230D, 0x00000001,
0x00000028, 0x00004B07, 0x0000071F, 0x0003003E, 0x00001679, 0x0000230D,
0x000100DA, 0x00050080, 0x0000000C, 0x00002D83, 0x000059AE, 0x00000A0E,
0x000200F9, 0x000018AE, 0x000200F8, 0x00002FE9, 0x000100DB, 0x000100FD,
0x00010038,
};

View File

@ -1,170 +0,0 @@
// Generated with `xb buildshaders`.
#if 0
; SPIR-V
; Version: 1.0
; Generator: Khronos Glslang Reference Front End; 10
; Bound: 24789
; Schema: 0
OpCapability Geometry
OpCapability GeometryPointSize
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Geometry %5663 "main" %4930 %5305 %5430 %3302 %4044 %4656 %3736
OpExecutionMode %5663 InputLinesAdjacency
OpExecutionMode %5663 Invocations 1
OpExecutionMode %5663 OutputTriangleStrip
OpExecutionMode %5663 OutputVertices 4
OpMemberDecorate %_struct_1032 0 BuiltIn Position
OpMemberDecorate %_struct_1032 1 BuiltIn PointSize
OpDecorate %_struct_1032 Block
OpMemberDecorate %_struct_1033 0 BuiltIn Position
OpMemberDecorate %_struct_1033 1 BuiltIn PointSize
OpDecorate %_struct_1033 Block
OpDecorate %5430 Location 0
OpDecorate %3302 Location 0
OpDecorate %4044 Location 16
OpDecorate %4656 Location 17
OpDecorate %3736 Location 16
%void = OpTypeVoid
%1282 = OpTypeFunction %void
%int = OpTypeInt 32 1
%_ptr_Function_int = OpTypePointer Function %int
%int_0 = OpConstant %int 0
%int_4 = OpConstant %int 4
%bool = OpTypeBool
%uint = OpTypeInt 32 0
%uint_4 = OpConstant %uint 4
%_arr_int_uint_4 = OpTypeArray %int %uint_4
%int_1 = OpConstant %int 1
%int_3 = OpConstant %int 3
%int_2 = OpConstant %int 2
%566 = OpConstantComposite %_arr_int_uint_4 %int_0 %int_1 %int_3 %int_2
%_ptr_Function__arr_int_uint_4 = OpTypePointer Function %_arr_int_uint_4
%float = OpTypeFloat 32
%v4float = OpTypeVector %float 4
%_struct_1032 = OpTypeStruct %v4float %float
%_ptr_Output__struct_1032 = OpTypePointer Output %_struct_1032
%4930 = OpVariable %_ptr_Output__struct_1032 Output
%_struct_1033 = OpTypeStruct %v4float %float
%_arr__struct_1033_uint_4 = OpTypeArray %_struct_1033 %uint_4
%_ptr_Input__arr__struct_1033_uint_4 = OpTypePointer Input %_arr__struct_1033_uint_4
%5305 = OpVariable %_ptr_Input__arr__struct_1033_uint_4 Input
%_ptr_Input_v4float = OpTypePointer Input %v4float
%_ptr_Output_v4float = OpTypePointer Output %v4float
%_ptr_Input_float = OpTypePointer Input %float
%_ptr_Output_float = OpTypePointer Output %float
%uint_16 = OpConstant %uint 16
%_arr_v4float_uint_16 = OpTypeArray %v4float %uint_16
%_ptr_Output__arr_v4float_uint_16 = OpTypePointer Output %_arr_v4float_uint_16
%5430 = OpVariable %_ptr_Output__arr_v4float_uint_16 Output
%_arr__arr_v4float_uint_16_uint_4 = OpTypeArray %_arr_v4float_uint_16 %uint_4
%_ptr_Input__arr__arr_v4float_uint_16_uint_4 = OpTypePointer Input %_arr__arr_v4float_uint_16_uint_4
%3302 = OpVariable %_ptr_Input__arr__arr_v4float_uint_16_uint_4 Input
%_ptr_Input__arr_v4float_uint_16 = OpTypePointer Input %_arr_v4float_uint_16
%v2float = OpTypeVector %float 2
%_arr_v2float_uint_4 = OpTypeArray %v2float %uint_4
%_ptr_Input__arr_v2float_uint_4 = OpTypePointer Input %_arr_v2float_uint_4
%4044 = OpVariable %_ptr_Input__arr_v2float_uint_4 Input
%_arr_float_uint_4 = OpTypeArray %float %uint_4
%_ptr_Input__arr_float_uint_4 = OpTypePointer Input %_arr_float_uint_4
%4656 = OpVariable %_ptr_Input__arr_float_uint_4 Input
%_ptr_Output_v2float = OpTypePointer Output %v2float
%3736 = OpVariable %_ptr_Output_v2float Output
%5663 = OpFunction %void None %1282
%9454 = OpLabel
%5238 = OpVariable %_ptr_Function__arr_int_uint_4 Function
OpBranch %18173
%18173 = OpLabel
%22958 = OpPhi %int %int_0 %9454 %11651 %15146
%24788 = OpSLessThan %bool %22958 %int_4
OpLoopMerge %12265 %15146 None
OpBranchConditional %24788 %15146 %12265
%15146 = OpLabel
OpStore %5238 %566
%22512 = OpAccessChain %_ptr_Function_int %5238 %22958
%7372 = OpLoad %int %22512
%20154 = OpAccessChain %_ptr_Input_v4float %5305 %7372 %int_0
%22427 = OpLoad %v4float %20154
%19981 = OpAccessChain %_ptr_Output_v4float %4930 %int_0
OpStore %19981 %22427
%19905 = OpAccessChain %_ptr_Input_float %5305 %7372 %int_1
%7391 = OpLoad %float %19905
%19982 = OpAccessChain %_ptr_Output_float %4930 %int_1
OpStore %19982 %7391
%19848 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %3302 %7372
%10874 = OpLoad %_arr_v4float_uint_16 %19848
OpStore %5430 %10874
OpEmitVertex
%11651 = OpIAdd %int %22958 %int_1
OpBranch %18173
%12265 = OpLabel
OpEndPrimitive
OpReturn
OpFunctionEnd
#endif
const uint32_t quad_list_gs[] = {
0x07230203, 0x00010000, 0x0008000A, 0x000060D5, 0x00000000, 0x00020011,
0x00000002, 0x00020011, 0x00000018, 0x0006000B, 0x00000001, 0x4C534C47,
0x6474732E, 0x3035342E, 0x00000000, 0x0003000E, 0x00000000, 0x00000001,
0x000C000F, 0x00000003, 0x0000161F, 0x6E69616D, 0x00000000, 0x00001342,
0x000014B9, 0x00001536, 0x00000CE6, 0x00000FCC, 0x00001230, 0x00000E98,
0x00030010, 0x0000161F, 0x00000015, 0x00040010, 0x0000161F, 0x00000000,
0x00000001, 0x00030010, 0x0000161F, 0x0000001D, 0x00040010, 0x0000161F,
0x0000001A, 0x00000004, 0x00050048, 0x00000408, 0x00000000, 0x0000000B,
0x00000000, 0x00050048, 0x00000408, 0x00000001, 0x0000000B, 0x00000001,
0x00030047, 0x00000408, 0x00000002, 0x00050048, 0x00000409, 0x00000000,
0x0000000B, 0x00000000, 0x00050048, 0x00000409, 0x00000001, 0x0000000B,
0x00000001, 0x00030047, 0x00000409, 0x00000002, 0x00040047, 0x00001536,
0x0000001E, 0x00000000, 0x00040047, 0x00000CE6, 0x0000001E, 0x00000000,
0x00040047, 0x00000FCC, 0x0000001E, 0x00000010, 0x00040047, 0x00001230,
0x0000001E, 0x00000011, 0x00040047, 0x00000E98, 0x0000001E, 0x00000010,
0x00020013, 0x00000008, 0x00030021, 0x00000502, 0x00000008, 0x00040015,
0x0000000C, 0x00000020, 0x00000001, 0x00040020, 0x00000289, 0x00000007,
0x0000000C, 0x0004002B, 0x0000000C, 0x00000A0B, 0x00000000, 0x0004002B,
0x0000000C, 0x00000A17, 0x00000004, 0x00020014, 0x00000009, 0x00040015,
0x0000000B, 0x00000020, 0x00000000, 0x0004002B, 0x0000000B, 0x00000A16,
0x00000004, 0x0004001C, 0x00000251, 0x0000000C, 0x00000A16, 0x0004002B,
0x0000000C, 0x00000A0E, 0x00000001, 0x0004002B, 0x0000000C, 0x00000A14,
0x00000003, 0x0004002B, 0x0000000C, 0x00000A11, 0x00000002, 0x0007002C,
0x00000251, 0x00000236, 0x00000A0B, 0x00000A0E, 0x00000A14, 0x00000A11,
0x00040020, 0x000004CE, 0x00000007, 0x00000251, 0x00030016, 0x0000000D,
0x00000020, 0x00040017, 0x0000001D, 0x0000000D, 0x00000004, 0x0004001E,
0x00000408, 0x0000001D, 0x0000000D, 0x00040020, 0x00000685, 0x00000003,
0x00000408, 0x0004003B, 0x00000685, 0x00001342, 0x00000003, 0x0004001E,
0x00000409, 0x0000001D, 0x0000000D, 0x0004001C, 0x000003A8, 0x00000409,
0x00000A16, 0x00040020, 0x00000625, 0x00000001, 0x000003A8, 0x0004003B,
0x00000625, 0x000014B9, 0x00000001, 0x00040020, 0x0000029A, 0x00000001,
0x0000001D, 0x00040020, 0x0000029B, 0x00000003, 0x0000001D, 0x00040020,
0x0000028A, 0x00000001, 0x0000000D, 0x00040020, 0x0000028B, 0x00000003,
0x0000000D, 0x0004002B, 0x0000000B, 0x00000A3A, 0x00000010, 0x0004001C,
0x00000656, 0x0000001D, 0x00000A3A, 0x00040020, 0x000008D3, 0x00000003,
0x00000656, 0x0004003B, 0x000008D3, 0x00001536, 0x00000003, 0x0004001C,
0x00000503, 0x00000656, 0x00000A16, 0x00040020, 0x0000077F, 0x00000001,
0x00000503, 0x0004003B, 0x0000077F, 0x00000CE6, 0x00000001, 0x00040020,
0x000008D4, 0x00000001, 0x00000656, 0x00040017, 0x00000013, 0x0000000D,
0x00000002, 0x0004001C, 0x000002E4, 0x00000013, 0x00000A16, 0x00040020,
0x00000561, 0x00000001, 0x000002E4, 0x0004003B, 0x00000561, 0x00000FCC,
0x00000001, 0x0004001C, 0x00000266, 0x0000000D, 0x00000A16, 0x00040020,
0x000004E3, 0x00000001, 0x00000266, 0x0004003B, 0x000004E3, 0x00001230,
0x00000001, 0x00040020, 0x00000290, 0x00000003, 0x00000013, 0x0004003B,
0x00000290, 0x00000E98, 0x00000003, 0x00050036, 0x00000008, 0x0000161F,
0x00000000, 0x00000502, 0x000200F8, 0x000024EE, 0x0004003B, 0x000004CE,
0x00001476, 0x00000007, 0x000200F9, 0x000046FD, 0x000200F8, 0x000046FD,
0x000700F5, 0x0000000C, 0x000059AE, 0x00000A0B, 0x000024EE, 0x00002D83,
0x00003B2A, 0x000500B1, 0x00000009, 0x000060D4, 0x000059AE, 0x00000A17,
0x000400F6, 0x00002FE9, 0x00003B2A, 0x00000000, 0x000400FA, 0x000060D4,
0x00003B2A, 0x00002FE9, 0x000200F8, 0x00003B2A, 0x0003003E, 0x00001476,
0x00000236, 0x00050041, 0x00000289, 0x000057F0, 0x00001476, 0x000059AE,
0x0004003D, 0x0000000C, 0x00001CCC, 0x000057F0, 0x00060041, 0x0000029A,
0x00004EBA, 0x000014B9, 0x00001CCC, 0x00000A0B, 0x0004003D, 0x0000001D,
0x0000579B, 0x00004EBA, 0x00050041, 0x0000029B, 0x00004E0D, 0x00001342,
0x00000A0B, 0x0003003E, 0x00004E0D, 0x0000579B, 0x00060041, 0x0000028A,
0x00004DC1, 0x000014B9, 0x00001CCC, 0x00000A0E, 0x0004003D, 0x0000000D,
0x00001CDF, 0x00004DC1, 0x00050041, 0x0000028B, 0x00004E0E, 0x00001342,
0x00000A0E, 0x0003003E, 0x00004E0E, 0x00001CDF, 0x00050041, 0x000008D4,
0x00004D88, 0x00000CE6, 0x00001CCC, 0x0004003D, 0x00000656, 0x00002A7A,
0x00004D88, 0x0003003E, 0x00001536, 0x00002A7A, 0x000100DA, 0x00050080,
0x0000000C, 0x00002D83, 0x000059AE, 0x00000A0E, 0x000200F9, 0x000046FD,
0x000200F8, 0x00002FE9, 0x000100DB, 0x000100FD, 0x00010038,
};

View File

@ -1,430 +0,0 @@
// Generated with `xb buildshaders`.
#if 0
; SPIR-V
; Version: 1.0
; Generator: Khronos Glslang Reference Front End; 10
; Bound: 24790
; Schema: 0
OpCapability Geometry
OpCapability GeometryPointSize
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Geometry %5663 "main" %5305 %4930 %5430 %3302 %4044 %4656 %3736
OpExecutionMode %5663 Triangles
OpExecutionMode %5663 Invocations 1
OpExecutionMode %5663 OutputTriangleStrip
OpExecutionMode %5663 OutputVertices 6
OpMemberDecorate %_struct_1032 0 BuiltIn Position
OpMemberDecorate %_struct_1032 1 BuiltIn PointSize
OpDecorate %_struct_1032 Block
OpMemberDecorate %_struct_1033 0 BuiltIn Position
OpMemberDecorate %_struct_1033 1 BuiltIn PointSize
OpDecorate %_struct_1033 Block
OpDecorate %5430 Location 0
OpDecorate %3302 Location 0
OpDecorate %4044 Location 16
OpDecorate %4656 Location 17
OpDecorate %3736 Location 16
%void = OpTypeVoid
%1282 = OpTypeFunction %void
%float = OpTypeFloat 32
%v2float = OpTypeVector %float 2
%bool = OpTypeBool
%v2bool = OpTypeVector %bool 2
%v4float = OpTypeVector %float 4
%_struct_1032 = OpTypeStruct %v4float %float
%uint = OpTypeInt 32 0
%uint_3 = OpConstant %uint 3
%_arr__struct_1032_uint_3 = OpTypeArray %_struct_1032 %uint_3
%_ptr_Input__arr__struct_1032_uint_3 = OpTypePointer Input %_arr__struct_1032_uint_3
%5305 = OpVariable %_ptr_Input__arr__struct_1032_uint_3 Input
%int = OpTypeInt 32 1
%int_0 = OpConstant %int 0
%int_2 = OpConstant %int 2
%uint_0 = OpConstant %uint 0
%_ptr_Input_float = OpTypePointer Input %float
%int_1 = OpConstant %int 1
%uint_1 = OpConstant %uint 1
%float_0_00100000005 = OpConstant %float 0.00100000005
%_ptr_Input_v4float = OpTypePointer Input %v4float
%_struct_1033 = OpTypeStruct %v4float %float
%_ptr_Output__struct_1033 = OpTypePointer Output %_struct_1033
%4930 = OpVariable %_ptr_Output__struct_1033 Output
%_ptr_Output_v4float = OpTypePointer Output %v4float
%_ptr_Output_float = OpTypePointer Output %float
%uint_16 = OpConstant %uint 16
%_arr_v4float_uint_16 = OpTypeArray %v4float %uint_16
%_ptr_Output__arr_v4float_uint_16 = OpTypePointer Output %_arr_v4float_uint_16
%5430 = OpVariable %_ptr_Output__arr_v4float_uint_16 Output
%_arr__arr_v4float_uint_16_uint_3 = OpTypeArray %_arr_v4float_uint_16 %uint_3
%_ptr_Input__arr__arr_v4float_uint_16_uint_3 = OpTypePointer Input %_arr__arr_v4float_uint_16_uint_3
%3302 = OpVariable %_ptr_Input__arr__arr_v4float_uint_16_uint_3 Input
%_ptr_Input__arr_v4float_uint_16 = OpTypePointer Input %_arr_v4float_uint_16
%int_16 = OpConstant %int 16
%_arr_v2float_uint_3 = OpTypeArray %v2float %uint_3
%_ptr_Input__arr_v2float_uint_3 = OpTypePointer Input %_arr_v2float_uint_3
%4044 = OpVariable %_ptr_Input__arr_v2float_uint_3 Input
%_arr_float_uint_3 = OpTypeArray %float %uint_3
%_ptr_Input__arr_float_uint_3 = OpTypePointer Input %_arr_float_uint_3
%4656 = OpVariable %_ptr_Input__arr_float_uint_3 Input
%_ptr_Output_v2float = OpTypePointer Output %v2float
%3736 = OpVariable %_ptr_Output_v2float Output
%1759 = OpConstantComposite %v2float %float_0_00100000005 %float_0_00100000005
%5663 = OpFunction %void None %1282
%23915 = OpLabel
%7129 = OpAccessChain %_ptr_Input_float %5305 %int_2 %int_0 %uint_0
%15627 = OpLoad %float %7129
%20439 = OpAccessChain %_ptr_Input_float %5305 %int_1 %int_0 %uint_1
%19889 = OpLoad %float %20439
%10917 = OpCompositeConstruct %v2float %15627 %19889
%24777 = OpAccessChain %_ptr_Input_v4float %5305 %int_0 %int_0
%7883 = OpLoad %v4float %24777
%6765 = OpVectorShuffle %v2float %7883 %7883 0 1
%15739 = OpFSub %v2float %6765 %10917
%7757 = OpExtInst %v2float %1 FAbs %15739
%19021 = OpFOrdLessThanEqual %v2bool %7757 %1759
%15711 = OpAll %bool %19021
%11402 = OpLogicalNot %bool %15711
OpSelectionMerge %13286 None
OpBranchConditional %11402 %12129 %13286
%12129 = OpLabel
%18210 = OpAccessChain %_ptr_Input_float %5305 %int_1 %int_0 %uint_0
%15628 = OpLoad %float %18210
%20440 = OpAccessChain %_ptr_Input_float %5305 %int_2 %int_0 %uint_1
%21143 = OpLoad %float %20440
%17643 = OpCompositeConstruct %v2float %15628 %21143
%15490 = OpFSub %v2float %6765 %17643
%24406 = OpExtInst %v2float %1 FAbs %15490
%20560 = OpFOrdLessThanEqual %v2bool %24406 %1759
%20788 = OpAll %bool %20560
OpBranch %13286
%13286 = OpLabel
%10924 = OpPhi %bool %15711 %23915 %20788 %12129
OpSelectionMerge %23648 None
OpBranchConditional %10924 %12148 %9186
%12148 = OpLabel
%18037 = OpAccessChain %_ptr_Output_v4float %4930 %int_0
OpStore %18037 %7883
%19905 = OpAccessChain %_ptr_Input_float %5305 %int_0 %int_1
%7391 = OpLoad %float %19905
%19981 = OpAccessChain %_ptr_Output_float %4930 %int_1
OpStore %19981 %7391
%19848 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %3302 %int_0
%10874 = OpLoad %_arr_v4float_uint_16 %19848
OpStore %5430 %10874
OpEmitVertex
%22812 = OpAccessChain %_ptr_Input_v4float %5305 %int_1 %int_0
%11398 = OpLoad %v4float %22812
OpStore %18037 %11398
%16622 = OpAccessChain %_ptr_Input_float %5305 %int_1 %int_1
%7967 = OpLoad %float %16622
OpStore %19981 %7967
%16623 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %3302 %int_1
%10875 = OpLoad %_arr_v4float_uint_16 %16623
OpStore %5430 %10875
OpEmitVertex
%22813 = OpAccessChain %_ptr_Input_v4float %5305 %int_2 %int_0
%11399 = OpLoad %v4float %22813
OpStore %18037 %11399
%16624 = OpAccessChain %_ptr_Input_float %5305 %int_2 %int_1
%7968 = OpLoad %float %16624
OpStore %19981 %7968
%16625 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %3302 %int_2
%10876 = OpLoad %_arr_v4float_uint_16 %16625
OpStore %5430 %10876
OpEmitVertex
OpEndPrimitive
OpStore %18037 %11399
OpStore %19981 %7968
OpStore %5430 %10876
OpEmitVertex
OpStore %18037 %11398
OpStore %19981 %7967
OpStore %5430 %10875
OpEmitVertex
%8851 = OpFNegate %v2float %6765
%13757 = OpVectorShuffle %v2float %11398 %11398 0 1
%21457 = OpFAdd %v2float %8851 %13757
%7434 = OpVectorShuffle %v2float %11399 %11399 0 1
%21812 = OpFAdd %v2float %21457 %7434
%18423 = OpCompositeExtract %float %21812 0
%14087 = OpCompositeExtract %float %21812 1
%7641 = OpCompositeExtract %float %11399 2
%7472 = OpCompositeExtract %float %11399 3
%18779 = OpCompositeConstruct %v4float %18423 %14087 %7641 %7472
OpStore %18037 %18779
OpStore %19981 %7968
OpBranch %17364
%17364 = OpLabel
%22958 = OpPhi %int %int_0 %12148 %21301 %14551
%24788 = OpSLessThan %bool %22958 %int_16
OpLoopMerge %11792 %14551 None
OpBranchConditional %24788 %14551 %11792
%14551 = OpLabel
%19388 = OpAccessChain %_ptr_Input_v4float %3302 %int_0 %22958
%24048 = OpLoad %v4float %19388
%19880 = OpFNegate %v4float %24048
%6667 = OpAccessChain %_ptr_Input_v4float %3302 %int_1 %22958
%6828 = OpLoad %v4float %6667
%22565 = OpFAdd %v4float %19880 %6828
%18783 = OpAccessChain %_ptr_Input_v4float %3302 %int_2 %22958
%21055 = OpLoad %v4float %18783
%22584 = OpFAdd %v4float %22565 %21055
%18591 = OpAccessChain %_ptr_Output_v4float %5430 %22958
OpStore %18591 %22584
%21301 = OpIAdd %int %22958 %int_1
OpBranch %17364
%11792 = OpLabel
OpEmitVertex
OpEndPrimitive
OpBranch %23648
%9186 = OpLabel
%20459 = OpAccessChain %_ptr_Output_v4float %4930 %int_0
OpStore %20459 %7883
%19906 = OpAccessChain %_ptr_Input_float %5305 %int_0 %int_1
%7392 = OpLoad %float %19906
%19982 = OpAccessChain %_ptr_Output_float %4930 %int_1
OpStore %19982 %7392
%19849 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %3302 %int_0
%10877 = OpLoad %_arr_v4float_uint_16 %19849
OpStore %5430 %10877
OpEmitVertex
%22814 = OpAccessChain %_ptr_Input_v4float %5305 %int_1 %int_0
%11400 = OpLoad %v4float %22814
OpStore %20459 %11400
%16626 = OpAccessChain %_ptr_Input_float %5305 %int_1 %int_1
%7969 = OpLoad %float %16626
OpStore %19982 %7969
%16627 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %3302 %int_1
%10878 = OpLoad %_arr_v4float_uint_16 %16627
OpStore %5430 %10878
OpEmitVertex
%22815 = OpAccessChain %_ptr_Input_v4float %5305 %int_2 %int_0
%11401 = OpLoad %v4float %22815
OpStore %20459 %11401
%16628 = OpAccessChain %_ptr_Input_float %5305 %int_2 %int_1
%7970 = OpLoad %float %16628
OpStore %19982 %7970
%16629 = OpAccessChain %_ptr_Input__arr_v4float_uint_16 %3302 %int_2
%10879 = OpLoad %_arr_v4float_uint_16 %16629
OpStore %5430 %10879
OpEmitVertex
OpEndPrimitive
OpStore %20459 %7883
OpStore %19982 %7392
OpStore %5430 %10877
OpEmitVertex
OpStore %20459 %11401
OpStore %19982 %7970
OpStore %5430 %10879
OpEmitVertex
%12391 = OpVectorShuffle %v2float %11400 %11400 0 1
%21222 = OpFNegate %v2float %12391
%8335 = OpFAdd %v2float %6765 %21222
%13861 = OpVectorShuffle %v2float %11401 %11401 0 1
%21813 = OpFAdd %v2float %8335 %13861
%18424 = OpCompositeExtract %float %21813 0
%14088 = OpCompositeExtract %float %21813 1
%7642 = OpCompositeExtract %float %11401 2
%7473 = OpCompositeExtract %float %11401 3
%18780 = OpCompositeConstruct %v4float %18424 %14088 %7642 %7473
OpStore %20459 %18780
OpStore %19982 %7970
OpBranch %17365
%17365 = OpLabel
%22959 = OpPhi %int %int_0 %9186 %21302 %14552
%24789 = OpSLessThan %bool %22959 %int_16
OpLoopMerge %11793 %14552 None
OpBranchConditional %24789 %14552 %11793
%14552 = OpLabel
%18211 = OpAccessChain %_ptr_Input_v4float %3302 %int_0 %22959
%15629 = OpLoad %v4float %18211
%21332 = OpAccessChain %_ptr_Input_v4float %3302 %int_1 %22959
%12974 = OpLoad %v4float %21332
%8884 = OpFNegate %v4float %12974
%7862 = OpFAdd %v4float %15629 %8884
%14199 = OpAccessChain %_ptr_Input_v4float %3302 %int_2 %22959
%21056 = OpLoad %v4float %14199
%22585 = OpFAdd %v4float %7862 %21056
%18592 = OpAccessChain %_ptr_Output_v4float %5430 %22959
OpStore %18592 %22585
%21302 = OpIAdd %int %22959 %int_1
OpBranch %17365
%11793 = OpLabel
OpEmitVertex
OpEndPrimitive
OpBranch %23648
%23648 = OpLabel
OpReturn
OpFunctionEnd
#endif
const uint32_t rect_list_gs[] = {
0x07230203, 0x00010000, 0x0008000A, 0x000060D6, 0x00000000, 0x00020011,
0x00000002, 0x00020011, 0x00000018, 0x0006000B, 0x00000001, 0x4C534C47,
0x6474732E, 0x3035342E, 0x00000000, 0x0003000E, 0x00000000, 0x00000001,
0x000C000F, 0x00000003, 0x0000161F, 0x6E69616D, 0x00000000, 0x000014B9,
0x00001342, 0x00001536, 0x00000CE6, 0x00000FCC, 0x00001230, 0x00000E98,
0x00030010, 0x0000161F, 0x00000016, 0x00040010, 0x0000161F, 0x00000000,
0x00000001, 0x00030010, 0x0000161F, 0x0000001D, 0x00040010, 0x0000161F,
0x0000001A, 0x00000006, 0x00050048, 0x00000408, 0x00000000, 0x0000000B,
0x00000000, 0x00050048, 0x00000408, 0x00000001, 0x0000000B, 0x00000001,
0x00030047, 0x00000408, 0x00000002, 0x00050048, 0x00000409, 0x00000000,
0x0000000B, 0x00000000, 0x00050048, 0x00000409, 0x00000001, 0x0000000B,
0x00000001, 0x00030047, 0x00000409, 0x00000002, 0x00040047, 0x00001536,
0x0000001E, 0x00000000, 0x00040047, 0x00000CE6, 0x0000001E, 0x00000000,
0x00040047, 0x00000FCC, 0x0000001E, 0x00000010, 0x00040047, 0x00001230,
0x0000001E, 0x00000011, 0x00040047, 0x00000E98, 0x0000001E, 0x00000010,
0x00020013, 0x00000008, 0x00030021, 0x00000502, 0x00000008, 0x00030016,
0x0000000D, 0x00000020, 0x00040017, 0x00000013, 0x0000000D, 0x00000002,
0x00020014, 0x00000009, 0x00040017, 0x0000000F, 0x00000009, 0x00000002,
0x00040017, 0x0000001D, 0x0000000D, 0x00000004, 0x0004001E, 0x00000408,
0x0000001D, 0x0000000D, 0x00040015, 0x0000000B, 0x00000020, 0x00000000,
0x0004002B, 0x0000000B, 0x00000A13, 0x00000003, 0x0004001C, 0x0000085F,
0x00000408, 0x00000A13, 0x00040020, 0x00000ADC, 0x00000001, 0x0000085F,
0x0004003B, 0x00000ADC, 0x000014B9, 0x00000001, 0x00040015, 0x0000000C,
0x00000020, 0x00000001, 0x0004002B, 0x0000000C, 0x00000A0B, 0x00000000,
0x0004002B, 0x0000000C, 0x00000A11, 0x00000002, 0x0004002B, 0x0000000B,
0x00000A0A, 0x00000000, 0x00040020, 0x0000028A, 0x00000001, 0x0000000D,
0x0004002B, 0x0000000C, 0x00000A0E, 0x00000001, 0x0004002B, 0x0000000B,
0x00000A0D, 0x00000001, 0x0004002B, 0x0000000D, 0x00000030, 0x3A83126F,
0x00040020, 0x0000029A, 0x00000001, 0x0000001D, 0x0004001E, 0x00000409,
0x0000001D, 0x0000000D, 0x00040020, 0x00000685, 0x00000003, 0x00000409,
0x0004003B, 0x00000685, 0x00001342, 0x00000003, 0x00040020, 0x0000029B,
0x00000003, 0x0000001D, 0x00040020, 0x0000028B, 0x00000003, 0x0000000D,
0x0004002B, 0x0000000B, 0x00000A3A, 0x00000010, 0x0004001C, 0x000008F6,
0x0000001D, 0x00000A3A, 0x00040020, 0x00000B73, 0x00000003, 0x000008F6,
0x0004003B, 0x00000B73, 0x00001536, 0x00000003, 0x0004001C, 0x0000084A,
0x000008F6, 0x00000A13, 0x00040020, 0x00000AC7, 0x00000001, 0x0000084A,
0x0004003B, 0x00000AC7, 0x00000CE6, 0x00000001, 0x00040020, 0x00000B74,
0x00000001, 0x000008F6, 0x0004002B, 0x0000000C, 0x00000A3B, 0x00000010,
0x0004001C, 0x00000352, 0x00000013, 0x00000A13, 0x00040020, 0x000005CF,
0x00000001, 0x00000352, 0x0004003B, 0x000005CF, 0x00000FCC, 0x00000001,
0x0004001C, 0x00000298, 0x0000000D, 0x00000A13, 0x00040020, 0x00000515,
0x00000001, 0x00000298, 0x0004003B, 0x00000515, 0x00001230, 0x00000001,
0x00040020, 0x00000290, 0x00000003, 0x00000013, 0x0004003B, 0x00000290,
0x00000E98, 0x00000003, 0x0005002C, 0x00000013, 0x000006DF, 0x00000030,
0x00000030, 0x00050036, 0x00000008, 0x0000161F, 0x00000000, 0x00000502,
0x000200F8, 0x00005D6B, 0x00070041, 0x0000028A, 0x00001BD9, 0x000014B9,
0x00000A11, 0x00000A0B, 0x00000A0A, 0x0004003D, 0x0000000D, 0x00003D0B,
0x00001BD9, 0x00070041, 0x0000028A, 0x00004FD7, 0x000014B9, 0x00000A0E,
0x00000A0B, 0x00000A0D, 0x0004003D, 0x0000000D, 0x00004DB1, 0x00004FD7,
0x00050050, 0x00000013, 0x00002AA5, 0x00003D0B, 0x00004DB1, 0x00060041,
0x0000029A, 0x000060C9, 0x000014B9, 0x00000A0B, 0x00000A0B, 0x0004003D,
0x0000001D, 0x00001ECB, 0x000060C9, 0x0007004F, 0x00000013, 0x00001A6D,
0x00001ECB, 0x00001ECB, 0x00000000, 0x00000001, 0x00050083, 0x00000013,
0x00003D7B, 0x00001A6D, 0x00002AA5, 0x0006000C, 0x00000013, 0x00001E4D,
0x00000001, 0x00000004, 0x00003D7B, 0x000500BC, 0x0000000F, 0x00004A4D,
0x00001E4D, 0x000006DF, 0x0004009B, 0x00000009, 0x00003D5F, 0x00004A4D,
0x000400A8, 0x00000009, 0x00002C8A, 0x00003D5F, 0x000300F7, 0x000033E6,
0x00000000, 0x000400FA, 0x00002C8A, 0x00002F61, 0x000033E6, 0x000200F8,
0x00002F61, 0x00070041, 0x0000028A, 0x00004722, 0x000014B9, 0x00000A0E,
0x00000A0B, 0x00000A0A, 0x0004003D, 0x0000000D, 0x00003D0C, 0x00004722,
0x00070041, 0x0000028A, 0x00004FD8, 0x000014B9, 0x00000A11, 0x00000A0B,
0x00000A0D, 0x0004003D, 0x0000000D, 0x00005297, 0x00004FD8, 0x00050050,
0x00000013, 0x000044EB, 0x00003D0C, 0x00005297, 0x00050083, 0x00000013,
0x00003C82, 0x00001A6D, 0x000044EB, 0x0006000C, 0x00000013, 0x00005F56,
0x00000001, 0x00000004, 0x00003C82, 0x000500BC, 0x0000000F, 0x00005050,
0x00005F56, 0x000006DF, 0x0004009B, 0x00000009, 0x00005134, 0x00005050,
0x000200F9, 0x000033E6, 0x000200F8, 0x000033E6, 0x000700F5, 0x00000009,
0x00002AAC, 0x00003D5F, 0x00005D6B, 0x00005134, 0x00002F61, 0x000300F7,
0x00005C60, 0x00000000, 0x000400FA, 0x00002AAC, 0x00002F74, 0x000023E2,
0x000200F8, 0x00002F74, 0x00050041, 0x0000029B, 0x00004675, 0x00001342,
0x00000A0B, 0x0003003E, 0x00004675, 0x00001ECB, 0x00060041, 0x0000028A,
0x00004DC1, 0x000014B9, 0x00000A0B, 0x00000A0E, 0x0004003D, 0x0000000D,
0x00001CDF, 0x00004DC1, 0x00050041, 0x0000028B, 0x00004E0D, 0x00001342,
0x00000A0E, 0x0003003E, 0x00004E0D, 0x00001CDF, 0x00050041, 0x00000B74,
0x00004D88, 0x00000CE6, 0x00000A0B, 0x0004003D, 0x000008F6, 0x00002A7A,
0x00004D88, 0x0003003E, 0x00001536, 0x00002A7A, 0x000100DA, 0x00060041,
0x0000029A, 0x0000591C, 0x000014B9, 0x00000A0E, 0x00000A0B, 0x0004003D,
0x0000001D, 0x00002C86, 0x0000591C, 0x0003003E, 0x00004675, 0x00002C86,
0x00060041, 0x0000028A, 0x000040EE, 0x000014B9, 0x00000A0E, 0x00000A0E,
0x0004003D, 0x0000000D, 0x00001F1F, 0x000040EE, 0x0003003E, 0x00004E0D,
0x00001F1F, 0x00050041, 0x00000B74, 0x000040EF, 0x00000CE6, 0x00000A0E,
0x0004003D, 0x000008F6, 0x00002A7B, 0x000040EF, 0x0003003E, 0x00001536,
0x00002A7B, 0x000100DA, 0x00060041, 0x0000029A, 0x0000591D, 0x000014B9,
0x00000A11, 0x00000A0B, 0x0004003D, 0x0000001D, 0x00002C87, 0x0000591D,
0x0003003E, 0x00004675, 0x00002C87, 0x00060041, 0x0000028A, 0x000040F0,
0x000014B9, 0x00000A11, 0x00000A0E, 0x0004003D, 0x0000000D, 0x00001F20,
0x000040F0, 0x0003003E, 0x00004E0D, 0x00001F20, 0x00050041, 0x00000B74,
0x000040F1, 0x00000CE6, 0x00000A11, 0x0004003D, 0x000008F6, 0x00002A7C,
0x000040F1, 0x0003003E, 0x00001536, 0x00002A7C, 0x000100DA, 0x000100DB,
0x0003003E, 0x00004675, 0x00002C87, 0x0003003E, 0x00004E0D, 0x00001F20,
0x0003003E, 0x00001536, 0x00002A7C, 0x000100DA, 0x0003003E, 0x00004675,
0x00002C86, 0x0003003E, 0x00004E0D, 0x00001F1F, 0x0003003E, 0x00001536,
0x00002A7B, 0x000100DA, 0x0004007F, 0x00000013, 0x00002293, 0x00001A6D,
0x0007004F, 0x00000013, 0x000035BD, 0x00002C86, 0x00002C86, 0x00000000,
0x00000001, 0x00050081, 0x00000013, 0x000053D1, 0x00002293, 0x000035BD,
0x0007004F, 0x00000013, 0x00001D0A, 0x00002C87, 0x00002C87, 0x00000000,
0x00000001, 0x00050081, 0x00000013, 0x00005534, 0x000053D1, 0x00001D0A,
0x00050051, 0x0000000D, 0x000047F7, 0x00005534, 0x00000000, 0x00050051,
0x0000000D, 0x00003707, 0x00005534, 0x00000001, 0x00050051, 0x0000000D,
0x00001DD9, 0x00002C87, 0x00000002, 0x00050051, 0x0000000D, 0x00001D30,
0x00002C87, 0x00000003, 0x00070050, 0x0000001D, 0x0000495B, 0x000047F7,
0x00003707, 0x00001DD9, 0x00001D30, 0x0003003E, 0x00004675, 0x0000495B,
0x0003003E, 0x00004E0D, 0x00001F20, 0x000200F9, 0x000043D4, 0x000200F8,
0x000043D4, 0x000700F5, 0x0000000C, 0x000059AE, 0x00000A0B, 0x00002F74,
0x00005335, 0x000038D7, 0x000500B1, 0x00000009, 0x000060D4, 0x000059AE,
0x00000A3B, 0x000400F6, 0x00002E10, 0x000038D7, 0x00000000, 0x000400FA,
0x000060D4, 0x000038D7, 0x00002E10, 0x000200F8, 0x000038D7, 0x00060041,
0x0000029A, 0x00004BBC, 0x00000CE6, 0x00000A0B, 0x000059AE, 0x0004003D,
0x0000001D, 0x00005DF0, 0x00004BBC, 0x0004007F, 0x0000001D, 0x00004DA8,
0x00005DF0, 0x00060041, 0x0000029A, 0x00001A0B, 0x00000CE6, 0x00000A0E,
0x000059AE, 0x0004003D, 0x0000001D, 0x00001AAC, 0x00001A0B, 0x00050081,
0x0000001D, 0x00005825, 0x00004DA8, 0x00001AAC, 0x00060041, 0x0000029A,
0x0000495F, 0x00000CE6, 0x00000A11, 0x000059AE, 0x0004003D, 0x0000001D,
0x0000523F, 0x0000495F, 0x00050081, 0x0000001D, 0x00005838, 0x00005825,
0x0000523F, 0x00050041, 0x0000029B, 0x0000489F, 0x00001536, 0x000059AE,
0x0003003E, 0x0000489F, 0x00005838, 0x00050080, 0x0000000C, 0x00005335,
0x000059AE, 0x00000A0E, 0x000200F9, 0x000043D4, 0x000200F8, 0x00002E10,
0x000100DA, 0x000100DB, 0x000200F9, 0x00005C60, 0x000200F8, 0x000023E2,
0x00050041, 0x0000029B, 0x00004FEB, 0x00001342, 0x00000A0B, 0x0003003E,
0x00004FEB, 0x00001ECB, 0x00060041, 0x0000028A, 0x00004DC2, 0x000014B9,
0x00000A0B, 0x00000A0E, 0x0004003D, 0x0000000D, 0x00001CE0, 0x00004DC2,
0x00050041, 0x0000028B, 0x00004E0E, 0x00001342, 0x00000A0E, 0x0003003E,
0x00004E0E, 0x00001CE0, 0x00050041, 0x00000B74, 0x00004D89, 0x00000CE6,
0x00000A0B, 0x0004003D, 0x000008F6, 0x00002A7D, 0x00004D89, 0x0003003E,
0x00001536, 0x00002A7D, 0x000100DA, 0x00060041, 0x0000029A, 0x0000591E,
0x000014B9, 0x00000A0E, 0x00000A0B, 0x0004003D, 0x0000001D, 0x00002C88,
0x0000591E, 0x0003003E, 0x00004FEB, 0x00002C88, 0x00060041, 0x0000028A,
0x000040F2, 0x000014B9, 0x00000A0E, 0x00000A0E, 0x0004003D, 0x0000000D,
0x00001F21, 0x000040F2, 0x0003003E, 0x00004E0E, 0x00001F21, 0x00050041,
0x00000B74, 0x000040F3, 0x00000CE6, 0x00000A0E, 0x0004003D, 0x000008F6,
0x00002A7E, 0x000040F3, 0x0003003E, 0x00001536, 0x00002A7E, 0x000100DA,
0x00060041, 0x0000029A, 0x0000591F, 0x000014B9, 0x00000A11, 0x00000A0B,
0x0004003D, 0x0000001D, 0x00002C89, 0x0000591F, 0x0003003E, 0x00004FEB,
0x00002C89, 0x00060041, 0x0000028A, 0x000040F4, 0x000014B9, 0x00000A11,
0x00000A0E, 0x0004003D, 0x0000000D, 0x00001F22, 0x000040F4, 0x0003003E,
0x00004E0E, 0x00001F22, 0x00050041, 0x00000B74, 0x000040F5, 0x00000CE6,
0x00000A11, 0x0004003D, 0x000008F6, 0x00002A7F, 0x000040F5, 0x0003003E,
0x00001536, 0x00002A7F, 0x000100DA, 0x000100DB, 0x0003003E, 0x00004FEB,
0x00001ECB, 0x0003003E, 0x00004E0E, 0x00001CE0, 0x0003003E, 0x00001536,
0x00002A7D, 0x000100DA, 0x0003003E, 0x00004FEB, 0x00002C89, 0x0003003E,
0x00004E0E, 0x00001F22, 0x0003003E, 0x00001536, 0x00002A7F, 0x000100DA,
0x0007004F, 0x00000013, 0x00003067, 0x00002C88, 0x00002C88, 0x00000000,
0x00000001, 0x0004007F, 0x00000013, 0x000052E6, 0x00003067, 0x00050081,
0x00000013, 0x0000208F, 0x00001A6D, 0x000052E6, 0x0007004F, 0x00000013,
0x00003625, 0x00002C89, 0x00002C89, 0x00000000, 0x00000001, 0x00050081,
0x00000013, 0x00005535, 0x0000208F, 0x00003625, 0x00050051, 0x0000000D,
0x000047F8, 0x00005535, 0x00000000, 0x00050051, 0x0000000D, 0x00003708,
0x00005535, 0x00000001, 0x00050051, 0x0000000D, 0x00001DDA, 0x00002C89,
0x00000002, 0x00050051, 0x0000000D, 0x00001D31, 0x00002C89, 0x00000003,
0x00070050, 0x0000001D, 0x0000495C, 0x000047F8, 0x00003708, 0x00001DDA,
0x00001D31, 0x0003003E, 0x00004FEB, 0x0000495C, 0x0003003E, 0x00004E0E,
0x00001F22, 0x000200F9, 0x000043D5, 0x000200F8, 0x000043D5, 0x000700F5,
0x0000000C, 0x000059AF, 0x00000A0B, 0x000023E2, 0x00005336, 0x000038D8,
0x000500B1, 0x00000009, 0x000060D5, 0x000059AF, 0x00000A3B, 0x000400F6,
0x00002E11, 0x000038D8, 0x00000000, 0x000400FA, 0x000060D5, 0x000038D8,
0x00002E11, 0x000200F8, 0x000038D8, 0x00060041, 0x0000029A, 0x00004723,
0x00000CE6, 0x00000A0B, 0x000059AF, 0x0004003D, 0x0000001D, 0x00003D0D,
0x00004723, 0x00060041, 0x0000029A, 0x00005354, 0x00000CE6, 0x00000A0E,
0x000059AF, 0x0004003D, 0x0000001D, 0x000032AE, 0x00005354, 0x0004007F,
0x0000001D, 0x000022B4, 0x000032AE, 0x00050081, 0x0000001D, 0x00001EB6,
0x00003D0D, 0x000022B4, 0x00060041, 0x0000029A, 0x00003777, 0x00000CE6,
0x00000A11, 0x000059AF, 0x0004003D, 0x0000001D, 0x00005240, 0x00003777,
0x00050081, 0x0000001D, 0x00005839, 0x00001EB6, 0x00005240, 0x00050041,
0x0000029B, 0x000048A0, 0x00001536, 0x000059AF, 0x0003003E, 0x000048A0,
0x00005839, 0x00050080, 0x0000000C, 0x00005336, 0x000059AF, 0x00000A0E,
0x000200F9, 0x000043D5, 0x000200F8, 0x00002E11, 0x000100DA, 0x000100DB,
0x000200F9, 0x00005C60, 0x000200F8, 0x00005C60, 0x000100FD, 0x00010038,
};

View File

@ -1,35 +0,0 @@
// NOTE: This file is compiled and embedded into the exe.
// Use `xenia-build genspirv` and check in any changes under bin/.
#version 450 core
#extension all : warn
#extension GL_ARB_shading_language_420pack : require
#extension GL_ARB_separate_shader_objects : require
#extension GL_ARB_explicit_attrib_location : require
layout(set = 0, binding = 1) uniform consts_type {
vec4 float_consts[512];
uint loop_consts[32];
uint bool_consts[8];
} consts;
layout(push_constant) uniform push_consts_type {
vec4 window_scale;
vec4 vtx_fmt;
vec4 point_size;
vec4 alpha_test;
uint ps_param_gen;
} push_constants;
layout(set = 1, binding = 0) uniform sampler1D textures1D[32];
layout(set = 1, binding = 1) uniform sampler2D textures2D[32];
layout(set = 1, binding = 2) uniform sampler3D textures3D[32];
layout(set = 1, binding = 3) uniform samplerCube textures4D[32];
layout(location = 0) in vec4 in_interpolators[16];
layout(location = 0) out vec4 oC[4];
void main() {
// This shader does absolutely nothing!
return;
}

View File

@ -1,53 +0,0 @@
// NOTE: This file is compiled and embedded into the exe.
// Use `xenia-build genspirv` and check in any changes under bin/.
#version 450 core
#extension all : warn
#extension GL_ARB_separate_shader_objects : require
#extension GL_ARB_explicit_attrib_location : require
in gl_PerVertex {
vec4 gl_Position;
float gl_PointSize;
// float gl_ClipDistance[];
} gl_in[];
out gl_PerVertex {
vec4 gl_Position;
float gl_PointSize;
// float gl_ClipDistance[];
};
layout(location = 0) in vec4 in_interpolators[][16];
layout(location = 0) out vec4 out_interpolators[16];
layout(location = 16) in vec2 _in_point_coord_unused[];
layout(location = 17) in float _in_point_size_unused[];
layout(location = 16) out vec2 _out_point_coord_unused;
layout(lines_adjacency) in;
layout(line_strip, max_vertices = 5) out;
void main() {
gl_Position = gl_in[0].gl_Position;
gl_PointSize = gl_in[0].gl_PointSize;
out_interpolators = in_interpolators[0];
EmitVertex();
gl_Position = gl_in[1].gl_Position;
gl_PointSize = gl_in[1].gl_PointSize;
out_interpolators = in_interpolators[1];
EmitVertex();
gl_Position = gl_in[2].gl_Position;
gl_PointSize = gl_in[2].gl_PointSize;
out_interpolators = in_interpolators[2];
EmitVertex();
gl_Position = gl_in[3].gl_Position;
gl_PointSize = gl_in[3].gl_PointSize;
out_interpolators = in_interpolators[3];
EmitVertex();
gl_Position = gl_in[0].gl_Position;
gl_PointSize = gl_in[0].gl_PointSize;
out_interpolators = in_interpolators[0];
EmitVertex();
EndPrimitive();
}

View File

@ -1,63 +0,0 @@
// NOTE: This file is compiled and embedded into the exe.
// Use `xenia-build genspirv` and check in any changes under bin/.
#version 450 core
#extension all : warn
#extension GL_ARB_shading_language_420pack : require
#extension GL_ARB_separate_shader_objects : require
#extension GL_ARB_explicit_attrib_location : require
layout(push_constant) uniform push_consts_type {
vec4 window_scale;
vec4 vtx_fmt;
vec4 point_size;
vec4 alpha_test;
uint ps_param_gen;
} push_constants;
in gl_PerVertex {
vec4 gl_Position;
// float gl_ClipDistance[];
} gl_in[];
out gl_PerVertex {
vec4 gl_Position;
// float gl_ClipDistance[];
};
layout(location = 0) in vec4 in_interpolators[][16];
layout(location = 16) in vec2 in_point_coord_unused[];
layout(location = 17) in float point_size[];
layout(location = 0) out vec4 out_interpolators[16];
layout(location = 16) out vec2 point_coord;
// TODO(benvanik): clamp to min/max.
// TODO(benvanik): figure out how to see which interpolator gets adjusted.
layout(points) in;
layout(triangle_strip, max_vertices = 4) out;
void main() {
const vec2 offsets[4] = {
vec2(-1.0, 1.0),
vec2( 1.0, 1.0),
vec2(-1.0, -1.0),
vec2( 1.0, -1.0),
};
vec4 pos = gl_in[0].gl_Position;
vec2 window_scaled_psize = push_constants.point_size.xy;
// Shader header writes -1.0f to pointSize by default, so any positive value
// means that it was overwritten by the translated vertex shader.
if (point_size[0] > 0.0f) {
window_scaled_psize = vec2(point_size[0]);
}
window_scaled_psize /= push_constants.window_scale.zw;
for (int i = 0; i < 4; ++i) {
gl_Position = vec4(pos.xy + (offsets[i] * window_scaled_psize), pos.zw);
out_interpolators = in_interpolators[0];
point_coord = max(offsets[i], vec2(0.0f));
EmitVertex();
}
EndPrimitive();
}

View File

@ -1,42 +0,0 @@
// NOTE: This file is compiled and embedded into the exe.
// Use `xenia-build genspirv` and check in any changes under bin/.
#version 450 core
#extension all : warn
#extension GL_ARB_shading_language_420pack : require
#extension GL_ARB_separate_shader_objects : require
#extension GL_ARB_explicit_attrib_location : require
in gl_PerVertex {
vec4 gl_Position;
float gl_PointSize;
// float gl_ClipDistance[];
} gl_in[];
out gl_PerVertex {
vec4 gl_Position;
float gl_PointSize;
// float gl_ClipDistance[];
};
layout(location = 0) in vec4 in_interpolators[][16];
layout(location = 0) out vec4 out_interpolators[16];
layout(location = 16) in vec2 _in_point_coord_unused[];
layout(location = 17) in float _in_point_size_unused[];
layout(location = 16) out vec2 _out_point_coord_unused;
layout(lines_adjacency) in;
layout(triangle_strip, max_vertices = 4) out;
void main() {
const int order[4] = { 0, 1, 3, 2 };
for (int i = 0; i < 4; ++i) {
int input_index = order[i];
gl_Position = gl_in[input_index].gl_Position;
gl_PointSize = gl_in[input_index].gl_PointSize;
out_interpolators = in_interpolators[input_index];
EmitVertex();
}
EndPrimitive();
}

View File

@ -1,124 +0,0 @@
// NOTE: This file is compiled and embedded into the exe.
// Use `xenia-build genspirv` and check in any changes under bin/.
#version 450 core
#extension all : warn
#extension GL_ARB_separate_shader_objects : require
#extension GL_ARB_explicit_attrib_location : require
in gl_PerVertex {
vec4 gl_Position;
float gl_PointSize;
// float gl_ClipDistance[];
} gl_in[];
out gl_PerVertex {
vec4 gl_Position;
float gl_PointSize;
// float gl_ClipDistance[];
};
layout(location = 0) in vec4 in_interpolators[][16];
layout(location = 0) out vec4 out_interpolators[16];
layout(location = 16) in vec2 _in_point_coord_unused[];
layout(location = 17) in float _in_point_size_unused[];
layout(location = 16) out vec2 _out_point_coord_unused;
layout(triangles) in;
layout(triangle_strip, max_vertices = 6) out;
bool equalsEpsilon(vec2 left, vec2 right, float epsilon) {
return all(lessThanEqual(abs(left - right), vec2(epsilon)));
}
void main() {
// Most games use a left-aligned form.
if (equalsEpsilon(gl_in[0].gl_Position.xy, vec2(gl_in[2].gl_Position.x, gl_in[1].gl_Position.y), 0.001) ||
equalsEpsilon(gl_in[0].gl_Position.xy, vec2(gl_in[1].gl_Position.x, gl_in[2].gl_Position.y), 0.001)) {
// 0 ------ 1 0: -1,-1
// | - | 1: 1,-1
// | // | 2: -1, 1
// | - | 3: [ 1, 1 ]
// 2 ----- [3]
//
// 0 ------ 2 0: -1,-1
// | - | 1: -1, 1
// | // | 2: 1,-1
// | - | 3: [ 1, 1 ]
// 1 ------[3]
gl_Position = gl_in[0].gl_Position;
gl_PointSize = gl_in[0].gl_PointSize;
out_interpolators = in_interpolators[0];
EmitVertex();
gl_Position = gl_in[1].gl_Position;
gl_PointSize = gl_in[1].gl_PointSize;
out_interpolators = in_interpolators[1];
EmitVertex();
gl_Position = gl_in[2].gl_Position;
gl_PointSize = gl_in[2].gl_PointSize;
out_interpolators = in_interpolators[2];
EmitVertex();
EndPrimitive();
gl_Position = gl_in[2].gl_Position;
gl_PointSize = gl_in[2].gl_PointSize;
out_interpolators = in_interpolators[2];
EmitVertex();
gl_Position = gl_in[1].gl_Position;
gl_PointSize = gl_in[1].gl_PointSize;
out_interpolators = in_interpolators[1];
EmitVertex();
gl_Position = vec4((-gl_in[0].gl_Position.xy) +
gl_in[1].gl_Position.xy +
gl_in[2].gl_Position.xy,
gl_in[2].gl_Position.zw);
gl_PointSize = gl_in[2].gl_PointSize;
for (int i = 0; i < 16; ++i) {
out_interpolators[i] = (-in_interpolators[0][i]) +
in_interpolators[1][i] +
in_interpolators[2][i];
}
EmitVertex();
EndPrimitive();
} else {
// 0 ------ 1 0: -1,-1
// | - | 1: 1,-1
// | \\ | 2: 1, 1
// | - | 3: [-1, 1 ]
// [3] ----- 2
gl_Position = gl_in[0].gl_Position;
gl_PointSize = gl_in[0].gl_PointSize;
out_interpolators = in_interpolators[0];
EmitVertex();
gl_Position = gl_in[1].gl_Position;
gl_PointSize = gl_in[1].gl_PointSize;
out_interpolators = in_interpolators[1];
EmitVertex();
gl_Position = gl_in[2].gl_Position;
gl_PointSize = gl_in[2].gl_PointSize;
out_interpolators = in_interpolators[2];
EmitVertex();
EndPrimitive();
gl_Position = gl_in[0].gl_Position;
gl_PointSize = gl_in[0].gl_PointSize;
out_interpolators = in_interpolators[0];
EmitVertex();
gl_Position = gl_in[2].gl_Position;
gl_PointSize = gl_in[2].gl_PointSize;
out_interpolators = in_interpolators[2];
EmitVertex();
gl_Position = vec4( gl_in[0].gl_Position.xy +
(-gl_in[1].gl_Position.xy) +
gl_in[2].gl_Position.xy,
gl_in[2].gl_Position.zw);
gl_PointSize = gl_in[2].gl_PointSize;
for (int i = 0; i < 16; ++i) {
out_interpolators[i] = in_interpolators[0][i] +
(-in_interpolators[1][i]) +
in_interpolators[2][i];
}
EmitVertex();
EndPrimitive();
}
}

View File

@ -1,146 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2016 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/gpu/vulkan/texture_config.h"
namespace xe {
namespace gpu {
namespace vulkan {
#define COMP_SWIZ(r, g, b, a) \
{ \
VK_COMPONENT_SWIZZLE_##r, VK_COMPONENT_SWIZZLE_##g, \
VK_COMPONENT_SWIZZLE_##b, VK_COMPONENT_SWIZZLE_##a \
}
#define VEC_SWIZ(x, y, z, w) \
{ \
VECTOR_SWIZZLE_##x, VECTOR_SWIZZLE_##y, VECTOR_SWIZZLE_##z, \
VECTOR_SWIZZLE_##w \
}
#define RGBA COMP_SWIZ(R, G, B, A)
#define ___R COMP_SWIZ(IDENTITY, IDENTITY, IDENTITY, R)
#define RRRR COMP_SWIZ(R, R, R, R)
#define XYZW VEC_SWIZ(X, Y, Z, W)
#define YXWZ VEC_SWIZ(Y, X, W, Z)
#define ZYXW VEC_SWIZ(Z, Y, X, W)
#define ___(format) \
{ VK_FORMAT_##format }
#define _c_(format, component_swizzle) \
{ VK_FORMAT_##format, component_swizzle, XYZW }
#define __v(format, vector_swizzle) \
{ VK_FORMAT_##format, RGBA, vector_swizzle }
#define _cv(format, component_swizzle, vector_swizzle) \
{ VK_FORMAT_##format, component_swizzle, vector_swizzle }
// https://www.khronos.org/registry/vulkan/specs/1.1-extensions/man/html/VkFormat.html
const TextureConfig texture_configs[64] = {
/* k_1_REVERSE */ ___(UNDEFINED),
/* k_1 */ ___(UNDEFINED),
/* k_8 */ ___(R8_UNORM),
/* k_1_5_5_5 */ __v(A1R5G5B5_UNORM_PACK16, ZYXW),
/* k_5_6_5 */ __v(R5G6B5_UNORM_PACK16, ZYXW),
/* k_6_5_5 */ ___(UNDEFINED),
/* k_8_8_8_8 */ ___(R8G8B8A8_UNORM),
/* k_2_10_10_10 */ ___(A2R10G10B10_UNORM_PACK32),
/* k_8_A */ ___(R8_UNORM),
/* k_8_B */ ___(UNDEFINED),
/* k_8_8 */ ___(R8G8_UNORM),
/* k_Cr_Y1_Cb_Y0_REP */ ___(UNDEFINED),
/* k_Y1_Cr_Y0_Cb_REP */ ___(UNDEFINED),
/* k_16_16_EDRAM */ ___(UNDEFINED),
/* k_8_8_8_8_A */ ___(UNDEFINED),
/* k_4_4_4_4 */ __v(R4G4B4A4_UNORM_PACK16, YXWZ),
// TODO: Verify if these two are correct (I think not).
/* k_10_11_11 */ ___(B10G11R11_UFLOAT_PACK32),
/* k_11_11_10 */ ___(B10G11R11_UFLOAT_PACK32),
/* k_DXT1 */ ___(BC1_RGBA_UNORM_BLOCK),
/* k_DXT2_3 */ ___(BC2_UNORM_BLOCK),
/* k_DXT4_5 */ ___(BC3_UNORM_BLOCK),
/* k_16_16_16_16_EDRAM */ ___(UNDEFINED),
// TODO: D24 unsupported on AMD.
/* k_24_8 */ ___(D24_UNORM_S8_UINT),
/* k_24_8_FLOAT */ ___(D32_SFLOAT_S8_UINT),
/* k_16 */ ___(R16_UNORM),
/* k_16_16 */ ___(R16G16_UNORM),
/* k_16_16_16_16 */ ___(R16G16B16A16_UNORM),
/* k_16_EXPAND */ ___(R16_SFLOAT),
/* k_16_16_EXPAND */ ___(R16G16_SFLOAT),
/* k_16_16_16_16_EXPAND */ ___(R16G16B16A16_SFLOAT),
/* k_16_FLOAT */ ___(R16_SFLOAT),
/* k_16_16_FLOAT */ ___(R16G16_SFLOAT),
/* k_16_16_16_16_FLOAT */ ___(R16G16B16A16_SFLOAT),
// ! These are UNORM formats, not SINT.
/* k_32 */ ___(R32_SINT),
/* k_32_32 */ ___(R32G32_SINT),
/* k_32_32_32_32 */ ___(R32G32B32A32_SINT),
/* k_32_FLOAT */ ___(R32_SFLOAT),
/* k_32_32_FLOAT */ ___(R32G32_SFLOAT),
/* k_32_32_32_32_FLOAT */ ___(R32G32B32A32_SFLOAT),
/* k_32_AS_8 */ ___(UNDEFINED),
/* k_32_AS_8_8 */ ___(UNDEFINED),
/* k_16_MPEG */ ___(UNDEFINED),
/* k_16_16_MPEG */ ___(UNDEFINED),
/* k_8_INTERLACED */ ___(UNDEFINED),
/* k_32_AS_8_INTERLACED */ ___(UNDEFINED),
/* k_32_AS_8_8_INTERLACED */ ___(UNDEFINED),
/* k_16_INTERLACED */ ___(UNDEFINED),
/* k_16_MPEG_INTERLACED */ ___(UNDEFINED),
/* k_16_16_MPEG_INTERLACED */ ___(UNDEFINED),
// https://fileadmin.cs.lth.se/cs/Personal/Michael_Doggett/talks/unc-xenos-doggett.pdf
/* k_DXN */ ___(BC5_UNORM_BLOCK), // ?
/* k_8_8_8_8_AS_16_16_16_16 */ ___(R8G8B8A8_UNORM),
/* k_DXT1_AS_16_16_16_16 */ ___(BC1_RGBA_UNORM_BLOCK),
/* k_DXT2_3_AS_16_16_16_16 */ ___(BC2_UNORM_BLOCK),
/* k_DXT4_5_AS_16_16_16_16 */ ___(BC3_UNORM_BLOCK),
/* k_2_10_10_10_AS_16_16_16_16 */ ___(A2R10G10B10_UNORM_PACK32),
// TODO: Verify if these two are correct (I think not).
/* k_10_11_11_AS_16_16_16_16 */ ___(B10G11R11_UFLOAT_PACK32), // ?
/* k_11_11_10_AS_16_16_16_16 */ ___(B10G11R11_UFLOAT_PACK32), // ?
/* k_32_32_32_FLOAT */ ___(R32G32B32_SFLOAT),
/* k_DXT3A */ _c_(BC2_UNORM_BLOCK, ___R),
/* k_DXT5A */ _c_(BC4_UNORM_BLOCK, RRRR), // ATI1N
// https://fileadmin.cs.lth.se/cs/Personal/Michael_Doggett/talks/unc-xenos-doggett.pdf
/* k_CTX1 */ ___(R8G8_UINT),
/* k_DXT3A_AS_1_1_1_1 */ ___(UNDEFINED),
/* k_8_8_8_8_GAMMA_EDRAM */ ___(UNDEFINED),
/* k_2_10_10_10_FLOAT_EDRAM */ ___(UNDEFINED),
};
#undef _cv
#undef __v
#undef _c_
#undef ___
#undef ZYXW
#undef YXWZ
#undef XYZW
#undef RRRR
#undef ___R
#undef RGBA
#undef VEC_SWIZ
#undef COMP_SWIZ
} // namespace vulkan
} // namespace gpu
} // namespace xe

View File

@ -1,50 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2016 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_VULKAN_TEXTURE_CONFIG_H_
#define XENIA_GPU_VULKAN_TEXTURE_CONFIG_H_
#include "xenia/gpu/texture_info.h"
#include "xenia/gpu/xenos.h"
#include "xenia/ui/vulkan/vulkan_provider.h"
namespace xe {
namespace gpu {
namespace vulkan {
typedef enum VectorSwizzle {
VECTOR_SWIZZLE_X = 0,
VECTOR_SWIZZLE_Y = 1,
VECTOR_SWIZZLE_Z = 2,
VECTOR_SWIZZLE_W = 3,
} VectorSwizzle;
struct TextureConfig {
VkFormat host_format;
struct {
VkComponentSwizzle r = VK_COMPONENT_SWIZZLE_R;
VkComponentSwizzle g = VK_COMPONENT_SWIZZLE_G;
VkComponentSwizzle b = VK_COMPONENT_SWIZZLE_B;
VkComponentSwizzle a = VK_COMPONENT_SWIZZLE_A;
} component_swizzle;
struct {
VectorSwizzle x = VECTOR_SWIZZLE_X;
VectorSwizzle y = VECTOR_SWIZZLE_Y;
VectorSwizzle z = VECTOR_SWIZZLE_Z;
VectorSwizzle w = VECTOR_SWIZZLE_W;
} vector_swizzle;
};
extern const TextureConfig texture_configs[64];
} // namespace vulkan
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_VULKAN_TEXTURE_CONFIG_H_

File diff suppressed because it is too large Load Diff

View File

@ -2,7 +2,7 @@
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2020 Ben Vanik. All rights reserved. *
* Copyright 2022 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
@ -10,69 +10,264 @@
#ifndef XENIA_GPU_VULKAN_VULKAN_COMMAND_PROCESSOR_H_
#define XENIA_GPU_VULKAN_VULKAN_COMMAND_PROCESSOR_H_
#include <atomic>
#include <array>
#include <climits>
#include <cstdint>
#include <cstring>
#include <deque>
#include <functional>
#include <memory>
#include <mutex>
#include <queue>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include "xenia/base/threading.h"
#include "xenia/base/assert.h"
#include "xenia/base/hash.h"
#include "xenia/gpu/command_processor.h"
#include "xenia/gpu/register_file.h"
#include "xenia/gpu/vulkan/buffer_cache.h"
#include "xenia/gpu/vulkan/render_cache.h"
#include "xenia/gpu/draw_util.h"
#include "xenia/gpu/registers.h"
#include "xenia/gpu/spirv_shader_translator.h"
#include "xenia/gpu/vulkan/deferred_command_buffer.h"
#include "xenia/gpu/vulkan/vulkan_graphics_system.h"
#include "xenia/gpu/vulkan/vulkan_pipeline_cache.h"
#include "xenia/gpu/vulkan/vulkan_primitive_processor.h"
#include "xenia/gpu/vulkan/vulkan_render_target_cache.h"
#include "xenia/gpu/vulkan/vulkan_shader.h"
#include "xenia/gpu/vulkan/vulkan_shared_memory.h"
#include "xenia/gpu/vulkan/vulkan_texture_cache.h"
#include "xenia/gpu/xenos.h"
#include "xenia/kernel/xthread.h"
#include "xenia/memory.h"
#include "xenia/ui/vulkan/blitter.h"
#include "xenia/ui/vulkan/fenced_pools.h"
#include "xenia/kernel/kernel_state.h"
#include "xenia/ui/vulkan/single_type_descriptor_set_allocator.h"
#include "xenia/ui/vulkan/vulkan_presenter.h"
#include "xenia/ui/vulkan/vulkan_provider.h"
#include "xenia/ui/vulkan/vulkan_submission_tracker.h"
#include "xenia/ui/vulkan/vulkan_util.h"
#include "xenia/ui/vulkan/vulkan_upload_buffer_pool.h"
namespace xe {
namespace gpu {
namespace vulkan {
class VulkanTextureCache;
class VulkanCommandProcessor : public CommandProcessor {
public:
// Single-descriptor layouts for use within a single frame.
enum class SingleTransientDescriptorLayout {
kUniformBufferGuestVertex,
kUniformBufferFragment,
kUniformBufferGuestShader,
kUniformBufferSystemConstants,
kUniformBufferCompute,
kStorageBufferCompute,
kCount,
};
class ScratchBufferAcquisition {
public:
explicit ScratchBufferAcquisition() = default;
explicit ScratchBufferAcquisition(VulkanCommandProcessor& command_processor,
VkBuffer buffer,
VkPipelineStageFlags stage_mask,
VkAccessFlags access_mask)
: command_processor_(&command_processor),
buffer_(buffer),
stage_mask_(stage_mask),
access_mask_(access_mask) {}
ScratchBufferAcquisition(const ScratchBufferAcquisition& acquisition) =
delete;
ScratchBufferAcquisition& operator=(
const ScratchBufferAcquisition& acquisition) = delete;
ScratchBufferAcquisition(ScratchBufferAcquisition&& acquisition) {
command_processor_ = acquisition.command_processor_;
buffer_ = acquisition.buffer_;
stage_mask_ = acquisition.stage_mask_;
access_mask_ = acquisition.access_mask_;
acquisition.command_processor_ = nullptr;
acquisition.buffer_ = VK_NULL_HANDLE;
acquisition.stage_mask_ = 0;
acquisition.access_mask_ = 0;
}
ScratchBufferAcquisition& operator=(
ScratchBufferAcquisition&& acquisition) {
if (this == &acquisition) {
return *this;
}
command_processor_ = acquisition.command_processor_;
buffer_ = acquisition.buffer_;
stage_mask_ = acquisition.stage_mask_;
access_mask_ = acquisition.access_mask_;
acquisition.command_processor_ = nullptr;
acquisition.buffer_ = VK_NULL_HANDLE;
acquisition.stage_mask_ = 0;
acquisition.access_mask_ = 0;
return *this;
}
~ScratchBufferAcquisition() {
if (buffer_ != VK_NULL_HANDLE) {
assert_true(command_processor_->scratch_buffer_used_);
assert_true(command_processor_->scratch_buffer_ == buffer_);
command_processor_->scratch_buffer_last_stage_mask_ = stage_mask_;
command_processor_->scratch_buffer_last_access_mask_ = access_mask_;
command_processor_->scratch_buffer_last_usage_submission_ =
command_processor_->GetCurrentSubmission();
command_processor_->scratch_buffer_used_ = false;
}
}
// VK_NULL_HANDLE if failed to acquire or if moved.
VkBuffer buffer() const { return buffer_; }
VkPipelineStageFlags GetStageMask() const { return stage_mask_; }
VkPipelineStageFlags SetStageMask(VkPipelineStageFlags new_stage_mask) {
VkPipelineStageFlags old_stage_mask = stage_mask_;
stage_mask_ = new_stage_mask;
return old_stage_mask;
}
VkAccessFlags GetAccessMask() const { return access_mask_; }
VkAccessFlags SetAccessMask(VkAccessFlags new_access_mask) {
VkAccessFlags old_access_mask = access_mask_;
access_mask_ = new_access_mask;
return old_access_mask;
}
private:
VulkanCommandProcessor* command_processor_ = nullptr;
VkBuffer buffer_ = VK_NULL_HANDLE;
VkPipelineStageFlags stage_mask_ = 0;
VkAccessFlags access_mask_ = 0;
};
VulkanCommandProcessor(VulkanGraphicsSystem* graphics_system,
kernel::KernelState* kernel_state);
~VulkanCommandProcessor() override;
~VulkanCommandProcessor();
void RequestFrameTrace(const std::filesystem::path& root_path) override;
void TracePlaybackWroteMemory(uint32_t base_ptr, uint32_t length) override;
void RestoreEdramSnapshot(const void* snapshot) override;
void ClearCaches() override;
void TracePlaybackWroteMemory(uint32_t base_ptr, uint32_t length) override;
void RestoreEdramSnapshot(const void* snapshot) override;
ui::vulkan::VulkanProvider& GetVulkanProvider() const {
return *static_cast<ui::vulkan::VulkanProvider*>(
graphics_system_->provider());
}
RenderCache* render_cache() { return render_cache_.get(); }
// Returns the deferred drawing command list for the currently open
// submission.
DeferredCommandBuffer& deferred_command_buffer() {
assert_true(submission_open_);
return deferred_command_buffer_;
}
private:
bool submission_open() const { return submission_open_; }
uint64_t GetCurrentSubmission() const {
return submission_completed_ +
uint64_t(submissions_in_flight_fences_.size()) + 1;
}
uint64_t GetCompletedSubmission() const { return submission_completed_; }
// Sparse binds are:
// - In a single submission, all submitted in one vkQueueBindSparse.
// - Sent to the queue without waiting for a semaphore.
// Thus, multiple sparse binds between the completed and the current
// submission, and within one submission, must not touch any overlapping
// memory regions.
void SparseBindBuffer(VkBuffer buffer, uint32_t bind_count,
const VkSparseMemoryBind* binds,
VkPipelineStageFlags wait_stage_mask);
uint64_t GetCurrentFrame() const { return frame_current_; }
uint64_t GetCompletedFrame() const { return frame_completed_; }
// Submission must be open to insert barriers. If no pipeline stages access
// the resource in a synchronization scope, the stage masks should be 0 (top /
// bottom of pipe should be specified only if explicitly needed). Returning
// true if the barrier has actually been inserted and not dropped.
bool PushBufferMemoryBarrier(
VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size,
VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask,
VkAccessFlags src_access_mask, VkAccessFlags dst_access_mask,
uint32_t src_queue_family_index = VK_QUEUE_FAMILY_IGNORED,
uint32_t dst_queue_family_index = VK_QUEUE_FAMILY_IGNORED,
bool skip_if_equal = true);
bool PushImageMemoryBarrier(
VkImage image, const VkImageSubresourceRange& subresource_range,
VkPipelineStageFlags src_stage_mask, VkPipelineStageFlags dst_stage_mask,
VkAccessFlags src_access_mask, VkAccessFlags dst_access_mask,
VkImageLayout old_layout, VkImageLayout new_layout,
uint32_t src_queue_family_index = VK_QUEUE_FAMILY_IGNORED,
uint32_t dst_queue_family_index = VK_QUEUE_FAMILY_IGNORED,
bool skip_if_equal = true);
// Returns whether any barriers have been submitted - if true is returned, the
// render pass will also be closed.
bool SubmitBarriers(bool force_end_render_pass);
// If not started yet, begins a render pass from the render target cache.
// Submission must be open.
void SubmitBarriersAndEnterRenderTargetCacheRenderPass(
VkRenderPass render_pass,
const VulkanRenderTargetCache::Framebuffer* framebuffer);
// Must be called before doing anything outside the render pass scope,
// including adding pipeline barriers that are not a part of the render pass
// scope. Submission must be open.
void EndRenderPass();
VkDescriptorSetLayout GetSingleTransientDescriptorLayout(
SingleTransientDescriptorLayout transient_descriptor_layout) const {
return descriptor_set_layouts_single_transient_[size_t(
transient_descriptor_layout)];
}
// A frame must be open.
VkDescriptorSet AllocateSingleTransientDescriptor(
SingleTransientDescriptorLayout transient_descriptor_layout);
// Allocates a descriptor, space in the uniform buffer pool, and fills the
// VkWriteDescriptorSet structure and VkDescriptorBufferInfo referenced by it.
// Returns null in case of failure.
uint8_t* WriteTransientUniformBufferBinding(
size_t size, SingleTransientDescriptorLayout transient_descriptor_layout,
VkDescriptorBufferInfo& descriptor_buffer_info_out,
VkWriteDescriptorSet& write_descriptor_set_out);
uint8_t* WriteTransientUniformBufferBinding(
size_t size, SingleTransientDescriptorLayout transient_descriptor_layout,
VkDescriptorSet& descriptor_set_out);
// The returned reference is valid until a cache clear.
VkDescriptorSetLayout GetTextureDescriptorSetLayout(bool is_samplers,
bool is_vertex,
size_t binding_count);
// The returned reference is valid until a cache clear.
const VulkanPipelineCache::PipelineLayoutProvider* GetPipelineLayout(
size_t texture_count_pixel, size_t sampler_count_pixel,
size_t texture_count_vertex, size_t sampler_count_vertex);
// Returns a single temporary GPU-side buffer within a submission for tasks
// like texture untiling and resolving. May push a buffer memory barrier into
// the initial usage. Submission must be open.
ScratchBufferAcquisition AcquireScratchGpuBuffer(
VkDeviceSize size, VkPipelineStageFlags initial_stage_mask,
VkAccessFlags initial_access_mask);
// Binds a graphics pipeline for host-specific purposes, invalidating the
// affected state. keep_dynamic_* must be false (to invalidate the dynamic
// state after binding the pipeline with the same state being static, or if
// the caller changes the dynamic state bypassing the VulkanCommandProcessor)
// unless the caller has these state variables as dynamic and uses the
// tracking in VulkanCommandProcessor to modify them.
void BindExternalGraphicsPipeline(VkPipeline pipeline,
bool keep_dynamic_depth_bias = false,
bool keep_dynamic_blend_constants = false,
bool keep_dynamic_stencil_mask_ref = false);
void BindExternalComputePipeline(VkPipeline pipeline);
void SetViewport(const VkViewport& viewport);
void SetScissor(const VkRect2D& scissor);
protected:
bool SetupContext() override;
void ShutdownContext() override;
void MakeCoherent() override;
void WriteRegister(uint32_t index, uint32_t value) override;
void BeginFrame();
void EndFrame();
void OnGammaRamp256EntryTableValueWritten() override;
void OnGammaRampPWLValueWritten() override;
void IssueSwap(uint32_t frontbuffer_ptr, uint32_t frontbuffer_width,
uint32_t frontbuffer_height) override;
@ -81,52 +276,459 @@ class VulkanCommandProcessor : public CommandProcessor {
const uint32_t* host_address,
uint32_t dword_count) override;
bool IssueDraw(xenos::PrimitiveType primitive_type, uint32_t index_count,
bool IssueDraw(xenos::PrimitiveType prim_type, uint32_t index_count,
IndexBufferInfo* index_buffer_info,
bool major_mode_explicit) override;
bool PopulateConstants(VkCommandBuffer command_buffer,
VulkanShader* vertex_shader,
VulkanShader* pixel_shader);
bool PopulateIndexBuffer(VkCommandBuffer command_buffer,
IndexBufferInfo* index_buffer_info);
bool PopulateVertexBuffers(VkCommandBuffer command_buffer,
VkCommandBuffer setup_buffer,
VulkanShader* vertex_shader);
bool PopulateSamplers(VkCommandBuffer command_buffer,
VkCommandBuffer setup_buffer,
VulkanShader* vertex_shader,
VulkanShader* pixel_shader);
bool IssueCopy() override;
uint64_t dirty_float_constants_ = 0; // Dirty float constants in blocks of 4
uint8_t dirty_bool_constants_ = 0;
uint32_t dirty_loop_constants_ = 0;
uint8_t dirty_gamma_constants_ = 0;
void InitializeTrace() override;
uint32_t coher_base_vc_ = 0;
uint32_t coher_size_vc_ = 0;
private:
struct CommandBuffer {
VkCommandPool pool;
VkCommandBuffer buffer;
};
struct SparseBufferBind {
VkBuffer buffer;
size_t bind_offset;
uint32_t bind_count;
};
union TextureDescriptorSetLayoutKey {
uint32_t key;
struct {
// 0 - sampled image descriptors, 1 - sampler descriptors.
uint32_t is_samplers : 1;
uint32_t is_vertex : 1;
// For 0, use descriptor_set_layout_empty_ instead as these are owning
// references.
uint32_t binding_count : 30;
};
TextureDescriptorSetLayoutKey() : key(0) {
static_assert_size(*this, sizeof(key));
}
struct Hasher {
size_t operator()(const TextureDescriptorSetLayoutKey& key) const {
return std::hash<decltype(key.key)>{}(key.key);
}
};
bool operator==(const TextureDescriptorSetLayoutKey& other_key) const {
return key == other_key.key;
}
bool operator!=(const TextureDescriptorSetLayoutKey& other_key) const {
return !(*this == other_key);
}
};
union PipelineLayoutKey {
uint64_t key;
struct {
// Pixel textures in the low bits since those are varied much more
// commonly.
uint16_t texture_count_pixel;
uint16_t sampler_count_pixel;
uint16_t texture_count_vertex;
uint16_t sampler_count_vertex;
};
PipelineLayoutKey() : key(0) { static_assert_size(*this, sizeof(key)); }
struct Hasher {
size_t operator()(const PipelineLayoutKey& key) const {
return std::hash<decltype(key.key)>{}(key.key);
}
};
bool operator==(const PipelineLayoutKey& other_key) const {
return key == other_key.key;
}
bool operator!=(const PipelineLayoutKey& other_key) const {
return !(*this == other_key);
}
};
class PipelineLayout : public VulkanPipelineCache::PipelineLayoutProvider {
public:
explicit PipelineLayout(
VkPipelineLayout pipeline_layout,
VkDescriptorSetLayout descriptor_set_layout_textures_vertex_ref,
VkDescriptorSetLayout descriptor_set_layout_samplers_vertex_ref,
VkDescriptorSetLayout descriptor_set_layout_textures_pixel_ref,
VkDescriptorSetLayout descriptor_set_layout_samplers_pixel_ref)
: pipeline_layout_(pipeline_layout),
descriptor_set_layout_textures_vertex_ref_(
descriptor_set_layout_textures_vertex_ref),
descriptor_set_layout_samplers_vertex_ref_(
descriptor_set_layout_samplers_vertex_ref),
descriptor_set_layout_textures_pixel_ref_(
descriptor_set_layout_textures_pixel_ref),
descriptor_set_layout_samplers_pixel_ref_(
descriptor_set_layout_samplers_pixel_ref) {}
VkPipelineLayout GetPipelineLayout() const override {
return pipeline_layout_;
}
VkDescriptorSetLayout descriptor_set_layout_textures_vertex_ref() const {
return descriptor_set_layout_textures_vertex_ref_;
}
VkDescriptorSetLayout descriptor_set_layout_samplers_vertex_ref() const {
return descriptor_set_layout_samplers_vertex_ref_;
}
VkDescriptorSetLayout descriptor_set_layout_textures_pixel_ref() const {
return descriptor_set_layout_textures_pixel_ref_;
}
VkDescriptorSetLayout descriptor_set_layout_samplers_pixel_ref() const {
return descriptor_set_layout_samplers_pixel_ref_;
}
private:
VkPipelineLayout pipeline_layout_;
VkDescriptorSetLayout descriptor_set_layout_textures_vertex_ref_;
VkDescriptorSetLayout descriptor_set_layout_samplers_vertex_ref_;
VkDescriptorSetLayout descriptor_set_layout_textures_pixel_ref_;
VkDescriptorSetLayout descriptor_set_layout_samplers_pixel_ref_;
};
struct UsedSingleTransientDescriptor {
uint64_t frame;
SingleTransientDescriptorLayout layout;
VkDescriptorSet set;
};
struct UsedTextureTransientDescriptorSet {
uint64_t frame;
TextureDescriptorSetLayoutKey layout;
VkDescriptorSet set;
};
enum SwapApplyGammaDescriptorSet : uint32_t {
kSwapApplyGammaDescriptorSetRamp,
kSwapApplyGammaDescriptorSetSource,
kSwapApplyGammaDescriptorSetCount,
};
// Framebuffer for the current presenter's guest output image revision, and
// its usage tracking.
struct SwapFramebuffer {
VkFramebuffer framebuffer = VK_NULL_HANDLE;
uint64_t version = UINT64_MAX;
uint64_t last_submission = 0;
};
// BeginSubmission and EndSubmission may be called at any time. If there's an
// open non-frame submission, BeginSubmission(true) will promote it to a
// frame. EndSubmission(true) will close the frame no matter whether the
// submission has already been closed.
// Unlike on Direct3D 12, submission boundaries do not imply any memory
// barriers aside from an incoming host write (but not outgoing host read)
// dependency.
// Rechecks submission number and reclaims per-submission resources. Pass 0 as
// the submission to await to simply check status, or pass
// GetCurrentSubmission() to wait for all queue operations to be completed.
void CheckSubmissionFenceAndDeviceLoss(uint64_t await_submission);
// If is_guest_command is true, a new full frame - with full cleanup of
// resources and, if needed, starting capturing - is opened if pending (as
// opposed to simply resuming after mid-frame synchronization). Returns
// whether a submission is open currently and the device is not lost.
bool BeginSubmission(bool is_guest_command);
// If is_swap is true, a full frame is closed - with, if needed, cache
// clearing and stopping capturing. Returns whether the submission was done
// successfully, if it has failed, leaves it open.
bool EndSubmission(bool is_swap);
bool AwaitAllQueueOperationsCompletion() {
CheckSubmissionFenceAndDeviceLoss(GetCurrentSubmission());
return !submission_open_ && submissions_in_flight_fences_.empty();
}
void ClearTransientDescriptorPools();
void SplitPendingBarrier();
void DestroyScratchBuffer();
void UpdateDynamicState(const draw_util::ViewportInfo& viewport_info,
bool primitive_polygonal,
reg::RB_DEPTHCONTROL normalized_depth_control);
void UpdateSystemConstantValues(bool primitive_polygonal,
xenos::Endian index_endian,
const draw_util::ViewportInfo& viewport_info,
uint32_t used_texture_mask);
bool UpdateBindings(const VulkanShader* vertex_shader,
const VulkanShader* pixel_shader);
// Allocates a descriptor set and fills the VkWriteDescriptorSet structure.
// The descriptor set layout must be the one for the given is_samplers,
// is_vertex, binding_count (from GetTextureDescriptorSetLayout - may be
// already available at the moment of the call, no need to locate it again).
// Returns whether the allocation was successful.
bool WriteTransientTextureBindings(
bool is_samplers, bool is_vertex, uint32_t binding_count,
VkDescriptorSetLayout descriptor_set_layout,
const VkDescriptorImageInfo* image_info,
VkWriteDescriptorSet& write_descriptor_set_out);
bool device_lost_ = false;
bool capturing_ = false;
bool trace_requested_ = false;
bool cache_clear_requested_ = false;
std::unique_ptr<BufferCache> buffer_cache_;
// Host shader types that guest shaders can be translated into - they can
// access the shared memory (via vertex fetch, memory export, or manual index
// buffer reading) and textures.
VkPipelineStageFlags guest_shader_pipeline_stages_ = 0;
VkShaderStageFlags guest_shader_vertex_stages_ = 0;
std::vector<VkFence> fences_free_;
std::vector<VkSemaphore> semaphores_free_;
bool submission_open_ = false;
uint64_t submission_completed_ = 0;
// In case vkQueueSubmit fails after something like a successful
// vkQueueBindSparse, to wait correctly on the next attempt.
std::vector<VkSemaphore> current_submission_wait_semaphores_;
std::vector<VkPipelineStageFlags> current_submission_wait_stage_masks_;
std::vector<VkFence> submissions_in_flight_fences_;
std::deque<std::pair<uint64_t, VkSemaphore>>
submissions_in_flight_semaphores_;
static constexpr uint32_t kMaxFramesInFlight = 3;
bool frame_open_ = false;
// Guest frame index, since some transient resources can be reused across
// submissions. Values updated in the beginning of a frame.
uint64_t frame_current_ = 1;
uint64_t frame_completed_ = 0;
// Submission indices of frames that have already been submitted.
uint64_t closed_frame_submissions_[kMaxFramesInFlight] = {};
// <Submission where last used, resource>, sorted by the submission number.
std::deque<std::pair<uint64_t, VkDeviceMemory>> destroy_memory_;
std::deque<std::pair<uint64_t, VkBuffer>> destroy_buffers_;
std::deque<std::pair<uint64_t, VkFramebuffer>> destroy_framebuffers_;
std::vector<CommandBuffer> command_buffers_writable_;
std::deque<std::pair<uint64_t, CommandBuffer>> command_buffers_submitted_;
DeferredCommandBuffer deferred_command_buffer_;
std::vector<VkSparseMemoryBind> sparse_memory_binds_;
std::vector<SparseBufferBind> sparse_buffer_binds_;
// SparseBufferBind converted to VkSparseBufferMemoryBindInfo to this buffer
// on submission (because pBinds should point to a place in std::vector, but
// it may be reallocated).
std::vector<VkSparseBufferMemoryBindInfo> sparse_buffer_bind_infos_temp_;
VkPipelineStageFlags sparse_bind_wait_stage_mask_ = 0;
// Temporary storage with reusable memory for creating descriptor set layouts.
std::vector<VkDescriptorSetLayoutBinding> descriptor_set_layout_bindings_;
// Temporary storage with reusable memory for writing image and sampler
// descriptors.
std::vector<VkDescriptorImageInfo> descriptor_write_image_info_;
std::unique_ptr<ui::vulkan::VulkanUploadBufferPool> uniform_buffer_pool_;
// Descriptor set layouts used by different shaders.
VkDescriptorSetLayout descriptor_set_layout_empty_ = VK_NULL_HANDLE;
VkDescriptorSetLayout descriptor_set_layout_shared_memory_and_edram_ =
VK_NULL_HANDLE;
std::array<VkDescriptorSetLayout,
size_t(SingleTransientDescriptorLayout::kCount)>
descriptor_set_layouts_single_transient_{};
// Descriptor set layouts are referenced by pipeline_layouts_.
std::unordered_map<TextureDescriptorSetLayoutKey, VkDescriptorSetLayout,
TextureDescriptorSetLayoutKey::Hasher>
descriptor_set_layouts_textures_;
// Pipeline layouts are referenced by VulkanPipelineCache.
std::unordered_map<PipelineLayoutKey, PipelineLayout,
PipelineLayoutKey::Hasher>
pipeline_layouts_;
ui::vulkan::SingleTypeDescriptorSetAllocator
transient_descriptor_allocator_uniform_buffer_;
ui::vulkan::SingleTypeDescriptorSetAllocator
transient_descriptor_allocator_storage_buffer_;
std::deque<UsedSingleTransientDescriptor> single_transient_descriptors_used_;
std::array<std::vector<VkDescriptorSet>,
size_t(SingleTransientDescriptorLayout::kCount)>
single_transient_descriptors_free_;
ui::vulkan::SingleTypeDescriptorSetAllocator
transient_descriptor_allocator_sampled_image_;
ui::vulkan::SingleTypeDescriptorSetAllocator
transient_descriptor_allocator_sampler_;
std::deque<UsedTextureTransientDescriptorSet>
texture_transient_descriptor_sets_used_;
std::unordered_map<TextureDescriptorSetLayoutKey,
std::vector<VkDescriptorSet>,
TextureDescriptorSetLayoutKey::Hasher>
texture_transient_descriptor_sets_free_;
std::unique_ptr<VulkanSharedMemory> shared_memory_;
std::unique_ptr<VulkanPrimitiveProcessor> primitive_processor_;
std::unique_ptr<VulkanRenderTargetCache> render_target_cache_;
std::unique_ptr<VulkanPipelineCache> pipeline_cache_;
std::unique_ptr<RenderCache> render_cache_;
std::unique_ptr<VulkanTextureCache> texture_cache_;
std::unique_ptr<ui::vulkan::Blitter> blitter_;
std::unique_ptr<ui::vulkan::CommandBufferPool> command_buffer_pool_;
VkDescriptorPool shared_memory_and_edram_descriptor_pool_ = VK_NULL_HANDLE;
VkDescriptorSet shared_memory_and_edram_descriptor_set_;
bool frame_open_ = false;
const RenderState* current_render_state_ = nullptr;
VkCommandBuffer current_command_buffer_ = nullptr;
VkCommandBuffer current_setup_buffer_ = nullptr;
VkFence current_batch_fence_;
// Bytes 0x0...0x3FF - 256-entry gamma ramp table with B10G10R10X2 data (read
// as R10G10B10X2 with swizzle).
// Bytes 0x400...0x9FF - 128-entry PWL R16G16 gamma ramp (R - base, G - delta,
// low 6 bits of each are zero, 3 elements per entry).
// kMaxFramesInFlight pairs of gamma ramps if in host-visible memory and
// uploaded directly, one otherwise.
VkDeviceMemory gamma_ramp_buffer_memory_ = VK_NULL_HANDLE;
VkBuffer gamma_ramp_buffer_ = VK_NULL_HANDLE;
// kMaxFramesInFlight pairs, only when the gamma ramp buffer is not
// host-visible.
VkDeviceMemory gamma_ramp_upload_buffer_memory_ = VK_NULL_HANDLE;
VkBuffer gamma_ramp_upload_buffer_ = VK_NULL_HANDLE;
VkDeviceSize gamma_ramp_upload_memory_size_;
uint32_t gamma_ramp_upload_memory_type_;
// Mapping of either gamma_ramp_buffer_memory_ (if it's host-visible) or
// gamma_ramp_upload_buffer_memory_ (otherwise).
void* gamma_ramp_upload_mapping_;
std::array<VkBufferView, 2 * kMaxFramesInFlight> gamma_ramp_buffer_views_{};
// UINT32_MAX if outdated.
uint32_t gamma_ramp_256_entry_table_current_frame_ = UINT32_MAX;
uint32_t gamma_ramp_pwl_current_frame_ = UINT32_MAX;
ui::vulkan::VulkanSubmissionTracker swap_submission_tracker_;
VkFramebuffer swap_framebuffer_ = VK_NULL_HANDLE;
uint64_t swap_framebuffer_version_ = UINT64_MAX;
VkDescriptorSetLayout swap_descriptor_set_layout_sampled_image_ =
VK_NULL_HANDLE;
VkDescriptorSetLayout swap_descriptor_set_layout_uniform_texel_buffer_ =
VK_NULL_HANDLE;
// Descriptor pool for allocating descriptors needed for presentation, such as
// the destination images and the gamma ramps.
VkDescriptorPool swap_descriptor_pool_ = VK_NULL_HANDLE;
// Interleaved 256-entry table and PWL texel buffer descriptors.
// kMaxFramesInFlight pairs of gamma ramps if in host-visible memory and
// uploaded directly, one otherwise.
std::array<VkDescriptorSet, 2 * kMaxFramesInFlight>
swap_descriptors_gamma_ramp_;
// Sampled images.
std::array<VkDescriptorSet, kMaxFramesInFlight> swap_descriptors_source_;
VkPipelineLayout swap_apply_gamma_pipeline_layout_ = VK_NULL_HANDLE;
// Has no dependencies on specific pipeline stages on both ends to simplify
// use in different scenarios with different pipelines - use explicit barriers
// for synchronization.
VkRenderPass swap_apply_gamma_render_pass_ = VK_NULL_HANDLE;
VkPipeline swap_apply_gamma_256_entry_table_pipeline_ = VK_NULL_HANDLE;
VkPipeline swap_apply_gamma_pwl_pipeline_ = VK_NULL_HANDLE;
std::array<SwapFramebuffer,
ui::vulkan::VulkanPresenter::kMaxActiveGuestOutputImageVersions>
swap_framebuffers_;
// Pending pipeline barriers.
std::vector<VkBufferMemoryBarrier> pending_barriers_buffer_memory_barriers_;
std::vector<VkImageMemoryBarrier> pending_barriers_image_memory_barriers_;
struct PendingBarrier {
VkPipelineStageFlags src_stage_mask = 0;
VkPipelineStageFlags dst_stage_mask = 0;
size_t buffer_memory_barriers_offset = 0;
size_t image_memory_barriers_offset = 0;
};
std::vector<PendingBarrier> pending_barriers_;
PendingBarrier current_pending_barrier_;
// GPU-local scratch buffer.
static constexpr VkDeviceSize kScratchBufferSizeIncrement = 16 * 1024 * 1024;
VkDeviceMemory scratch_buffer_memory_ = VK_NULL_HANDLE;
VkBuffer scratch_buffer_ = VK_NULL_HANDLE;
VkDeviceSize scratch_buffer_size_ = 0;
VkPipelineStageFlags scratch_buffer_last_stage_mask_ = 0;
VkAccessFlags scratch_buffer_last_access_mask_ = 0;
uint64_t scratch_buffer_last_usage_submission_ = 0;
bool scratch_buffer_used_ = false;
// The current dynamic state of the graphics pipeline bind point. Note that
// binding any pipeline to the bind point with static state (even if it's
// unused, like depth bias being disabled, but the values themselves still not
// declared as dynamic in the pipeline) invalidates such dynamic state.
VkViewport dynamic_viewport_;
VkRect2D dynamic_scissor_;
float dynamic_depth_bias_constant_factor_;
float dynamic_depth_bias_slope_factor_;
float dynamic_blend_constants_[4];
// The stencil values are pre-initialized (to D3D11_DEFAULT_STENCIL_*, and the
// initial values for front and back are the same for portability subset
// safety) because they're updated conditionally to avoid changing the back
// face values when stencil is disabled and the primitive type is changed
// between polygonal and non-polygonal.
uint32_t dynamic_stencil_compare_mask_front_ = UINT8_MAX;
uint32_t dynamic_stencil_compare_mask_back_ = UINT8_MAX;
uint32_t dynamic_stencil_write_mask_front_ = UINT8_MAX;
uint32_t dynamic_stencil_write_mask_back_ = UINT8_MAX;
uint32_t dynamic_stencil_reference_front_ = 0;
uint32_t dynamic_stencil_reference_back_ = 0;
bool dynamic_viewport_update_needed_;
bool dynamic_scissor_update_needed_;
bool dynamic_depth_bias_update_needed_;
bool dynamic_blend_constants_update_needed_;
bool dynamic_stencil_compare_mask_front_update_needed_;
bool dynamic_stencil_compare_mask_back_update_needed_;
bool dynamic_stencil_write_mask_front_update_needed_;
bool dynamic_stencil_write_mask_back_update_needed_;
bool dynamic_stencil_reference_front_update_needed_;
bool dynamic_stencil_reference_back_update_needed_;
// Currently used samplers.
std::vector<std::pair<VulkanTextureCache::SamplerParameters, VkSampler>>
current_samplers_vertex_;
std::vector<std::pair<VulkanTextureCache::SamplerParameters, VkSampler>>
current_samplers_pixel_;
// Cache render pass currently started in the command buffer with the
// framebuffer.
VkRenderPass current_render_pass_;
const VulkanRenderTargetCache::Framebuffer* current_framebuffer_;
// Currently bound graphics pipeline, either from the pipeline cache (with
// potentially deferred creation - current_external_graphics_pipeline_ is
// VK_NULL_HANDLE in this case) or a non-Xenos one
// (current_guest_graphics_pipeline_ is VK_NULL_HANDLE in this case).
// TODO(Triang3l): Change to a deferred compilation handle.
VkPipeline current_guest_graphics_pipeline_;
VkPipeline current_external_graphics_pipeline_;
VkPipeline current_external_compute_pipeline_;
// Pipeline layout of the current guest graphics pipeline.
const PipelineLayout* current_guest_graphics_pipeline_layout_;
VkDescriptorSet current_graphics_descriptor_sets_
[SpirvShaderTranslator::kDescriptorSetCount];
// Whether descriptor sets in current_graphics_descriptor_sets_ point to
// up-to-date data.
uint32_t current_graphics_descriptor_set_values_up_to_date_;
// Whether the descriptor sets currently bound to the command buffer - only
// low bits for the descriptor set layouts that remained the same are kept
// when changing the pipeline layout. May be out of sync with
// current_graphics_descriptor_set_values_up_to_date_, but should be ensured
// to be a subset of it at some point when it becomes important; bits for
// non-existent descriptor set layouts may also be set, but need to be ignored
// when they start to matter.
uint32_t current_graphics_descriptor_sets_bound_up_to_date_;
static_assert(
SpirvShaderTranslator::kDescriptorSetCount <=
sizeof(current_graphics_descriptor_set_values_up_to_date_) * CHAR_BIT,
"Bit fields storing descriptor set validity must be large enough");
static_assert(
SpirvShaderTranslator::kDescriptorSetCount <=
sizeof(current_graphics_descriptor_sets_bound_up_to_date_) * CHAR_BIT,
"Bit fields storing descriptor set validity must be large enough");
// Float constant usage masks of the last draw call.
uint64_t current_float_constant_map_vertex_[4];
uint64_t current_float_constant_map_pixel_[4];
// System shader constants.
SpirvShaderTranslator::SystemConstants system_constants_;
};
} // namespace vulkan

View File

@ -1,16 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2016 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/gpu/vulkan/vulkan_gpu_flags.h"
DEFINE_bool(vulkan_renderdoc_capture_all, false,
"Capture everything with RenderDoc.", "Vulkan");
DEFINE_bool(vulkan_native_msaa, false, "Use native MSAA", "Vulkan");
DEFINE_bool(vulkan_dump_disasm, false,
"Dump shader disassembly. NVIDIA only supported.", "Vulkan");

View File

@ -1,20 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2016 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_VULKAN_VULKAN_GPU_FLAGS_H_
#define XENIA_GPU_VULKAN_VULKAN_GPU_FLAGS_H_
#define FINE_GRAINED_DRAW_SCOPES 1
#include "xenia/base/cvar.h"
DECLARE_bool(vulkan_renderdoc_capture_all);
DECLARE_bool(vulkan_native_msaa);
DECLARE_bool(vulkan_dump_disasm);
#endif // XENIA_GPU_VULKAN_VULKAN_GPU_FLAGS_H_

View File

@ -26,7 +26,9 @@ class VulkanGraphicsSystem : public GraphicsSystem {
static bool IsAvailable() { return true; }
std::string name() const override { return "Vulkan - obsolete"; }
std::string name() const override {
return "Vulkan - HEAVILY INCOMPLETE, early development";
}
X_STATUS Setup(cpu::Processor* processor, kernel::KernelState* kernel_state,
ui::WindowedAppContext* app_context,

File diff suppressed because it is too large Load Diff

View File

@ -2,312 +2,322 @@
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2016 Ben Vanik. All rights reserved. *
* Copyright 2022 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_VULKAN_VULKAN_PIPELINE_CACHE_H_
#define XENIA_GPU_VULKAN_VULKAN_PIPELINE_CACHE_H_
#ifndef XENIA_GPU_VULKAN_VULKAN_PIPELINE_STATE_CACHE_H_
#define XENIA_GPU_VULKAN_VULKAN_PIPELINE_STATE_CACHE_H_
#include <cstddef>
#include <cstring>
#include <functional>
#include <memory>
#include <unordered_map>
#include <utility>
#include "xenia/base/string_buffer.h"
#include "xenia/base/hash.h"
#include "xenia/base/platform.h"
#include "xenia/base/xxhash.h"
#include "xenia/gpu/primitive_processor.h"
#include "xenia/gpu/register_file.h"
#include "xenia/gpu/registers.h"
#include "xenia/gpu/spirv_shader_translator.h"
#include "xenia/gpu/vulkan/render_cache.h"
#include "xenia/gpu/vulkan/vulkan_render_target_cache.h"
#include "xenia/gpu/vulkan/vulkan_shader.h"
#include "xenia/gpu/xenos.h"
#include "xenia/ui/spirv/spirv_disassembler.h"
#include "xenia/ui/vulkan/vulkan_provider.h"
namespace xe {
namespace gpu {
namespace vulkan {
// Configures and caches pipelines based on render state.
// This is responsible for properly setting all state required for a draw
// including shaders, various blend/etc options, and input configuration.
class VulkanCommandProcessor;
// TODO(Triang3l): Create a common base for both the Vulkan and the Direct3D
// implementations.
class VulkanPipelineCache {
public:
enum class UpdateStatus {
kCompatible,
kMismatch,
kError,
static constexpr size_t kLayoutUIDEmpty = 0;
class PipelineLayoutProvider {
public:
virtual ~PipelineLayoutProvider() {}
virtual VkPipelineLayout GetPipelineLayout() const = 0;
protected:
PipelineLayoutProvider() = default;
};
VulkanPipelineCache(RegisterFile* register_file,
const ui::vulkan::VulkanProvider& provider);
VulkanPipelineCache(VulkanCommandProcessor& command_processor,
const RegisterFile& register_file,
VulkanRenderTargetCache& render_target_cache,
VkShaderStageFlags guest_shader_vertex_stages);
~VulkanPipelineCache();
VkResult Initialize(VkDescriptorSetLayout uniform_descriptor_set_layout,
VkDescriptorSetLayout texture_descriptor_set_layout,
VkDescriptorSetLayout vertex_descriptor_set_layout);
bool Initialize();
void Shutdown();
// Loads a shader from the cache, possibly translating it.
VulkanShader* LoadShader(xenos::ShaderType shader_type,
uint32_t guest_address, const uint32_t* host_address,
uint32_t dword_count);
const uint32_t* host_address, uint32_t dword_count);
// Analyze shader microcode on the translator thread.
void AnalyzeShaderUcode(Shader& shader) {
shader.AnalyzeUcode(ucode_disasm_buffer_);
}
// Configures a pipeline using the current render state and the given render
// pass. If a previously available pipeline is available it will be used,
// otherwise a new one may be created. Any state that can be set dynamically
// in the command buffer is issued at this time.
// Returns whether the pipeline could be successfully created.
UpdateStatus ConfigurePipeline(VkCommandBuffer command_buffer,
const RenderState* render_state,
VulkanShader* vertex_shader,
VulkanShader* pixel_shader,
xenos::PrimitiveType primitive_type,
VkPipeline* pipeline_out);
// Retrieves the shader modification for the current state. The shader must
// have microcode analyzed.
SpirvShaderTranslator::Modification GetCurrentVertexShaderModification(
const Shader& shader,
Shader::HostVertexShaderType host_vertex_shader_type) const;
SpirvShaderTranslator::Modification GetCurrentPixelShaderModification(
const Shader& shader, uint32_t normalized_color_mask) const;
// Sets required dynamic state on the command buffer.
// Only state that has changed since the last call will be set unless
// full_update is true.
bool SetDynamicState(VkCommandBuffer command_buffer, bool full_update);
// Pipeline layout shared by all pipelines.
VkPipelineLayout pipeline_layout() const { return pipeline_layout_; }
// Clears all cached content.
void ClearCache();
bool EnsureShadersTranslated(VulkanShader::VulkanTranslation* vertex_shader,
VulkanShader::VulkanTranslation* pixel_shader);
// TODO(Triang3l): Return a deferred creation handle.
bool ConfigurePipeline(
VulkanShader::VulkanTranslation* vertex_shader,
VulkanShader::VulkanTranslation* pixel_shader,
const PrimitiveProcessor::ProcessingResult& primitive_processing_result,
reg::RB_DEPTHCONTROL normalized_depth_control,
uint32_t normalized_color_mask,
VulkanRenderTargetCache::RenderPassKey render_pass_key,
VkPipeline& pipeline_out,
const PipelineLayoutProvider*& pipeline_layout_out);
private:
// Creates or retrieves an existing pipeline for the currently configured
// state.
VkPipeline GetPipeline(const RenderState* render_state, uint64_t hash_key);
enum class PipelineGeometryShader : uint32_t {
kNone,
kRectangleList,
kQuadList,
};
bool TranslateShader(VulkanShader::VulkanTranslation& translation);
enum class PipelinePrimitiveTopology : uint32_t {
kPointList,
kLineList,
kLineStrip,
kTriangleList,
kTriangleStrip,
kTriangleFan,
kLineListWithAdjacency,
kPatchList,
};
void DumpShaderDisasmAMD(VkPipeline pipeline);
void DumpShaderDisasmNV(const VkGraphicsPipelineCreateInfo& info);
enum class PipelinePolygonMode : uint32_t {
kFill,
kLine,
kPoint,
};
// Gets a geometry shader used to emulate the given primitive type.
// Returns nullptr if the primitive doesn't need to be emulated.
VkShaderModule GetGeometryShader(xenos::PrimitiveType primitive_type,
bool is_line_mode);
enum class PipelineBlendFactor : uint32_t {
kZero,
kOne,
kSrcColor,
kOneMinusSrcColor,
kDstColor,
kOneMinusDstColor,
kSrcAlpha,
kOneMinusSrcAlpha,
kDstAlpha,
kOneMinusDstAlpha,
kConstantColor,
kOneMinusConstantColor,
kConstantAlpha,
kOneMinusConstantAlpha,
kSrcAlphaSaturate,
};
RegisterFile* register_file_ = nullptr;
const ui::vulkan::VulkanProvider& provider_;
// Update PipelineDescription::kVersion if anything is changed!
XEPACKEDSTRUCT(PipelineRenderTarget, {
PipelineBlendFactor src_color_blend_factor : 4; // 4
PipelineBlendFactor dst_color_blend_factor : 4; // 8
xenos::BlendOp color_blend_op : 3; // 11
PipelineBlendFactor src_alpha_blend_factor : 4; // 15
PipelineBlendFactor dst_alpha_blend_factor : 4; // 19
xenos::BlendOp alpha_blend_op : 3; // 22
uint32_t color_write_mask : 4; // 26
});
// Temporary storage for AnalyzeUcode calls.
XEPACKEDSTRUCT(PipelineDescription, {
uint64_t vertex_shader_hash;
uint64_t vertex_shader_modification;
// 0 if no pixel shader.
uint64_t pixel_shader_hash;
uint64_t pixel_shader_modification;
VulkanRenderTargetCache::RenderPassKey render_pass_key;
// Shader stages.
PipelineGeometryShader geometry_shader : 2; // 2
// Input assembly.
PipelinePrimitiveTopology primitive_topology : 3; // 5
uint32_t primitive_restart : 1; // 6
// Rasterization.
uint32_t depth_clamp_enable : 1; // 7
PipelinePolygonMode polygon_mode : 2; // 9
uint32_t cull_front : 1; // 10
uint32_t cull_back : 1; // 11
uint32_t front_face_clockwise : 1; // 12
// Depth / stencil.
uint32_t depth_write_enable : 1; // 13
xenos::CompareFunction depth_compare_op : 3; // 15
uint32_t stencil_test_enable : 1; // 17
xenos::StencilOp stencil_front_fail_op : 3; // 20
xenos::StencilOp stencil_front_pass_op : 3; // 23
xenos::StencilOp stencil_front_depth_fail_op : 3; // 26
xenos::CompareFunction stencil_front_compare_op : 3; // 29
xenos::StencilOp stencil_back_fail_op : 3; // 32
xenos::StencilOp stencil_back_pass_op : 3; // 3
xenos::StencilOp stencil_back_depth_fail_op : 3; // 6
xenos::CompareFunction stencil_back_compare_op : 3; // 9
// Filled only for the attachments present in the render pass object.
PipelineRenderTarget render_targets[xenos::kMaxColorRenderTargets];
// Including all the padding, for a stable hash.
PipelineDescription() { Reset(); }
PipelineDescription(const PipelineDescription& description) {
std::memcpy(this, &description, sizeof(*this));
}
PipelineDescription& operator=(const PipelineDescription& description) {
std::memcpy(this, &description, sizeof(*this));
return *this;
}
bool operator==(const PipelineDescription& description) const {
return std::memcmp(this, &description, sizeof(*this)) == 0;
}
void Reset() { std::memset(this, 0, sizeof(*this)); }
uint64_t GetHash() const { return XXH3_64bits(this, sizeof(*this)); }
struct Hasher {
size_t operator()(const PipelineDescription& description) const {
return size_t(description.GetHash());
}
};
});
struct Pipeline {
VkPipeline pipeline = VK_NULL_HANDLE;
// The layouts are owned by the VulkanCommandProcessor, and must not be
// destroyed by it while the pipeline cache is active.
const PipelineLayoutProvider* pipeline_layout;
Pipeline(const PipelineLayoutProvider* pipeline_layout_provider)
: pipeline_layout(pipeline_layout_provider) {}
};
// Description that can be passed from the command processor thread to the
// creation threads, with everything needed from caches pre-looked-up.
struct PipelineCreationArguments {
std::pair<const PipelineDescription, Pipeline>* pipeline;
const VulkanShader::VulkanTranslation* vertex_shader;
const VulkanShader::VulkanTranslation* pixel_shader;
VkShaderModule geometry_shader;
VkRenderPass render_pass;
};
union GeometryShaderKey {
uint32_t key;
struct {
PipelineGeometryShader type : 2;
uint32_t interpolator_count : 5;
uint32_t user_clip_plane_count : 3;
uint32_t user_clip_plane_cull : 1;
uint32_t has_vertex_kill_and : 1;
uint32_t has_point_size : 1;
uint32_t has_point_coordinates : 1;
};
GeometryShaderKey() : key(0) { static_assert_size(*this, sizeof(key)); }
struct Hasher {
size_t operator()(const GeometryShaderKey& key) const {
return std::hash<uint32_t>{}(key.key);
}
};
bool operator==(const GeometryShaderKey& other_key) const {
return key == other_key.key;
}
bool operator!=(const GeometryShaderKey& other_key) const {
return !(*this == other_key);
}
};
// Can be called from multiple threads.
bool TranslateAnalyzedShader(SpirvShaderTranslator& translator,
VulkanShader::VulkanTranslation& translation);
void WritePipelineRenderTargetDescription(
reg::RB_BLENDCONTROL blend_control, uint32_t write_mask,
PipelineRenderTarget& render_target_out) const;
bool GetCurrentStateDescription(
const VulkanShader::VulkanTranslation* vertex_shader,
const VulkanShader::VulkanTranslation* pixel_shader,
const PrimitiveProcessor::ProcessingResult& primitive_processing_result,
reg::RB_DEPTHCONTROL normalized_depth_control,
uint32_t normalized_color_mask,
VulkanRenderTargetCache::RenderPassKey render_pass_key,
PipelineDescription& description_out) const;
// Whether the pipeline for the given description is supported by the device.
bool ArePipelineRequirementsMet(const PipelineDescription& description) const;
static bool GetGeometryShaderKey(PipelineGeometryShader geometry_shader_type,
GeometryShaderKey& key_out);
VkShaderModule GetGeometryShader(GeometryShaderKey key);
// Can be called from creation threads - all needed data must be fully set up
// at the point of the call: shaders must be translated, pipeline layout and
// render pass objects must be available.
bool EnsurePipelineCreated(
const PipelineCreationArguments& creation_arguments);
VulkanCommandProcessor& command_processor_;
const RegisterFile& register_file_;
VulkanRenderTargetCache& render_target_cache_;
VkShaderStageFlags guest_shader_vertex_stages_;
// Temporary storage for AnalyzeUcode calls on the processor thread.
StringBuffer ucode_disasm_buffer_;
// Reusable shader translator.
std::unique_ptr<ShaderTranslator> shader_translator_ = nullptr;
// Disassembler used to get the SPIRV disasm. Only used in debug.
xe::ui::spirv::SpirvDisassembler disassembler_;
// All loaded shaders mapped by their guest hash key.
std::unordered_map<uint64_t, VulkanShader*> shader_map_;
// Reusable shader translator on the command processor thread.
std::unique_ptr<SpirvShaderTranslator> shader_translator_;
// Vulkan pipeline cache, which in theory helps us out.
// This can be serialized to disk and reused, if we want.
VkPipelineCache pipeline_cache_ = nullptr;
// Layout used for all pipelines describing our uniforms, textures, and push
// constants.
VkPipelineLayout pipeline_layout_ = nullptr;
struct LayoutUID {
size_t uid;
size_t vector_span_offset;
size_t vector_span_length;
};
std::mutex layouts_mutex_;
// Texture binding layouts of different shaders, for obtaining layout UIDs.
std::vector<VulkanShader::TextureBinding> texture_binding_layouts_;
// Map of texture binding layouts used by shaders, for obtaining UIDs. Keys
// are XXH3 hashes of layouts, values need manual collision resolution using
// layout_vector_offset:layout_length of texture_binding_layouts_.
std::unordered_multimap<uint64_t, LayoutUID,
xe::hash::IdentityHasher<uint64_t>>
texture_binding_layout_map_;
// Shared geometry shaders.
struct {
VkShaderModule line_quad_list;
VkShaderModule point_list;
VkShaderModule quad_list;
VkShaderModule rect_list;
} geometry_shaders_;
// Ucode hash -> shader.
std::unordered_map<uint64_t, VulkanShader*,
xe::hash::IdentityHasher<uint64_t>>
shaders_;
// Shared dummy pixel shader.
VkShaderModule dummy_pixel_shader_;
// Geometry shaders for Xenos primitive types not supported by Vulkan.
// Stores VK_NULL_HANDLE if failed to create.
std::unordered_map<GeometryShaderKey, VkShaderModule,
GeometryShaderKey::Hasher>
geometry_shaders_;
// Hash state used to incrementally produce pipeline hashes during update.
// By the time the full update pass has run the hash will represent the
// current state in a way that can uniquely identify the produced VkPipeline.
XXH3_state_t hash_state_;
// All previously generated pipelines mapped by hash.
std::unordered_map<uint64_t, VkPipeline> cached_pipelines_;
std::unordered_map<PipelineDescription, Pipeline, PipelineDescription::Hasher>
pipelines_;
// Previously used pipeline. This matches our current state settings
// and allows us to quickly(ish) reuse the pipeline if no registers have
// changed.
VkPipeline current_pipeline_ = nullptr;
private:
UpdateStatus UpdateState(VulkanShader* vertex_shader,
VulkanShader* pixel_shader,
xenos::PrimitiveType primitive_type);
UpdateStatus UpdateRenderTargetState();
UpdateStatus UpdateShaderStages(VulkanShader* vertex_shader,
VulkanShader* pixel_shader,
xenos::PrimitiveType primitive_type);
UpdateStatus UpdateVertexInputState(VulkanShader* vertex_shader);
UpdateStatus UpdateInputAssemblyState(xenos::PrimitiveType primitive_type);
UpdateStatus UpdateViewportState();
UpdateStatus UpdateRasterizationState(xenos::PrimitiveType primitive_type);
UpdateStatus UpdateMultisampleState();
UpdateStatus UpdateDepthStencilState();
UpdateStatus UpdateColorBlendState();
bool SetShadowRegister(uint32_t* dest, uint32_t register_name);
bool SetShadowRegister(float* dest, uint32_t register_name);
bool SetShadowRegisterArray(uint32_t* dest, uint32_t num,
uint32_t register_name);
struct UpdateRenderTargetsRegisters {
uint32_t rb_modecontrol;
reg::RB_SURFACE_INFO rb_surface_info;
reg::RB_COLOR_INFO rb_color_info;
reg::RB_DEPTH_INFO rb_depth_info;
reg::RB_COLOR_INFO rb_color1_info;
reg::RB_COLOR_INFO rb_color2_info;
reg::RB_COLOR_INFO rb_color3_info;
uint32_t rb_color_mask;
uint32_t rb_depthcontrol;
uint32_t rb_stencilrefmask;
UpdateRenderTargetsRegisters() { Reset(); }
void Reset() { std::memset(this, 0, sizeof(*this)); }
} update_render_targets_regs_;
struct UpdateShaderStagesRegisters {
xenos::PrimitiveType primitive_type;
uint32_t pa_su_sc_mode_cntl;
reg::SQ_PROGRAM_CNTL sq_program_cntl;
VulkanShader* vertex_shader;
VulkanShader* pixel_shader;
UpdateShaderStagesRegisters() { Reset(); }
void Reset() { std::memset(this, 0, sizeof(*this)); }
} update_shader_stages_regs_;
VkPipelineShaderStageCreateInfo update_shader_stages_info_[3];
uint32_t update_shader_stages_stage_count_ = 0;
struct UpdateVertexInputStateRegisters {
VulkanShader* vertex_shader;
UpdateVertexInputStateRegisters() { Reset(); }
void Reset() { std::memset(this, 0, sizeof(*this)); }
} update_vertex_input_state_regs_;
VkPipelineVertexInputStateCreateInfo update_vertex_input_state_info_;
VkVertexInputBindingDescription update_vertex_input_state_binding_descrs_[32];
VkVertexInputAttributeDescription
update_vertex_input_state_attrib_descrs_[96];
struct UpdateInputAssemblyStateRegisters {
xenos::PrimitiveType primitive_type;
uint32_t pa_su_sc_mode_cntl;
uint32_t multi_prim_ib_reset_index;
UpdateInputAssemblyStateRegisters() { Reset(); }
void Reset() { std::memset(this, 0, sizeof(*this)); }
} update_input_assembly_state_regs_;
VkPipelineInputAssemblyStateCreateInfo update_input_assembly_state_info_;
struct UpdateViewportStateRegisters {
// uint32_t pa_cl_clip_cntl;
uint32_t rb_surface_info;
uint32_t pa_cl_vte_cntl;
uint32_t pa_su_sc_mode_cntl;
uint32_t pa_sc_window_offset;
uint32_t pa_sc_window_scissor_tl;
uint32_t pa_sc_window_scissor_br;
float pa_cl_vport_xoffset;
float pa_cl_vport_yoffset;
float pa_cl_vport_zoffset;
float pa_cl_vport_xscale;
float pa_cl_vport_yscale;
float pa_cl_vport_zscale;
UpdateViewportStateRegisters() { Reset(); }
void Reset() { std::memset(this, 0, sizeof(*this)); }
} update_viewport_state_regs_;
VkPipelineViewportStateCreateInfo update_viewport_state_info_;
struct UpdateRasterizationStateRegisters {
xenos::PrimitiveType primitive_type;
uint32_t pa_cl_clip_cntl;
uint32_t pa_su_sc_mode_cntl;
uint32_t pa_sc_screen_scissor_tl;
uint32_t pa_sc_screen_scissor_br;
uint32_t pa_sc_viz_query;
uint32_t pa_su_poly_offset_enable;
uint32_t multi_prim_ib_reset_index;
UpdateRasterizationStateRegisters() { Reset(); }
void Reset() { std::memset(this, 0, sizeof(*this)); }
} update_rasterization_state_regs_;
VkPipelineRasterizationStateCreateInfo update_rasterization_state_info_;
struct UpdateMultisampleStateeRegisters {
uint32_t pa_sc_aa_config;
uint32_t pa_su_sc_mode_cntl;
uint32_t rb_surface_info;
UpdateMultisampleStateeRegisters() { Reset(); }
void Reset() { std::memset(this, 0, sizeof(*this)); }
} update_multisample_state_regs_;
VkPipelineMultisampleStateCreateInfo update_multisample_state_info_;
struct UpdateDepthStencilStateRegisters {
uint32_t rb_depthcontrol;
uint32_t rb_stencilrefmask;
UpdateDepthStencilStateRegisters() { Reset(); }
void Reset() { std::memset(this, 0, sizeof(*this)); }
} update_depth_stencil_state_regs_;
VkPipelineDepthStencilStateCreateInfo update_depth_stencil_state_info_;
struct UpdateColorBlendStateRegisters {
uint32_t rb_color_mask;
uint32_t rb_blendcontrol[4];
uint32_t rb_modecontrol;
UpdateColorBlendStateRegisters() { Reset(); }
void Reset() { std::memset(this, 0, sizeof(*this)); }
} update_color_blend_state_regs_;
VkPipelineColorBlendStateCreateInfo update_color_blend_state_info_;
VkPipelineColorBlendAttachmentState update_color_blend_attachment_states_[4];
struct SetDynamicStateRegisters {
uint32_t pa_sc_window_offset;
uint32_t pa_su_sc_mode_cntl;
uint32_t pa_sc_window_scissor_tl;
uint32_t pa_sc_window_scissor_br;
uint32_t rb_surface_info;
uint32_t pa_su_sc_vtx_cntl;
// Bias is in Vulkan units because depth format may potentially effect it.
float pa_su_poly_offset_scale;
float pa_su_poly_offset_offset;
uint32_t pa_cl_vte_cntl;
float pa_cl_vport_xoffset;
float pa_cl_vport_yoffset;
float pa_cl_vport_zoffset;
float pa_cl_vport_xscale;
float pa_cl_vport_yscale;
float pa_cl_vport_zscale;
float rb_blend_rgba[4];
uint32_t rb_stencilrefmask;
reg::SQ_PROGRAM_CNTL sq_program_cntl;
uint32_t sq_context_misc;
uint32_t rb_colorcontrol;
reg::RB_COLOR_INFO rb_color_info;
reg::RB_COLOR_INFO rb_color1_info;
reg::RB_COLOR_INFO rb_color2_info;
reg::RB_COLOR_INFO rb_color3_info;
float rb_alpha_ref;
uint32_t pa_su_point_size;
SetDynamicStateRegisters() { Reset(); }
void Reset() { std::memset(this, 0, sizeof(*this)); }
} set_dynamic_state_registers_;
// Previously used pipeline, to avoid lookups if the state wasn't changed.
const std::pair<const PipelineDescription, Pipeline>* last_pipeline_ =
nullptr;
};
} // namespace vulkan
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_VULKAN_VULKAN_PIPELINE_CACHE_H_
#endif // XENIA_GPU_VULKAN_VULKAN_PIPELINE_STATE_CACHE_H_

View File

@ -0,0 +1,229 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2021 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/gpu/vulkan/vulkan_primitive_processor.h"
#include <algorithm>
#include <cstdint>
#include <memory>
#include "xenia/base/assert.h"
#include "xenia/base/logging.h"
#include "xenia/gpu/vulkan/deferred_command_buffer.h"
#include "xenia/gpu/vulkan/vulkan_command_processor.h"
#include "xenia/ui/vulkan/vulkan_provider.h"
#include "xenia/ui/vulkan/vulkan_util.h"
namespace xe {
namespace gpu {
namespace vulkan {
VulkanPrimitiveProcessor::~VulkanPrimitiveProcessor() { Shutdown(true); }
bool VulkanPrimitiveProcessor::Initialize() {
// TODO(Triang3l): fullDrawIndexUint32 feature check and indirect index fetch.
const ui::vulkan::VulkanProvider& provider =
command_processor_.GetVulkanProvider();
const VkPhysicalDeviceFeatures& device_features = provider.device_features();
const VkPhysicalDevicePortabilitySubsetFeaturesKHR*
device_portability_subset_features =
provider.device_portability_subset_features();
if (!InitializeCommon(true,
!device_portability_subset_features ||
device_portability_subset_features->triangleFans,
false, device_features.geometryShader)) {
Shutdown();
return false;
}
frame_index_buffer_pool_ =
std::make_unique<ui::vulkan::VulkanUploadBufferPool>(
command_processor_.GetVulkanProvider(),
VK_BUFFER_USAGE_INDEX_BUFFER_BIT,
std::max(size_t(kMinRequiredConvertedIndexBufferSize),
ui::GraphicsUploadBufferPool::kDefaultPageSize));
return true;
}
void VulkanPrimitiveProcessor::Shutdown(bool from_destructor) {
const ui::vulkan::VulkanProvider& provider =
command_processor_.GetVulkanProvider();
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
VkDevice device = provider.device();
frame_index_buffers_.clear();
frame_index_buffer_pool_.reset();
ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device,
builtin_index_buffer_upload_);
ui::vulkan::util::DestroyAndNullHandle(dfn.vkFreeMemory, device,
builtin_index_buffer_upload_memory_);
ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device,
builtin_index_buffer_);
ui::vulkan::util::DestroyAndNullHandle(dfn.vkFreeMemory, device,
builtin_index_buffer_memory_);
if (!from_destructor) {
ShutdownCommon();
}
}
void VulkanPrimitiveProcessor::CompletedSubmissionUpdated() {
if (builtin_index_buffer_upload_ != VK_NULL_HANDLE &&
command_processor_.GetCompletedSubmission() >=
builtin_index_buffer_upload_submission_) {
const ui::vulkan::VulkanProvider& provider =
command_processor_.GetVulkanProvider();
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
VkDevice device = provider.device();
ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device,
builtin_index_buffer_upload_);
ui::vulkan::util::DestroyAndNullHandle(dfn.vkFreeMemory, device,
builtin_index_buffer_upload_memory_);
}
}
void VulkanPrimitiveProcessor::BeginSubmission() {
if (builtin_index_buffer_upload_ != VK_NULL_HANDLE &&
builtin_index_buffer_upload_submission_ == UINT64_MAX) {
// No need to submit deferred barriers - builtin_index_buffer_ has never
// been used yet, and builtin_index_buffer_upload_ is written before
// submitting commands reading it.
command_processor_.EndRenderPass();
DeferredCommandBuffer& command_buffer =
command_processor_.deferred_command_buffer();
VkBufferCopy* copy_region = command_buffer.CmdCopyBufferEmplace(
builtin_index_buffer_upload_, builtin_index_buffer_, 1);
copy_region->srcOffset = 0;
copy_region->dstOffset = 0;
copy_region->size = builtin_index_buffer_size_;
command_processor_.PushBufferMemoryBarrier(
builtin_index_buffer_, 0, VK_WHOLE_SIZE, VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, VK_ACCESS_TRANSFER_WRITE_BIT,
VK_ACCESS_INDEX_READ_BIT);
builtin_index_buffer_upload_submission_ =
command_processor_.GetCurrentSubmission();
}
}
void VulkanPrimitiveProcessor::BeginFrame() {
frame_index_buffer_pool_->Reclaim(command_processor_.GetCompletedFrame());
}
void VulkanPrimitiveProcessor::EndSubmission() {
frame_index_buffer_pool_->FlushWrites();
}
void VulkanPrimitiveProcessor::EndFrame() {
ClearPerFrameCache();
frame_index_buffers_.clear();
}
bool VulkanPrimitiveProcessor::InitializeBuiltin16BitIndexBuffer(
uint32_t index_count, std::function<void(uint16_t*)> fill_callback) {
assert_not_zero(index_count);
assert_true(builtin_index_buffer_ == VK_NULL_HANDLE);
assert_true(builtin_index_buffer_memory_ == VK_NULL_HANDLE);
assert_true(builtin_index_buffer_upload_ == VK_NULL_HANDLE);
assert_true(builtin_index_buffer_upload_memory_ == VK_NULL_HANDLE);
const ui::vulkan::VulkanProvider& provider =
command_processor_.GetVulkanProvider();
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
VkDevice device = provider.device();
builtin_index_buffer_size_ = VkDeviceSize(sizeof(uint16_t) * index_count);
if (!ui::vulkan::util::CreateDedicatedAllocationBuffer(
provider, builtin_index_buffer_size_,
VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT,
ui::vulkan::util::MemoryPurpose::kDeviceLocal, builtin_index_buffer_,
builtin_index_buffer_memory_)) {
XELOGE(
"Vulkan primitive processor: Failed to create the built-in index "
"buffer GPU resource with {} 16-bit indices",
index_count);
return false;
}
uint32_t upload_memory_type;
if (!ui::vulkan::util::CreateDedicatedAllocationBuffer(
provider, builtin_index_buffer_size_,
VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
ui::vulkan::util::MemoryPurpose::kUpload,
builtin_index_buffer_upload_, builtin_index_buffer_upload_memory_,
&upload_memory_type)) {
XELOGE(
"Vulkan primitive processor: Failed to create the built-in index "
"buffer upload resource with {} 16-bit indices",
index_count);
ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device,
builtin_index_buffer_);
ui::vulkan::util::DestroyAndNullHandle(dfn.vkFreeMemory, device,
builtin_index_buffer_memory_);
return false;
}
void* mapping;
if (dfn.vkMapMemory(device, builtin_index_buffer_upload_memory_, 0,
VK_WHOLE_SIZE, 0, &mapping) != VK_SUCCESS) {
XELOGE(
"Vulkan primitive processor: Failed to map the built-in index buffer "
"upload resource with {} 16-bit indices",
index_count);
ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device,
builtin_index_buffer_upload_);
ui::vulkan::util::DestroyAndNullHandle(dfn.vkFreeMemory, device,
builtin_index_buffer_upload_memory_);
ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device,
builtin_index_buffer_);
ui::vulkan::util::DestroyAndNullHandle(dfn.vkFreeMemory, device,
builtin_index_buffer_memory_);
return false;
}
fill_callback(reinterpret_cast<uint16_t*>(mapping));
ui::vulkan::util::FlushMappedMemoryRange(
provider, builtin_index_buffer_memory_, upload_memory_type);
dfn.vkUnmapMemory(device, builtin_index_buffer_upload_memory_);
// Schedule uploading in the first submission.
builtin_index_buffer_upload_submission_ = UINT64_MAX;
return true;
}
void* VulkanPrimitiveProcessor::RequestHostConvertedIndexBufferForCurrentFrame(
xenos::IndexFormat format, uint32_t index_count, bool coalign_for_simd,
uint32_t coalignment_original_address, size_t& backend_handle_out) {
size_t index_size = format == xenos::IndexFormat::kInt16 ? sizeof(uint16_t)
: sizeof(uint32_t);
VkBuffer buffer;
VkDeviceSize offset;
uint8_t* mapping = frame_index_buffer_pool_->Request(
command_processor_.GetCurrentFrame(),
index_size * index_count +
(coalign_for_simd ? XE_GPU_PRIMITIVE_PROCESSOR_SIMD_SIZE : 0),
index_size, buffer, offset);
if (!mapping) {
return nullptr;
}
if (coalign_for_simd) {
ptrdiff_t coalignment_offset =
GetSimdCoalignmentOffset(mapping, coalignment_original_address);
mapping += coalignment_offset;
offset = VkDeviceSize(offset + coalignment_offset);
}
backend_handle_out = frame_index_buffers_.size();
frame_index_buffers_.emplace_back(buffer, offset);
return mapping;
}
} // namespace vulkan
} // namespace gpu
} // namespace xe

View File

@ -0,0 +1,92 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2021 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_VULKAN_VULKAN_PRIMITIVE_PROCESSOR_H_
#define XENIA_GPU_VULKAN_VULKAN_PRIMITIVE_PROCESSOR_H_
#include <memory>
#include <utility>
#include "xenia/base/assert.h"
#include "xenia/gpu/primitive_processor.h"
#include "xenia/ui/vulkan/vulkan_provider.h"
#include "xenia/ui/vulkan/vulkan_upload_buffer_pool.h"
namespace xe {
namespace gpu {
namespace vulkan {
class VulkanCommandProcessor;
class VulkanPrimitiveProcessor final : public PrimitiveProcessor {
public:
VulkanPrimitiveProcessor(const RegisterFile& register_file, Memory& memory,
TraceWriter& trace_writer,
SharedMemory& shared_memory,
VulkanCommandProcessor& command_processor)
: PrimitiveProcessor(register_file, memory, trace_writer, shared_memory),
command_processor_(command_processor) {}
~VulkanPrimitiveProcessor();
bool Initialize();
void Shutdown(bool from_destructor = false);
void ClearCache() { frame_index_buffer_pool_->ClearCache(); }
void CompletedSubmissionUpdated();
void BeginSubmission();
void BeginFrame();
void EndSubmission();
void EndFrame();
std::pair<VkBuffer, VkDeviceSize> GetBuiltinIndexBuffer(size_t handle) const {
assert_not_null(builtin_index_buffer_);
return std::make_pair(
builtin_index_buffer_,
VkDeviceSize(GetBuiltinIndexBufferOffsetBytes(handle)));
}
std::pair<VkBuffer, VkDeviceSize> GetConvertedIndexBuffer(
size_t handle) const {
return frame_index_buffers_[handle];
}
protected:
bool InitializeBuiltin16BitIndexBuffer(
uint32_t index_count,
std::function<void(uint16_t*)> fill_callback) override;
void* RequestHostConvertedIndexBufferForCurrentFrame(
xenos::IndexFormat format, uint32_t index_count, bool coalign_for_simd,
uint32_t coalignment_original_address,
size_t& backend_handle_out) override;
private:
VulkanCommandProcessor& command_processor_;
VkDeviceSize builtin_index_buffer_size_ = 0;
VkBuffer builtin_index_buffer_ = VK_NULL_HANDLE;
VkDeviceMemory builtin_index_buffer_memory_ = VK_NULL_HANDLE;
// Temporary buffer copied in the beginning of the first submission for
// uploading to builtin_index_buffer_, destroyed when the submission when it
// was uploaded is completed.
VkBuffer builtin_index_buffer_upload_ = VK_NULL_HANDLE;
VkDeviceMemory builtin_index_buffer_upload_memory_ = VK_NULL_HANDLE;
// UINT64_MAX means not uploaded yet and needs uploading in the first
// submission (if the upload buffer exists at all).
uint64_t builtin_index_buffer_upload_submission_ = UINT64_MAX;
std::unique_ptr<ui::vulkan::VulkanUploadBufferPool> frame_index_buffer_pool_;
// Indexed by the backend handles.
std::deque<std::pair<VkBuffer, VkDeviceSize>> frame_index_buffers_;
};
} // namespace vulkan
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_VULKAN_VULKAN_PRIMITIVE_PROCESSOR_H_

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,905 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2022 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_VULKAN_VULKAN_RENDER_TARGET_CACHE_H_
#define XENIA_GPU_VULKAN_VULKAN_RENDER_TARGET_CACHE_H_
#include <array>
#include <cstdint>
#include <cstring>
#include <functional>
#include <memory>
#include <unordered_map>
#include "xenia/base/hash.h"
#include "xenia/base/xxhash.h"
#include "xenia/gpu/render_target_cache.h"
#include "xenia/gpu/vulkan/vulkan_shared_memory.h"
#include "xenia/gpu/vulkan/vulkan_texture_cache.h"
#include "xenia/gpu/xenos.h"
#include "xenia/ui/vulkan/single_layout_descriptor_set_pool.h"
#include "xenia/ui/vulkan/vulkan_provider.h"
#include "xenia/ui/vulkan/vulkan_upload_buffer_pool.h"
namespace xe {
namespace gpu {
namespace vulkan {
class VulkanCommandProcessor;
class VulkanRenderTargetCache final : public RenderTargetCache {
public:
union RenderPassKey {
struct {
// If emulating 2x as 4x, this is still 2x for simplicity of using this
// field to make guest-related decisions. Render pass objects are not very
// expensive, and their dependencies can't be shared between 2x-as-4x and
// true 4x MSAA passes (framebuffers because render target cache render
// targets are different for 2x and 4x guest MSAA, pipelines because the
// sample mask will have 2 samples excluded for 2x-as-4x).
xenos::MsaaSamples msaa_samples : xenos::kMsaaSamplesBits; // 2
// << 0 is depth, << 1...4 is color.
uint32_t depth_and_color_used : 1 + xenos::kMaxColorRenderTargets; // 7
// 0 for unused attachments.
// If VK_FORMAT_D24_UNORM_S8_UINT is not supported, this must be kD24FS8
// even for kD24S8.
xenos::DepthRenderTargetFormat depth_format
: xenos::kDepthRenderTargetFormatBits; // 8
// Linear or sRGB included if host sRGB is used.
xenos::ColorRenderTargetFormat color_0_view_format
: xenos::kColorRenderTargetFormatBits; // 12
xenos::ColorRenderTargetFormat color_1_view_format
: xenos::kColorRenderTargetFormatBits; // 16
xenos::ColorRenderTargetFormat color_2_view_format
: xenos::kColorRenderTargetFormatBits; // 20
xenos::ColorRenderTargetFormat color_3_view_format
: xenos::kColorRenderTargetFormatBits; // 24
uint32_t color_rts_use_transfer_formats : 1; // 25
};
uint32_t key = 0;
struct Hasher {
size_t operator()(const RenderPassKey& key) const {
return std::hash<uint32_t>{}(key.key);
}
};
bool operator==(const RenderPassKey& other_key) const {
return key == other_key.key;
}
bool operator!=(const RenderPassKey& other_key) const {
return !(*this == other_key);
}
bool operator<(const RenderPassKey& other_key) const {
return key < other_key.key;
}
};
static_assert_size(RenderPassKey, sizeof(uint32_t));
struct Framebuffer {
VkFramebuffer framebuffer;
VkExtent2D host_extent;
Framebuffer(VkFramebuffer framebuffer, const VkExtent2D& host_extent)
: framebuffer(framebuffer), host_extent(host_extent) {}
};
VulkanRenderTargetCache(const RegisterFile& register_file,
const Memory& memory, TraceWriter& trace_writer,
uint32_t draw_resolution_scale_x,
uint32_t draw_resolution_scale_y,
VulkanCommandProcessor& command_processor);
~VulkanRenderTargetCache();
// Transient descriptor set layouts must be initialized in the command
// processor.
bool Initialize();
void Shutdown(bool from_destructor = false);
void ClearCache() override;
void CompletedSubmissionUpdated();
void EndSubmission();
// TODO(Triang3l): Fragment shader interlock.
Path GetPath() const override { return Path::kHostRenderTargets; }
// Performs the resolve to a shared memory area according to the current
// register values, and also clears the render targets if needed. Must be in a
// frame for calling.
bool Resolve(const Memory& memory, VulkanSharedMemory& shared_memory,
VulkanTextureCache& texture_cache, uint32_t& written_address_out,
uint32_t& written_length_out);
bool Update(bool is_rasterization_done,
reg::RB_DEPTHCONTROL normalized_depth_control,
uint32_t normalized_color_mask,
const Shader& vertex_shader) override;
// Binding information for the last successful update.
RenderPassKey last_update_render_pass_key() const {
return last_update_render_pass_key_;
}
VkRenderPass last_update_render_pass() const {
return last_update_render_pass_;
}
const Framebuffer* last_update_framebuffer() const {
return last_update_framebuffer_;
}
// Using R16G16[B16A16]_SNORM, which are -1...1, not the needed -32...32.
// Persistent data doesn't depend on this, so can be overriden by per-game
// configuration.
bool IsFixedRG16TruncatedToMinus1To1() const {
// TODO(Triang3l): Not float16 condition.
return GetPath() == Path::kHostRenderTargets &&
!cvars::snorm16_render_target_full_range;
}
bool IsFixedRGBA16TruncatedToMinus1To1() const {
// TODO(Triang3l): Not float16 condition.
return GetPath() == Path::kHostRenderTargets &&
!cvars::snorm16_render_target_full_range;
}
bool depth_unorm24_vulkan_format_supported() const {
return depth_unorm24_vulkan_format_supported_;
}
bool depth_float24_round() const { return depth_float24_round_; }
bool msaa_2x_attachments_supported() const {
return msaa_2x_attachments_supported_;
}
bool msaa_2x_no_attachments_supported() const {
return msaa_2x_no_attachments_supported_;
}
bool IsMsaa2xSupported(bool subpass_has_attachments) const {
return subpass_has_attachments ? msaa_2x_attachments_supported_
: msaa_2x_no_attachments_supported_;
}
// Returns the render pass object, or VK_NULL_HANDLE if failed to create.
// A render pass managed by the render target cache may be ended and resumed
// at any time (to allow for things like copying and texture loading).
VkRenderPass GetRenderPass(RenderPassKey key);
VkFormat GetDepthVulkanFormat(xenos::DepthRenderTargetFormat format) const;
VkFormat GetColorVulkanFormat(xenos::ColorRenderTargetFormat format) const;
VkFormat GetColorOwnershipTransferVulkanFormat(
xenos::ColorRenderTargetFormat format,
bool* is_integer_out = nullptr) const;
protected:
uint32_t GetMaxRenderTargetWidth() const override;
uint32_t GetMaxRenderTargetHeight() const override;
RenderTarget* CreateRenderTarget(RenderTargetKey key) override;
bool IsHostDepthEncodingDifferent(
xenos::DepthRenderTargetFormat format) const override;
private:
enum class EdramBufferUsage {
// There's no need for combined fragment and compute usages.
// With host render targets, the usual usage sequence is as follows:
// - Optionally compute writes - host depth copy storing for EDRAM range
// ownership transfers.
// - Optionally fragment reads - host depth copy storing for EDRAM range
// ownership transfers.
// - Compute writes - copying from host render targets during resolving.
// - Compute reads - writing to the shared memory during resolving.
// With the render backend implementation based on fragment shader
// interlocks, it's:
// - Fragment reads and writes - depth / stencil and color operations.
// - Compute reads - writing to the shared memory during resolving.
// So, fragment reads and compute reads normally don't follow each other,
// and there's no need to amortize the cost of a read > read barrier in an
// exceptional situation by using a wider barrier in the normal scenario.
// Host depth copy storing.
kFragmentRead,
// Fragment shader interlock depth / stencil and color operations.
kFragmentReadWrite,
// Resolve - copying to the shared memory.
kComputeRead,
// Resolve - copying from host render targets.
kComputeWrite,
// Trace recording.
kTransferRead,
// Trace playback.
kTransferWrite,
};
enum class EdramBufferModificationStatus {
// The values are ordered by how strong the barrier conditions are.
// No uncommitted shader writes.
kUnmodified,
// Need to commit before the next fragment shader interlock usage with
// overlap.
kViaFragmentShaderInterlock,
// Need to commit before any next fragment shader interlock usage.
kViaUnordered,
};
enum ResolveCopyDescriptorSet : uint32_t {
// Never changes.
kResolveCopyDescriptorSetEdram,
// Shared memory or a region in it.
kResolveCopyDescriptorSetDest,
kResolveCopyDescriptorSetCount,
};
struct ResolveCopyShaderCode {
const uint32_t* unscaled;
size_t unscaled_size_bytes;
const uint32_t* scaled;
size_t scaled_size_bytes;
};
static void GetEdramBufferUsageMasks(EdramBufferUsage usage,
VkPipelineStageFlags& stage_mask_out,
VkAccessFlags& access_mask_out);
void UseEdramBuffer(EdramBufferUsage new_usage);
void MarkEdramBufferModified(
EdramBufferModificationStatus modification_status =
EdramBufferModificationStatus::kViaUnordered);
void CommitEdramBufferShaderWrites(
EdramBufferModificationStatus commit_status =
EdramBufferModificationStatus::kViaFragmentShaderInterlock);
VulkanCommandProcessor& command_processor_;
TraceWriter& trace_writer_;
// Accessible in fragment and compute shaders.
VkDescriptorSetLayout descriptor_set_layout_storage_buffer_ = VK_NULL_HANDLE;
VkDescriptorSetLayout descriptor_set_layout_sampled_image_ = VK_NULL_HANDLE;
VkDescriptorSetLayout descriptor_set_layout_sampled_image_x2_ =
VK_NULL_HANDLE;
std::unique_ptr<ui::vulkan::SingleLayoutDescriptorSetPool>
descriptor_set_pool_sampled_image_;
std::unique_ptr<ui::vulkan::SingleLayoutDescriptorSetPool>
descriptor_set_pool_sampled_image_x2_;
VkDeviceMemory edram_buffer_memory_ = VK_NULL_HANDLE;
VkBuffer edram_buffer_ = VK_NULL_HANDLE;
EdramBufferUsage edram_buffer_usage_;
EdramBufferModificationStatus edram_buffer_modification_status_ =
EdramBufferModificationStatus::kUnmodified;
VkDescriptorPool edram_storage_buffer_descriptor_pool_ = VK_NULL_HANDLE;
VkDescriptorSet edram_storage_buffer_descriptor_set_;
VkPipelineLayout resolve_copy_pipeline_layout_ = VK_NULL_HANDLE;
static const ResolveCopyShaderCode
kResolveCopyShaders[size_t(draw_util::ResolveCopyShaderIndex::kCount)];
std::array<VkPipeline, size_t(draw_util::ResolveCopyShaderIndex::kCount)>
resolve_copy_pipelines_{};
// RenderPassKey::key -> VkRenderPass.
// VK_NULL_HANDLE if failed to create.
std::unordered_map<uint32_t, VkRenderPass> render_passes_;
// For host render targets.
// Can only be destroyed when framebuffers referencing it are destroyed!
class VulkanRenderTarget final : public RenderTarget {
public:
static constexpr VkPipelineStageFlags kColorDrawStageMask =
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
static constexpr VkAccessFlags kColorDrawAccessMask =
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
static constexpr VkImageLayout kColorDrawLayout =
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
static constexpr VkPipelineStageFlags kDepthDrawStageMask =
VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
static constexpr VkAccessFlags kDepthDrawAccessMask =
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
static constexpr VkImageLayout kDepthDrawLayout =
VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
// Takes ownership of the Vulkan objects passed to the constructor.
VulkanRenderTarget(RenderTargetKey key,
VulkanRenderTargetCache& render_target_cache,
VkImage image, VkDeviceMemory memory,
VkImageView view_depth_color,
VkImageView view_depth_stencil, VkImageView view_stencil,
VkImageView view_srgb,
VkImageView view_color_transfer_separate,
size_t descriptor_set_index_transfer_source)
: RenderTarget(key),
render_target_cache_(render_target_cache),
image_(image),
memory_(memory),
view_depth_color_(view_depth_color),
view_depth_stencil_(view_depth_stencil),
view_stencil_(view_stencil),
view_srgb_(view_srgb),
view_color_transfer_separate_(view_color_transfer_separate),
descriptor_set_index_transfer_source_(
descriptor_set_index_transfer_source) {}
~VulkanRenderTarget();
VkImage image() const { return image_; }
VkImageView view_depth_color() const { return view_depth_color_; }
VkImageView view_depth_stencil() const { return view_depth_stencil_; }
VkImageView view_color_transfer_separate() const {
return view_color_transfer_separate_;
}
VkImageView view_color_transfer() const {
return view_color_transfer_separate_ != VK_NULL_HANDLE
? view_color_transfer_separate_
: view_depth_color_;
}
VkDescriptorSet GetDescriptorSetTransferSource() const {
ui::vulkan::SingleLayoutDescriptorSetPool& descriptor_set_pool =
key().is_depth
? *render_target_cache_.descriptor_set_pool_sampled_image_x2_
: *render_target_cache_.descriptor_set_pool_sampled_image_;
return descriptor_set_pool.Get(descriptor_set_index_transfer_source_);
}
static void GetDrawUsage(bool is_depth,
VkPipelineStageFlags* stage_mask_out,
VkAccessFlags* access_mask_out,
VkImageLayout* layout_out) {
if (stage_mask_out) {
*stage_mask_out = is_depth ? kDepthDrawStageMask : kColorDrawStageMask;
}
if (access_mask_out) {
*access_mask_out =
is_depth ? kDepthDrawAccessMask : kColorDrawAccessMask;
}
if (layout_out) {
*layout_out = is_depth ? kDepthDrawLayout : kColorDrawLayout;
}
}
void GetDrawUsage(VkPipelineStageFlags* stage_mask_out,
VkAccessFlags* access_mask_out,
VkImageLayout* layout_out) const {
GetDrawUsage(key().is_depth, stage_mask_out, access_mask_out, layout_out);
}
VkPipelineStageFlags current_stage_mask() const {
return current_stage_mask_;
}
VkAccessFlags current_access_mask() const { return current_access_mask_; }
VkImageLayout current_layout() const { return current_layout_; }
void SetUsage(VkPipelineStageFlags stage_mask, VkAccessFlags access_mask,
VkImageLayout layout) {
current_stage_mask_ = stage_mask;
current_access_mask_ = access_mask;
current_layout_ = layout;
}
uint32_t temporary_sort_index() const { return temporary_sort_index_; }
void SetTemporarySortIndex(uint32_t index) {
temporary_sort_index_ = index;
}
private:
VulkanRenderTargetCache& render_target_cache_;
VkImage image_;
VkDeviceMemory memory_;
// TODO(Triang3l): Per-format drawing views for mutable formats with EDRAM
// aliasing without transfers.
VkImageView view_depth_color_;
// Optional views.
VkImageView view_depth_stencil_;
VkImageView view_stencil_;
VkImageView view_srgb_;
VkImageView view_color_transfer_separate_;
// 2 sampled images for depth / stencil, 1 sampled image for color.
size_t descriptor_set_index_transfer_source_;
VkPipelineStageFlags current_stage_mask_ = 0;
VkAccessFlags current_access_mask_ = 0;
VkImageLayout current_layout_ = VK_IMAGE_LAYOUT_UNDEFINED;
// Temporary storage for indices in operations like transfers and dumps.
uint32_t temporary_sort_index_ = 0;
};
struct FramebufferKey {
RenderPassKey render_pass_key;
// Same as RenderTargetKey::pitch_tiles_at_32bpp.
uint32_t pitch_tiles_at_32bpp : 8; // 8
// [0, 2047].
uint32_t depth_base_tiles : xenos::kEdramBaseTilesBits - 1; // 19
uint32_t color_0_base_tiles : xenos::kEdramBaseTilesBits - 1; // 30
uint32_t color_1_base_tiles : xenos::kEdramBaseTilesBits - 1; // 43
uint32_t color_2_base_tiles : xenos::kEdramBaseTilesBits - 1; // 54
uint32_t color_3_base_tiles : xenos::kEdramBaseTilesBits - 1; // 75
// Including all the padding, for a stable hash.
FramebufferKey() { Reset(); }
FramebufferKey(const FramebufferKey& key) {
std::memcpy(this, &key, sizeof(*this));
}
FramebufferKey& operator=(const FramebufferKey& key) {
std::memcpy(this, &key, sizeof(*this));
return *this;
}
bool operator==(const FramebufferKey& key) const {
return std::memcmp(this, &key, sizeof(*this)) == 0;
}
using Hasher = xe::hash::XXHasher<FramebufferKey>;
void Reset() { std::memset(this, 0, sizeof(*this)); }
};
enum TransferUsedDescriptorSet : uint32_t {
// Ordered from the least to the most frequently changed.
kTransferUsedDescriptorSetHostDepthBuffer,
kTransferUsedDescriptorSetHostDepthStencilTextures,
kTransferUsedDescriptorSetDepthStencilTextures,
// Mutually exclusive with kTransferUsedDescriptorSetDepthStencilTextures.
kTransferUsedDescriptorSetColorTexture,
kTransferUsedDescriptorSetCount,
kTransferUsedDescriptorSetHostDepthBufferBit =
uint32_t(1) << kTransferUsedDescriptorSetHostDepthBuffer,
kTransferUsedDescriptorSetHostDepthStencilTexturesBit =
uint32_t(1) << kTransferUsedDescriptorSetHostDepthStencilTextures,
kTransferUsedDescriptorSetDepthStencilTexturesBit =
uint32_t(1) << kTransferUsedDescriptorSetDepthStencilTextures,
kTransferUsedDescriptorSetColorTextureBit =
uint32_t(1) << kTransferUsedDescriptorSetColorTexture,
};
// 32-bit push constants (for simplicity of size calculation and to avoid
// std140 packing issues).
enum TransferUsedPushConstantDword : uint32_t {
kTransferUsedPushConstantDwordHostDepthAddress,
kTransferUsedPushConstantDwordAddress,
// Changed 8 times per transfer.
kTransferUsedPushConstantDwordStencilMask,
kTransferUsedPushConstantDwordCount,
kTransferUsedPushConstantDwordHostDepthAddressBit =
uint32_t(1) << kTransferUsedPushConstantDwordHostDepthAddress,
kTransferUsedPushConstantDwordAddressBit =
uint32_t(1) << kTransferUsedPushConstantDwordAddress,
kTransferUsedPushConstantDwordStencilMaskBit =
uint32_t(1) << kTransferUsedPushConstantDwordStencilMask,
};
enum class TransferPipelineLayoutIndex {
kColor,
kDepth,
kColorToStencilBit,
kDepthToStencilBit,
kColorAndHostDepthTexture,
kColorAndHostDepthBuffer,
kDepthAndHostDepthTexture,
kDepthAndHostDepthBuffer,
kCount,
};
struct TransferPipelineLayoutInfo {
uint32_t used_descriptor_sets;
uint32_t used_push_constant_dwords;
};
static const TransferPipelineLayoutInfo
kTransferPipelineLayoutInfos[size_t(TransferPipelineLayoutIndex::kCount)];
enum class TransferMode : uint32_t {
kColorToDepth,
kColorToColor,
kDepthToDepth,
kDepthToColor,
kColorToStencilBit,
kDepthToStencilBit,
// Two-source modes, using the host depth if it, when converted to the guest
// format, matches what's in the owner source (not modified, keep host
// precision), or the guest data otherwise (significantly modified, possibly
// cleared). Stencil for FragStencilRef is always taken from the guest
// source.
kColorAndHostDepthToDepth,
// When using different source and destination depth formats.
kDepthAndHostDepthToDepth,
// If host depth is fetched, but it's the same image as the destination,
// it's copied to the EDRAM buffer (but since it's just a scratch buffer,
// with tiles laid out linearly with the same pitch as in the original
// render target; also no swapping of 40-sample columns as opposed to the
// host render target - this is done only for the color source) and fetched
// from there instead of the host depth texture.
kColorAndHostDepthCopyToDepth,
kDepthAndHostDepthCopyToDepth,
kCount,
};
enum class TransferOutput {
kColor,
kDepth,
kStencilBit,
};
struct TransferModeInfo {
TransferOutput output;
TransferPipelineLayoutIndex pipeline_layout;
};
static const TransferModeInfo kTransferModes[size_t(TransferMode::kCount)];
union TransferShaderKey {
uint32_t key;
struct {
xenos::MsaaSamples dest_msaa_samples : xenos::kMsaaSamplesBits;
uint32_t dest_color_rt_index : xenos::kColorRenderTargetIndexBits;
uint32_t dest_resource_format : xenos::kRenderTargetFormatBits;
xenos::MsaaSamples source_msaa_samples : xenos::kMsaaSamplesBits;
// Always 1x when the host depth is a copy from a buffer rather than an
// image, not to create the same pipeline for different MSAA sample counts
// as it doesn't matter in this case.
xenos::MsaaSamples host_depth_source_msaa_samples
: xenos::kMsaaSamplesBits;
uint32_t source_resource_format : xenos::kRenderTargetFormatBits;
// Last bits because this affects the pipeline layout - after sorting,
// only change it as fewer times as possible. Depth buffers have an
// additional stencil texture.
static_assert(size_t(TransferMode::kCount) <= (size_t(1) << 4));
TransferMode mode : 4;
};
TransferShaderKey() : key(0) { static_assert_size(*this, sizeof(key)); }
struct Hasher {
size_t operator()(const TransferShaderKey& key) const {
return std::hash<uint32_t>{}(key.key);
}
};
bool operator==(const TransferShaderKey& other_key) const {
return key == other_key.key;
}
bool operator!=(const TransferShaderKey& other_key) const {
return !(*this == other_key);
}
bool operator<(const TransferShaderKey& other_key) const {
return key < other_key.key;
}
};
struct TransferPipelineKey {
RenderPassKey render_pass_key;
TransferShaderKey shader_key;
TransferPipelineKey(RenderPassKey render_pass_key,
TransferShaderKey shader_key)
: render_pass_key(render_pass_key), shader_key(shader_key) {}
struct Hasher {
size_t operator()(const TransferPipelineKey& key) const {
XXH3_state_t hash_state;
XXH3_64bits_reset(&hash_state);
XXH3_64bits_update(&hash_state, &key.render_pass_key,
sizeof(key.render_pass_key));
XXH3_64bits_update(&hash_state, &key.shader_key,
sizeof(key.shader_key));
return static_cast<size_t>(XXH3_64bits_digest(&hash_state));
}
};
bool operator==(const TransferPipelineKey& other_key) const {
return render_pass_key == other_key.render_pass_key &&
shader_key == other_key.shader_key;
}
bool operator!=(const TransferPipelineKey& other_key) const {
return !(*this == other_key);
}
bool operator<(const TransferPipelineKey& other_key) const {
if (render_pass_key != other_key.render_pass_key) {
return render_pass_key < other_key.render_pass_key;
}
return shader_key < other_key.shader_key;
}
};
union TransferAddressConstant {
uint32_t constant;
struct {
// All in tiles.
uint32_t dest_pitch : xenos::kEdramPitchTilesBits;
uint32_t source_pitch : xenos::kEdramPitchTilesBits;
// Safe to use 12 bits for signed difference - no ownership transfer can
// ever occur between render targets with EDRAM base >= 2048 as this would
// result in 0-length spans. 10 + 10 + 12 is exactly 32, any more bits,
// and more root 32-bit constants will be used.
// Destination base in tiles minus source base in tiles (not vice versa
// because this is a transform of the coordinate system, not addresses
// themselves).
// 0 for host_depth_source_is_copy (ignored in this case anyway as
// destination == source anyway).
int32_t source_to_dest : xenos::kEdramBaseTilesBits;
};
TransferAddressConstant() : constant(0) {
static_assert_size(*this, sizeof(constant));
}
bool operator==(const TransferAddressConstant& other_constant) const {
return constant == other_constant.constant;
}
bool operator!=(const TransferAddressConstant& other_constant) const {
return !(*this == other_constant);
}
};
struct TransferInvocation {
Transfer transfer;
TransferShaderKey shader_key;
TransferInvocation(const Transfer& transfer,
const TransferShaderKey& shader_key)
: transfer(transfer), shader_key(shader_key) {}
bool operator<(const TransferInvocation& other_invocation) {
// TODO(Triang3l): See if it may be better to sort by the source in the
// first place, especially when reading the same data multiple times (like
// to write the stencil bits after depth) for better read locality.
// Sort by the shader key primarily to reduce pipeline state (context)
// switches.
if (shader_key != other_invocation.shader_key) {
return shader_key < other_invocation.shader_key;
}
// Host depth render targets are changed rarely if they exist, won't save
// many binding changes, ignore them for simplicity (their existence is
// caught by the shader key change).
assert_not_null(transfer.source);
assert_not_null(other_invocation.transfer.source);
uint32_t source_index =
static_cast<const VulkanRenderTarget*>(transfer.source)
->temporary_sort_index();
uint32_t other_source_index = static_cast<const VulkanRenderTarget*>(
other_invocation.transfer.source)
->temporary_sort_index();
if (source_index != other_source_index) {
return source_index < other_source_index;
}
return transfer.start_tiles < other_invocation.transfer.start_tiles;
}
bool CanBeMergedIntoOneDraw(
const TransferInvocation& other_invocation) const {
return shader_key == other_invocation.shader_key &&
transfer.AreSourcesSame(other_invocation.transfer);
}
};
union DumpPipelineKey {
uint32_t key;
struct {
xenos::MsaaSamples msaa_samples : 2;
uint32_t resource_format : 4;
// Last bit because this affects the pipeline - after sorting, only change
// it at most once. Depth buffers have an additional stencil SRV.
uint32_t is_depth : 1;
};
DumpPipelineKey() : key(0) { static_assert_size(*this, sizeof(key)); }
struct Hasher {
size_t operator()(const DumpPipelineKey& key) const {
return std::hash<uint32_t>{}(key.key);
}
};
bool operator==(const DumpPipelineKey& other_key) const {
return key == other_key.key;
}
bool operator!=(const DumpPipelineKey& other_key) const {
return !(*this == other_key);
}
bool operator<(const DumpPipelineKey& other_key) const {
return key < other_key.key;
}
xenos::ColorRenderTargetFormat GetColorFormat() const {
assert_false(is_depth);
return xenos::ColorRenderTargetFormat(resource_format);
}
xenos::DepthRenderTargetFormat GetDepthFormat() const {
assert_true(is_depth);
return xenos::DepthRenderTargetFormat(resource_format);
}
};
// There's no strict dependency on the group size in dumping, for simplicity
// calculations especially with resolution scaling, dividing manually (as the
// group size is not unlimited). The only restriction is that an integer
// multiple of it must be 80x16 samples (and no larger than that) for 32bpp,
// or 40x16 samples for 64bpp (because only a half of the pair of tiles may
// need to be dumped). Using 8x16 since that's 128 - the minimum required
// group size on Vulkan, and the maximum number of lanes in a subgroup on
// Vulkan.
static constexpr uint32_t kDumpSamplesPerGroupX = 8;
static constexpr uint32_t kDumpSamplesPerGroupY = 16;
union DumpPitches {
uint32_t pitches;
struct {
// Both in tiles.
uint32_t dest_pitch : xenos::kEdramPitchTilesBits;
uint32_t source_pitch : xenos::kEdramPitchTilesBits;
};
DumpPitches() : pitches(0) { static_assert_size(*this, sizeof(pitches)); }
bool operator==(const DumpPitches& other_pitches) const {
return pitches == other_pitches.pitches;
}
bool operator!=(const DumpPitches& other_pitches) const {
return !(*this == other_pitches);
}
};
union DumpOffsets {
uint32_t offsets;
struct {
uint32_t dispatch_first_tile : xenos::kEdramBaseTilesBits;
uint32_t source_base_tiles : xenos::kEdramBaseTilesBits;
};
DumpOffsets() : offsets(0) { static_assert_size(*this, sizeof(offsets)); }
bool operator==(const DumpOffsets& other_offsets) const {
return offsets == other_offsets.offsets;
}
bool operator!=(const DumpOffsets& other_offsets) const {
return !(*this == other_offsets);
}
};
enum DumpDescriptorSet : uint32_t {
// Never changes. Same in both color and depth pipeline layouts, keep the
// first for pipeline layout compatibility, to only have to set it once.
kDumpDescriptorSetEdram,
// One resolve may need multiple sources. Different descriptor set layouts
// for color and depth.
kDumpDescriptorSetSource,
kDumpDescriptorSetCount,
};
enum DumpPushConstant : uint32_t {
// May be different for different sources.
kDumpPushConstantPitches,
// May be changed multiple times for the same source.
kDumpPushConstantOffsets,
kDumpPushConstantCount,
};
struct DumpInvocation {
ResolveCopyDumpRectangle rectangle;
DumpPipelineKey pipeline_key;
DumpInvocation(const ResolveCopyDumpRectangle& rectangle,
const DumpPipelineKey& pipeline_key)
: rectangle(rectangle), pipeline_key(pipeline_key) {}
bool operator<(const DumpInvocation& other_invocation) {
// Sort by the pipeline key primarily to reduce pipeline state (context)
// switches.
if (pipeline_key != other_invocation.pipeline_key) {
return pipeline_key < other_invocation.pipeline_key;
}
assert_not_null(rectangle.render_target);
uint32_t render_target_index =
static_cast<const VulkanRenderTarget*>(rectangle.render_target)
->temporary_sort_index();
const ResolveCopyDumpRectangle& other_rectangle =
other_invocation.rectangle;
uint32_t other_render_target_index =
static_cast<const VulkanRenderTarget*>(other_rectangle.render_target)
->temporary_sort_index();
if (render_target_index != other_render_target_index) {
return render_target_index < other_render_target_index;
}
if (rectangle.row_first != other_rectangle.row_first) {
return rectangle.row_first < other_rectangle.row_first;
}
return rectangle.row_first_start < other_rectangle.row_first_start;
}
};
// Returns the framebuffer object, or VK_NULL_HANDLE if failed to create.
const Framebuffer* GetFramebuffer(
RenderPassKey render_pass_key, uint32_t pitch_tiles_at_32bpp,
const RenderTarget* const* depth_and_color_render_targets);
VkShaderModule GetTransferShader(TransferShaderKey key);
// With sample-rate shading, returns a pointer to one pipeline. Without
// sample-rate shading, returns a pointer to as many pipelines as there are
// samples. If there was a failure to create a pipeline, returns nullptr.
VkPipeline const* GetTransferPipelines(TransferPipelineKey key);
// Do ownership transfers for render targets - each render target / vector may
// be null / empty in case there's nothing to do for them.
// resolve_clear_rectangle is expected to be provided by
// PrepareHostRenderTargetsResolveClear which should do all the needed size
// bound checks.
void PerformTransfersAndResolveClears(
uint32_t render_target_count, RenderTarget* const* render_targets,
const std::vector<Transfer>* render_target_transfers,
const uint64_t* render_target_resolve_clear_values = nullptr,
const Transfer::Rectangle* resolve_clear_rectangle = nullptr);
VkPipeline GetDumpPipeline(DumpPipelineKey key);
// Writes contents of host render targets within rectangles from
// ResolveInfo::GetCopyEdramTileSpan to edram_buffer_.
void DumpRenderTargets(uint32_t dump_base, uint32_t dump_row_length_used,
uint32_t dump_rows, uint32_t dump_pitch);
bool gamma_render_target_as_srgb_ = false;
bool depth_unorm24_vulkan_format_supported_ = false;
bool depth_float24_round_ = false;
bool msaa_2x_attachments_supported_ = false;
bool msaa_2x_no_attachments_supported_ = false;
std::unordered_map<FramebufferKey, Framebuffer, FramebufferKey::Hasher>
framebuffers_;
RenderPassKey last_update_render_pass_key_;
VkRenderPass last_update_render_pass_ = VK_NULL_HANDLE;
uint32_t last_update_framebuffer_pitch_tiles_at_32bpp_ = 0;
const RenderTarget* const*
last_update_framebuffer_attachments_[1 + xenos::kMaxColorRenderTargets] =
{};
const Framebuffer* last_update_framebuffer_ = VK_NULL_HANDLE;
// Set 0 - EDRAM storage buffer, set 1 - source depth sampled image (and
// unused stencil from the transfer descriptor set), HostDepthStoreConstants
// passed via push constants.
VkPipelineLayout host_depth_store_pipeline_layout_ = VK_NULL_HANDLE;
VkPipeline host_depth_store_pipelines_[size_t(xenos::MsaaSamples::k4X) + 1] =
{};
std::unique_ptr<ui::vulkan::VulkanUploadBufferPool>
transfer_vertex_buffer_pool_;
VkShaderModule transfer_passthrough_vertex_shader_ = VK_NULL_HANDLE;
VkPipelineLayout transfer_pipeline_layouts_[size_t(
TransferPipelineLayoutIndex::kCount)] = {};
// VK_NULL_HANDLE if failed to create.
std::unordered_map<TransferShaderKey, VkShaderModule,
TransferShaderKey::Hasher>
transfer_shaders_;
// With sample-rate shading, one pipeline per entry. Without sample-rate
// shading, one pipeline per sample per entry. VK_NULL_HANDLE if failed to
// create.
std::unordered_map<TransferPipelineKey, std::array<VkPipeline, 4>,
TransferPipelineKey::Hasher>
transfer_pipelines_;
VkPipelineLayout dump_pipeline_layout_color_ = VK_NULL_HANDLE;
VkPipelineLayout dump_pipeline_layout_depth_ = VK_NULL_HANDLE;
// Compute pipelines for copying host render target contents to the EDRAM
// buffer. VK_NULL_HANDLE if failed to create.
std::unordered_map<DumpPipelineKey, VkPipeline, DumpPipelineKey::Hasher>
dump_pipelines_;
// Temporary storage for Resolve.
std::vector<Transfer> clear_transfers_[2];
// Temporary storage for PerformTransfersAndResolveClears.
std::vector<TransferInvocation> current_transfer_invocations_;
// Temporary storage for DumpRenderTargets.
std::vector<ResolveCopyDumpRectangle> dump_rectangles_;
std::vector<DumpInvocation> dump_invocations_;
};
} // namespace vulkan
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_VULKAN_VULKAN_RENDER_TARGET_CACHE_H_

View File

@ -2,24 +2,59 @@
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2020 Ben Vanik. All rights reserved. *
* Copyright 2022 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/gpu/vulkan/vulkan_shader.h"
#include "third_party/fmt/include/fmt/format.h"
#include "xenia/base/assert.h"
#include <cstdint>
#include "xenia/base/logging.h"
#include "xenia/base/math.h"
#include "xenia/ui/vulkan/vulkan_util.h"
#include "xenia/ui/vulkan/vulkan_provider.h"
namespace xe {
namespace gpu {
namespace vulkan {
using xe::ui::vulkan::util::CheckResult;
VulkanShader::VulkanTranslation::~VulkanTranslation() {
if (shader_module_) {
const ui::vulkan::VulkanProvider& provider =
static_cast<const VulkanShader&>(shader()).provider_;
provider.dfn().vkDestroyShaderModule(provider.device(), shader_module_,
nullptr);
}
}
VkShaderModule VulkanShader::VulkanTranslation::GetOrCreateShaderModule() {
if (!is_valid()) {
return VK_NULL_HANDLE;
}
if (shader_module_ != VK_NULL_HANDLE) {
return shader_module_;
}
const ui::vulkan::VulkanProvider& provider =
static_cast<const VulkanShader&>(shader()).provider_;
VkShaderModuleCreateInfo shader_module_create_info;
shader_module_create_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
shader_module_create_info.pNext = nullptr;
shader_module_create_info.flags = 0;
shader_module_create_info.codeSize = translated_binary().size();
shader_module_create_info.pCode =
reinterpret_cast<const uint32_t*>(translated_binary().data());
if (provider.dfn().vkCreateShaderModule(provider.device(),
&shader_module_create_info, nullptr,
&shader_module_) != VK_SUCCESS) {
XELOGE(
"VulkanShader::VulkanTranslation: Failed to create a Vulkan shader "
"module for shader {:016X} modification {:016X}",
shader().ucode_data_hash(), modification());
MakeInvalid();
return VK_NULL_HANDLE;
}
return shader_module_;
}
VulkanShader::VulkanShader(const ui::vulkan::VulkanProvider& provider,
xenos::ShaderType shader_type,
@ -27,60 +62,10 @@ VulkanShader::VulkanShader(const ui::vulkan::VulkanProvider& provider,
const uint32_t* ucode_dwords,
size_t ucode_dword_count,
std::endian ucode_source_endian)
: Shader(shader_type, ucode_data_hash, ucode_dwords, ucode_dword_count,
ucode_source_endian),
: SpirvShader(shader_type, ucode_data_hash, ucode_dwords, ucode_dword_count,
ucode_source_endian),
provider_(provider) {}
VulkanShader::VulkanTranslation::~VulkanTranslation() {
if (shader_module_) {
const ui::vulkan::VulkanProvider& provider =
static_cast<VulkanShader&>(shader()).provider_;
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
VkDevice device = provider.device();
dfn.vkDestroyShaderModule(device, shader_module_, nullptr);
shader_module_ = nullptr;
}
}
bool VulkanShader::VulkanTranslation::Prepare() {
assert_null(shader_module_);
assert_true(is_valid());
const VulkanShader& vulkan_shader = static_cast<VulkanShader&>(shader());
const ui::vulkan::VulkanProvider& provider = vulkan_shader.provider_;
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
VkDevice device = provider.device();
// Create the shader module.
VkShaderModuleCreateInfo shader_info;
shader_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
shader_info.pNext = nullptr;
shader_info.flags = 0;
shader_info.codeSize = translated_binary().size();
shader_info.pCode =
reinterpret_cast<const uint32_t*>(translated_binary().data());
auto status =
dfn.vkCreateShaderModule(device, &shader_info, nullptr, &shader_module_);
CheckResult(status, "vkCreateShaderModule");
char type_char;
switch (vulkan_shader.type()) {
case xenos::ShaderType::kVertex:
type_char = 'v';
break;
case xenos::ShaderType::kPixel:
type_char = 'p';
break;
default:
type_char = 'u';
}
provider.SetDeviceObjectName(
VK_OBJECT_TYPE_SHADER_MODULE, uint64_t(shader_module_),
fmt::format("S({}): {:016X}", type_char, vulkan_shader.ucode_data_hash())
.c_str());
return status == VK_SUCCESS;
}
Shader::Translation* VulkanShader::CreateTranslationInstance(
uint64_t modification) {
return new VulkanTranslation(*this, modification);

View File

@ -2,7 +2,7 @@
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2016 Ben Vanik. All rights reserved. *
* Copyright 2022 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
@ -10,42 +10,68 @@
#ifndef XENIA_GPU_VULKAN_VULKAN_SHADER_H_
#define XENIA_GPU_VULKAN_VULKAN_SHADER_H_
#include <string>
#include <cstdint>
#include "xenia/gpu/shader.h"
#include "xenia/gpu/spirv_shader.h"
#include "xenia/gpu/xenos.h"
#include "xenia/ui/vulkan/vulkan_provider.h"
namespace xe {
namespace gpu {
namespace vulkan {
class VulkanShader : public Shader {
class VulkanShader : public SpirvShader {
public:
class VulkanTranslation : public Translation {
class VulkanTranslation : public SpirvTranslation {
public:
VulkanTranslation(VulkanShader& shader, uint64_t modification)
: Translation(shader, modification) {}
explicit VulkanTranslation(VulkanShader& shader, uint64_t modification)
: SpirvTranslation(shader, modification) {}
~VulkanTranslation() override;
bool Prepare();
// Available only if the translation is_valid and has been prepared.
VkShaderModule GetOrCreateShaderModule();
VkShaderModule shader_module() const { return shader_module_; }
private:
VkShaderModule shader_module_ = nullptr;
VkShaderModule shader_module_ = VK_NULL_HANDLE;
};
VulkanShader(const ui::vulkan::VulkanProvider& provider,
xenos::ShaderType shader_type, uint64_t ucode_data_hash,
const uint32_t* ucode_dwords, size_t ucode_dword_count,
std::endian ucode_source_endian = std::endian::big);
explicit VulkanShader(const ui::vulkan::VulkanProvider& provider,
xenos::ShaderType shader_type, uint64_t ucode_data_hash,
const uint32_t* ucode_dwords, size_t ucode_dword_count,
std::endian ucode_source_endian = std::endian::big);
// For owning subsystem like the pipeline cache, accessors for unique
// identifiers (used instead of hashes to make sure collisions can't happen)
// of binding layouts used by the shader, for invalidation if a shader with an
// incompatible layout has been bound.
size_t GetTextureBindingLayoutUserUID() const {
return texture_binding_layout_user_uid_;
}
size_t GetSamplerBindingLayoutUserUID() const {
return sampler_binding_layout_user_uid_;
}
// Modifications of the same shader can be translated on different threads.
// The "set" function must only be called if "enter" returned true - these are
// set up only once.
bool EnterBindingLayoutUserUIDSetup() {
return !binding_layout_user_uids_set_up_.test_and_set();
}
void SetTextureBindingLayoutUserUID(size_t uid) {
texture_binding_layout_user_uid_ = uid;
}
void SetSamplerBindingLayoutUserUID(size_t uid) {
sampler_binding_layout_user_uid_ = uid;
}
protected:
Translation* CreateTranslationInstance(uint64_t modification) override;
private:
const ui::vulkan::VulkanProvider& provider_;
std::atomic_flag binding_layout_user_uids_set_up_ = ATOMIC_FLAG_INIT;
size_t texture_binding_layout_user_uid_ = 0;
size_t sampler_binding_layout_user_uid_ = 0;
};
} // namespace vulkan

View File

@ -0,0 +1,499 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2020 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/gpu/vulkan/vulkan_shared_memory.h"
#include <algorithm>
#include <cstring>
#include <utility>
#include <vector>
#include "xenia/base/assert.h"
#include "xenia/base/cvar.h"
#include "xenia/base/logging.h"
#include "xenia/base/math.h"
#include "xenia/gpu/vulkan/deferred_command_buffer.h"
#include "xenia/gpu/vulkan/vulkan_command_processor.h"
#include "xenia/ui/vulkan/vulkan_util.h"
DEFINE_bool(vulkan_sparse_shared_memory, true,
"Enable sparse binding for shared memory emulation. Disabling it "
"increases video memory usage - a 512 MB buffer is created - but "
"allows graphics debuggers that don't support sparse binding to "
"work.",
"Vulkan");
namespace xe {
namespace gpu {
namespace vulkan {
VulkanSharedMemory::VulkanSharedMemory(
VulkanCommandProcessor& command_processor, Memory& memory,
TraceWriter& trace_writer,
VkPipelineStageFlags guest_shader_pipeline_stages)
: SharedMemory(memory),
command_processor_(command_processor),
trace_writer_(trace_writer),
guest_shader_pipeline_stages_(guest_shader_pipeline_stages) {}
VulkanSharedMemory::~VulkanSharedMemory() { Shutdown(true); }
bool VulkanSharedMemory::Initialize() {
InitializeCommon();
const ui::vulkan::VulkanProvider& provider =
command_processor_.GetVulkanProvider();
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
VkDevice device = provider.device();
const VkPhysicalDeviceFeatures& device_features = provider.device_features();
const VkBufferCreateFlags sparse_flags =
VK_BUFFER_CREATE_SPARSE_BINDING_BIT |
VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT;
// Try to create a sparse buffer.
VkBufferCreateInfo buffer_create_info;
buffer_create_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
buffer_create_info.pNext = nullptr;
buffer_create_info.flags = sparse_flags;
buffer_create_info.size = kBufferSize;
buffer_create_info.usage =
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT;
buffer_create_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
buffer_create_info.queueFamilyIndexCount = 0;
buffer_create_info.pQueueFamilyIndices = nullptr;
if (cvars::vulkan_sparse_shared_memory &&
provider.IsSparseBindingSupported() &&
device_features.sparseResidencyBuffer) {
if (dfn.vkCreateBuffer(device, &buffer_create_info, nullptr, &buffer_) ==
VK_SUCCESS) {
VkMemoryRequirements buffer_memory_requirements;
dfn.vkGetBufferMemoryRequirements(device, buffer_,
&buffer_memory_requirements);
if (xe::bit_scan_forward(buffer_memory_requirements.memoryTypeBits &
provider.memory_types_device_local(),
&buffer_memory_type_)) {
uint32_t allocation_size_log2;
xe::bit_scan_forward(
std::max(uint64_t(buffer_memory_requirements.alignment),
uint64_t(1)),
&allocation_size_log2);
if (allocation_size_log2 < kBufferSizeLog2) {
// Maximum of 1024 allocations in the worst case for all of the
// buffer because of the overall 4096 allocation count limit on
// Windows drivers.
InitializeSparseHostGpuMemory(
std::max(allocation_size_log2,
std::max(kHostGpuMemoryOptimalSparseAllocationLog2,
kBufferSizeLog2 - uint32_t(10))));
} else {
// Shouldn't happen on any real platform, but no point allocating the
// buffer sparsely.
dfn.vkDestroyBuffer(device, buffer_, nullptr);
buffer_ = VK_NULL_HANDLE;
}
} else {
XELOGE(
"Shared memory: Failed to get a device-local Vulkan memory type "
"for the sparse buffer");
dfn.vkDestroyBuffer(device, buffer_, nullptr);
buffer_ = VK_NULL_HANDLE;
}
} else {
XELOGE("Shared memory: Failed to create the {} MB Vulkan sparse buffer",
kBufferSize >> 20);
}
}
// Create a non-sparse buffer if there were issues with the sparse buffer.
if (buffer_ == VK_NULL_HANDLE) {
XELOGGPU(
"Vulkan sparse binding is not used for shared memory emulation - video "
"memory usage may increase significantly because a full {} MB buffer "
"will be created",
kBufferSize >> 20);
buffer_create_info.flags &= ~sparse_flags;
if (dfn.vkCreateBuffer(device, &buffer_create_info, nullptr, &buffer_) !=
VK_SUCCESS) {
XELOGE("Shared memory: Failed to create the {} MB Vulkan buffer",
kBufferSize >> 20);
Shutdown();
return false;
}
VkMemoryRequirements buffer_memory_requirements;
dfn.vkGetBufferMemoryRequirements(device, buffer_,
&buffer_memory_requirements);
if (!xe::bit_scan_forward(buffer_memory_requirements.memoryTypeBits &
provider.memory_types_device_local(),
&buffer_memory_type_)) {
XELOGE(
"Shared memory: Failed to get a device-local Vulkan memory type for "
"the buffer");
Shutdown();
return false;
}
VkMemoryAllocateInfo buffer_memory_allocate_info;
VkMemoryAllocateInfo* buffer_memory_allocate_info_last =
&buffer_memory_allocate_info;
buffer_memory_allocate_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
buffer_memory_allocate_info.pNext = nullptr;
buffer_memory_allocate_info.allocationSize =
buffer_memory_requirements.size;
buffer_memory_allocate_info.memoryTypeIndex = buffer_memory_type_;
VkMemoryDedicatedAllocateInfoKHR buffer_memory_dedicated_allocate_info;
if (provider.device_extensions().khr_dedicated_allocation) {
buffer_memory_allocate_info_last->pNext =
&buffer_memory_dedicated_allocate_info;
buffer_memory_allocate_info_last =
reinterpret_cast<VkMemoryAllocateInfo*>(
&buffer_memory_dedicated_allocate_info);
buffer_memory_dedicated_allocate_info.sType =
VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR;
buffer_memory_dedicated_allocate_info.pNext = nullptr;
buffer_memory_dedicated_allocate_info.image = VK_NULL_HANDLE;
buffer_memory_dedicated_allocate_info.buffer = buffer_;
}
VkDeviceMemory buffer_memory;
if (dfn.vkAllocateMemory(device, &buffer_memory_allocate_info, nullptr,
&buffer_memory) != VK_SUCCESS) {
XELOGE(
"Shared memory: Failed to allocate {} MB of memory for the Vulkan "
"buffer",
kBufferSize >> 20);
Shutdown();
return false;
}
buffer_memory_.push_back(buffer_memory);
if (dfn.vkBindBufferMemory(device, buffer_, buffer_memory, 0) !=
VK_SUCCESS) {
XELOGE("Shared memory: Failed to bind memory to the Vulkan buffer");
Shutdown();
return false;
}
}
// The first usage will likely be uploading.
last_usage_ = Usage::kTransferDestination;
last_written_range_ = std::make_pair<uint32_t, uint32_t>(0, 0);
upload_buffer_pool_ = std::make_unique<ui::vulkan::VulkanUploadBufferPool>(
provider, VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
xe::align(ui::vulkan::VulkanUploadBufferPool::kDefaultPageSize,
size_t(1) << page_size_log2()));
return true;
}
void VulkanSharedMemory::Shutdown(bool from_destructor) {
ResetTraceDownload();
upload_buffer_pool_.reset();
const ui::vulkan::VulkanProvider& provider =
command_processor_.GetVulkanProvider();
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
VkDevice device = provider.device();
ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device, buffer_);
for (VkDeviceMemory memory : buffer_memory_) {
dfn.vkFreeMemory(device, memory, nullptr);
}
buffer_memory_.clear();
// If calling from the destructor, the SharedMemory destructor will call
// ShutdownCommon.
if (!from_destructor) {
ShutdownCommon();
}
}
void VulkanSharedMemory::CompletedSubmissionUpdated() {
upload_buffer_pool_->Reclaim(command_processor_.GetCompletedSubmission());
}
void VulkanSharedMemory::EndSubmission() { upload_buffer_pool_->FlushWrites(); }
void VulkanSharedMemory::Use(Usage usage,
std::pair<uint32_t, uint32_t> written_range) {
written_range.first = std::min(written_range.first, kBufferSize);
written_range.second =
std::min(written_range.second, kBufferSize - written_range.first);
assert_true(usage != Usage::kRead || !written_range.second);
if (last_usage_ != usage || last_written_range_.second) {
VkPipelineStageFlags src_stage_mask, dst_stage_mask;
VkAccessFlags src_access_mask, dst_access_mask;
GetUsageMasks(last_usage_, src_stage_mask, src_access_mask);
GetUsageMasks(usage, dst_stage_mask, dst_access_mask);
VkDeviceSize offset, size;
if (last_usage_ == usage) {
// Committing the previous write, while not changing the access mask
// (passing false as whether to skip the barrier if no masks are changed
// for this reason).
offset = VkDeviceSize(last_written_range_.first);
size = VkDeviceSize(last_written_range_.second);
} else {
// Changing the stage and access mask - all preceding writes must be
// available not only to the source stage, but to the destination as well.
offset = 0;
size = VK_WHOLE_SIZE;
last_usage_ = usage;
}
command_processor_.PushBufferMemoryBarrier(
buffer_, offset, size, src_stage_mask, dst_stage_mask, src_access_mask,
dst_access_mask, VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED,
false);
}
last_written_range_ = written_range;
}
bool VulkanSharedMemory::InitializeTraceSubmitDownloads() {
ResetTraceDownload();
PrepareForTraceDownload();
uint32_t download_page_count = trace_download_page_count();
if (!download_page_count) {
return false;
}
const ui::vulkan::VulkanProvider& provider =
command_processor_.GetVulkanProvider();
if (!ui::vulkan::util::CreateDedicatedAllocationBuffer(
provider, download_page_count << page_size_log2(),
VK_BUFFER_USAGE_TRANSFER_DST_BIT,
ui::vulkan::util::MemoryPurpose::kReadback, trace_download_buffer_,
trace_download_buffer_memory_)) {
XELOGE(
"Shared memory: Failed to create a {} KB GPU-written memory download "
"buffer for frame tracing",
download_page_count << page_size_log2() >> 10);
ResetTraceDownload();
return false;
}
Use(Usage::kRead);
command_processor_.SubmitBarriers(true);
DeferredCommandBuffer& command_buffer =
command_processor_.deferred_command_buffer();
size_t download_range_count = trace_download_ranges().size();
VkBufferCopy* download_regions = command_buffer.CmdCopyBufferEmplace(
buffer_, trace_download_buffer_, uint32_t(download_range_count));
VkDeviceSize download_buffer_offset = 0;
for (size_t i = 0; i < download_range_count; ++i) {
VkBufferCopy& download_region = download_regions[i];
const std::pair<uint32_t, uint32_t>& download_range =
trace_download_ranges()[i];
download_region.srcOffset = download_range.first;
download_region.dstOffset = download_buffer_offset;
download_region.size = download_range.second;
download_buffer_offset += download_range.second;
}
command_processor_.PushBufferMemoryBarrier(
trace_download_buffer_, 0, VK_WHOLE_SIZE, VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_HOST_BIT, VK_ACCESS_TRANSFER_WRITE_BIT,
VK_ACCESS_HOST_READ_BIT);
return true;
}
void VulkanSharedMemory::InitializeTraceCompleteDownloads() {
if (!trace_download_buffer_memory_) {
return;
}
const ui::vulkan::VulkanProvider& provider =
command_processor_.GetVulkanProvider();
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
VkDevice device = provider.device();
void* download_mapping;
if (dfn.vkMapMemory(device, trace_download_buffer_memory_, 0, VK_WHOLE_SIZE,
0, &download_mapping) == VK_SUCCESS) {
uint32_t download_buffer_offset = 0;
for (const auto& download_range : trace_download_ranges()) {
trace_writer_.WriteMemoryRead(
download_range.first, download_range.second,
reinterpret_cast<const uint8_t*>(download_mapping) +
download_buffer_offset);
}
dfn.vkUnmapMemory(device, trace_download_buffer_memory_);
} else {
XELOGE(
"Shared memory: Failed to map the GPU-written memory download buffer "
"for frame tracing");
}
ResetTraceDownload();
}
bool VulkanSharedMemory::AllocateSparseHostGpuMemoryRange(
uint32_t offset_allocations, uint32_t length_allocations) {
if (!length_allocations) {
return true;
}
const ui::vulkan::VulkanProvider& provider =
command_processor_.GetVulkanProvider();
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
VkDevice device = provider.device();
VkMemoryAllocateInfo memory_allocate_info;
memory_allocate_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
memory_allocate_info.pNext = nullptr;
memory_allocate_info.allocationSize =
length_allocations << host_gpu_memory_sparse_granularity_log2();
memory_allocate_info.memoryTypeIndex = buffer_memory_type_;
VkDeviceMemory memory;
if (dfn.vkAllocateMemory(device, &memory_allocate_info, nullptr, &memory) !=
VK_SUCCESS) {
XELOGE("Shared memory: Failed to allocate sparse buffer memory");
return false;
}
buffer_memory_.push_back(memory);
VkSparseMemoryBind bind;
bind.resourceOffset = offset_allocations
<< host_gpu_memory_sparse_granularity_log2();
bind.size = memory_allocate_info.allocationSize;
bind.memory = memory;
bind.memoryOffset = 0;
bind.flags = 0;
VkPipelineStageFlags bind_wait_stage_mask =
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT;
if (provider.device_features().tessellationShader) {
bind_wait_stage_mask |=
VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT;
}
command_processor_.SparseBindBuffer(buffer_, 1, &bind, bind_wait_stage_mask);
return true;
}
bool VulkanSharedMemory::UploadRanges(
const std::vector<std::pair<uint32_t, uint32_t>>& upload_page_ranges) {
if (upload_page_ranges.empty()) {
return true;
}
// upload_page_ranges are sorted, use them to determine the range for the
// ordering barrier.
Use(Usage::kTransferDestination,
std::make_pair(
upload_page_ranges.front().first << page_size_log2(),
(upload_page_ranges.back().first + upload_page_ranges.back().second -
upload_page_ranges.front().first)
<< page_size_log2()));
command_processor_.SubmitBarriers(true);
DeferredCommandBuffer& command_buffer =
command_processor_.deferred_command_buffer();
uint64_t submission_current = command_processor_.GetCurrentSubmission();
bool successful = true;
upload_regions_.clear();
VkBuffer upload_buffer_previous = VK_NULL_HANDLE;
for (auto upload_range : upload_page_ranges) {
uint32_t upload_range_start = upload_range.first;
uint32_t upload_range_length = upload_range.second;
trace_writer_.WriteMemoryRead(upload_range_start << page_size_log2(),
upload_range_length << page_size_log2());
while (upload_range_length) {
VkBuffer upload_buffer;
VkDeviceSize upload_buffer_offset, upload_buffer_size;
uint8_t* upload_buffer_mapping = upload_buffer_pool_->RequestPartial(
submission_current, upload_range_length << page_size_log2(),
size_t(1) << page_size_log2(), upload_buffer, upload_buffer_offset,
upload_buffer_size);
if (upload_buffer_mapping == nullptr) {
XELOGE("Shared memory: Failed to get a Vulkan upload buffer");
successful = false;
break;
}
MakeRangeValid(upload_range_start << page_size_log2(),
uint32_t(upload_buffer_size), false, false);
std::memcpy(
upload_buffer_mapping,
memory().TranslatePhysical(upload_range_start << page_size_log2()),
upload_buffer_size);
if (upload_buffer_previous != upload_buffer && !upload_regions_.empty()) {
assert_true(upload_buffer_previous != VK_NULL_HANDLE);
command_buffer.CmdVkCopyBuffer(upload_buffer_previous, buffer_,
uint32_t(upload_regions_.size()),
upload_regions_.data());
upload_regions_.clear();
}
upload_buffer_previous = upload_buffer;
VkBufferCopy& upload_region = upload_regions_.emplace_back();
upload_region.srcOffset = upload_buffer_offset;
upload_region.dstOffset =
VkDeviceSize(upload_range_start << page_size_log2());
upload_region.size = upload_buffer_size;
uint32_t upload_buffer_pages =
uint32_t(upload_buffer_size >> page_size_log2());
upload_range_start += upload_buffer_pages;
upload_range_length -= upload_buffer_pages;
}
if (!successful) {
break;
}
}
if (!upload_regions_.empty()) {
assert_true(upload_buffer_previous != VK_NULL_HANDLE);
command_buffer.CmdVkCopyBuffer(upload_buffer_previous, buffer_,
uint32_t(upload_regions_.size()),
upload_regions_.data());
upload_regions_.clear();
}
return successful;
}
void VulkanSharedMemory::GetUsageMasks(Usage usage,
VkPipelineStageFlags& stage_mask,
VkAccessFlags& access_mask) const {
switch (usage) {
case Usage::kComputeWrite:
stage_mask = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
access_mask = VK_ACCESS_SHADER_READ_BIT;
return;
case Usage::kTransferDestination:
stage_mask = VK_PIPELINE_STAGE_TRANSFER_BIT;
access_mask = VK_ACCESS_TRANSFER_WRITE_BIT;
return;
default:
break;
}
stage_mask =
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | guest_shader_pipeline_stages_;
access_mask = VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_SHADER_READ_BIT;
switch (usage) {
case Usage::kRead:
stage_mask |=
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT;
access_mask |= VK_ACCESS_TRANSFER_READ_BIT;
break;
case Usage::kGuestDrawReadWrite:
access_mask |= VK_ACCESS_SHADER_WRITE_BIT;
break;
default:
assert_unhandled_case(usage);
}
}
void VulkanSharedMemory::ResetTraceDownload() {
const ui::vulkan::VulkanProvider& provider =
command_processor_.GetVulkanProvider();
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
VkDevice device = provider.device();
ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyBuffer, device,
trace_download_buffer_);
ui::vulkan::util::DestroyAndNullHandle(dfn.vkFreeMemory, device,
trace_download_buffer_memory_);
ReleaseTraceDownloadRanges();
}
} // namespace vulkan
} // namespace gpu
} // namespace xe

View File

@ -0,0 +1,97 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2020 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_GPU_VULKAN_VULKAN_SHARED_MEMORY_H_
#define XENIA_GPU_VULKAN_VULKAN_SHARED_MEMORY_H_
#include <algorithm>
#include <memory>
#include <utility>
#include <vector>
#include "xenia/gpu/shared_memory.h"
#include "xenia/gpu/trace_writer.h"
#include "xenia/memory.h"
#include "xenia/ui/vulkan/vulkan_provider.h"
#include "xenia/ui/vulkan/vulkan_upload_buffer_pool.h"
namespace xe {
namespace gpu {
namespace vulkan {
class VulkanCommandProcessor;
class VulkanSharedMemory : public SharedMemory {
public:
VulkanSharedMemory(VulkanCommandProcessor& command_processor, Memory& memory,
TraceWriter& trace_writer,
VkPipelineStageFlags guest_shader_pipeline_stages);
~VulkanSharedMemory() override;
bool Initialize();
void Shutdown(bool from_destructor = false);
void CompletedSubmissionUpdated();
void EndSubmission();
enum class Usage {
// Index buffer, vfetch, compute read, transfer source.
kRead,
// Index buffer, vfetch, memexport.
kGuestDrawReadWrite,
kComputeWrite,
kTransferDestination,
};
// Inserts a pipeline barrier for the target usage, also ensuring consecutive
// read-write accesses are ordered with each other.
void Use(Usage usage, std::pair<uint32_t, uint32_t> written_range = {});
VkBuffer buffer() const { return buffer_; }
// Returns true if any downloads were submitted to the command processor.
bool InitializeTraceSubmitDownloads();
void InitializeTraceCompleteDownloads();
protected:
bool AllocateSparseHostGpuMemoryRange(uint32_t offset_allocations,
uint32_t length_allocations) override;
bool UploadRanges(const std::vector<std::pair<uint32_t, uint32_t>>&
upload_page_ranges) override;
private:
void GetUsageMasks(Usage usage, VkPipelineStageFlags& stage_mask,
VkAccessFlags& access_mask) const;
VulkanCommandProcessor& command_processor_;
TraceWriter& trace_writer_;
VkPipelineStageFlags guest_shader_pipeline_stages_;
VkBuffer buffer_ = VK_NULL_HANDLE;
uint32_t buffer_memory_type_;
// Single for non-sparse, every allocation so far for sparse.
std::vector<VkDeviceMemory> buffer_memory_;
Usage last_usage_;
std::pair<uint32_t, uint32_t> last_written_range_;
std::unique_ptr<ui::vulkan::VulkanUploadBufferPool> upload_buffer_pool_;
std::vector<VkBufferCopy> upload_regions_;
// Created temporarily, only for downloading.
VkBuffer trace_download_buffer_ = VK_NULL_HANDLE;
VkDeviceMemory trace_download_buffer_memory_ = VK_NULL_HANDLE;
void ResetTraceDownload();
};
} // namespace vulkan
} // namespace gpu
} // namespace xe
#endif // XENIA_GPU_VULKAN_VULKAN_SHARED_MEMORY_H_

File diff suppressed because it is too large Load Diff

View File

@ -2,7 +2,7 @@
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2016 Ben Vanik. All rights reserved. *
* Copyright 2022 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
@ -10,22 +10,15 @@
#ifndef XENIA_GPU_VULKAN_VULKAN_TEXTURE_CACHE_H_
#define XENIA_GPU_VULKAN_VULKAN_TEXTURE_CACHE_H_
#include <algorithm>
#include <list>
#include <array>
#include <memory>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include "xenia/base/mutex.h"
#include "xenia/gpu/register_file.h"
#include "xenia/gpu/sampler_info.h"
#include "xenia/gpu/shader.h"
#include "xenia/gpu/texture_conversion.h"
#include "xenia/gpu/texture_info.h"
#include "xenia/gpu/trace_writer.h"
#include "xenia/gpu/vulkan/vulkan_command_processor.h"
#include "xenia/gpu/xenos.h"
#include "xenia/ui/vulkan/circular_buffer.h"
#include "xenia/ui/vulkan/fenced_pools.h"
#include "xenia/base/hash.h"
#include "xenia/gpu/texture_cache.h"
#include "xenia/gpu/vulkan/vulkan_shader.h"
#include "xenia/gpu/vulkan/vulkan_shared_memory.h"
#include "xenia/ui/vulkan/vulkan_mem_alloc.h"
#include "xenia/ui/vulkan/vulkan_provider.h"
@ -33,205 +26,334 @@ namespace xe {
namespace gpu {
namespace vulkan {
//
class VulkanTextureCache {
class VulkanCommandProcessor;
class VulkanTextureCache final : public TextureCache {
public:
struct TextureView;
// This represents an uploaded Vulkan texture.
struct Texture {
TextureInfo texture_info;
std::vector<std::unique_ptr<TextureView>> views;
VkFormat format;
VkImage image;
VkImageLayout image_layout;
VmaAllocation alloc;
VmaAllocationInfo alloc_info;
VkFramebuffer framebuffer; // Blit target frame buffer.
VkImageUsageFlags usage_flags;
bool is_watched;
bool pending_invalidation;
// Pointer to the latest usage fence.
VkFence in_flight_fence;
};
struct TextureView {
Texture* texture;
VkImageView view;
union {
uint16_t swizzle;
struct {
// FIXME: This only applies on little-endian platforms!
uint16_t swiz_x : 3;
uint16_t swiz_y : 3;
uint16_t swiz_z : 3;
uint16_t swiz_w : 3;
uint16_t : 4;
};
// Sampler parameters that can be directly converted to a host sampler or used
// for checking whether samplers bindings are up to date.
union SamplerParameters {
uint32_t value;
struct {
xenos::ClampMode clamp_x : 3; // 3
xenos::ClampMode clamp_y : 3; // 6
xenos::ClampMode clamp_z : 3; // 9
xenos::BorderColor border_color : 2; // 11
uint32_t mag_linear : 1; // 12
uint32_t min_linear : 1; // 13
uint32_t mip_linear : 1; // 14
xenos::AnisoFilter aniso_filter : 3; // 17
uint32_t mip_min_level : 4; // 21
uint32_t mip_base_map : 1; // 22
// Maximum mip level is in the texture resource itself, but mip_base_map
// can be used to limit fetching to mip_min_level.
};
SamplerParameters() : value(0) { static_assert_size(*this, sizeof(value)); }
struct Hasher {
size_t operator()(const SamplerParameters& parameters) const {
return std::hash<uint32_t>{}(parameters.value);
}
};
bool operator==(const SamplerParameters& parameters) const {
return value == parameters.value;
}
bool operator!=(const SamplerParameters& parameters) const {
return value != parameters.value;
}
};
VulkanTextureCache(Memory* memory, RegisterFile* register_file,
TraceWriter* trace_writer,
ui::vulkan::VulkanProvider& provider);
~VulkanTextureCache();
VkResult Initialize();
void Shutdown();
// Descriptor set layout containing all possible texture bindings.
// The set contains one descriptor for each texture sampler [0-31].
VkDescriptorSetLayout texture_descriptor_set_layout() const {
return texture_descriptor_set_layout_;
// Transient descriptor set layouts must be initialized in the command
// processor.
static std::unique_ptr<VulkanTextureCache> Create(
const RegisterFile& register_file, VulkanSharedMemory& shared_memory,
uint32_t draw_resolution_scale_x, uint32_t draw_resolution_scale_y,
VulkanCommandProcessor& command_processor,
VkPipelineStageFlags guest_shader_pipeline_stages) {
std::unique_ptr<VulkanTextureCache> texture_cache(new VulkanTextureCache(
register_file, shared_memory, draw_resolution_scale_x,
draw_resolution_scale_y, command_processor,
guest_shader_pipeline_stages));
if (!texture_cache->Initialize()) {
return nullptr;
}
return std::move(texture_cache);
}
// Prepares a descriptor set containing the samplers and images for all
// bindings. The textures will be uploaded/converted/etc as needed.
// Requires a fence to be provided that will be signaled when finished
// using the returned descriptor set.
VkDescriptorSet PrepareTextureSet(
VkCommandBuffer setup_command_buffer, VkFence completion_fence,
const std::vector<Shader::TextureBinding>& vertex_bindings,
const std::vector<Shader::TextureBinding>& pixel_bindings);
~VulkanTextureCache();
// TODO(benvanik): ReadTexture.
void BeginSubmission(uint64_t new_submission_index) override;
Texture* Lookup(const TextureInfo& texture_info);
// Must be called within a frame - creates and untiles textures needed by
// shaders, and enqueues transitioning them into the sampled usage. This may
// bind compute pipelines (notifying the command processor about that), and
// also since it may insert deferred barriers, before flushing the barriers
// preceding host GPU work.
void RequestTextures(uint32_t used_texture_mask) override;
// Looks for a texture either containing or matching these parameters.
// Caller is responsible for checking if the texture returned is an exact
// match or just contains the texture given by the parameters.
// If offset_x and offset_y are not null, this may return a texture that
// contains this address at an offset.
Texture* LookupAddress(uint32_t guest_address, uint32_t width,
uint32_t height, xenos::TextureFormat format,
VkOffset2D* out_offset = nullptr);
VkImageView GetActiveBindingOrNullImageView(uint32_t fetch_constant_index,
xenos::FetchOpDimension dimension,
bool is_signed) const;
TextureView* DemandView(Texture* texture, uint16_t swizzle);
SamplerParameters GetSamplerParameters(
const VulkanShader::SamplerBinding& binding) const;
// Demands a texture for the purpose of resolving from EDRAM. This either
// creates a new texture or returns a previously created texture.
Texture* DemandResolveTexture(const TextureInfo& texture_info);
// Must be called for every used sampler at least once in a single submission,
// and a submission must be open for this to be callable.
// Returns:
// - The sampler, if obtained successfully - and increases its last usage
// submission index - and has_overflown_out = false.
// - VK_NULL_HANDLE and has_overflown_out = true if there's a total sampler
// count overflow in a submission that potentially hasn't completed yet.
// - VK_NULL_HANDLE and has_overflown_out = false in case of a general failure
// to create a sampler.
VkSampler UseSampler(SamplerParameters parameters, bool& has_overflown_out);
// Returns the submission index to await (may be the current submission in
// case of an overflow within a single submission - in this case, it must be
// ended, and a new one must be started) in case of sampler count overflow, so
// samplers may be freed, and UseSamplers may take their slots.
uint64_t GetSubmissionToAwaitOnSamplerOverflow(
uint32_t overflowed_sampler_count) const;
// Clears all cached content.
void ClearCache();
// Returns the 2D view of the front buffer texture (for fragment shader
// reading - the barrier will be pushed in the command processor if needed),
// or VK_NULL_HANDLE in case of failure. May call LoadTextureData.
VkImageView RequestSwapTexture(uint32_t& width_scaled_out,
uint32_t& height_scaled_out,
xenos::TextureFormat& format_out);
// Frees any unused resources
void Scavenge();
protected:
bool IsSignedVersionSeparateForFormat(TextureKey key) const override;
uint32_t GetHostFormatSwizzle(TextureKey key) const override;
uint32_t GetMaxHostTextureWidthHeight(
xenos::DataDimension dimension) const override;
uint32_t GetMaxHostTextureDepthOrArraySize(
xenos::DataDimension dimension) const override;
std::unique_ptr<Texture> CreateTexture(TextureKey key) override;
bool LoadTextureDataFromResidentMemoryImpl(Texture& texture, bool load_base,
bool load_mips) override;
void UpdateTextureBindingsImpl(uint32_t fetch_constant_mask) override;
private:
struct UpdateSetInfo;
enum LoadDescriptorSetIndex {
kLoadDescriptorSetIndexDestination,
kLoadDescriptorSetIndexSource,
kLoadDescriptorSetIndexConstants,
kLoadDescriptorSetCount,
};
struct HostFormat {
LoadShaderIndex load_shader;
// Do NOT add integer formats to this - they are not filterable, can only be
// read with ImageFetch, not ImageSample! If any game is seen using
// num_format 1 for fixed-point formats (for floating-point, it's normally
// set to 1 though), add a constant buffer containing multipliers for the
// textures and multiplication to the tfetch implementation.
VkFormat format;
// Whether the format is block-compressed on the host (the host block size
// matches the guest format block size in this case), and isn't decompressed
// on load.
bool block_compressed;
// Set up dynamically based on what's supported by the device.
bool linear_filterable;
};
struct HostFormatPair {
HostFormat format_unsigned;
HostFormat format_signed;
// Mapping of Xenos swizzle components to Vulkan format components.
uint32_t swizzle;
// Whether the unsigned and the signed formats are compatible for one image
// and the same image data (on a portability subset device, this should also
// take imageViewFormatReinterpretation into account).
bool unsigned_signed_compatible;
};
class VulkanTexture final : public Texture {
public:
enum class Usage {
kUndefined,
kTransferDestination,
kGuestShaderSampled,
kSwapSampled,
};
// Takes ownership of the image and its memory.
explicit VulkanTexture(VulkanTextureCache& texture_cache,
const TextureKey& key, VkImage image,
VmaAllocation allocation);
~VulkanTexture();
VkImage image() const { return image_; }
// Doesn't transition (the caller must insert the barrier).
Usage SetUsage(Usage new_usage) {
Usage old_usage = usage_;
usage_ = new_usage;
return old_usage;
}
VkImageView GetView(bool is_signed, uint32_t host_swizzle,
bool is_array = true);
private:
union ViewKey {
uint32_t key;
struct {
uint32_t is_signed_separate_view : 1;
uint32_t host_swizzle : 12;
uint32_t is_array : 1;
};
ViewKey() : key(0) { static_assert_size(*this, sizeof(key)); }
struct Hasher {
size_t operator()(const ViewKey& key) const {
return std::hash<decltype(key.key)>{}(key.key);
}
};
bool operator==(const ViewKey& other_key) const {
return key == other_key.key;
}
bool operator!=(const ViewKey& other_key) const {
return !(*this == other_key);
}
};
static constexpr VkComponentSwizzle GetComponentSwizzle(
uint32_t texture_swizzle, uint32_t component_index) {
xenos::XE_GPU_TEXTURE_SWIZZLE texture_component_swizzle =
xenos::XE_GPU_TEXTURE_SWIZZLE(
(texture_swizzle >> (3 * component_index)) & 0b111);
if (texture_component_swizzle ==
xenos::XE_GPU_TEXTURE_SWIZZLE(component_index)) {
// The portability subset requires all swizzles to be IDENTITY, return
// IDENTITY specifically, not R, G, B, A.
return VK_COMPONENT_SWIZZLE_IDENTITY;
}
switch (texture_component_swizzle) {
case xenos::XE_GPU_TEXTURE_SWIZZLE_R:
return VK_COMPONENT_SWIZZLE_R;
case xenos::XE_GPU_TEXTURE_SWIZZLE_G:
return VK_COMPONENT_SWIZZLE_G;
case xenos::XE_GPU_TEXTURE_SWIZZLE_B:
return VK_COMPONENT_SWIZZLE_B;
case xenos::XE_GPU_TEXTURE_SWIZZLE_A:
return VK_COMPONENT_SWIZZLE_A;
case xenos::XE_GPU_TEXTURE_SWIZZLE_0:
return VK_COMPONENT_SWIZZLE_ZERO;
case xenos::XE_GPU_TEXTURE_SWIZZLE_1:
return VK_COMPONENT_SWIZZLE_ONE;
default:
// An invalid value.
return VK_COMPONENT_SWIZZLE_IDENTITY;
}
}
VkImage image_;
VmaAllocation allocation_;
Usage usage_ = Usage::kUndefined;
std::unordered_map<ViewKey, VkImageView, ViewKey::Hasher> views_;
};
struct VulkanTextureBinding {
VkImageView image_view_unsigned;
VkImageView image_view_signed;
VulkanTextureBinding() { Reset(); }
void Reset() {
image_view_unsigned = VK_NULL_HANDLE;
image_view_signed = VK_NULL_HANDLE;
}
};
// Cached Vulkan sampler.
struct Sampler {
SamplerInfo sampler_info;
VkSampler sampler;
uint64_t last_usage_submission;
std::pair<const SamplerParameters, Sampler>* used_previous;
std::pair<const SamplerParameters, Sampler>* used_next;
};
struct WatchedTexture {
Texture* texture;
bool is_mip;
};
static constexpr bool AreDimensionsCompatible(
xenos::FetchOpDimension binding_dimension,
xenos::DataDimension resource_dimension) {
switch (binding_dimension) {
case xenos::FetchOpDimension::k1D:
case xenos::FetchOpDimension::k2D:
return resource_dimension == xenos::DataDimension::k1D ||
resource_dimension == xenos::DataDimension::k2DOrStacked;
case xenos::FetchOpDimension::k3DOrStacked:
return resource_dimension == xenos::DataDimension::k3D;
case xenos::FetchOpDimension::kCube:
return resource_dimension == xenos::DataDimension::kCube;
default:
return false;
}
}
// Allocates a new texture and memory to back it on the GPU.
Texture* AllocateTexture(const TextureInfo& texture_info,
VkFormatFeatureFlags required_flags =
VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT);
bool FreeTexture(Texture* texture);
explicit VulkanTextureCache(
const RegisterFile& register_file, VulkanSharedMemory& shared_memory,
uint32_t draw_resolution_scale_x, uint32_t draw_resolution_scale_y,
VulkanCommandProcessor& command_processor,
VkPipelineStageFlags guest_shader_pipeline_stages);
void WatchTexture(Texture* texture);
void TextureTouched(Texture* texture);
std::pair<uint32_t, uint32_t> MemoryInvalidationCallback(
uint32_t physical_address_start, uint32_t length, bool exact_range);
static std::pair<uint32_t, uint32_t> MemoryInvalidationCallbackThunk(
void* context_ptr, uint32_t physical_address_start, uint32_t length,
bool exact_range);
bool Initialize();
// Demands a texture. If command_buffer is null and the texture hasn't been
// uploaded to graphics memory already, we will return null and bail.
Texture* Demand(const TextureInfo& texture_info,
VkCommandBuffer command_buffer = nullptr,
VkFence completion_fence = nullptr);
Sampler* Demand(const SamplerInfo& sampler_info);
const HostFormatPair& GetHostFormatPair(TextureKey key) const;
void FlushPendingCommands(VkCommandBuffer command_buffer,
VkFence completion_fence);
void GetTextureUsageMasks(VulkanTexture::Usage usage,
VkPipelineStageFlags& stage_mask,
VkAccessFlags& access_mask, VkImageLayout& layout);
bool ConvertTexture(uint8_t* dest, VkBufferImageCopy* copy_region,
uint32_t mip, const TextureInfo& src);
xenos::ClampMode NormalizeClampMode(xenos::ClampMode clamp_mode) const;
static const FormatInfo* GetFormatInfo(xenos::TextureFormat format);
static texture_conversion::CopyBlockCallback GetFormatCopyBlock(
xenos::TextureFormat format);
static TextureExtent GetMipExtent(const TextureInfo& src, uint32_t mip);
static uint32_t ComputeMipStorage(const FormatInfo* format_info,
uint32_t width, uint32_t height,
uint32_t depth, uint32_t mip);
static uint32_t ComputeMipStorage(const TextureInfo& src, uint32_t mip);
static uint32_t ComputeTextureStorage(const TextureInfo& src);
VulkanCommandProcessor& command_processor_;
VkPipelineStageFlags guest_shader_pipeline_stages_;
// Writes a texture back into guest memory. This call is (mostly) asynchronous
// but the texture must not be flagged for destruction.
void WritebackTexture(Texture* texture);
// Using the Vulkan Memory Allocator because texture count in games is
// naturally pretty much unbounded, while Vulkan implementations, especially
// on Windows versions before 10, may have an allocation count limit as low as
// 4096.
VmaAllocator vma_allocator_ = VK_NULL_HANDLE;
// Queues commands to upload a texture from system memory, applying any
// conversions necessary. This may flush the command buffer to the GPU if we
// run out of staging memory.
bool UploadTexture(VkCommandBuffer command_buffer, VkFence completion_fence,
Texture* dest, const TextureInfo& src);
static const HostFormatPair kBestHostFormats[64];
static const HostFormatPair kHostFormatGBGRUnaligned;
static const HostFormatPair kHostFormatBGRGUnaligned;
HostFormatPair host_formats_[64];
void HashTextureBindings(XXH3_state_t* hash_state, uint32_t& fetch_mask,
const std::vector<Shader::TextureBinding>& bindings);
bool SetupTextureBindings(
VkCommandBuffer command_buffer, VkFence completion_fence,
UpdateSetInfo* update_set_info,
const std::vector<Shader::TextureBinding>& bindings);
bool SetupTextureBinding(VkCommandBuffer command_buffer,
VkFence completion_fence,
UpdateSetInfo* update_set_info,
const Shader::TextureBinding& binding);
VkPipelineLayout load_pipeline_layout_ = VK_NULL_HANDLE;
std::array<VkPipeline, kLoadShaderCount> load_pipelines_{};
std::array<VkPipeline, kLoadShaderCount> load_pipelines_scaled_{};
// Removes invalidated textures from the cache, queues them for delete.
void RemoveInvalidatedTextures();
// If both images can be placed in the same allocation, it's one allocation,
// otherwise it's two separate.
std::array<VkDeviceMemory, 2> null_images_memory_{};
VkImage null_image_2d_array_cube_ = VK_NULL_HANDLE;
VkImage null_image_3d_ = VK_NULL_HANDLE;
VkImageView null_image_view_2d_array_ = VK_NULL_HANDLE;
VkImageView null_image_view_cube_ = VK_NULL_HANDLE;
VkImageView null_image_view_3d_ = VK_NULL_HANDLE;
bool null_images_cleared_ = false;
Memory* memory_ = nullptr;
std::array<VulkanTextureBinding, xenos::kTextureFetchConstantCount>
vulkan_texture_bindings_;
RegisterFile* register_file_ = nullptr;
TraceWriter* trace_writer_ = nullptr;
ui::vulkan::VulkanProvider& provider_;
uint32_t sampler_max_count_;
std::unique_ptr<xe::ui::vulkan::CommandBufferPool> wb_command_pool_ = nullptr;
std::unique_ptr<xe::ui::vulkan::DescriptorPool> descriptor_pool_ = nullptr;
std::unordered_map<uint64_t, VkDescriptorSet> texture_sets_;
VkDescriptorSetLayout texture_descriptor_set_layout_ = nullptr;
xenos::AnisoFilter max_anisotropy_;
VmaAllocator mem_allocator_ = nullptr;
ui::vulkan::CircularBuffer staging_buffer_;
ui::vulkan::CircularBuffer wb_staging_buffer_;
std::unordered_map<uint64_t, Texture*> textures_;
std::unordered_map<uint64_t, Sampler*> samplers_;
std::list<Texture*> pending_delete_textures_;
void* memory_invalidation_callback_handle_ = nullptr;
xe::global_critical_region global_critical_region_;
std::list<WatchedTexture> watched_textures_;
std::unordered_set<Texture*>* invalidated_textures_;
std::unordered_set<Texture*> invalidated_textures_sets_[2];
struct UpdateSetInfo {
// Bitmap of all 32 fetch constants and whether they have been setup yet.
// This prevents duplication across the vertex and pixel shader.
uint32_t has_setup_fetch_mask;
uint32_t image_write_count = 0;
VkWriteDescriptorSet image_writes[32];
VkDescriptorImageInfo image_infos[32];
} update_set_info_;
std::unordered_map<SamplerParameters, Sampler, SamplerParameters::Hasher>
samplers_;
std::pair<const SamplerParameters, Sampler>* sampler_used_first_ = nullptr;
std::pair<const SamplerParameters, Sampler>* sampler_used_last_ = nullptr;
};
} // namespace vulkan

View File

@ -2,7 +2,7 @@
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2021 Ben Vanik. All rights reserved. *
* Copyright 2022 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
@ -19,8 +19,6 @@ namespace xe {
namespace gpu {
namespace vulkan {
using namespace xe::gpu::xenos;
class VulkanTraceViewer final : public TraceViewer {
public:
static std::unique_ptr<WindowedApp> Create(
@ -35,36 +33,21 @@ class VulkanTraceViewer final : public TraceViewer {
uintptr_t GetColorRenderTarget(
uint32_t pitch, xenos::MsaaSamples samples, uint32_t base,
xenos::ColorRenderTargetFormat format) override {
auto command_processor = static_cast<VulkanCommandProcessor*>(
graphics_system()->command_processor());
// return command_processor->GetColorRenderTarget(pitch, samples, base,
// format);
// TODO(Triang3l): EDRAM viewer.
return 0;
}
uintptr_t GetDepthRenderTarget(
uint32_t pitch, xenos::MsaaSamples samples, uint32_t base,
xenos::DepthRenderTargetFormat format) override {
auto command_processor = static_cast<VulkanCommandProcessor*>(
graphics_system()->command_processor());
// return command_processor->GetDepthRenderTarget(pitch, samples, base,
// format);
// TODO(Triang3l): EDRAM viewer.
return 0;
}
uintptr_t GetTextureEntry(const TextureInfo& texture_info,
const SamplerInfo& sampler_info) override {
auto command_processor = static_cast<VulkanCommandProcessor*>(
graphics_system()->command_processor());
// auto entry_view =
// command_processor->texture_cache()->Demand(texture_info,
// sampler_info);
// if (!entry_view) {
// return 0;
//}
// auto texture = entry_view->texture;
// return static_cast<uintptr_t>(texture->handle);
// TODO(Triang3l): Textures, but from a fetch constant rather than
// TextureInfo/SamplerInfo which are going away.
return 0;
}

View File

@ -1,19 +0,0 @@
project_root = "../../../.."
include(project_root.."/tools/build")
group("src")
project("xenia-ui-spirv")
uuid("2323a069-5b29-44a3-b524-f35451a81978")
kind("StaticLib")
language("C++")
links({
"glslang-spirv",
"spirv-tools",
"xenia-base",
})
defines({
})
includedirs({
project_root.."/third_party/spirv-tools/external/include",
})
local_platform_files()

View File

@ -1,78 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2015 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/ui/spirv/spirv_assembler.h"
#include "third_party/spirv-tools/include/spirv-tools/libspirv.h"
#include "xenia/base/logging.h"
namespace xe {
namespace ui {
namespace spirv {
SpirvAssembler::Result::Result(spv_binary binary, spv_diagnostic diagnostic)
: binary_(binary), diagnostic_(diagnostic) {}
SpirvAssembler::Result::~Result() {
if (binary_) {
spvBinaryDestroy(binary_);
}
if (diagnostic_) {
spvDiagnosticDestroy(diagnostic_);
}
}
bool SpirvAssembler::Result::has_error() const { return !!diagnostic_; }
size_t SpirvAssembler::Result::error_source_line() const {
return diagnostic_ ? diagnostic_->position.line : 0;
}
size_t SpirvAssembler::Result::error_source_column() const {
return diagnostic_ ? diagnostic_->position.column : 0;
}
const char* SpirvAssembler::Result::error_string() const {
return diagnostic_ ? diagnostic_->error : "";
}
const uint32_t* SpirvAssembler::Result::words() const {
return binary_ ? binary_->code : nullptr;
}
size_t SpirvAssembler::Result::word_count() const {
return binary_ ? binary_->wordCount : 0;
}
SpirvAssembler::SpirvAssembler()
: spv_context_(spvContextCreate(SPV_ENV_VULKAN_1_0)) {}
SpirvAssembler::~SpirvAssembler() { spvContextDestroy(spv_context_); }
std::unique_ptr<SpirvAssembler::Result> SpirvAssembler::Assemble(
const char* source_text, size_t source_text_length) {
spv_binary binary = nullptr;
spv_diagnostic diagnostic = nullptr;
auto result_code = spvTextToBinary(spv_context_, source_text,
source_text_length, &binary, &diagnostic);
std::unique_ptr<Result> result(new Result(binary, diagnostic));
if (result_code) {
XELOGE("Failed to assemble spv: {}", result_code);
if (result->has_error()) {
return result;
} else {
return nullptr;
}
}
return result;
}
} // namespace spirv
} // namespace ui
} // namespace xe

View File

@ -1,69 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2020 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_UI_SPIRV_SPIRV_ASSEMBLER_H_
#define XENIA_UI_SPIRV_SPIRV_ASSEMBLER_H_
#include <memory>
#include <string>
#include "xenia/ui/spirv/spirv_util.h"
namespace xe {
namespace ui {
namespace spirv {
class SpirvAssembler {
public:
class Result {
public:
Result(spv_binary binary, spv_diagnostic diagnostic);
~Result();
// True if the result has an error associated with it.
bool has_error() const;
// Line of the error in the provided source text.
size_t error_source_line() const;
// Column of the error in the provided source text.
size_t error_source_column() const;
// Human-readable description of the error.
const char* error_string() const;
// Assembled SPIRV binary.
// Returned pointer lifetime is tied to this Result instance.
const uint32_t* words() const;
// Size of the SPIRV binary, in words.
size_t word_count() const;
private:
spv_binary binary_ = nullptr;
spv_diagnostic diagnostic_ = nullptr;
};
SpirvAssembler();
~SpirvAssembler();
// Assembles the given source text into a SPIRV binary.
// The return will be nullptr if assembly fails due to a library error.
// The return may have an error set on it if the source text is malformed.
std::unique_ptr<Result> Assemble(const char* source_text,
size_t source_text_length);
std::unique_ptr<Result> Assemble(const std::string_view source_text) {
return Assemble(source_text.data(), source_text.size());
}
private:
spv_context spv_context_ = nullptr;
};
} // namespace spirv
} // namespace ui
} // namespace xe
#endif // XENIA_UI_SPIRV_SPIRV_ASSEMBLER_H_

View File

@ -1,82 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2015 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/ui/spirv/spirv_disassembler.h"
#include "third_party/spirv-tools/include/spirv-tools/libspirv.h"
#include "xenia/base/logging.h"
namespace xe {
namespace ui {
namespace spirv {
SpirvDisassembler::Result::Result(spv_text text, spv_diagnostic diagnostic)
: text_(text), diagnostic_(diagnostic) {}
SpirvDisassembler::Result::~Result() {
if (text_) {
spvTextDestroy(text_);
}
if (diagnostic_) {
spvDiagnosticDestroy(diagnostic_);
}
}
bool SpirvDisassembler::Result::has_error() const { return !!diagnostic_; }
size_t SpirvDisassembler::Result::error_word_index() const {
return diagnostic_ ? diagnostic_->position.index : 0;
}
const char* SpirvDisassembler::Result::error_string() const {
return diagnostic_ ? diagnostic_->error : "";
}
const char* SpirvDisassembler::Result::text() const {
return text_ ? text_->str : "";
}
std::string SpirvDisassembler::Result::to_string() const {
return text_ ? std::string(text_->str, text_->length) : "";
}
void SpirvDisassembler::Result::AppendText(StringBuffer* target_buffer) const {
if (text_) {
target_buffer->AppendBytes(reinterpret_cast<const uint8_t*>(text_->str),
text_->length);
}
}
SpirvDisassembler::SpirvDisassembler()
: spv_context_(spvContextCreate(SPV_ENV_VULKAN_1_0)) {}
SpirvDisassembler::~SpirvDisassembler() { spvContextDestroy(spv_context_); }
std::unique_ptr<SpirvDisassembler::Result> SpirvDisassembler::Disassemble(
const uint32_t* words, size_t word_count) {
spv_text text = nullptr;
spv_diagnostic diagnostic = nullptr;
auto result_code =
spvBinaryToText(spv_context_, words, word_count,
SPV_BINARY_TO_TEXT_OPTION_INDENT, &text, &diagnostic);
std::unique_ptr<Result> result(new Result(text, diagnostic));
if (result_code) {
XELOGE("Failed to disassemble spv: {}", result_code);
if (result->has_error()) {
return result;
} else {
return nullptr;
}
}
return result;
}
} // namespace spirv
} // namespace ui
} // namespace xe

View File

@ -1,66 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2015 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_UI_SPIRV_SPIRV_DISASSEMBLER_H_
#define XENIA_UI_SPIRV_SPIRV_DISASSEMBLER_H_
#include <memory>
#include <string>
#include "xenia/base/string_buffer.h"
#include "xenia/ui/spirv/spirv_util.h"
namespace xe {
namespace ui {
namespace spirv {
class SpirvDisassembler {
public:
class Result {
public:
Result(spv_text text, spv_diagnostic diagnostic);
~Result();
// True if the result has an error associated with it.
bool has_error() const;
// Index of the error in the provided binary word data.
size_t error_word_index() const;
// Human-readable description of the error.
const char* error_string() const;
// Disassembled source text.
// Returned pointer lifetime is tied to this Result instance.
const char* text() const;
// Converts the disassembled source text to a string.
std::string to_string() const;
// Appends the disassembled source text to the given buffer.
void AppendText(StringBuffer* target_buffer) const;
private:
spv_text text_ = nullptr;
spv_diagnostic diagnostic_ = nullptr;
};
SpirvDisassembler();
~SpirvDisassembler();
// Disassembles the given SPIRV binary.
// The return will be nullptr if disassembly fails due to a library error.
// The return may have an error set on it if the SPIRV binary is malformed.
std::unique_ptr<Result> Disassemble(const uint32_t* words, size_t word_count);
private:
spv_context spv_context_ = nullptr;
};
} // namespace spirv
} // namespace ui
} // namespace xe
#endif // XENIA_UI_SPIRV_SPIRV_DISASSEMBLER_H_

View File

@ -1,20 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2015 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/ui/spirv/spirv_util.h"
namespace xe {
namespace ui {
namespace spirv {
//
} // namespace spirv
} // namespace ui
} // namespace xe

View File

@ -1,36 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2015 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_UI_SPIRV_SPIRV_UTIL_H_
#define XENIA_UI_SPIRV_SPIRV_UTIL_H_
#include "third_party/spirv-headers/include/spirv/1.1/spirv.hpp11"
#include "third_party/spirv/GLSL.std.450.hpp11"
// Forward declarations from SPIRV-Tools so we don't pollute /so/ much.
struct spv_binary_t;
typedef spv_binary_t* spv_binary;
struct spv_context_t;
typedef spv_context_t* spv_context;
struct spv_diagnostic_t;
typedef spv_diagnostic_t* spv_diagnostic;
struct spv_text_t;
typedef spv_text_t* spv_text;
namespace xe {
namespace ui {
namespace spirv {
//
} // namespace spirv
} // namespace ui
} // namespace xe
#endif // XENIA_UI_SPIRV_SPIRV_UTIL_H_

View File

@ -1,80 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2016 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/ui/spirv/spirv_validator.h"
#include "third_party/spirv-tools/include/spirv-tools/libspirv.h"
#include "xenia/base/logging.h"
namespace xe {
namespace ui {
namespace spirv {
SpirvValidator::Result::Result(spv_text text, spv_diagnostic diagnostic)
: text_(text), diagnostic_(diagnostic) {}
SpirvValidator::Result::~Result() {
if (text_) {
spvTextDestroy(text_);
}
if (diagnostic_) {
spvDiagnosticDestroy(diagnostic_);
}
}
bool SpirvValidator::Result::has_error() const { return !!diagnostic_; }
size_t SpirvValidator::Result::error_word_index() const {
return diagnostic_ ? diagnostic_->position.index : 0;
}
const char* SpirvValidator::Result::error_string() const {
return diagnostic_ ? diagnostic_->error : "";
}
const char* SpirvValidator::Result::text() const {
return text_ ? text_->str : "";
}
std::string SpirvValidator::Result::to_string() const {
return text_ ? std::string(text_->str, text_->length) : "";
}
void SpirvValidator::Result::AppendText(StringBuffer* target_buffer) const {
if (text_) {
target_buffer->AppendBytes(reinterpret_cast<const uint8_t*>(text_->str),
text_->length);
}
}
SpirvValidator::SpirvValidator()
: spv_context_(spvContextCreate(SPV_ENV_UNIVERSAL_1_1)) {}
SpirvValidator::~SpirvValidator() { spvContextDestroy(spv_context_); }
std::unique_ptr<SpirvValidator::Result> SpirvValidator::Validate(
const uint32_t* words, size_t word_count) {
spv_text text = nullptr;
spv_diagnostic diagnostic = nullptr;
spv_const_binary_t binary = {words, word_count};
auto result_code = spvValidate(spv_context_, &binary, &diagnostic);
std::unique_ptr<Result> result(new Result(text, diagnostic));
if (result_code) {
XELOGE("Failed to validate spv: {}", result_code);
if (result->has_error()) {
return result;
} else {
return nullptr;
}
}
return result;
}
} // namespace spirv
} // namespace ui
} // namespace xe

View File

@ -1,66 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2016 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_UI_SPIRV_SPIRV_VALIDATOR_H_
#define XENIA_UI_SPIRV_SPIRV_VALIDATOR_H_
#include <memory>
#include <string>
#include "xenia/base/string_buffer.h"
#include "xenia/ui/spirv/spirv_util.h"
namespace xe {
namespace ui {
namespace spirv {
class SpirvValidator {
public:
class Result {
public:
Result(spv_text text, spv_diagnostic diagnostic);
~Result();
// True if the result has an error associated with it.
bool has_error() const;
// Index of the error in the provided binary word data.
size_t error_word_index() const;
// Human-readable description of the error.
const char* error_string() const;
// Disassembled source text.
// Returned pointer lifetime is tied to this Result instance.
const char* text() const;
// Converts the disassembled source text to a string.
std::string to_string() const;
// Appends the disassembled source text to the given buffer.
void AppendText(StringBuffer* target_buffer) const;
private:
spv_text text_ = nullptr;
spv_diagnostic diagnostic_ = nullptr;
};
SpirvValidator();
~SpirvValidator();
// Validates the given SPIRV binary.
// The return will be nullptr if validation fails due to a library error.
// The return may have an error set on it if the SPIRV binary is malformed.
std::unique_ptr<Result> Validate(const uint32_t* words, size_t word_count);
private:
spv_context spv_context_ = nullptr;
};
} // namespace spirv
} // namespace ui
} // namespace xe
#endif // XENIA_UI_SPIRV_SPIRV_VALIDATOR_H_

View File

@ -1,574 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2016 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/ui/vulkan/blitter.h"
#include "xenia/base/math.h"
#include "xenia/ui/vulkan/fenced_pools.h"
#include "xenia/ui/vulkan/vulkan_util.h"
namespace xe {
namespace ui {
namespace vulkan {
using util::CheckResult;
// Generated with `xb buildshaders`.
namespace shaders {
#include "xenia/ui/vulkan/shaders/bytecode/vulkan_spirv/blit_color_ps.h"
#include "xenia/ui/vulkan/shaders/bytecode/vulkan_spirv/blit_depth_ps.h"
#include "xenia/ui/vulkan/shaders/bytecode/vulkan_spirv/blit_vs.h"
} // namespace shaders
Blitter::Blitter(const VulkanProvider& provider) : provider_(provider) {}
Blitter::~Blitter() { Shutdown(); }
VkResult Blitter::Initialize() {
const VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
VkDevice device = provider_.device();
VkResult status = VK_SUCCESS;
// Shaders
VkShaderModuleCreateInfo shader_create_info;
std::memset(&shader_create_info, 0, sizeof(shader_create_info));
shader_create_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
shader_create_info.codeSize = sizeof(shaders::blit_vs);
shader_create_info.pCode = shaders::blit_vs;
status = dfn.vkCreateShaderModule(device, &shader_create_info, nullptr,
&blit_vertex_);
CheckResult(status, "vkCreateShaderModule");
if (status != VK_SUCCESS) {
return status;
}
provider_.SetDeviceObjectName(VK_OBJECT_TYPE_SHADER_MODULE,
uint64_t(blit_vertex_), "S(B): Vertex");
shader_create_info.codeSize = sizeof(shaders::blit_color_ps);
shader_create_info.pCode = shaders::blit_color_ps;
status = dfn.vkCreateShaderModule(device, &shader_create_info, nullptr,
&blit_color_);
CheckResult(status, "vkCreateShaderModule");
if (status != VK_SUCCESS) {
return status;
}
provider_.SetDeviceObjectName(VK_OBJECT_TYPE_SHADER_MODULE,
uint64_t(blit_color_), "S(B): Color");
shader_create_info.codeSize = sizeof(shaders::blit_depth_ps);
shader_create_info.pCode = shaders::blit_depth_ps;
status = dfn.vkCreateShaderModule(device, &shader_create_info, nullptr,
&blit_depth_);
CheckResult(status, "vkCreateShaderModule");
if (status != VK_SUCCESS) {
return status;
}
provider_.SetDeviceObjectName(VK_OBJECT_TYPE_SHADER_MODULE,
uint64_t(blit_depth_), "S(B): Depth");
// Create the descriptor set layout used for our texture sampler.
// As it changes almost every draw we cache it per texture.
VkDescriptorSetLayoutCreateInfo texture_set_layout_info;
texture_set_layout_info.sType =
VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
texture_set_layout_info.pNext = nullptr;
texture_set_layout_info.flags = 0;
texture_set_layout_info.bindingCount = 1;
VkDescriptorSetLayoutBinding texture_binding;
texture_binding.binding = 0;
texture_binding.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
texture_binding.descriptorCount = 1;
texture_binding.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
texture_binding.pImmutableSamplers = nullptr;
texture_set_layout_info.pBindings = &texture_binding;
status = dfn.vkCreateDescriptorSetLayout(device, &texture_set_layout_info,
nullptr, &descriptor_set_layout_);
CheckResult(status, "vkCreateDescriptorSetLayout");
if (status != VK_SUCCESS) {
return status;
}
// Create a descriptor pool
VkDescriptorPoolSize pool_sizes[1];
pool_sizes[0].descriptorCount = 4096;
pool_sizes[0].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
descriptor_pool_ = std::make_unique<DescriptorPool>(
provider_, 4096,
std::vector<VkDescriptorPoolSize>(pool_sizes, std::end(pool_sizes)));
// Create the pipeline layout used for our pipeline.
VkPipelineLayoutCreateInfo pipeline_layout_info;
pipeline_layout_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
pipeline_layout_info.pNext = nullptr;
pipeline_layout_info.flags = 0;
VkDescriptorSetLayout set_layouts[] = {descriptor_set_layout_};
pipeline_layout_info.setLayoutCount =
static_cast<uint32_t>(xe::countof(set_layouts));
pipeline_layout_info.pSetLayouts = set_layouts;
VkPushConstantRange push_constant_ranges[2];
push_constant_ranges[0].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
push_constant_ranges[0].offset = 0;
push_constant_ranges[0].size = sizeof(VtxPushConstants);
push_constant_ranges[1].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
push_constant_ranges[1].offset = sizeof(VtxPushConstants);
push_constant_ranges[1].size = sizeof(PixPushConstants);
pipeline_layout_info.pushConstantRangeCount =
static_cast<uint32_t>(xe::countof(push_constant_ranges));
pipeline_layout_info.pPushConstantRanges = push_constant_ranges;
status = dfn.vkCreatePipelineLayout(device, &pipeline_layout_info, nullptr,
&pipeline_layout_);
CheckResult(status, "vkCreatePipelineLayout");
if (status != VK_SUCCESS) {
return status;
}
// Create two samplers.
VkSamplerCreateInfo sampler_create_info = {
VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
nullptr,
0,
VK_FILTER_NEAREST,
VK_FILTER_NEAREST,
VK_SAMPLER_MIPMAP_MODE_NEAREST,
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
0.f,
VK_FALSE,
1.f,
VK_FALSE,
VK_COMPARE_OP_NEVER,
0.f,
0.f,
VK_BORDER_COLOR_INT_TRANSPARENT_BLACK,
VK_FALSE,
};
status = dfn.vkCreateSampler(device, &sampler_create_info, nullptr,
&samp_nearest_);
CheckResult(status, "vkCreateSampler");
if (status != VK_SUCCESS) {
return status;
}
sampler_create_info.minFilter = VK_FILTER_LINEAR;
sampler_create_info.magFilter = VK_FILTER_LINEAR;
sampler_create_info.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR;
status =
dfn.vkCreateSampler(device, &sampler_create_info, nullptr, &samp_linear_);
CheckResult(status, "vkCreateSampler");
if (status != VK_SUCCESS) {
return status;
}
return VK_SUCCESS;
}
void Blitter::Shutdown() {
const VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
VkDevice device = provider_.device();
util::DestroyAndNullHandle(dfn.vkDestroySampler, device, samp_nearest_);
util::DestroyAndNullHandle(dfn.vkDestroySampler, device, samp_linear_);
util::DestroyAndNullHandle(dfn.vkDestroyShaderModule, device, blit_vertex_);
util::DestroyAndNullHandle(dfn.vkDestroyShaderModule, device, blit_color_);
util::DestroyAndNullHandle(dfn.vkDestroyShaderModule, device, blit_depth_);
util::DestroyAndNullHandle(dfn.vkDestroyPipeline, device, pipeline_color_);
util::DestroyAndNullHandle(dfn.vkDestroyPipeline, device, pipeline_depth_);
util::DestroyAndNullHandle(dfn.vkDestroyPipelineLayout, device,
pipeline_layout_);
util::DestroyAndNullHandle(dfn.vkDestroyDescriptorSetLayout, device,
descriptor_set_layout_);
for (auto& pipeline : pipelines_) {
dfn.vkDestroyPipeline(device, pipeline.second, nullptr);
}
pipelines_.clear();
for (auto& pass : render_passes_) {
dfn.vkDestroyRenderPass(device, pass.second, nullptr);
}
render_passes_.clear();
}
void Blitter::Scavenge() {
if (descriptor_pool_->has_open_batch()) {
descriptor_pool_->EndBatch();
}
descriptor_pool_->Scavenge();
}
void Blitter::BlitTexture2D(VkCommandBuffer command_buffer, VkFence fence,
VkImageView src_image_view, VkRect2D src_rect,
VkExtent2D src_extents, VkFormat dst_image_format,
VkRect2D dst_rect, VkExtent2D dst_extents,
VkFramebuffer dst_framebuffer, VkViewport viewport,
VkRect2D scissor, VkFilter filter,
bool color_or_depth, bool swap_channels) {
const VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
VkDevice device = provider_.device();
// Do we need a full draw, or can we cheap out with a blit command?
bool full_draw = swap_channels || true;
if (full_draw) {
if (!descriptor_pool_->has_open_batch()) {
descriptor_pool_->BeginBatch(fence);
}
// Acquire a render pass.
auto render_pass = GetRenderPass(dst_image_format, color_or_depth);
VkRenderPassBeginInfo render_pass_info = {
VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
nullptr,
render_pass,
dst_framebuffer,
{{0, 0}, dst_extents},
0,
nullptr,
};
dfn.vkCmdBeginRenderPass(command_buffer, &render_pass_info,
VK_SUBPASS_CONTENTS_INLINE);
dfn.vkCmdSetViewport(command_buffer, 0, 1, &viewport);
dfn.vkCmdSetScissor(command_buffer, 0, 1, &scissor);
// Acquire a pipeline.
auto pipeline =
GetPipeline(render_pass, color_or_depth ? blit_color_ : blit_depth_,
color_or_depth);
dfn.vkCmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
pipeline);
// Acquire and update a descriptor set for this image.
auto set = descriptor_pool_->AcquireEntry(descriptor_set_layout_);
if (!set) {
assert_always();
descriptor_pool_->CancelBatch();
return;
}
VkWriteDescriptorSet write;
write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
write.pNext = nullptr;
write.dstSet = set;
write.dstBinding = 0;
write.dstArrayElement = 0;
write.descriptorCount = 1;
write.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
VkDescriptorImageInfo image;
image.sampler = filter == VK_FILTER_NEAREST ? samp_nearest_ : samp_linear_;
image.imageView = src_image_view;
image.imageLayout = VK_IMAGE_LAYOUT_GENERAL;
write.pImageInfo = &image;
write.pBufferInfo = nullptr;
write.pTexelBufferView = nullptr;
dfn.vkUpdateDescriptorSets(device, 1, &write, 0, nullptr);
dfn.vkCmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
pipeline_layout_, 0, 1, &set, 0, nullptr);
VtxPushConstants vtx_constants = {
{
float(src_rect.offset.x) / src_extents.width,
float(src_rect.offset.y) / src_extents.height,
float(src_rect.extent.width) / src_extents.width,
float(src_rect.extent.height) / src_extents.height,
},
{
float(dst_rect.offset.x) / dst_extents.width,
float(dst_rect.offset.y) / dst_extents.height,
float(dst_rect.extent.width) / dst_extents.width,
float(dst_rect.extent.height) / dst_extents.height,
},
};
dfn.vkCmdPushConstants(command_buffer, pipeline_layout_,
VK_SHADER_STAGE_VERTEX_BIT, 0,
sizeof(VtxPushConstants), &vtx_constants);
PixPushConstants pix_constants = {
0,
0,
0,
swap_channels ? 1 : 0,
};
dfn.vkCmdPushConstants(
command_buffer, pipeline_layout_, VK_SHADER_STAGE_FRAGMENT_BIT,
sizeof(VtxPushConstants), sizeof(PixPushConstants), &pix_constants);
dfn.vkCmdDraw(command_buffer, 4, 1, 0, 0);
dfn.vkCmdEndRenderPass(command_buffer);
}
}
void Blitter::CopyColorTexture2D(VkCommandBuffer command_buffer, VkFence fence,
VkImage src_image, VkImageView src_image_view,
VkOffset2D src_offset, VkImage dst_image,
VkImageView dst_image_view, VkExtent2D extents,
VkFilter filter, bool swap_channels) {}
void Blitter::CopyDepthTexture(VkCommandBuffer command_buffer, VkFence fence,
VkImage src_image, VkImageView src_image_view,
VkOffset2D src_offset, VkImage dst_image,
VkImageView dst_image_view, VkExtent2D extents) {
}
VkRenderPass Blitter::GetRenderPass(VkFormat format, bool color_or_depth) {
auto pass = render_passes_.find(format);
if (pass != render_passes_.end()) {
return pass->second;
}
// Create and cache the render pass.
VkRenderPass render_pass = CreateRenderPass(format, color_or_depth);
if (render_pass) {
render_passes_[format] = render_pass;
}
return render_pass;
}
VkPipeline Blitter::GetPipeline(VkRenderPass render_pass,
VkShaderModule frag_shader,
bool color_or_depth) {
auto it = pipelines_.find(std::make_pair(render_pass, frag_shader));
if (it != pipelines_.end()) {
return it->second;
}
// Create and cache the pipeline.
VkPipeline pipeline =
CreatePipeline(render_pass, frag_shader, color_or_depth);
if (pipeline) {
pipelines_[std::make_pair(render_pass, frag_shader)] = pipeline;
}
return pipeline;
}
VkRenderPass Blitter::CreateRenderPass(VkFormat output_format,
bool color_or_depth) {
VkAttachmentDescription attachments[1];
std::memset(attachments, 0, sizeof(attachments));
// Output attachment
attachments[0].flags = 0;
attachments[0].format = output_format;
attachments[0].samples = VK_SAMPLE_COUNT_1_BIT;
attachments[0].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
attachments[0].storeOp = VK_ATTACHMENT_STORE_OP_STORE;
attachments[0].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
attachments[0].stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
attachments[0].initialLayout =
color_or_depth ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
: VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
attachments[0].finalLayout = attachments[0].initialLayout;
VkAttachmentReference attach_refs[1];
attach_refs[0].attachment = 0;
attach_refs[0].layout =
color_or_depth ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
: VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
VkSubpassDescription subpass = {
0, VK_PIPELINE_BIND_POINT_GRAPHICS,
0, nullptr,
0, nullptr,
nullptr, nullptr,
0, nullptr,
};
if (color_or_depth) {
subpass.colorAttachmentCount = 1;
subpass.pColorAttachments = attach_refs;
} else {
subpass.pDepthStencilAttachment = attach_refs;
}
VkRenderPassCreateInfo renderpass_info = {
VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
nullptr,
0,
1,
attachments,
1,
&subpass,
0,
nullptr,
};
VkRenderPass renderpass = nullptr;
const VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
VkDevice device = provider_.device();
VkResult result =
dfn.vkCreateRenderPass(device, &renderpass_info, nullptr, &renderpass);
CheckResult(result, "vkCreateRenderPass");
return renderpass;
}
VkPipeline Blitter::CreatePipeline(VkRenderPass render_pass,
VkShaderModule frag_shader,
bool color_or_depth) {
const VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
VkDevice device = provider_.device();
VkResult result = VK_SUCCESS;
// Pipeline
VkGraphicsPipelineCreateInfo pipeline_info;
std::memset(&pipeline_info, 0, sizeof(VkGraphicsPipelineCreateInfo));
pipeline_info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
// Shaders
pipeline_info.stageCount = 2;
VkPipelineShaderStageCreateInfo stages[2];
std::memset(stages, 0, sizeof(stages));
stages[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
stages[0].stage = VK_SHADER_STAGE_VERTEX_BIT;
stages[0].module = blit_vertex_;
stages[0].pName = "main";
stages[1].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
stages[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT;
stages[1].module = frag_shader;
stages[1].pName = "main";
pipeline_info.pStages = stages;
// Vertex input
VkPipelineVertexInputStateCreateInfo vtx_state;
std::memset(&vtx_state, 0, sizeof(vtx_state));
vtx_state.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO;
vtx_state.flags = 0;
vtx_state.vertexAttributeDescriptionCount = 0;
vtx_state.pVertexAttributeDescriptions = nullptr;
vtx_state.vertexBindingDescriptionCount = 0;
vtx_state.pVertexBindingDescriptions = nullptr;
pipeline_info.pVertexInputState = &vtx_state;
// Input Assembly
VkPipelineInputAssemblyStateCreateInfo input_info;
input_info.sType =
VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;
input_info.pNext = nullptr;
input_info.flags = 0;
input_info.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP;
input_info.primitiveRestartEnable = VK_FALSE;
pipeline_info.pInputAssemblyState = &input_info;
pipeline_info.pTessellationState = nullptr;
VkPipelineViewportStateCreateInfo viewport_state_info;
viewport_state_info.sType =
VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO;
viewport_state_info.pNext = nullptr;
viewport_state_info.flags = 0;
viewport_state_info.viewportCount = 1;
viewport_state_info.pViewports = nullptr;
viewport_state_info.scissorCount = 1;
viewport_state_info.pScissors = nullptr;
pipeline_info.pViewportState = &viewport_state_info;
VkPipelineRasterizationStateCreateInfo rasterization_info;
rasterization_info.sType =
VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO;
rasterization_info.pNext = nullptr;
rasterization_info.flags = 0;
rasterization_info.depthClampEnable = VK_FALSE;
rasterization_info.rasterizerDiscardEnable = VK_FALSE;
rasterization_info.polygonMode = VK_POLYGON_MODE_FILL;
rasterization_info.cullMode = VK_CULL_MODE_NONE;
rasterization_info.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE;
rasterization_info.depthBiasEnable = VK_FALSE;
rasterization_info.depthBiasConstantFactor = 0;
rasterization_info.depthBiasClamp = 0;
rasterization_info.depthBiasSlopeFactor = 0;
rasterization_info.lineWidth = 1.0f;
pipeline_info.pRasterizationState = &rasterization_info;
VkPipelineMultisampleStateCreateInfo multisample_info;
multisample_info.sType =
VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO;
multisample_info.pNext = nullptr;
multisample_info.flags = 0;
multisample_info.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT;
multisample_info.sampleShadingEnable = VK_FALSE;
multisample_info.minSampleShading = 0;
multisample_info.pSampleMask = nullptr;
multisample_info.alphaToCoverageEnable = VK_FALSE;
multisample_info.alphaToOneEnable = VK_FALSE;
pipeline_info.pMultisampleState = &multisample_info;
VkPipelineDepthStencilStateCreateInfo depth_info = {
VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
nullptr,
0,
VK_TRUE,
VK_TRUE,
VK_COMPARE_OP_ALWAYS,
VK_FALSE,
VK_FALSE,
{},
{},
0.f,
1.f,
};
pipeline_info.pDepthStencilState = &depth_info;
VkPipelineColorBlendStateCreateInfo blend_info;
blend_info.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO;
blend_info.pNext = nullptr;
blend_info.flags = 0;
blend_info.logicOpEnable = VK_FALSE;
blend_info.logicOp = VK_LOGIC_OP_NO_OP;
VkPipelineColorBlendAttachmentState blend_attachments[1];
if (color_or_depth) {
blend_attachments[0].blendEnable = VK_FALSE;
blend_attachments[0].srcColorBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA;
blend_attachments[0].dstColorBlendFactor = VK_BLEND_FACTOR_ZERO;
blend_attachments[0].colorBlendOp = VK_BLEND_OP_ADD;
blend_attachments[0].srcAlphaBlendFactor = VK_BLEND_FACTOR_SRC_ALPHA;
blend_attachments[0].dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO;
blend_attachments[0].alphaBlendOp = VK_BLEND_OP_ADD;
blend_attachments[0].colorWriteMask = 0xF;
blend_info.attachmentCount =
static_cast<uint32_t>(xe::countof(blend_attachments));
blend_info.pAttachments = blend_attachments;
} else {
blend_info.attachmentCount = 0;
blend_info.pAttachments = nullptr;
}
std::memset(blend_info.blendConstants, 0, sizeof(blend_info.blendConstants));
pipeline_info.pColorBlendState = &blend_info;
VkPipelineDynamicStateCreateInfo dynamic_state_info;
dynamic_state_info.sType =
VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO;
dynamic_state_info.pNext = nullptr;
dynamic_state_info.flags = 0;
VkDynamicState dynamic_states[] = {
VK_DYNAMIC_STATE_VIEWPORT,
VK_DYNAMIC_STATE_SCISSOR,
};
dynamic_state_info.dynamicStateCount =
static_cast<uint32_t>(xe::countof(dynamic_states));
dynamic_state_info.pDynamicStates = dynamic_states;
pipeline_info.pDynamicState = &dynamic_state_info;
pipeline_info.layout = pipeline_layout_;
pipeline_info.renderPass = render_pass;
pipeline_info.subpass = 0;
pipeline_info.basePipelineHandle = nullptr;
pipeline_info.basePipelineIndex = -1;
VkPipeline pipeline = nullptr;
result = dfn.vkCreateGraphicsPipelines(device, nullptr, 1, &pipeline_info,
nullptr, &pipeline);
CheckResult(result, "vkCreateGraphicsPipelines");
return pipeline;
}
} // namespace vulkan
} // namespace ui
} // namespace xe

View File

@ -1,100 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2016 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_UI_VULKAN_BLITTER_H_
#define XENIA_UI_VULKAN_BLITTER_H_
#include <map>
#include <memory>
#include "xenia/ui/vulkan/vulkan_provider.h"
namespace xe {
namespace ui {
namespace vulkan {
class DescriptorPool;
class Blitter {
public:
Blitter(const VulkanProvider& provider);
~Blitter();
VkResult Initialize();
void Scavenge();
void Shutdown();
// Queues commands to blit a texture to another texture.
//
// src_rect is the rectangle of pixels to copy from the source
// src_extents is the actual size of the source image
// dst_rect is the rectangle of pixels that are replaced with the source
// dst_extents is the actual size of the destination image
// dst_framebuffer must only have one attachment, the target texture.
// viewport is the viewport rect (set to {0, 0, dst_w, dst_h} if unsure)
// scissor is the scissor rect for the dest (set to dst size if unsure)
void BlitTexture2D(VkCommandBuffer command_buffer, VkFence fence,
VkImageView src_image_view, VkRect2D src_rect,
VkExtent2D src_extents, VkFormat dst_image_format,
VkRect2D dst_rect, VkExtent2D dst_extents,
VkFramebuffer dst_framebuffer, VkViewport viewport,
VkRect2D scissor, VkFilter filter, bool color_or_depth,
bool swap_channels);
void CopyColorTexture2D(VkCommandBuffer command_buffer, VkFence fence,
VkImage src_image, VkImageView src_image_view,
VkOffset2D src_offset, VkImage dst_image,
VkImageView dst_image_view, VkExtent2D extents,
VkFilter filter, bool swap_channels);
void CopyDepthTexture(VkCommandBuffer command_buffer, VkFence fence,
VkImage src_image, VkImageView src_image_view,
VkOffset2D src_offset, VkImage dst_image,
VkImageView dst_image_view, VkExtent2D extents);
// For framebuffer creation.
VkRenderPass GetRenderPass(VkFormat format, bool color_or_depth);
private:
struct VtxPushConstants {
float src_uv[4]; // 0x00
float dst_uv[4]; // 0x10
};
struct PixPushConstants {
int _pad[3]; // 0x20
int swap; // 0x2C
};
VkPipeline GetPipeline(VkRenderPass render_pass, VkShaderModule frag_shader,
bool color_or_depth);
VkRenderPass CreateRenderPass(VkFormat output_format, bool color_or_depth);
VkPipeline CreatePipeline(VkRenderPass render_pass,
VkShaderModule frag_shader, bool color_or_depth);
std::unique_ptr<DescriptorPool> descriptor_pool_ = nullptr;
const VulkanProvider& provider_;
VkPipeline pipeline_color_ = nullptr;
VkPipeline pipeline_depth_ = nullptr;
VkPipelineLayout pipeline_layout_ = nullptr;
VkShaderModule blit_vertex_ = nullptr;
VkShaderModule blit_color_ = nullptr;
VkShaderModule blit_depth_ = nullptr;
VkSampler samp_linear_ = nullptr;
VkSampler samp_nearest_ = nullptr;
VkDescriptorSetLayout descriptor_set_layout_ = nullptr;
std::map<VkFormat, VkRenderPass> render_passes_;
std::map<std::pair<VkRenderPass, VkShaderModule>, VkPipeline> pipelines_;
};
} // namespace vulkan
} // namespace ui
} // namespace xe
#endif // XENIA_UI_VULKAN_BLITTER_H_

View File

@ -1,314 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2015 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/ui/vulkan/circular_buffer.h"
#include <algorithm>
#include "xenia/base/assert.h"
#include "xenia/base/logging.h"
#include "xenia/base/math.h"
#include "xenia/ui/vulkan/vulkan_util.h"
namespace xe {
namespace ui {
namespace vulkan {
using util::CheckResult;
CircularBuffer::CircularBuffer(const VulkanProvider& provider,
VkBufferUsageFlags usage, VkDeviceSize capacity,
VkDeviceSize alignment)
: provider_(provider), capacity_(capacity) {
const VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
VkDevice device = provider_.device();
VkResult status = VK_SUCCESS;
// Create our internal buffer.
VkBufferCreateInfo buffer_info;
buffer_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
buffer_info.pNext = nullptr;
buffer_info.flags = 0;
buffer_info.size = capacity;
buffer_info.usage = usage;
buffer_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
buffer_info.queueFamilyIndexCount = 0;
buffer_info.pQueueFamilyIndices = nullptr;
status = dfn.vkCreateBuffer(device, &buffer_info, nullptr, &gpu_buffer_);
CheckResult(status, "vkCreateBuffer");
if (status != VK_SUCCESS) {
assert_always();
}
VkMemoryRequirements reqs;
dfn.vkGetBufferMemoryRequirements(device, gpu_buffer_, &reqs);
alignment_ = xe::round_up(alignment, reqs.alignment);
}
CircularBuffer::~CircularBuffer() { Shutdown(); }
VkResult CircularBuffer::Initialize(VkDeviceMemory memory,
VkDeviceSize offset) {
assert_true(offset % alignment_ == 0);
gpu_memory_ = memory;
gpu_base_ = offset;
const VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
VkDevice device = provider_.device();
VkResult status = VK_SUCCESS;
// Bind the buffer to its backing memory.
status = dfn.vkBindBufferMemory(device, gpu_buffer_, gpu_memory_, gpu_base_);
CheckResult(status, "vkBindBufferMemory");
if (status != VK_SUCCESS) {
XELOGE("CircularBuffer::Initialize - Failed to bind memory!");
Shutdown();
return status;
}
// Map the memory so we can access it.
status = dfn.vkMapMemory(device, gpu_memory_, gpu_base_, capacity_, 0,
reinterpret_cast<void**>(&host_base_));
CheckResult(status, "vkMapMemory");
if (status != VK_SUCCESS) {
XELOGE("CircularBuffer::Initialize - Failed to map memory!");
Shutdown();
return status;
}
return VK_SUCCESS;
}
VkResult CircularBuffer::Initialize() {
const VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
VkDevice device = provider_.device();
VkResult status = VK_SUCCESS;
VkMemoryRequirements reqs;
dfn.vkGetBufferMemoryRequirements(device, gpu_buffer_, &reqs);
// Allocate memory from the device to back the buffer.
owns_gpu_memory_ = true;
VkMemoryAllocateInfo memory_allocate_info;
memory_allocate_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
memory_allocate_info.pNext = nullptr;
memory_allocate_info.allocationSize = reqs.size;
memory_allocate_info.memoryTypeIndex = ui::vulkan::util::ChooseHostMemoryType(
provider_, reqs.memoryTypeBits, false);
if (memory_allocate_info.memoryTypeIndex == UINT32_MAX) {
XELOGE("CircularBuffer::Initialize - Failed to get memory type!");
Shutdown();
return VK_ERROR_INITIALIZATION_FAILED;
}
status = dfn.vkAllocateMemory(device, &memory_allocate_info, nullptr,
&gpu_memory_);
if (status != VK_SUCCESS) {
XELOGE("CircularBuffer::Initialize - Failed to allocate memory!");
Shutdown();
return status;
}
capacity_ = reqs.size;
gpu_base_ = 0;
// Bind the buffer to its backing memory.
status = dfn.vkBindBufferMemory(device, gpu_buffer_, gpu_memory_, gpu_base_);
CheckResult(status, "vkBindBufferMemory");
if (status != VK_SUCCESS) {
XELOGE("CircularBuffer::Initialize - Failed to bind memory!");
Shutdown();
return status;
}
// Map the memory so we can access it.
status = dfn.vkMapMemory(device, gpu_memory_, gpu_base_, capacity_, 0,
reinterpret_cast<void**>(&host_base_));
CheckResult(status, "vkMapMemory");
if (status != VK_SUCCESS) {
XELOGE("CircularBuffer::Initialize - Failed to map memory!");
Shutdown();
return status;
}
return VK_SUCCESS;
}
void CircularBuffer::Shutdown() {
Clear();
const VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
VkDevice device = provider_.device();
if (host_base_) {
dfn.vkUnmapMemory(device, gpu_memory_);
host_base_ = nullptr;
}
if (gpu_buffer_) {
dfn.vkDestroyBuffer(device, gpu_buffer_, nullptr);
gpu_buffer_ = nullptr;
}
if (gpu_memory_ && owns_gpu_memory_) {
dfn.vkFreeMemory(device, gpu_memory_, nullptr);
gpu_memory_ = nullptr;
}
}
void CircularBuffer::GetBufferMemoryRequirements(VkMemoryRequirements* reqs) {
const VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
VkDevice device = provider_.device();
dfn.vkGetBufferMemoryRequirements(device, gpu_buffer_, reqs);
}
bool CircularBuffer::CanAcquire(VkDeviceSize length) {
// Make sure the length is aligned.
length = xe::round_up(length, alignment_);
if (allocations_.empty()) {
// Read head has caught up to write head (entire buffer available for write)
assert_true(read_head_ == write_head_);
return capacity_ >= length;
} else if (write_head_ < read_head_) {
// Write head wrapped around and is behind read head.
// | write |---- read ----|
return (read_head_ - write_head_) >= length;
} else if (write_head_ > read_head_) {
// Read head behind write head.
// 1. Check if there's enough room from write -> capacity
// | |---- read ----| write |
if ((capacity_ - write_head_) >= length) {
return true;
}
// 2. Check if there's enough room from 0 -> read
// | write |---- read ----| |
if ((read_head_ - 0) >= length) {
return true;
}
}
return false;
}
CircularBuffer::Allocation* CircularBuffer::Acquire(VkDeviceSize length,
VkFence fence) {
VkDeviceSize aligned_length = xe::round_up(length, alignment_);
if (!CanAcquire(aligned_length)) {
return nullptr;
}
assert_true(write_head_ % alignment_ == 0);
if (write_head_ < read_head_) {
// Write head behind read head.
assert_true(read_head_ - write_head_ >= aligned_length);
Allocation alloc;
alloc.host_ptr = host_base_ + write_head_;
alloc.gpu_memory = gpu_memory_;
alloc.offset = gpu_base_ + write_head_;
alloc.length = length;
alloc.aligned_length = aligned_length;
alloc.fence = fence;
write_head_ += aligned_length;
allocations_.push(alloc);
return &allocations_.back();
} else {
// Write head equal to/after read head
if (capacity_ - write_head_ >= aligned_length) {
// Free space from write -> capacity
Allocation alloc;
alloc.host_ptr = host_base_ + write_head_;
alloc.gpu_memory = gpu_memory_;
alloc.offset = gpu_base_ + write_head_;
alloc.length = length;
alloc.aligned_length = aligned_length;
alloc.fence = fence;
write_head_ += aligned_length;
allocations_.push(alloc);
return &allocations_.back();
} else if ((read_head_ - 0) >= aligned_length) {
// Not enough space from write -> capacity, but there is enough free space
// from begin -> read
Allocation alloc;
alloc.host_ptr = host_base_ + 0;
alloc.gpu_memory = gpu_memory_;
alloc.offset = gpu_base_ + 0;
alloc.length = length;
alloc.aligned_length = aligned_length;
alloc.fence = fence;
write_head_ = aligned_length;
allocations_.push(alloc);
return &allocations_.back();
}
}
return nullptr;
}
void CircularBuffer::Flush(Allocation* allocation) {
const VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
VkDevice device = provider_.device();
VkMappedMemoryRange range;
range.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
range.pNext = nullptr;
range.memory = gpu_memory_;
range.offset = gpu_base_ + allocation->offset;
range.size = allocation->length;
dfn.vkFlushMappedMemoryRanges(device, 1, &range);
}
void CircularBuffer::Flush(VkDeviceSize offset, VkDeviceSize length) {
const VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
VkDevice device = provider_.device();
VkMappedMemoryRange range;
range.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
range.pNext = nullptr;
range.memory = gpu_memory_;
range.offset = gpu_base_ + offset;
range.size = length;
dfn.vkFlushMappedMemoryRanges(device, 1, &range);
}
void CircularBuffer::Clear() {
allocations_ = std::queue<Allocation>{};
write_head_ = read_head_ = 0;
}
void CircularBuffer::Scavenge() {
const VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
VkDevice device = provider_.device();
// Stash the last signalled fence
VkFence fence = nullptr;
while (!allocations_.empty()) {
Allocation& alloc = allocations_.front();
if (fence != alloc.fence &&
dfn.vkGetFenceStatus(device, alloc.fence) != VK_SUCCESS) {
// Don't bother freeing following allocations to ensure proper ordering.
break;
}
fence = alloc.fence;
if (capacity_ - read_head_ < alloc.aligned_length) {
// This allocation is stored at the beginning of the buffer.
read_head_ = alloc.aligned_length;
} else {
read_head_ += alloc.aligned_length;
}
allocations_.pop();
}
if (allocations_.empty()) {
// Reset R/W heads to work around fragmentation issues.
read_head_ = write_head_ = 0;
}
}
} // namespace vulkan
} // namespace ui
} // namespace xe

View File

@ -1,92 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2015 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_UI_VULKAN_CIRCULAR_BUFFER_H_
#define XENIA_UI_VULKAN_CIRCULAR_BUFFER_H_
#include <queue>
#include "xenia/ui/vulkan/vulkan_provider.h"
namespace xe {
namespace ui {
namespace vulkan {
// A circular buffer, intended to hold (fairly) temporary memory that will be
// released when a fence is signaled. Best used when allocations are taken
// in-order with command buffer submission.
//
// Allocations loop around the buffer in circles (but are not fragmented at the
// ends of the buffer), where trailing older allocations are freed after use.
class CircularBuffer {
public:
CircularBuffer(const VulkanProvider& provider, VkBufferUsageFlags usage,
VkDeviceSize capacity, VkDeviceSize alignment = 256);
~CircularBuffer();
struct Allocation {
void* host_ptr;
VkDeviceMemory gpu_memory;
VkDeviceSize offset;
VkDeviceSize length;
VkDeviceSize aligned_length;
// Allocation usage fence. This allocation will be deleted when the fence
// becomes signaled.
VkFence fence;
};
VkResult Initialize(VkDeviceMemory memory, VkDeviceSize offset);
VkResult Initialize();
void Shutdown();
void GetBufferMemoryRequirements(VkMemoryRequirements* reqs);
VkDeviceSize alignment() const { return alignment_; }
VkDeviceSize capacity() const { return capacity_; }
VkBuffer gpu_buffer() const { return gpu_buffer_; }
VkDeviceMemory gpu_memory() const { return gpu_memory_; }
uint8_t* host_base() const { return host_base_; }
bool CanAcquire(VkDeviceSize length);
// Acquires space to hold memory. This allocation is only freed when the fence
// reaches the signaled state.
Allocation* Acquire(VkDeviceSize length, VkFence fence);
void Flush(Allocation* allocation);
void Flush(VkDeviceSize offset, VkDeviceSize length);
// Clears all allocations, regardless of whether they've been consumed or not.
void Clear();
// Frees any allocations whose fences have been signaled.
void Scavenge();
private:
// All of these variables are relative to gpu_base
VkDeviceSize capacity_ = 0;
VkDeviceSize alignment_ = 0;
VkDeviceSize write_head_ = 0;
VkDeviceSize read_head_ = 0;
const VulkanProvider& provider_;
bool owns_gpu_memory_ = false;
VkBuffer gpu_buffer_ = nullptr;
VkDeviceMemory gpu_memory_ = nullptr;
VkDeviceSize gpu_base_ = 0;
uint8_t* host_base_ = nullptr;
std::queue<Allocation> allocations_;
};
} // namespace vulkan
} // namespace ui
} // namespace xe
#endif // XENIA_UI_GL_CIRCULAR_BUFFER_H_

View File

@ -1,142 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2016 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/ui/vulkan/fenced_pools.h"
#include "xenia/base/assert.h"
#include "xenia/base/math.h"
#include "xenia/ui/vulkan/vulkan_util.h"
namespace xe {
namespace ui {
namespace vulkan {
using util::CheckResult;
CommandBufferPool::CommandBufferPool(const VulkanProvider& provider,
uint32_t queue_family_index)
: BaseFencedPool(provider) {
const VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
VkDevice device = provider_.device();
// Create the pool used for allocating buffers.
// They are marked as transient (short-lived) and cycled frequently.
VkCommandPoolCreateInfo cmd_pool_info;
cmd_pool_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
cmd_pool_info.pNext = nullptr;
cmd_pool_info.flags = VK_COMMAND_POOL_CREATE_TRANSIENT_BIT |
VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
cmd_pool_info.queueFamilyIndex = queue_family_index;
auto err =
dfn.vkCreateCommandPool(device, &cmd_pool_info, nullptr, &command_pool_);
CheckResult(err, "vkCreateCommandPool");
// Allocate a bunch of command buffers to start.
constexpr uint32_t kDefaultCount = 32;
VkCommandBufferAllocateInfo command_buffer_info;
command_buffer_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
command_buffer_info.pNext = nullptr;
command_buffer_info.commandPool = command_pool_;
command_buffer_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
command_buffer_info.commandBufferCount = kDefaultCount;
VkCommandBuffer command_buffers[kDefaultCount];
err = dfn.vkAllocateCommandBuffers(device, &command_buffer_info,
command_buffers);
CheckResult(err, "vkCreateCommandBuffer");
for (size_t i = 0; i < xe::countof(command_buffers); ++i) {
PushEntry(command_buffers[i], nullptr);
}
}
CommandBufferPool::~CommandBufferPool() {
FreeAllEntries();
const VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
VkDevice device = provider_.device();
dfn.vkDestroyCommandPool(device, command_pool_, nullptr);
command_pool_ = nullptr;
}
VkCommandBuffer CommandBufferPool::AllocateEntry(void* data) {
// TODO(benvanik): allocate a bunch at once?
VkCommandBufferAllocateInfo command_buffer_info;
command_buffer_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
command_buffer_info.pNext = nullptr;
command_buffer_info.commandPool = command_pool_;
command_buffer_info.level =
VkCommandBufferLevel(reinterpret_cast<uintptr_t>(data));
command_buffer_info.commandBufferCount = 1;
VkCommandBuffer command_buffer;
const VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
VkDevice device = provider_.device();
auto err = dfn.vkAllocateCommandBuffers(device, &command_buffer_info,
&command_buffer);
CheckResult(err, "vkCreateCommandBuffer");
return command_buffer;
}
void CommandBufferPool::FreeEntry(VkCommandBuffer handle) {
const VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
VkDevice device = provider_.device();
dfn.vkFreeCommandBuffers(device, command_pool_, 1, &handle);
}
DescriptorPool::DescriptorPool(const VulkanProvider& provider,
uint32_t max_count,
std::vector<VkDescriptorPoolSize> pool_sizes)
: BaseFencedPool(provider) {
VkDescriptorPoolCreateInfo descriptor_pool_info;
descriptor_pool_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
descriptor_pool_info.pNext = nullptr;
descriptor_pool_info.flags =
VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT;
descriptor_pool_info.maxSets = max_count;
descriptor_pool_info.poolSizeCount = uint32_t(pool_sizes.size());
descriptor_pool_info.pPoolSizes = pool_sizes.data();
const VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
VkDevice device = provider_.device();
auto err = dfn.vkCreateDescriptorPool(device, &descriptor_pool_info, nullptr,
&descriptor_pool_);
CheckResult(err, "vkCreateDescriptorPool");
}
DescriptorPool::~DescriptorPool() {
FreeAllEntries();
const VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
VkDevice device = provider_.device();
dfn.vkDestroyDescriptorPool(device, descriptor_pool_, nullptr);
descriptor_pool_ = nullptr;
}
VkDescriptorSet DescriptorPool::AllocateEntry(void* data) {
VkDescriptorSetLayout layout = reinterpret_cast<VkDescriptorSetLayout>(data);
VkDescriptorSet descriptor_set = nullptr;
VkDescriptorSetAllocateInfo set_alloc_info;
set_alloc_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
set_alloc_info.pNext = nullptr;
set_alloc_info.descriptorPool = descriptor_pool_;
set_alloc_info.descriptorSetCount = 1;
set_alloc_info.pSetLayouts = &layout;
const VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
VkDevice device = provider_.device();
auto err =
dfn.vkAllocateDescriptorSets(device, &set_alloc_info, &descriptor_set);
CheckResult(err, "vkAllocateDescriptorSets");
return descriptor_set;
}
void DescriptorPool::FreeEntry(VkDescriptorSet handle) {
const VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
VkDevice device = provider_.device();
dfn.vkFreeDescriptorSets(device, descriptor_pool_, 1, &handle);
}
} // namespace vulkan
} // namespace ui
} // namespace xe

View File

@ -1,341 +0,0 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2016 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_UI_VULKAN_FENCED_POOLS_H_
#define XENIA_UI_VULKAN_FENCED_POOLS_H_
#include <memory>
#include "xenia/base/assert.h"
#include "xenia/ui/vulkan/vulkan_provider.h"
#include "xenia/ui/vulkan/vulkan_util.h"
namespace xe {
namespace ui {
namespace vulkan {
// Simple pool for Vulkan homogenous objects that cannot be reused while
// in-flight.
// It batches pooled objects into groups and uses a vkQueueSubmit fence to
// indicate their availability. If no objects are free when one is requested
// the caller is expected to create them.
template <typename T, typename HANDLE>
class BaseFencedPool {
public:
BaseFencedPool(const VulkanProvider& provider) : provider_(provider) {}
virtual ~BaseFencedPool() {
// TODO(benvanik): wait on fence until done.
assert_null(pending_batch_list_head_);
// Subclasses must call FreeAllEntries() to properly clean up things.
assert_null(free_batch_list_head_);
assert_null(free_entry_list_head_);
}
// True if one or more batches are still pending on the GPU.
bool has_pending() const { return pending_batch_list_head_ != nullptr; }
// True if a batch is open.
bool has_open_batch() const { return open_batch_ != nullptr; }
// Checks all pending batches for completion and scavenges their entries.
// This should be called as frequently as reasonable.
void Scavenge() {
const VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
VkDevice device = provider_.device();
while (pending_batch_list_head_) {
auto batch = pending_batch_list_head_;
assert_not_null(batch->fence);
VkResult status = dfn.vkGetFenceStatus(device, batch->fence);
if (status == VK_SUCCESS || status == VK_ERROR_DEVICE_LOST) {
// Batch has completed. Reclaim.
pending_batch_list_head_ = batch->next;
if (batch == pending_batch_list_tail_) {
pending_batch_list_tail_ = nullptr;
}
batch->next = free_batch_list_head_;
free_batch_list_head_ = batch;
batch->entry_list_tail->next = free_entry_list_head_;
free_entry_list_head_ = batch->entry_list_head;
batch->entry_list_head = nullptr;
batch->entry_list_tail = nullptr;
} else {
// Batch is still in-flight. Since batches are executed in order we know
// no others after it could have completed, so early-exit.
return;
}
}
}
// Begins a new batch.
// All entries acquired within this batch will be marked as in-use until
// the fence returned is signalled.
// Pass in a fence to use an external fence. This assumes the fence has been
// reset.
VkFence BeginBatch(VkFence fence = nullptr) {
assert_null(open_batch_);
Batch* batch = nullptr;
const VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
VkDevice device = provider_.device();
if (free_batch_list_head_) {
// Reuse a batch.
batch = free_batch_list_head_;
free_batch_list_head_ = batch->next;
batch->next = nullptr;
if (batch->flags & kBatchOwnsFence && !fence) {
// Reset owned fence.
dfn.vkResetFences(device, 1, &batch->fence);
} else if ((batch->flags & kBatchOwnsFence) && fence) {
// Transfer owned -> external
dfn.vkDestroyFence(device, batch->fence, nullptr);
batch->fence = fence;
batch->flags &= ~kBatchOwnsFence;
} else if (!(batch->flags & kBatchOwnsFence) && !fence) {
// external -> owned
VkFenceCreateInfo info;
info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
info.pNext = nullptr;
info.flags = 0;
VkResult res = dfn.vkCreateFence(device, &info, nullptr, &batch->fence);
if (res != VK_SUCCESS) {
assert_always();
}
batch->flags |= kBatchOwnsFence;
} else {
// external -> external
batch->fence = fence;
}
} else {
// Allocate new batch.
batch = new Batch();
batch->next = nullptr;
batch->flags = 0;
if (!fence) {
VkFenceCreateInfo info;
info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
info.pNext = nullptr;
info.flags = 0;
VkResult res = dfn.vkCreateFence(device, &info, nullptr, &batch->fence);
if (res != VK_SUCCESS) {
assert_always();
}
batch->flags |= kBatchOwnsFence;
} else {
batch->fence = fence;
}
}
batch->entry_list_head = nullptr;
batch->entry_list_tail = nullptr;
open_batch_ = batch;
return batch->fence;
}
// Cancels an open batch, and releases all entries acquired within.
void CancelBatch() {
assert_not_null(open_batch_);
auto batch = open_batch_;
open_batch_ = nullptr;
// Relink the batch back into the free batch list.
batch->next = free_batch_list_head_;
free_batch_list_head_ = batch;
// Relink entries back into free entries list.
batch->entry_list_tail->next = free_entry_list_head_;
free_entry_list_head_ = batch->entry_list_head;
batch->entry_list_head = nullptr;
batch->entry_list_tail = nullptr;
}
// Ends the current batch.
void EndBatch() {
assert_not_null(open_batch_);
// Close and see if we have anything.
auto batch = open_batch_;
open_batch_ = nullptr;
if (!batch->entry_list_head) {
// Nothing to do.
batch->next = free_batch_list_head_;
free_batch_list_head_ = batch;
return;
}
// Append to the end of the batch list.
batch->next = nullptr;
if (!pending_batch_list_head_) {
pending_batch_list_head_ = batch;
}
if (pending_batch_list_tail_) {
pending_batch_list_tail_->next = batch;
pending_batch_list_tail_ = batch;
} else {
pending_batch_list_tail_ = batch;
}
}
protected:
// Attempts to acquire an entry from the pool in the current batch.
// If none are available a new one will be allocated.
HANDLE AcquireEntry(void* data) {
Entry* entry = nullptr;
if (free_entry_list_head_) {
// Slice off an entry from the free list.
Entry* prev = nullptr;
Entry* cur = free_entry_list_head_;
while (cur != nullptr) {
if (cur->data == data) {
if (prev) {
prev->next = cur->next;
} else {
free_entry_list_head_ = cur->next;
}
entry = cur;
break;
}
prev = cur;
cur = cur->next;
}
}
if (!entry) {
// No entry available; allocate new.
entry = new Entry();
entry->data = data;
entry->handle = static_cast<T*>(this)->AllocateEntry(data);
if (!entry->handle) {
delete entry;
return nullptr;
}
}
entry->next = nullptr;
if (!open_batch_->entry_list_head) {
open_batch_->entry_list_head = entry;
}
if (open_batch_->entry_list_tail) {
open_batch_->entry_list_tail->next = entry;
}
open_batch_->entry_list_tail = entry;
return entry->handle;
}
void PushEntry(HANDLE handle, void* data) {
auto entry = new Entry();
entry->next = free_entry_list_head_;
entry->data = data;
entry->handle = handle;
free_entry_list_head_ = entry;
}
void FreeAllEntries() {
const VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
VkDevice device = provider_.device();
// Run down free lists.
while (free_batch_list_head_) {
auto batch = free_batch_list_head_;
free_batch_list_head_ = batch->next;
if (batch->flags & kBatchOwnsFence) {
dfn.vkDestroyFence(device, batch->fence, nullptr);
batch->fence = nullptr;
}
delete batch;
}
while (free_entry_list_head_) {
auto entry = free_entry_list_head_;
free_entry_list_head_ = entry->next;
static_cast<T*>(this)->FreeEntry(entry->handle);
delete entry;
}
}
const VulkanProvider& provider_;
private:
struct Entry {
Entry* next;
void* data;
HANDLE handle;
};
struct Batch {
Batch* next;
Entry* entry_list_head;
Entry* entry_list_tail;
uint32_t flags;
VkFence fence;
};
static const uint32_t kBatchOwnsFence = 1;
Batch* free_batch_list_head_ = nullptr;
Entry* free_entry_list_head_ = nullptr;
Batch* pending_batch_list_head_ = nullptr;
Batch* pending_batch_list_tail_ = nullptr;
Batch* open_batch_ = nullptr;
};
class CommandBufferPool
: public BaseFencedPool<CommandBufferPool, VkCommandBuffer> {
public:
typedef BaseFencedPool<CommandBufferPool, VkCommandBuffer> Base;
CommandBufferPool(const VulkanProvider& provider,
uint32_t queue_family_index);
~CommandBufferPool() override;
VkCommandBuffer AcquireEntry(
VkCommandBufferLevel level = VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
return Base::AcquireEntry(reinterpret_cast<void*>(level));
}
protected:
friend class BaseFencedPool<CommandBufferPool, VkCommandBuffer>;
VkCommandBuffer AllocateEntry(void* data);
void FreeEntry(VkCommandBuffer handle);
VkCommandPool command_pool_ = nullptr;
};
class DescriptorPool : public BaseFencedPool<DescriptorPool, VkDescriptorSet> {
public:
typedef BaseFencedPool<DescriptorPool, VkDescriptorSet> Base;
DescriptorPool(const VulkanProvider& provider, uint32_t max_count,
std::vector<VkDescriptorPoolSize> pool_sizes);
~DescriptorPool() override;
VkDescriptorSet AcquireEntry(VkDescriptorSetLayout layout) {
return Base::AcquireEntry(layout);
}
// WARNING: Allocating sets from the vulkan pool will not be tracked!
VkDescriptorPool descriptor_pool() { return descriptor_pool_; }
protected:
friend class BaseFencedPool<DescriptorPool, VkDescriptorSet>;
VkDescriptorSet AllocateEntry(void* data);
void FreeEntry(VkDescriptorSet handle);
VkDescriptorPool descriptor_pool_ = nullptr;
};
} // namespace vulkan
} // namespace ui
} // namespace xe
#endif // XENIA_UI_VULKAN_FENCED_POOLS_H_

View File

@ -10,32 +10,28 @@ XE_UI_VULKAN_FUNCTION(vkCmdBindDescriptorSets)
XE_UI_VULKAN_FUNCTION(vkCmdBindIndexBuffer)
XE_UI_VULKAN_FUNCTION(vkCmdBindPipeline)
XE_UI_VULKAN_FUNCTION(vkCmdBindVertexBuffers)
XE_UI_VULKAN_FUNCTION(vkCmdBlitImage)
XE_UI_VULKAN_FUNCTION(vkCmdClearAttachments)
XE_UI_VULKAN_FUNCTION(vkCmdClearColorImage)
XE_UI_VULKAN_FUNCTION(vkCmdClearDepthStencilImage)
XE_UI_VULKAN_FUNCTION(vkCmdCopyBuffer)
XE_UI_VULKAN_FUNCTION(vkCmdCopyBufferToImage)
XE_UI_VULKAN_FUNCTION(vkCmdCopyImageToBuffer)
XE_UI_VULKAN_FUNCTION(vkCmdDispatch)
XE_UI_VULKAN_FUNCTION(vkCmdDraw)
XE_UI_VULKAN_FUNCTION(vkCmdDrawIndexed)
XE_UI_VULKAN_FUNCTION(vkCmdEndRenderPass)
XE_UI_VULKAN_FUNCTION(vkCmdExecuteCommands)
XE_UI_VULKAN_FUNCTION(vkCmdFillBuffer)
XE_UI_VULKAN_FUNCTION(vkCmdPipelineBarrier)
XE_UI_VULKAN_FUNCTION(vkCmdPushConstants)
XE_UI_VULKAN_FUNCTION(vkCmdResolveImage)
XE_UI_VULKAN_FUNCTION(vkCmdSetBlendConstants)
XE_UI_VULKAN_FUNCTION(vkCmdSetDepthBias)
XE_UI_VULKAN_FUNCTION(vkCmdSetDepthBounds)
XE_UI_VULKAN_FUNCTION(vkCmdSetLineWidth)
XE_UI_VULKAN_FUNCTION(vkCmdSetScissor)
XE_UI_VULKAN_FUNCTION(vkCmdSetStencilCompareMask)
XE_UI_VULKAN_FUNCTION(vkCmdSetStencilReference)
XE_UI_VULKAN_FUNCTION(vkCmdSetStencilWriteMask)
XE_UI_VULKAN_FUNCTION(vkCmdSetViewport)
XE_UI_VULKAN_FUNCTION(vkCreateBuffer)
XE_UI_VULKAN_FUNCTION(vkCreateBufferView)
XE_UI_VULKAN_FUNCTION(vkCreateCommandPool)
XE_UI_VULKAN_FUNCTION(vkCreateComputePipelines)
XE_UI_VULKAN_FUNCTION(vkCreateDescriptorPool)
XE_UI_VULKAN_FUNCTION(vkCreateDescriptorSetLayout)
XE_UI_VULKAN_FUNCTION(vkCreateFence)
@ -43,13 +39,13 @@ XE_UI_VULKAN_FUNCTION(vkCreateFramebuffer)
XE_UI_VULKAN_FUNCTION(vkCreateGraphicsPipelines)
XE_UI_VULKAN_FUNCTION(vkCreateImage)
XE_UI_VULKAN_FUNCTION(vkCreateImageView)
XE_UI_VULKAN_FUNCTION(vkCreatePipelineCache)
XE_UI_VULKAN_FUNCTION(vkCreatePipelineLayout)
XE_UI_VULKAN_FUNCTION(vkCreateRenderPass)
XE_UI_VULKAN_FUNCTION(vkCreateSampler)
XE_UI_VULKAN_FUNCTION(vkCreateSemaphore)
XE_UI_VULKAN_FUNCTION(vkCreateShaderModule)
XE_UI_VULKAN_FUNCTION(vkDestroyBuffer)
XE_UI_VULKAN_FUNCTION(vkDestroyBufferView)
XE_UI_VULKAN_FUNCTION(vkDestroyCommandPool)
XE_UI_VULKAN_FUNCTION(vkDestroyDescriptorPool)
XE_UI_VULKAN_FUNCTION(vkDestroyDescriptorSetLayout)
@ -58,7 +54,6 @@ XE_UI_VULKAN_FUNCTION(vkDestroyFramebuffer)
XE_UI_VULKAN_FUNCTION(vkDestroyImage)
XE_UI_VULKAN_FUNCTION(vkDestroyImageView)
XE_UI_VULKAN_FUNCTION(vkDestroyPipeline)
XE_UI_VULKAN_FUNCTION(vkDestroyPipelineCache)
XE_UI_VULKAN_FUNCTION(vkDestroyPipelineLayout)
XE_UI_VULKAN_FUNCTION(vkDestroyRenderPass)
XE_UI_VULKAN_FUNCTION(vkDestroySampler)
@ -66,23 +61,18 @@ XE_UI_VULKAN_FUNCTION(vkDestroySemaphore)
XE_UI_VULKAN_FUNCTION(vkDestroyShaderModule)
XE_UI_VULKAN_FUNCTION(vkEndCommandBuffer)
XE_UI_VULKAN_FUNCTION(vkFlushMappedMemoryRanges)
XE_UI_VULKAN_FUNCTION(vkFreeCommandBuffers)
XE_UI_VULKAN_FUNCTION(vkFreeDescriptorSets)
XE_UI_VULKAN_FUNCTION(vkFreeMemory)
XE_UI_VULKAN_FUNCTION(vkGetBufferMemoryRequirements)
XE_UI_VULKAN_FUNCTION(vkGetDeviceQueue)
XE_UI_VULKAN_FUNCTION(vkGetFenceStatus)
XE_UI_VULKAN_FUNCTION(vkGetImageMemoryRequirements)
XE_UI_VULKAN_FUNCTION(vkGetImageSubresourceLayout)
XE_UI_VULKAN_FUNCTION(vkGetPipelineCacheData)
XE_UI_VULKAN_FUNCTION(vkInvalidateMappedMemoryRanges)
XE_UI_VULKAN_FUNCTION(vkMapMemory)
XE_UI_VULKAN_FUNCTION(vkResetCommandBuffer)
XE_UI_VULKAN_FUNCTION(vkResetCommandPool)
XE_UI_VULKAN_FUNCTION(vkResetDescriptorPool)
XE_UI_VULKAN_FUNCTION(vkResetFences)
XE_UI_VULKAN_FUNCTION(vkQueueBindSparse)
XE_UI_VULKAN_FUNCTION(vkQueueSubmit)
XE_UI_VULKAN_FUNCTION(vkQueueWaitIdle)
XE_UI_VULKAN_FUNCTION(vkUnmapMemory)
XE_UI_VULKAN_FUNCTION(vkUpdateDescriptorSets)
XE_UI_VULKAN_FUNCTION(vkWaitForFences)

View File

@ -1,2 +0,0 @@
// VK_AMD_shader_info functions used in Xenia.
XE_UI_VULKAN_FUNCTION(vkGetShaderInfoAMD)

View File

@ -0,0 +1,4 @@
// VK_KHR_bind_memory2 functions used in Xenia.
// Promoted to Vulkan 1.1 core.
XE_UI_VULKAN_FUNCTION_PROMOTED(vkBindBufferMemory2KHR, vkBindBufferMemory2)
XE_UI_VULKAN_FUNCTION_PROMOTED(vkBindImageMemory2KHR, vkBindImageMemory2)

View File

@ -0,0 +1,6 @@
// VK_KHR_get_memory_requirements2 functions used in Xenia.
// Promoted to Vulkan 1.1 core.
XE_UI_VULKAN_FUNCTION_PROMOTED(vkGetBufferMemoryRequirements2KHR,
vkGetBufferMemoryRequirements2)
XE_UI_VULKAN_FUNCTION_PROMOTED(vkGetImageMemoryRequirements2KHR,
vkGetImageMemoryRequirements2)

View File

@ -0,0 +1,6 @@
// VK_KHR_maintenance4 functions used in Xenia.
// Promoted to Vulkan 1.3 core.
XE_UI_VULKAN_FUNCTION_PROMOTED(vkGetDeviceBufferMemoryRequirementsKHR,
vkGetDeviceBufferMemoryRequirements)
XE_UI_VULKAN_FUNCTION_PROMOTED(vkGetDeviceImageMemoryRequirementsKHR,
vkGetDeviceImageMemoryRequirements)

View File

@ -6,7 +6,6 @@ XE_UI_VULKAN_FUNCTION(vkEnumeratePhysicalDevices)
XE_UI_VULKAN_FUNCTION(vkGetDeviceProcAddr)
XE_UI_VULKAN_FUNCTION(vkGetPhysicalDeviceFeatures)
XE_UI_VULKAN_FUNCTION(vkGetPhysicalDeviceFormatProperties)
XE_UI_VULKAN_FUNCTION(vkGetPhysicalDeviceImageFormatProperties)
XE_UI_VULKAN_FUNCTION(vkGetPhysicalDeviceMemoryProperties)
XE_UI_VULKAN_FUNCTION(vkGetPhysicalDeviceProperties)
XE_UI_VULKAN_FUNCTION(vkGetPhysicalDeviceQueueFamilyProperties)

View File

@ -1,4 +1,6 @@
// VK_KHR_get_physical_device_properties2 functions used in Xenia.
// Promoted to Vulkan 1.1 core.
XE_UI_VULKAN_FUNCTION_PROMOTED(vkGetPhysicalDeviceMemoryProperties2KHR,
vkGetPhysicalDeviceMemoryProperties2)
XE_UI_VULKAN_FUNCTION_PROMOTED(vkGetPhysicalDeviceProperties2KHR,
vkGetPhysicalDeviceProperties2)

View File

@ -7,10 +7,8 @@ project("xenia-ui-vulkan")
kind("StaticLib")
language("C++")
links({
"fmt",
"xenia-base",
"xenia-ui",
"xenia-ui-spirv",
})
includedirs({
project_root.."/third_party/Vulkan-Headers/include",
@ -19,9 +17,7 @@ project("xenia-ui-vulkan")
local_platform_files("functions")
files({
"../shaders/bytecode/vulkan_spirv/*.h",
"shaders/bytecode/vulkan_spirv/*.h",
})
removefiles({"*_demo.cc"})
group("demos")
project("xenia-ui-window-vulkan-demo")
@ -33,7 +29,6 @@ project("xenia-ui-window-vulkan-demo")
"imgui",
"xenia-base",
"xenia-ui",
"xenia-ui-spirv",
"xenia-ui-vulkan",
})
includedirs({

View File

@ -1,31 +0,0 @@
// NOTE: This file is compiled and embedded into the exe.
// Use `xenia-build genspirv` and check in any changes under bin/.
#version 450 core
precision highp float;
layout(push_constant) uniform PushConstants {
// normalized [x, y, w, h]
layout(offset = 0x00) vec4 src_uv;
layout(offset = 0x10) vec4 dst_uv;
} push_constants;
layout(location = 0) out vec2 vtx_uv;
void main() {
const vec2 vtx_arr[4]=vec2[4](
vec2(0,0),
vec2(1,0),
vec2(0,1),
vec2(1,1)
);
vec2 vfetch_pos = vtx_arr[gl_VertexIndex];
vec2 scaled_pos = vfetch_pos.xy * vec2(2.0, 2.0) - vec2(1.0, 1.0);
vec4 scaled_dst_uv = push_constants.dst_uv * vec4(2.0);
gl_Position =
vec4(scaled_dst_uv.xy - vec2(1.0) + vfetch_pos.xy * scaled_dst_uv.zw, 0.0,
1.0);
vtx_uv = vfetch_pos.xy * push_constants.src_uv.zw + push_constants.src_uv.xy;
}

View File

@ -1,20 +0,0 @@
// NOTE: This file is compiled and embedded into the exe.
// Use `xenia-build genspirv` and check in any changes under bin/.
#version 450 core
precision highp float;
layout(push_constant) uniform PushConstants {
layout(offset = 0x20) vec3 _pad;
layout(offset = 0x2C) int swap;
} push_constants;
layout(set = 0, binding = 0) uniform sampler2D src_texture;
layout(location = 0) in vec2 vtx_uv;
layout(location = 0) out vec4 oC;
void main() {
oC = texture(src_texture, vtx_uv);
if (push_constants.swap != 0) oC = oC.bgra;
}

View File

@ -1,19 +0,0 @@
// NOTE: This file is compiled and embedded into the exe.
// Use `xenia-build genspirv` and check in any changes under bin/.
#version 450 core
precision highp float;
layout(push_constant) uniform PushConstants {
layout(offset = 0x20) vec3 _pad;
layout(offset = 0x2C) int swap;
} push_constants;
layout(set = 0, binding = 0) uniform sampler2D src_texture;
layout(location = 0) in vec2 vtx_uv;
layout(location = 0) out vec4 oC;
void main() {
gl_FragDepth = texture(src_texture, vtx_uv).r;
}

View File

@ -1,2 +0,0 @@
DisableFormat: true
SortIncludes: false

View File

@ -1,99 +0,0 @@
// Generated with `xb buildshaders`.
#if 0
; SPIR-V
; Version: 1.0
; Generator: Khronos Glslang Reference Front End; 10
; Bound: 24608
; Schema: 0
OpCapability Shader
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %5663 "main" %4841 %5592
OpExecutionMode %5663 OriginUpperLeft
OpDecorate %4841 Location 0
OpDecorate %5164 DescriptorSet 0
OpDecorate %5164 Binding 0
OpDecorate %5592 Location 0
OpMemberDecorate %_struct_1019 0 Offset 32
OpMemberDecorate %_struct_1019 1 Offset 44
OpDecorate %_struct_1019 Block
%void = OpTypeVoid
%1282 = OpTypeFunction %void
%float = OpTypeFloat 32
%v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
%4841 = OpVariable %_ptr_Output_v4float Output
%150 = OpTypeImage %float 2D 0 0 0 1 Unknown
%510 = OpTypeSampledImage %150
%_ptr_UniformConstant_510 = OpTypePointer UniformConstant %510
%5164 = OpVariable %_ptr_UniformConstant_510 UniformConstant
%v2float = OpTypeVector %float 2
%_ptr_Input_v2float = OpTypePointer Input %v2float
%5592 = OpVariable %_ptr_Input_v2float Input
%v3float = OpTypeVector %float 3
%int = OpTypeInt 32 1
%_struct_1019 = OpTypeStruct %v3float %int
%_ptr_PushConstant__struct_1019 = OpTypePointer PushConstant %_struct_1019
%3463 = OpVariable %_ptr_PushConstant__struct_1019 PushConstant
%int_1 = OpConstant %int 1
%_ptr_PushConstant_int = OpTypePointer PushConstant %int
%int_0 = OpConstant %int 0
%bool = OpTypeBool
%5663 = OpFunction %void None %1282
%24607 = OpLabel
%21248 = OpLoad %510 %5164
%19293 = OpLoad %v2float %5592
%8148 = OpImageSampleImplicitLod %v4float %21248 %19293
OpStore %4841 %8148
%20291 = OpAccessChain %_ptr_PushConstant_int %3463 %int_1
%11639 = OpLoad %int %20291
%12913 = OpINotEqual %bool %11639 %int_0
OpSelectionMerge %19578 None
OpBranchConditional %12913 %13163 %19578
%13163 = OpLabel
%9669 = OpLoad %v4float %4841
%6737 = OpVectorShuffle %v4float %9669 %9669 2 1 0 3
OpStore %4841 %6737
OpBranch %19578
%19578 = OpLabel
OpReturn
OpFunctionEnd
#endif
const uint32_t blit_color_ps[] = {
0x07230203, 0x00010000, 0x0008000A, 0x00006020, 0x00000000, 0x00020011,
0x00000001, 0x0006000B, 0x00000001, 0x4C534C47, 0x6474732E, 0x3035342E,
0x00000000, 0x0003000E, 0x00000000, 0x00000001, 0x0007000F, 0x00000004,
0x0000161F, 0x6E69616D, 0x00000000, 0x000012E9, 0x000015D8, 0x00030010,
0x0000161F, 0x00000007, 0x00040047, 0x000012E9, 0x0000001E, 0x00000000,
0x00040047, 0x0000142C, 0x00000022, 0x00000000, 0x00040047, 0x0000142C,
0x00000021, 0x00000000, 0x00040047, 0x000015D8, 0x0000001E, 0x00000000,
0x00050048, 0x000003FB, 0x00000000, 0x00000023, 0x00000020, 0x00050048,
0x000003FB, 0x00000001, 0x00000023, 0x0000002C, 0x00030047, 0x000003FB,
0x00000002, 0x00020013, 0x00000008, 0x00030021, 0x00000502, 0x00000008,
0x00030016, 0x0000000D, 0x00000020, 0x00040017, 0x0000001D, 0x0000000D,
0x00000004, 0x00040020, 0x0000029A, 0x00000003, 0x0000001D, 0x0004003B,
0x0000029A, 0x000012E9, 0x00000003, 0x00090019, 0x00000096, 0x0000000D,
0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000,
0x0003001B, 0x000001FE, 0x00000096, 0x00040020, 0x0000047B, 0x00000000,
0x000001FE, 0x0004003B, 0x0000047B, 0x0000142C, 0x00000000, 0x00040017,
0x00000013, 0x0000000D, 0x00000002, 0x00040020, 0x00000290, 0x00000001,
0x00000013, 0x0004003B, 0x00000290, 0x000015D8, 0x00000001, 0x00040017,
0x00000018, 0x0000000D, 0x00000003, 0x00040015, 0x0000000C, 0x00000020,
0x00000001, 0x0004001E, 0x000003FB, 0x00000018, 0x0000000C, 0x00040020,
0x00000678, 0x00000009, 0x000003FB, 0x0004003B, 0x00000678, 0x00000D87,
0x00000009, 0x0004002B, 0x0000000C, 0x00000A0E, 0x00000001, 0x00040020,
0x00000289, 0x00000009, 0x0000000C, 0x0004002B, 0x0000000C, 0x00000A0B,
0x00000000, 0x00020014, 0x00000009, 0x00050036, 0x00000008, 0x0000161F,
0x00000000, 0x00000502, 0x000200F8, 0x0000601F, 0x0004003D, 0x000001FE,
0x00005300, 0x0000142C, 0x0004003D, 0x00000013, 0x00004B5D, 0x000015D8,
0x00050057, 0x0000001D, 0x00001FD4, 0x00005300, 0x00004B5D, 0x0003003E,
0x000012E9, 0x00001FD4, 0x00050041, 0x00000289, 0x00004F43, 0x00000D87,
0x00000A0E, 0x0004003D, 0x0000000C, 0x00002D77, 0x00004F43, 0x000500AB,
0x00000009, 0x00003271, 0x00002D77, 0x00000A0B, 0x000300F7, 0x00004C7A,
0x00000000, 0x000400FA, 0x00003271, 0x0000336B, 0x00004C7A, 0x000200F8,
0x0000336B, 0x0004003D, 0x0000001D, 0x000025C5, 0x000012E9, 0x0009004F,
0x0000001D, 0x00001A51, 0x000025C5, 0x000025C5, 0x00000002, 0x00000001,
0x00000000, 0x00000003, 0x0003003E, 0x000012E9, 0x00001A51, 0x000200F9,
0x00004C7A, 0x000200F8, 0x00004C7A, 0x000100FD, 0x00010038,
};

View File

@ -1,70 +0,0 @@
// Generated with `xb buildshaders`.
#if 0
; SPIR-V
; Version: 1.0
; Generator: Khronos Glslang Reference Front End; 10
; Bound: 24608
; Schema: 0
OpCapability Shader
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Fragment %5663 "main" %gl_FragDepth %5592 %4841
OpExecutionMode %5663 OriginUpperLeft
OpExecutionMode %5663 DepthReplacing
OpDecorate %gl_FragDepth BuiltIn FragDepth
OpDecorate %5164 DescriptorSet 0
OpDecorate %5164 Binding 0
OpDecorate %5592 Location 0
OpDecorate %4841 Location 0
%void = OpTypeVoid
%1282 = OpTypeFunction %void
%float = OpTypeFloat 32
%_ptr_Output_float = OpTypePointer Output %float
%gl_FragDepth = OpVariable %_ptr_Output_float Output
%150 = OpTypeImage %float 2D 0 0 0 1 Unknown
%510 = OpTypeSampledImage %150
%_ptr_UniformConstant_510 = OpTypePointer UniformConstant %510
%5164 = OpVariable %_ptr_UniformConstant_510 UniformConstant
%v2float = OpTypeVector %float 2
%_ptr_Input_v2float = OpTypePointer Input %v2float
%5592 = OpVariable %_ptr_Input_v2float Input
%v4float = OpTypeVector %float 4
%_ptr_Output_v4float = OpTypePointer Output %v4float
%4841 = OpVariable %_ptr_Output_v4float Output
%5663 = OpFunction %void None %1282
%24607 = OpLabel
%21248 = OpLoad %510 %5164
%19654 = OpLoad %v2float %5592
%23875 = OpImageSampleImplicitLod %v4float %21248 %19654
%15662 = OpCompositeExtract %float %23875 0
OpStore %gl_FragDepth %15662
OpReturn
OpFunctionEnd
#endif
const uint32_t blit_depth_ps[] = {
0x07230203, 0x00010000, 0x0008000A, 0x00006020, 0x00000000, 0x00020011,
0x00000001, 0x0006000B, 0x00000001, 0x4C534C47, 0x6474732E, 0x3035342E,
0x00000000, 0x0003000E, 0x00000000, 0x00000001, 0x0008000F, 0x00000004,
0x0000161F, 0x6E69616D, 0x00000000, 0x000011F3, 0x000015D8, 0x000012E9,
0x00030010, 0x0000161F, 0x00000007, 0x00030010, 0x0000161F, 0x0000000C,
0x00040047, 0x000011F3, 0x0000000B, 0x00000016, 0x00040047, 0x0000142C,
0x00000022, 0x00000000, 0x00040047, 0x0000142C, 0x00000021, 0x00000000,
0x00040047, 0x000015D8, 0x0000001E, 0x00000000, 0x00040047, 0x000012E9,
0x0000001E, 0x00000000, 0x00020013, 0x00000008, 0x00030021, 0x00000502,
0x00000008, 0x00030016, 0x0000000D, 0x00000020, 0x00040020, 0x0000028A,
0x00000003, 0x0000000D, 0x0004003B, 0x0000028A, 0x000011F3, 0x00000003,
0x00090019, 0x00000096, 0x0000000D, 0x00000001, 0x00000000, 0x00000000,
0x00000000, 0x00000001, 0x00000000, 0x0003001B, 0x000001FE, 0x00000096,
0x00040020, 0x0000047B, 0x00000000, 0x000001FE, 0x0004003B, 0x0000047B,
0x0000142C, 0x00000000, 0x00040017, 0x00000013, 0x0000000D, 0x00000002,
0x00040020, 0x00000290, 0x00000001, 0x00000013, 0x0004003B, 0x00000290,
0x000015D8, 0x00000001, 0x00040017, 0x0000001D, 0x0000000D, 0x00000004,
0x00040020, 0x0000029A, 0x00000003, 0x0000001D, 0x0004003B, 0x0000029A,
0x000012E9, 0x00000003, 0x00050036, 0x00000008, 0x0000161F, 0x00000000,
0x00000502, 0x000200F8, 0x0000601F, 0x0004003D, 0x000001FE, 0x00005300,
0x0000142C, 0x0004003D, 0x00000013, 0x00004CC6, 0x000015D8, 0x00050057,
0x0000001D, 0x00005D43, 0x00005300, 0x00004CC6, 0x00050051, 0x0000000D,
0x00003D2E, 0x00005D43, 0x00000000, 0x0003003E, 0x000011F3, 0x00003D2E,
0x000100FD, 0x00010038,
};

View File

@ -1,149 +0,0 @@
// Generated with `xb buildshaders`.
#if 0
; SPIR-V
; Version: 1.0
; Generator: Khronos Glslang Reference Front End; 10
; Bound: 25137
; Schema: 0
OpCapability Shader
%1 = OpExtInstImport "GLSL.std.450"
OpMemoryModel Logical GLSL450
OpEntryPoint Vertex %5663 "main" %gl_VertexIndex %4930 %5592
OpDecorate %gl_VertexIndex BuiltIn VertexIndex
OpMemberDecorate %_struct_1080 0 Offset 0
OpMemberDecorate %_struct_1080 1 Offset 16
OpDecorate %_struct_1080 Block
OpMemberDecorate %_struct_1589 0 BuiltIn Position
OpMemberDecorate %_struct_1589 1 BuiltIn PointSize
OpMemberDecorate %_struct_1589 2 BuiltIn ClipDistance
OpMemberDecorate %_struct_1589 3 BuiltIn CullDistance
OpDecorate %_struct_1589 Block
OpDecorate %5592 Location 0
%void = OpTypeVoid
%1282 = OpTypeFunction %void
%float = OpTypeFloat 32
%v2float = OpTypeVector %float 2
%_ptr_Function_v2float = OpTypePointer Function %v2float
%uint = OpTypeInt 32 0
%uint_4 = OpConstant %uint 4
%_arr_v2float_uint_4 = OpTypeArray %v2float %uint_4
%float_0 = OpConstant %float 0
%1823 = OpConstantComposite %v2float %float_0 %float_0
%float_1 = OpConstant %float 1
%312 = OpConstantComposite %v2float %float_1 %float_0
%889 = OpConstantComposite %v2float %float_0 %float_1
%768 = OpConstantComposite %v2float %float_1 %float_1
%809 = OpConstantComposite %_arr_v2float_uint_4 %1823 %312 %889 %768
%int = OpTypeInt 32 1
%_ptr_Input_int = OpTypePointer Input %int
%gl_VertexIndex = OpVariable %_ptr_Input_int Input
%_ptr_Function__arr_v2float_uint_4 = OpTypePointer Function %_arr_v2float_uint_4
%float_2 = OpConstant %float 2
%v4float = OpTypeVector %float 4
%_struct_1080 = OpTypeStruct %v4float %v4float
%_ptr_PushConstant__struct_1080 = OpTypePointer PushConstant %_struct_1080
%3463 = OpVariable %_ptr_PushConstant__struct_1080 PushConstant
%int_1 = OpConstant %int 1
%_ptr_PushConstant_v4float = OpTypePointer PushConstant %v4float
%2243 = OpConstantComposite %v4float %float_2 %float_2 %float_2 %float_2
%uint_1 = OpConstant %uint 1
%_arr_float_uint_1 = OpTypeArray %float %uint_1
%_struct_1589 = OpTypeStruct %v4float %float %_arr_float_uint_1 %_arr_float_uint_1
%_ptr_Output__struct_1589 = OpTypePointer Output %_struct_1589
%4930 = OpVariable %_ptr_Output__struct_1589 Output
%int_0 = OpConstant %int 0
%_ptr_Output_v4float = OpTypePointer Output %v4float
%_ptr_Output_v2float = OpTypePointer Output %v2float
%5592 = OpVariable %_ptr_Output_v2float Output
%5663 = OpFunction %void None %1282
%24953 = OpLabel
%5238 = OpVariable %_ptr_Function__arr_v2float_uint_4 Function
%24173 = OpLoad %int %gl_VertexIndex
OpStore %5238 %809
%16679 = OpAccessChain %_ptr_Function_v2float %5238 %24173
%7372 = OpLoad %v2float %16679
%21446 = OpAccessChain %_ptr_PushConstant_v4float %3463 %int_1
%10986 = OpLoad %v4float %21446
%7772 = OpFMul %v4float %10986 %2243
%17065 = OpVectorShuffle %v2float %7772 %7772 0 1
%22600 = OpFSub %v2float %17065 %768
%7156 = OpVectorShuffle %v2float %7772 %7772 2 3
%20491 = OpFMul %v2float %7372 %7156
%18197 = OpFAdd %v2float %22600 %20491
%10599 = OpCompositeExtract %float %18197 0
%13956 = OpCompositeExtract %float %18197 1
%18260 = OpCompositeConstruct %v4float %10599 %13956 %float_0 %float_1
%8483 = OpAccessChain %_ptr_Output_v4float %4930 %int_0
OpStore %8483 %18260
%20171 = OpAccessChain %_ptr_PushConstant_v4float %3463 %int_0
%6318 = OpLoad %v4float %20171
%7688 = OpVectorShuffle %v2float %6318 %6318 2 3
%18797 = OpFMul %v2float %7372 %7688
%18691 = OpVectorShuffle %v2float %6318 %6318 0 1
%25136 = OpFAdd %v2float %18797 %18691
OpStore %5592 %25136
OpReturn
OpFunctionEnd
#endif
const uint32_t blit_vs[] = {
0x07230203, 0x00010000, 0x0008000A, 0x00006231, 0x00000000, 0x00020011,
0x00000001, 0x0006000B, 0x00000001, 0x4C534C47, 0x6474732E, 0x3035342E,
0x00000000, 0x0003000E, 0x00000000, 0x00000001, 0x0008000F, 0x00000000,
0x0000161F, 0x6E69616D, 0x00000000, 0x00001029, 0x00001342, 0x000015D8,
0x00040047, 0x00001029, 0x0000000B, 0x0000002A, 0x00050048, 0x00000438,
0x00000000, 0x00000023, 0x00000000, 0x00050048, 0x00000438, 0x00000001,
0x00000023, 0x00000010, 0x00030047, 0x00000438, 0x00000002, 0x00050048,
0x00000635, 0x00000000, 0x0000000B, 0x00000000, 0x00050048, 0x00000635,
0x00000001, 0x0000000B, 0x00000001, 0x00050048, 0x00000635, 0x00000002,
0x0000000B, 0x00000003, 0x00050048, 0x00000635, 0x00000003, 0x0000000B,
0x00000004, 0x00030047, 0x00000635, 0x00000002, 0x00040047, 0x000015D8,
0x0000001E, 0x00000000, 0x00020013, 0x00000008, 0x00030021, 0x00000502,
0x00000008, 0x00030016, 0x0000000D, 0x00000020, 0x00040017, 0x00000013,
0x0000000D, 0x00000002, 0x00040020, 0x00000290, 0x00000007, 0x00000013,
0x00040015, 0x0000000B, 0x00000020, 0x00000000, 0x0004002B, 0x0000000B,
0x00000A16, 0x00000004, 0x0004001C, 0x00000276, 0x00000013, 0x00000A16,
0x0004002B, 0x0000000D, 0x00000A0C, 0x00000000, 0x0005002C, 0x00000013,
0x0000071F, 0x00000A0C, 0x00000A0C, 0x0004002B, 0x0000000D, 0x0000008A,
0x3F800000, 0x0005002C, 0x00000013, 0x00000138, 0x0000008A, 0x00000A0C,
0x0005002C, 0x00000013, 0x00000379, 0x00000A0C, 0x0000008A, 0x0005002C,
0x00000013, 0x00000300, 0x0000008A, 0x0000008A, 0x0007002C, 0x00000276,
0x00000329, 0x0000071F, 0x00000138, 0x00000379, 0x00000300, 0x00040015,
0x0000000C, 0x00000020, 0x00000001, 0x00040020, 0x00000289, 0x00000001,
0x0000000C, 0x0004003B, 0x00000289, 0x00001029, 0x00000001, 0x00040020,
0x000004F3, 0x00000007, 0x00000276, 0x0004002B, 0x0000000D, 0x00000018,
0x40000000, 0x00040017, 0x0000001D, 0x0000000D, 0x00000004, 0x0004001E,
0x00000438, 0x0000001D, 0x0000001D, 0x00040020, 0x000006B5, 0x00000009,
0x00000438, 0x0004003B, 0x000006B5, 0x00000D87, 0x00000009, 0x0004002B,
0x0000000C, 0x00000A0E, 0x00000001, 0x00040020, 0x0000029A, 0x00000009,
0x0000001D, 0x0007002C, 0x0000001D, 0x000008C3, 0x00000018, 0x00000018,
0x00000018, 0x00000018, 0x0004002B, 0x0000000B, 0x00000A0D, 0x00000001,
0x0004001C, 0x000002E3, 0x0000000D, 0x00000A0D, 0x0006001E, 0x00000635,
0x0000001D, 0x0000000D, 0x000002E3, 0x000002E3, 0x00040020, 0x000008B2,
0x00000003, 0x00000635, 0x0004003B, 0x000008B2, 0x00001342, 0x00000003,
0x0004002B, 0x0000000C, 0x00000A0B, 0x00000000, 0x00040020, 0x0000029B,
0x00000003, 0x0000001D, 0x00040020, 0x00000291, 0x00000003, 0x00000013,
0x0004003B, 0x00000291, 0x000015D8, 0x00000003, 0x00050036, 0x00000008,
0x0000161F, 0x00000000, 0x00000502, 0x000200F8, 0x00006179, 0x0004003B,
0x000004F3, 0x00001476, 0x00000007, 0x0004003D, 0x0000000C, 0x00005E6D,
0x00001029, 0x0003003E, 0x00001476, 0x00000329, 0x00050041, 0x00000290,
0x00004127, 0x00001476, 0x00005E6D, 0x0004003D, 0x00000013, 0x00001CCC,
0x00004127, 0x00050041, 0x0000029A, 0x000053C6, 0x00000D87, 0x00000A0E,
0x0004003D, 0x0000001D, 0x00002AEA, 0x000053C6, 0x00050085, 0x0000001D,
0x00001E5C, 0x00002AEA, 0x000008C3, 0x0007004F, 0x00000013, 0x000042A9,
0x00001E5C, 0x00001E5C, 0x00000000, 0x00000001, 0x00050083, 0x00000013,
0x00005848, 0x000042A9, 0x00000300, 0x0007004F, 0x00000013, 0x00001BF4,
0x00001E5C, 0x00001E5C, 0x00000002, 0x00000003, 0x00050085, 0x00000013,
0x0000500B, 0x00001CCC, 0x00001BF4, 0x00050081, 0x00000013, 0x00004715,
0x00005848, 0x0000500B, 0x00050051, 0x0000000D, 0x00002967, 0x00004715,
0x00000000, 0x00050051, 0x0000000D, 0x00003684, 0x00004715, 0x00000001,
0x00070050, 0x0000001D, 0x00004754, 0x00002967, 0x00003684, 0x00000A0C,
0x0000008A, 0x00050041, 0x0000029B, 0x00002123, 0x00001342, 0x00000A0B,
0x0003003E, 0x00002123, 0x00004754, 0x00050041, 0x0000029A, 0x00004ECB,
0x00000D87, 0x00000A0B, 0x0004003D, 0x0000001D, 0x000018AE, 0x00004ECB,
0x0007004F, 0x00000013, 0x00001E08, 0x000018AE, 0x000018AE, 0x00000002,
0x00000003, 0x00050085, 0x00000013, 0x0000496D, 0x00001CCC, 0x00001E08,
0x0007004F, 0x00000013, 0x00004903, 0x000018AE, 0x000018AE, 0x00000000,
0x00000001, 0x00050081, 0x00000013, 0x00006230, 0x0000496D, 0x00004903,
0x0003003E, 0x000015D8, 0x00006230, 0x000100FD, 0x00010038,
};

View File

@ -0,0 +1,119 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2022 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/ui/vulkan/single_layout_descriptor_set_pool.h"
#include "xenia/base/assert.h"
#include "xenia/base/logging.h"
namespace xe {
namespace ui {
namespace vulkan {
SingleLayoutDescriptorSetPool::SingleLayoutDescriptorSetPool(
const VulkanProvider& provider, uint32_t pool_set_count,
uint32_t set_layout_descriptor_counts_count,
const VkDescriptorPoolSize* set_layout_descriptor_counts,
VkDescriptorSetLayout set_layout)
: provider_(provider),
pool_set_count_(pool_set_count),
set_layout_(set_layout) {
assert_not_zero(pool_set_count);
pool_descriptor_counts_.resize(set_layout_descriptor_counts_count);
for (uint32_t i = 0; i < set_layout_descriptor_counts_count; ++i) {
VkDescriptorPoolSize& pool_descriptor_type_count =
pool_descriptor_counts_[i];
const VkDescriptorPoolSize& set_layout_descriptor_type_count =
set_layout_descriptor_counts[i];
pool_descriptor_type_count.type = set_layout_descriptor_type_count.type;
pool_descriptor_type_count.descriptorCount =
set_layout_descriptor_type_count.descriptorCount * pool_set_count;
}
}
SingleLayoutDescriptorSetPool::~SingleLayoutDescriptorSetPool() {
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
VkDevice device = provider_.device();
if (current_pool_ != VK_NULL_HANDLE) {
dfn.vkDestroyDescriptorPool(device, current_pool_, nullptr);
}
for (VkDescriptorPool pool : full_pools_) {
dfn.vkDestroyDescriptorPool(device, pool, nullptr);
}
}
size_t SingleLayoutDescriptorSetPool::Allocate() {
if (!descriptor_sets_free_.empty()) {
size_t free_index = descriptor_sets_free_.back();
descriptor_sets_free_.pop_back();
return free_index;
}
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
VkDevice device = provider_.device();
// Two iterations so if vkAllocateDescriptorSets fails even with a non-zero
// current_pool_sets_remaining_, another attempt will be made in a new pool.
for (uint32_t i = 0; i < 2; ++i) {
if (current_pool_ != VK_NULL_HANDLE && !current_pool_sets_remaining_) {
full_pools_.push_back(current_pool_);
current_pool_ = VK_NULL_HANDLE;
}
if (current_pool_ == VK_NULL_HANDLE) {
VkDescriptorPoolCreateInfo pool_create_info;
pool_create_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
pool_create_info.pNext = nullptr;
pool_create_info.flags = 0;
pool_create_info.maxSets = pool_set_count_;
pool_create_info.poolSizeCount = uint32_t(pool_descriptor_counts_.size());
pool_create_info.pPoolSizes = pool_descriptor_counts_.data();
if (dfn.vkCreateDescriptorPool(device, &pool_create_info, nullptr,
&current_pool_) != VK_SUCCESS) {
XELOGE(
"SingleLayoutDescriptorSetPool: Failed to create a descriptor "
"pool");
return SIZE_MAX;
}
current_pool_sets_remaining_ = pool_set_count_;
}
VkDescriptorSetAllocateInfo descriptor_set_allocate_info;
descriptor_set_allocate_info.sType =
VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
descriptor_set_allocate_info.pNext = nullptr;
descriptor_set_allocate_info.descriptorPool = current_pool_;
descriptor_set_allocate_info.descriptorSetCount = 1;
descriptor_set_allocate_info.pSetLayouts = &set_layout_;
VkDescriptorSet descriptor_set;
if (dfn.vkAllocateDescriptorSets(device, &descriptor_set_allocate_info,
&descriptor_set) != VK_SUCCESS) {
XELOGE(
"SingleLayoutDescriptorSetPool: Failed to allocate a descriptor set");
if (current_pool_sets_remaining_ >= pool_set_count_) {
// Failed to allocate in a new pool - something completely wrong, don't
// store empty pools as full.
dfn.vkDestroyDescriptorPool(device, current_pool_, nullptr);
current_pool_ = VK_NULL_HANDLE;
return SIZE_MAX;
}
full_pools_.push_back(current_pool_);
current_pool_ = VK_NULL_HANDLE;
}
--current_pool_sets_remaining_;
descriptor_sets_.push_back(descriptor_set);
return descriptor_sets_.size() - 1;
}
// Both attempts have failed.
return SIZE_MAX;
}
} // namespace vulkan
} // namespace ui
} // namespace xe

View File

@ -0,0 +1,63 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2022 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_UI_VULKAN_SINGLE_DESCRIPTOR_SET_POOL_H_
#define XENIA_UI_VULKAN_SINGLE_DESCRIPTOR_SET_POOL_H_
#include <cstddef>
#include <cstdint>
#include <vector>
#include "xenia/base/assert.h"
#include "xenia/ui/vulkan/vulkan_provider.h"
namespace xe {
namespace ui {
namespace vulkan {
class SingleLayoutDescriptorSetPool {
public:
// set_layout_descriptor_counts must contain the numbers of descriptors of
// each type in a single set with the layout (the multiplication by the pool
// set count will be done internally). The descriptor set layout must not be
// destroyed until this object is also destroyed.
SingleLayoutDescriptorSetPool(
const VulkanProvider& provider, uint32_t pool_set_count,
uint32_t set_layout_descriptor_counts_count,
const VkDescriptorPoolSize* set_layout_descriptor_counts,
VkDescriptorSetLayout set_layout);
~SingleLayoutDescriptorSetPool();
// Returns SIZE_MAX in case of a failure.
size_t Allocate();
void Free(size_t index) {
assert_true(index < descriptor_sets_.size());
descriptor_sets_free_.push_back(index);
}
VkDescriptorSet Get(size_t index) const { return descriptor_sets_[index]; }
private:
const VulkanProvider& provider_;
uint32_t pool_set_count_;
std::vector<VkDescriptorPoolSize> pool_descriptor_counts_;
VkDescriptorSetLayout set_layout_;
std::vector<VkDescriptorPool> full_pools_;
VkDescriptorPool current_pool_ = VK_NULL_HANDLE;
uint32_t current_pool_sets_remaining_ = 0;
std::vector<VkDescriptorSet> descriptor_sets_;
std::vector<size_t> descriptor_sets_free_;
};
} // namespace vulkan
} // namespace ui
} // namespace xe
#endif // XENIA_UI_VULKAN_SINGLE_DESCRIPTOR_SET_POOL_H_

View File

@ -0,0 +1,216 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2022 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/ui/vulkan/single_type_descriptor_set_allocator.h"
#include "xenia/base/logging.h"
#include "xenia/ui/vulkan/vulkan_util.h"
namespace xe {
namespace ui {
namespace vulkan {
void SingleTypeDescriptorSetAllocator::Reset() {
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
VkDevice device = provider_.device();
ui::vulkan::util::DestroyAndNullHandle(dfn.vkDestroyDescriptorPool, device,
page_usable_latest_.pool);
for (const std::pair<uint32_t, Page>& page_pair : pages_usable_) {
dfn.vkDestroyDescriptorPool(device, page_pair.second.pool, nullptr);
}
pages_usable_.clear();
for (VkDescriptorPool pool : pages_full_) {
dfn.vkDestroyDescriptorPool(device, pool, nullptr);
}
pages_full_.clear();
}
VkDescriptorSet SingleTypeDescriptorSetAllocator::Allocate(
VkDescriptorSetLayout descriptor_set_layout, uint32_t descriptor_count) {
assert_not_zero(descriptor_count);
if (descriptor_count == 0) {
return VK_NULL_HANDLE;
}
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider_.dfn();
VkDevice device = provider_.device();
VkDescriptorSetAllocateInfo descriptor_set_allocate_info;
descriptor_set_allocate_info.sType =
VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
descriptor_set_allocate_info.pNext = nullptr;
descriptor_set_allocate_info.descriptorSetCount = 1;
descriptor_set_allocate_info.pSetLayouts = &descriptor_set_layout;
VkDescriptorSet descriptor_set;
if (descriptor_count > descriptor_pool_size_.descriptorCount) {
// Can't allocate in the pool, need a dedicated allocation.
VkDescriptorPoolSize dedicated_descriptor_pool_size;
dedicated_descriptor_pool_size.type = descriptor_pool_size_.type;
dedicated_descriptor_pool_size.descriptorCount = descriptor_count;
VkDescriptorPoolCreateInfo dedicated_descriptor_pool_create_info;
dedicated_descriptor_pool_create_info.sType =
VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
dedicated_descriptor_pool_create_info.pNext = nullptr;
dedicated_descriptor_pool_create_info.flags = 0;
dedicated_descriptor_pool_create_info.maxSets = 1;
dedicated_descriptor_pool_create_info.poolSizeCount = 1;
dedicated_descriptor_pool_create_info.pPoolSizes =
&dedicated_descriptor_pool_size;
VkDescriptorPool dedicated_descriptor_pool;
if (dfn.vkCreateDescriptorPool(
device, &dedicated_descriptor_pool_create_info, nullptr,
&dedicated_descriptor_pool) != VK_SUCCESS) {
XELOGE(
"SingleTypeDescriptorSetAllocator: Failed to create a dedicated pool "
"for {} descriptors",
dedicated_descriptor_pool_size.descriptorCount);
return VK_NULL_HANDLE;
}
descriptor_set_allocate_info.descriptorPool = dedicated_descriptor_pool;
if (dfn.vkAllocateDescriptorSets(device, &descriptor_set_allocate_info,
&descriptor_set) != VK_SUCCESS) {
XELOGE(
"SingleTypeDescriptorSetAllocator: Failed to allocate {} descriptors "
"in a dedicated pool",
descriptor_count);
dfn.vkDestroyDescriptorPool(device, dedicated_descriptor_pool, nullptr);
return VK_NULL_HANDLE;
}
pages_full_.push_back(dedicated_descriptor_pool);
return descriptor_set;
}
// Try allocating from the latest page an allocation has happened from, to
// avoid detaching from the map and re-attaching for every allocation.
if (page_usable_latest_.pool != VK_NULL_HANDLE) {
assert_not_zero(page_usable_latest_.descriptors_remaining);
assert_not_zero(page_usable_latest_.descriptor_sets_remaining);
if (page_usable_latest_.descriptors_remaining >= descriptor_count) {
descriptor_set_allocate_info.descriptorPool = page_usable_latest_.pool;
if (dfn.vkAllocateDescriptorSets(device, &descriptor_set_allocate_info,
&descriptor_set) == VK_SUCCESS) {
page_usable_latest_.descriptors_remaining -= descriptor_count;
--page_usable_latest_.descriptor_sets_remaining;
if (!page_usable_latest_.descriptors_remaining ||
!page_usable_latest_.descriptor_sets_remaining) {
pages_full_.push_back(page_usable_latest_.pool);
page_usable_latest_.pool = VK_NULL_HANDLE;
}
return descriptor_set;
}
// Failed to allocate internally even though there should be enough space,
// don't try to allocate from this pool again at all.
pages_full_.push_back(page_usable_latest_.pool);
page_usable_latest_.pool = VK_NULL_HANDLE;
}
}
// If allocating from the latest pool wasn't possible, pick any that has free
// space. Prefer filling pages that have the most free space as they can more
// likely be used for more allocations later.
while (!pages_usable_.empty()) {
auto page_usable_last_it = std::prev(pages_usable_.cend());
if (page_usable_last_it->second.descriptors_remaining < descriptor_count) {
// All other pages_usable_ entries have fewer free descriptors too (the
// remaining count is the map key).
break;
}
// Remove the page from the map unconditionally - in case of a successful
// allocation, it will have a different number of free descriptors, thus a
// new map key (but it will also become page_usable_latest_ instead even),
// or will become full, and in case of a failure to allocate internally even
// though there still should be enough space, it should never be allocated
// from again.
Page map_page = page_usable_last_it->second;
pages_usable_.erase(page_usable_last_it);
descriptor_set_allocate_info.descriptorPool = map_page.pool;
if (dfn.vkAllocateDescriptorSets(device, &descriptor_set_allocate_info,
&descriptor_set) != VK_SUCCESS) {
pages_full_.push_back(map_page.pool);
continue;
}
map_page.descriptors_remaining -= descriptor_count;
--map_page.descriptor_sets_remaining;
if (!map_page.descriptors_remaining ||
!map_page.descriptor_sets_remaining) {
pages_full_.push_back(map_page.pool);
} else {
if (page_usable_latest_.pool != VK_NULL_HANDLE) {
// Make the page with more free descriptors the next to allocate from.
if (map_page.descriptors_remaining >
page_usable_latest_.descriptors_remaining) {
pages_usable_.emplace(page_usable_latest_.descriptors_remaining,
page_usable_latest_);
page_usable_latest_ = map_page;
} else {
pages_usable_.emplace(map_page.descriptors_remaining, map_page);
}
} else {
page_usable_latest_ = map_page;
}
}
return descriptor_set;
}
// Try allocating from a new page.
VkDescriptorPoolCreateInfo new_descriptor_pool_create_info;
new_descriptor_pool_create_info.sType =
VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
new_descriptor_pool_create_info.pNext = nullptr;
new_descriptor_pool_create_info.flags = 0;
new_descriptor_pool_create_info.maxSets = descriptor_sets_per_page_;
new_descriptor_pool_create_info.poolSizeCount = 1;
new_descriptor_pool_create_info.pPoolSizes = &descriptor_pool_size_;
VkDescriptorPool new_descriptor_pool;
if (dfn.vkCreateDescriptorPool(device, &new_descriptor_pool_create_info,
nullptr, &new_descriptor_pool) != VK_SUCCESS) {
XELOGE(
"SingleTypeDescriptorSetAllocator: Failed to create a pool for {} sets "
"with {} descriptors",
descriptor_sets_per_page_, descriptor_pool_size_.descriptorCount);
return VK_NULL_HANDLE;
}
descriptor_set_allocate_info.descriptorPool = new_descriptor_pool;
if (dfn.vkAllocateDescriptorSets(device, &descriptor_set_allocate_info,
&descriptor_set) != VK_SUCCESS) {
XELOGE(
"SingleTypeDescriptorSetAllocator: Failed to allocate {} descriptors",
descriptor_count);
dfn.vkDestroyDescriptorPool(device, new_descriptor_pool, nullptr);
return VK_NULL_HANDLE;
}
Page new_page;
new_page.pool = new_descriptor_pool;
new_page.descriptors_remaining =
descriptor_pool_size_.descriptorCount - descriptor_count;
new_page.descriptor_sets_remaining = descriptor_sets_per_page_ - 1;
if (!new_page.descriptors_remaining || !new_page.descriptor_sets_remaining) {
pages_full_.push_back(new_page.pool);
} else {
if (page_usable_latest_.pool != VK_NULL_HANDLE) {
// Make the page with more free descriptors the next to allocate from.
if (new_page.descriptors_remaining >
page_usable_latest_.descriptors_remaining) {
pages_usable_.emplace(page_usable_latest_.descriptors_remaining,
page_usable_latest_);
page_usable_latest_ = new_page;
} else {
pages_usable_.emplace(new_page.descriptors_remaining, new_page);
}
} else {
page_usable_latest_ = new_page;
}
}
return descriptor_set;
}
} // namespace vulkan
} // namespace ui
} // namespace xe

View File

@ -0,0 +1,84 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2022 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_UI_VULKAN_SINGLE_TYPE_DESCRIPTOR_SET_ALLOCATOR_H_
#define XENIA_UI_VULKAN_SINGLE_TYPE_DESCRIPTOR_SET_ALLOCATOR_H_
#include <algorithm>
#include <cstdint>
#include <map>
#include <vector>
#include "xenia/base/assert.h"
#include "xenia/ui/vulkan/vulkan_provider.h"
namespace xe {
namespace ui {
namespace vulkan {
// Allocates multiple descriptors of a single type in descriptor set layouts
// consisting of descriptors of only that type. There's no way to free these
// descriptors within the SingleTypeDescriptorSetAllocator, per-layout free
// lists should be used externally.
class SingleTypeDescriptorSetAllocator {
public:
explicit SingleTypeDescriptorSetAllocator(
const ui::vulkan::VulkanProvider& provider,
VkDescriptorType descriptor_type, uint32_t descriptors_per_page,
uint32_t descriptor_sets_per_page)
: provider_(provider),
descriptor_sets_per_page_(descriptor_sets_per_page) {
assert_not_zero(descriptor_sets_per_page_);
descriptor_pool_size_.type = descriptor_type;
// Not allocating sets with 0 descriptors using the allocator - pointless to
// have the descriptor count below the set count.
descriptor_pool_size_.descriptorCount =
std::max(descriptors_per_page, descriptor_sets_per_page);
}
SingleTypeDescriptorSetAllocator(
const SingleTypeDescriptorSetAllocator& allocator) = delete;
SingleTypeDescriptorSetAllocator& operator=(
const SingleTypeDescriptorSetAllocator& allocator) = delete;
~SingleTypeDescriptorSetAllocator() { Reset(); }
void Reset();
VkDescriptorSet Allocate(VkDescriptorSetLayout descriptor_set_layout,
uint32_t descriptor_count);
private:
struct Page {
VkDescriptorPool pool;
uint32_t descriptors_remaining;
uint32_t descriptor_sets_remaining;
};
const ui::vulkan::VulkanProvider& provider_;
VkDescriptorPoolSize descriptor_pool_size_;
uint32_t descriptor_sets_per_page_;
std::vector<VkDescriptorPool> pages_full_;
// Because allocations must be contiguous, overflow may happen even if a page
// still has free descriptors, so multiple pages may have free space.
// To avoid removing and re-adding the page to the map that keeps them sorted
// (the key is the number of free descriptors remaining, and it changes at
// every allocation from a page), instead of always looking for a free space
// in the map, maintaining one page outside the map, and allocation attempts
// will be made from that page first.
std::multimap<uint32_t, Page> pages_usable_;
// Doesn't exist if page_usable_latest_.pool == VK_NULL_HANDLE.
Page page_usable_latest_ = {};
};
} // namespace vulkan
} // namespace ui
} // namespace xe
#endif // XENIA_UI_VULKAN_SINGLE_TYPE_DESCRIPTOR_SET_ALLOCATOR_H_

View File

@ -0,0 +1,123 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2020 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#include "xenia/ui/vulkan/spirv_tools_context.h"
#include <cstdlib>
#include <filesystem>
#include <string>
#include "xenia/base/logging.h"
#include "xenia/base/platform.h"
#if XE_PLATFORM_LINUX
#include <dlfcn.h>
#elif XE_PLATFORM_WIN32
#include "xenia/base/platform_win.h"
#endif
namespace xe {
namespace ui {
namespace vulkan {
bool SpirvToolsContext::Initialize(unsigned int spirv_version) {
const char* vulkan_sdk_env = std::getenv("VULKAN_SDK");
if (!vulkan_sdk_env) {
XELOGE("SPIRV-Tools: Failed to get the VULKAN_SDK environment variable");
Shutdown();
return false;
}
std::filesystem::path vulkan_sdk_path(vulkan_sdk_env);
#if XE_PLATFORM_LINUX
library_ = dlopen((vulkan_sdk_path / "bin/libSPIRV-Tools-shared.so").c_str(),
RTLD_NOW | RTLD_LOCAL);
if (!library_) {
XELOGE(
"SPIRV-Tools: Failed to load $VULKAN_SDK/bin/libSPIRV-Tools-shared.so");
Shutdown();
return false;
}
#elif XE_PLATFORM_WIN32
library_ = LoadLibraryW(
(vulkan_sdk_path / "Bin/SPIRV-Tools-shared.dll").wstring().c_str());
if (!library_) {
XELOGE(
"SPIRV-Tools: Failed to load %VULKAN_SDK%/Bin/SPIRV-Tools-shared.dll");
Shutdown();
return false;
}
#else
#error No SPIRV-Tools library loading provided for the target platform.
#endif
if (!LoadLibraryFunction(fn_spvContextCreate_, "spvContextCreate") ||
!LoadLibraryFunction(fn_spvContextDestroy_, "spvContextDestroy") ||
!LoadLibraryFunction(fn_spvValidateBinary_, "spvValidateBinary") ||
!LoadLibraryFunction(fn_spvDiagnosticDestroy_, "spvDiagnosticDestroy")) {
XELOGE("SPIRV-Tools: Failed to get library function pointers");
Shutdown();
return false;
}
spv_target_env target_env;
if (spirv_version >= 0x10500) {
target_env = SPV_ENV_VULKAN_1_2;
} else if (spirv_version >= 0x10400) {
target_env = SPV_ENV_VULKAN_1_1_SPIRV_1_4;
} else if (spirv_version >= 0x10300) {
target_env = SPV_ENV_VULKAN_1_1;
} else {
target_env = SPV_ENV_VULKAN_1_0;
}
context_ = fn_spvContextCreate_(target_env);
if (!context_) {
XELOGE("SPIRV-Tools: Failed to create a Vulkan 1.0 context");
Shutdown();
return false;
}
return true;
}
void SpirvToolsContext::Shutdown() {
if (context_) {
fn_spvContextDestroy_(context_);
context_ = nullptr;
}
if (library_) {
#if XE_PLATFORM_LINUX
dlclose(library_);
#elif XE_PLATFORM_WIN32
FreeLibrary(library_);
#endif
library_ = nullptr;
}
}
spv_result_t SpirvToolsContext::Validate(const uint32_t* words,
size_t num_words,
std::string* error) const {
if (error) {
error->clear();
}
if (!context_) {
return SPV_UNSUPPORTED;
}
spv_diagnostic diagnostic = nullptr;
spv_result_t result =
fn_spvValidateBinary_(context_, words, num_words, &diagnostic);
if (diagnostic) {
if (error && diagnostic && diagnostic->error) {
*error = diagnostic->error;
}
fn_spvDiagnosticDestroy_(diagnostic);
}
return result;
}
} // namespace vulkan
} // namespace ui
} // namespace xe

View File

@ -0,0 +1,72 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2020 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
#ifndef XENIA_UI_VULKAN_SPIRV_TOOLS_CONTEXT_H_
#define XENIA_UI_VULKAN_SPIRV_TOOLS_CONTEXT_H_
#include <cstdint>
#include <string>
#include "third_party/SPIRV-Tools/include/spirv-tools/libspirv.h"
#include "xenia/base/platform.h"
#if XE_PLATFORM_LINUX
#include <dlfcn.h>
#elif XE_PLATFORM_WIN32
#include "xenia/base/platform_win.h"
#endif
namespace xe {
namespace ui {
namespace vulkan {
class SpirvToolsContext {
public:
SpirvToolsContext() {}
SpirvToolsContext(const SpirvToolsContext& context) = delete;
SpirvToolsContext& operator=(const SpirvToolsContext& context) = delete;
~SpirvToolsContext() { Shutdown(); }
bool Initialize(unsigned int spirv_version);
void Shutdown();
spv_result_t Validate(const uint32_t* words, size_t num_words,
std::string* error) const;
private:
#if XE_PLATFORM_LINUX
void* library_ = nullptr;
#elif XE_PLATFORM_WIN32
HMODULE library_ = nullptr;
#endif
template <typename FunctionPointer>
bool LoadLibraryFunction(FunctionPointer& function, const char* name) {
#if XE_PLATFORM_LINUX
function = reinterpret_cast<FunctionPointer>(dlsym(library_, name));
#elif XE_PLATFORM_WIN32
function =
reinterpret_cast<FunctionPointer>(GetProcAddress(library_, name));
#else
#error No SPIRV-Tools LoadLibraryFunction provided for the target platform.
#endif
return function != nullptr;
}
decltype(&spvContextCreate) fn_spvContextCreate_ = nullptr;
decltype(&spvContextDestroy) fn_spvContextDestroy_ = nullptr;
decltype(&spvValidateBinary) fn_spvValidateBinary_ = nullptr;
decltype(&spvDiagnosticDestroy) fn_spvDiagnosticDestroy_ = nullptr;
spv_context context_ = nullptr;
};
} // namespace vulkan
} // namespace ui
} // namespace xe
#endif // XENIA_UI_VULKAN_SPIRV_TOOLS_CONTEXT_H_

View File

@ -0,0 +1,108 @@
/**
******************************************************************************
* Xenia : Xbox 360 Emulator Research Project *
******************************************************************************
* Copyright 2022 Ben Vanik. All rights reserved. *
* Released under the BSD license - see LICENSE in the root for more details. *
******************************************************************************
*/
// Implementing VMA in this translation unit.
#define VMA_IMPLEMENTATION
#include "xenia/ui/vulkan/vulkan_mem_alloc.h"
#include <cstring>
#include "xenia/base/logging.h"
#include "xenia/ui/vulkan/vulkan_provider.h"
namespace xe {
namespace ui {
namespace vulkan {
VmaAllocator CreateVmaAllocator(const VulkanProvider& provider,
bool externally_synchronized) {
const VulkanProvider::LibraryFunctions& lfn = provider.lfn();
const VulkanProvider::InstanceFunctions& ifn = provider.ifn();
const VulkanProvider::DeviceFunctions& dfn = provider.dfn();
const VulkanProvider::InstanceExtensions& instance_extensions =
provider.instance_extensions();
const VulkanProvider::DeviceExtensions& device_extensions =
provider.device_extensions();
VmaVulkanFunctions vma_vulkan_functions = {};
VmaAllocatorCreateInfo allocator_create_info = {};
vma_vulkan_functions.vkGetInstanceProcAddr = lfn.vkGetInstanceProcAddr;
vma_vulkan_functions.vkGetDeviceProcAddr = ifn.vkGetDeviceProcAddr;
vma_vulkan_functions.vkGetPhysicalDeviceProperties =
ifn.vkGetPhysicalDeviceProperties;
vma_vulkan_functions.vkGetPhysicalDeviceMemoryProperties =
ifn.vkGetPhysicalDeviceMemoryProperties;
vma_vulkan_functions.vkAllocateMemory = dfn.vkAllocateMemory;
vma_vulkan_functions.vkFreeMemory = dfn.vkFreeMemory;
vma_vulkan_functions.vkMapMemory = dfn.vkMapMemory;
vma_vulkan_functions.vkUnmapMemory = dfn.vkUnmapMemory;
vma_vulkan_functions.vkFlushMappedMemoryRanges =
dfn.vkFlushMappedMemoryRanges;
vma_vulkan_functions.vkInvalidateMappedMemoryRanges =
dfn.vkInvalidateMappedMemoryRanges;
vma_vulkan_functions.vkBindBufferMemory = dfn.vkBindBufferMemory;
vma_vulkan_functions.vkBindImageMemory = dfn.vkBindImageMemory;
vma_vulkan_functions.vkGetBufferMemoryRequirements =
dfn.vkGetBufferMemoryRequirements;
vma_vulkan_functions.vkGetImageMemoryRequirements =
dfn.vkGetImageMemoryRequirements;
vma_vulkan_functions.vkCreateBuffer = dfn.vkCreateBuffer;
vma_vulkan_functions.vkDestroyBuffer = dfn.vkDestroyBuffer;
vma_vulkan_functions.vkCreateImage = dfn.vkCreateImage;
vma_vulkan_functions.vkDestroyImage = dfn.vkDestroyImage;
vma_vulkan_functions.vkCmdCopyBuffer = dfn.vkCmdCopyBuffer;
if (device_extensions.khr_get_memory_requirements2) {
vma_vulkan_functions.vkGetBufferMemoryRequirements2KHR =
dfn.vkGetBufferMemoryRequirements2KHR;
vma_vulkan_functions.vkGetImageMemoryRequirements2KHR =
dfn.vkGetImageMemoryRequirements2KHR;
if (device_extensions.khr_dedicated_allocation) {
allocator_create_info.flags |=
VMA_ALLOCATOR_CREATE_KHR_DEDICATED_ALLOCATION_BIT;
}
}
if (device_extensions.khr_bind_memory2) {
vma_vulkan_functions.vkBindBufferMemory2KHR = dfn.vkBindBufferMemory2KHR;
vma_vulkan_functions.vkBindImageMemory2KHR = dfn.vkBindImageMemory2KHR;
allocator_create_info.flags |= VMA_ALLOCATOR_CREATE_KHR_BIND_MEMORY2_BIT;
}
if (instance_extensions.khr_get_physical_device_properties2) {
vma_vulkan_functions.vkGetPhysicalDeviceMemoryProperties2KHR =
ifn.vkGetPhysicalDeviceMemoryProperties2KHR;
if (device_extensions.ext_memory_budget) {
allocator_create_info.flags |= VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT;
}
}
if (device_extensions.khr_maintenance4) {
vma_vulkan_functions.vkGetDeviceImageMemoryRequirements =
dfn.vkGetDeviceImageMemoryRequirementsKHR;
}
if (externally_synchronized) {
allocator_create_info.flags |=
VMA_ALLOCATOR_CREATE_EXTERNALLY_SYNCHRONIZED_BIT;
}
allocator_create_info.physicalDevice = provider.physical_device();
allocator_create_info.device = provider.device();
allocator_create_info.pVulkanFunctions = &vma_vulkan_functions;
allocator_create_info.instance = provider.instance();
allocator_create_info.vulkanApiVersion =
provider.device_properties().apiVersion;
VmaAllocator allocator;
if (vmaCreateAllocator(&allocator_create_info, &allocator) != VK_SUCCESS) {
XELOGE("Failed to create a Vulkan Memory Allocator instance");
return VK_NULL_HANDLE;
}
return allocator;
}
} // namespace vulkan
} // namespace ui
} // namespace xe

View File

@ -29,33 +29,8 @@ namespace xe {
namespace ui {
namespace vulkan {
inline void FillVMAVulkanFunctions(VmaVulkanFunctions* vma_funcs,
const VulkanProvider& provider) {
const VulkanProvider::LibraryFunctions& lfn = provider.lfn();
const VulkanProvider::InstanceFunctions& ifn = provider.ifn();
const VulkanProvider::DeviceFunctions& dfn = provider.dfn();
vma_funcs->vkGetInstanceProcAddr = lfn.vkGetInstanceProcAddr;
vma_funcs->vkGetDeviceProcAddr = ifn.vkGetDeviceProcAddr;
vma_funcs->vkGetPhysicalDeviceProperties = ifn.vkGetPhysicalDeviceProperties;
vma_funcs->vkGetPhysicalDeviceMemoryProperties =
ifn.vkGetPhysicalDeviceMemoryProperties;
vma_funcs->vkAllocateMemory = dfn.vkAllocateMemory;
vma_funcs->vkFreeMemory = dfn.vkFreeMemory;
vma_funcs->vkMapMemory = dfn.vkMapMemory;
vma_funcs->vkUnmapMemory = dfn.vkUnmapMemory;
vma_funcs->vkFlushMappedMemoryRanges = dfn.vkFlushMappedMemoryRanges;
vma_funcs->vkInvalidateMappedMemoryRanges =
dfn.vkInvalidateMappedMemoryRanges;
vma_funcs->vkBindBufferMemory = dfn.vkBindBufferMemory;
vma_funcs->vkBindImageMemory = dfn.vkBindImageMemory;
vma_funcs->vkGetBufferMemoryRequirements = dfn.vkGetBufferMemoryRequirements;
vma_funcs->vkGetImageMemoryRequirements = dfn.vkGetImageMemoryRequirements;
vma_funcs->vkCreateBuffer = dfn.vkCreateBuffer;
vma_funcs->vkDestroyBuffer = dfn.vkDestroyBuffer;
vma_funcs->vkCreateImage = dfn.vkCreateImage;
vma_funcs->vkDestroyImage = dfn.vkDestroyImage;
vma_funcs->vkCmdCopyBuffer = dfn.vkCmdCopyBuffer;
}
VmaAllocator CreateVmaAllocator(const VulkanProvider& provider,
bool externally_synchronized);
} // namespace vulkan
} // namespace ui

View File

@ -29,13 +29,8 @@
#include "xenia/base/platform_win.h"
#endif
// Implement AMD's VMA here.
#define VMA_IMPLEMENTATION
#include "xenia/ui/vulkan/vulkan_mem_alloc.h"
// TODO(Triang3l): Disable Vulkan validation before releasing a stable version.
DEFINE_bool(
vulkan_validation, true,
vulkan_validation, false,
"Enable Vulkan validation (VK_LAYER_KHRONOS_validation). Messages will be "
"written to the OS debug log without vulkan_debug_messenger or to the "
"Xenia log with it.",
@ -548,22 +543,10 @@ bool VulkanProvider::Initialize() {
++i) {
VkPhysicalDevice physical_device_current = physical_devices[i];
// Get physical device features and check if the needed ones are supported.
// Need this before obtaining the queues as sparse binding is an optional
// feature.
// Get physical device features. Need this before obtaining the queues as
// sparse binding is an optional feature.
ifn_.vkGetPhysicalDeviceFeatures(physical_device_current,
&device_features_);
// Passing indices directly from guest memory, where they are big-endian; a
// workaround using fetch from shared memory for 32-bit indices that need
// swapping isn't implemented yet. Not supported only Qualcomm Adreno 4xx.
if (!device_features_.fullDrawIndexUint32) {
continue;
}
// TODO(Triang3l): Make geometry shaders optional by providing compute
// shader fallback (though that would require vertex shader stores).
if (!device_features_.geometryShader) {
continue;
}
// Get the needed queues:
// - Graphics and compute.
@ -704,11 +687,17 @@ bool VulkanProvider::Initialize() {
}
std::memset(&device_extensions_, 0, sizeof(device_extensions_));
if (device_properties_.apiVersion >= VK_MAKE_API_VERSION(0, 1, 1, 0)) {
device_extensions_.khr_bind_memory2 = true;
device_extensions_.khr_dedicated_allocation = true;
device_extensions_.khr_get_memory_requirements2 = true;
device_extensions_.khr_sampler_ycbcr_conversion = true;
if (device_properties_.apiVersion >= VK_MAKE_API_VERSION(0, 1, 2, 0)) {
device_extensions_.khr_image_format_list = true;
device_extensions_.khr_shader_float_controls = true;
device_extensions_.khr_spirv_1_4 = true;
if (device_properties_.apiVersion >= VK_MAKE_API_VERSION(0, 1, 3, 0)) {
device_extensions_.khr_maintenance4 = true;
}
}
}
device_extensions_enabled.clear();
@ -717,15 +706,28 @@ bool VulkanProvider::Initialize() {
// core to device_extensions_enabled. Adding literals to
// device_extensions_enabled for the most C string lifetime safety.
static const std::pair<const char*, size_t> kUsedDeviceExtensions[] = {
{"VK_AMD_shader_info", offsetof(DeviceExtensions, amd_shader_info)},
{"VK_EXT_fragment_shader_interlock",
offsetof(DeviceExtensions, ext_fragment_shader_interlock)},
{"VK_EXT_memory_budget", offsetof(DeviceExtensions, ext_memory_budget)},
{"VK_EXT_shader_stencil_export",
offsetof(DeviceExtensions, ext_shader_stencil_export)},
{"VK_KHR_bind_memory2", offsetof(DeviceExtensions, khr_bind_memory2)},
{"VK_KHR_dedicated_allocation",
offsetof(DeviceExtensions, khr_dedicated_allocation)},
{"VK_KHR_get_memory_requirements2",
offsetof(DeviceExtensions, khr_get_memory_requirements2)},
{"VK_KHR_image_format_list",
offsetof(DeviceExtensions, khr_image_format_list)},
{"VK_KHR_maintenance4", offsetof(DeviceExtensions, khr_maintenance4)},
{"VK_KHR_portability_subset",
offsetof(DeviceExtensions, khr_portability_subset)},
// While vkGetPhysicalDeviceFormatProperties should be used to check the
// format support (device support for Y'CbCr formats is not required by
// this extension or by Vulkan 1.1), still adding
// VK_KHR_sampler_ycbcr_conversion to this list to enable this extension
// on the device on Vulkan 1.0.
{"VK_KHR_sampler_ycbcr_conversion",
offsetof(DeviceExtensions, khr_sampler_ycbcr_conversion)},
{"VK_KHR_shader_float_controls",
offsetof(DeviceExtensions, khr_shader_float_controls)},
{"VK_KHR_spirv_1_4", offsetof(DeviceExtensions, khr_spirv_1_4)},
@ -917,10 +919,47 @@ bool VulkanProvider::Initialize() {
}
}
// Extensions - disable the specific extension if failed to get its functions.
if (device_extensions_.amd_shader_info) {
if (device_extensions_.khr_bind_memory2) {
bool functions_loaded = true;
#include "xenia/ui/vulkan/functions/device_amd_shader_info.inc"
device_extensions_.amd_shader_info = functions_loaded;
if (device_properties_.apiVersion >= VK_MAKE_API_VERSION(0, 1, 1, 0)) {
#define XE_UI_VULKAN_FUNCTION_PROMOTED XE_UI_VULKAN_FUNCTION_PROMOTE
#include "xenia/ui/vulkan/functions/device_khr_bind_memory2.inc"
#undef XE_UI_VULKAN_FUNCTION_PROMOTED
} else {
#define XE_UI_VULKAN_FUNCTION_PROMOTED XE_UI_VULKAN_FUNCTION_DONT_PROMOTE
#include "xenia/ui/vulkan/functions/device_khr_bind_memory2.inc"
#undef XE_UI_VULKAN_FUNCTION_PROMOTED
}
device_extensions_.khr_bind_memory2 = functions_loaded;
}
if (device_extensions_.khr_get_memory_requirements2) {
bool functions_loaded = true;
if (device_properties_.apiVersion >= VK_MAKE_API_VERSION(0, 1, 1, 0)) {
#define XE_UI_VULKAN_FUNCTION_PROMOTED XE_UI_VULKAN_FUNCTION_PROMOTE
#include "xenia/ui/vulkan/functions/device_khr_get_memory_requirements2.inc"
#undef XE_UI_VULKAN_FUNCTION_PROMOTED
} else {
#define XE_UI_VULKAN_FUNCTION_PROMOTED XE_UI_VULKAN_FUNCTION_DONT_PROMOTE
#include "xenia/ui/vulkan/functions/device_khr_get_memory_requirements2.inc"
#undef XE_UI_VULKAN_FUNCTION_PROMOTED
}
device_extensions_.khr_get_memory_requirements2 = functions_loaded;
// VK_KHR_dedicated_allocation can still work without the dedicated
// allocation preference getter even though it requires
// VK_KHR_get_memory_requirements2 to be supported and enabled.
}
if (device_extensions_.khr_maintenance4) {
bool functions_loaded = true;
if (device_properties_.apiVersion >= VK_MAKE_API_VERSION(0, 1, 3, 0)) {
#define XE_UI_VULKAN_FUNCTION_PROMOTED XE_UI_VULKAN_FUNCTION_PROMOTE
#include "xenia/ui/vulkan/functions/device_khr_maintenance4.inc"
#undef XE_UI_VULKAN_FUNCTION_PROMOTED
} else {
#define XE_UI_VULKAN_FUNCTION_PROMOTED XE_UI_VULKAN_FUNCTION_DONT_PROMOTE
#include "xenia/ui/vulkan/functions/device_khr_maintenance4.inc"
#undef XE_UI_VULKAN_FUNCTION_PROMOTED
}
device_extensions_.khr_maintenance4 = functions_loaded;
}
if (device_extensions_.khr_swapchain) {
bool functions_loaded = true;
@ -954,14 +993,22 @@ bool VulkanProvider::Initialize() {
VK_VERSION_MINOR(device_properties_.apiVersion),
VK_VERSION_PATCH(device_properties_.apiVersion));
XELOGVK("Vulkan device extensions:");
XELOGVK("* VK_AMD_shader_info: {}",
device_extensions_.amd_shader_info ? "yes" : "no");
XELOGVK("* VK_EXT_fragment_shader_interlock: {}",
device_extensions_.ext_fragment_shader_interlock ? "yes" : "no");
XELOGVK("* VK_EXT_memory_budget: {}",
device_extensions_.ext_memory_budget ? "yes" : "no");
XELOGVK("* VK_EXT_shader_stencil_export: {}",
device_extensions_.ext_shader_stencil_export ? "yes" : "no");
XELOGVK("* VK_KHR_bind_memory2: {}",
device_extensions_.khr_bind_memory2 ? "yes" : "no");
XELOGVK("* VK_KHR_dedicated_allocation: {}",
device_extensions_.khr_dedicated_allocation ? "yes" : "no");
XELOGVK("* VK_KHR_get_memory_requirements2: {}",
device_extensions_.khr_get_memory_requirements2 ? "yes" : "no");
XELOGVK("* VK_KHR_image_format_list: {}",
device_extensions_.khr_image_format_list ? "yes" : "no");
XELOGVK("* VK_KHR_maintenance4: {}",
device_extensions_.khr_maintenance4 ? "yes" : "no");
XELOGVK("* VK_KHR_portability_subset: {}",
device_extensions_.khr_portability_subset ? "yes" : "no");
if (device_extensions_.khr_portability_subset) {
@ -990,6 +1037,8 @@ bool VulkanProvider::Initialize() {
XELOGVK(" * Triangle fans: {}",
device_portability_subset_features_.triangleFans ? "yes" : "no");
}
XELOGVK("* VK_KHR_sampler_ycbcr_conversion: {}",
device_extensions_.khr_sampler_ycbcr_conversion ? "yes" : "no");
XELOGVK("* VK_KHR_shader_float_controls: {}",
device_extensions_.khr_shader_float_controls ? "yes" : "no");
if (device_extensions_.khr_shader_float_controls) {

View File

@ -131,14 +131,23 @@ class VulkanProvider : public GraphicsProvider {
return device_features_;
}
struct DeviceExtensions {
bool amd_shader_info;
bool ext_fragment_shader_interlock;
bool ext_memory_budget;
bool ext_shader_stencil_export;
// Core since 1.1.0.
bool khr_bind_memory2;
// Core since 1.1.0.
bool khr_dedicated_allocation;
// Core since 1.1.0.
bool khr_get_memory_requirements2;
// Core since 1.2.0.
bool khr_image_format_list;
// Core since 1.3.0.
bool khr_maintenance4;
// Requires the VK_KHR_get_physical_device_properties2 instance extension.
bool khr_portability_subset;
// Core since 1.1.0.
bool khr_sampler_ycbcr_conversion;
// Core since 1.2.0.
bool khr_shader_float_controls;
// Core since 1.2.0.
@ -215,9 +224,14 @@ class VulkanProvider : public GraphicsProvider {
VkDevice device() const { return device_; }
struct DeviceFunctions {
#define XE_UI_VULKAN_FUNCTION(name) PFN_##name name;
#define XE_UI_VULKAN_FUNCTION_PROMOTED(extension_name, core_name) \
PFN_##extension_name extension_name;
#include "xenia/ui/vulkan/functions/device_1_0.inc"
#include "xenia/ui/vulkan/functions/device_amd_shader_info.inc"
#include "xenia/ui/vulkan/functions/device_khr_bind_memory2.inc"
#include "xenia/ui/vulkan/functions/device_khr_get_memory_requirements2.inc"
#include "xenia/ui/vulkan/functions/device_khr_maintenance4.inc"
#include "xenia/ui/vulkan/functions/device_khr_swapchain.inc"
#undef XE_UI_VULKAN_FUNCTION_PROMOTED
#undef XE_UI_VULKAN_FUNCTION
};
const DeviceFunctions& dfn() const { return dfn_; }

View File

@ -189,6 +189,53 @@ bool CreateDedicatedAllocationImage(const VulkanProvider& provider,
return true;
}
VkPipeline CreateComputePipeline(
const VulkanProvider& provider, VkPipelineLayout layout,
VkShaderModule shader, const VkSpecializationInfo* specialization_info,
const char* entry_point) {
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
VkDevice device = provider.device();
VkComputePipelineCreateInfo pipeline_create_info;
pipeline_create_info.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO;
pipeline_create_info.pNext = nullptr;
pipeline_create_info.flags = 0;
pipeline_create_info.stage.sType =
VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
pipeline_create_info.stage.pNext = nullptr;
pipeline_create_info.stage.flags = 0;
pipeline_create_info.stage.stage = VK_SHADER_STAGE_COMPUTE_BIT;
pipeline_create_info.stage.module = shader;
pipeline_create_info.stage.pName = entry_point;
pipeline_create_info.stage.pSpecializationInfo = specialization_info;
pipeline_create_info.layout = layout;
pipeline_create_info.basePipelineHandle = VK_NULL_HANDLE;
pipeline_create_info.basePipelineIndex = -1;
VkPipeline pipeline;
if (dfn.vkCreateComputePipelines(device, VK_NULL_HANDLE, 1,
&pipeline_create_info, nullptr,
&pipeline) != VK_SUCCESS) {
return VK_NULL_HANDLE;
}
return pipeline;
}
VkPipeline CreateComputePipeline(
const VulkanProvider& provider, VkPipelineLayout layout,
const uint32_t* shader_code, size_t shader_code_size_bytes,
const VkSpecializationInfo* specialization_info, const char* entry_point) {
VkShaderModule shader =
CreateShaderModule(provider, shader_code, shader_code_size_bytes);
if (shader == VK_NULL_HANDLE) {
return VK_NULL_HANDLE;
}
const ui::vulkan::VulkanProvider::DeviceFunctions& dfn = provider.dfn();
VkDevice device = provider.device();
VkPipeline pipeline = CreateComputePipeline(provider, layout, shader,
specialization_info, entry_point);
dfn.vkDestroyShaderModule(device, shader, nullptr);
return pipeline;
}
} // namespace util
} // namespace vulkan
} // namespace ui

Some files were not shown because too many files have changed in this diff Show More